diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 4dec19f..3e0c53d 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -53,10 +53,18 @@ use kebab_store_vector::LanceVectorStore; /// p9-fb-34: top-level wrapper around a paginated, budget-limited /// search result. Mirrors the wire `search_response.v1` shape. /// -/// `next_cursor` is `Some(_)` when the retriever returned a full -/// `k_effective` page (more hits may exist) or when the budget loop -/// truncated mid-page; the caller threads it back through -/// [`SearchOpts::cursor`] on the next call. +/// `next_cursor` is non-null whenever more hits may be reachable — +/// either the retriever filled the page (more behind it), or the +/// budget loop popped hits (those popped hits remain fetchable +/// from `offset + returned`). It is null only when the retriever +/// returned fewer hits than requested AND nothing was popped — i.e. +/// the corpus has nothing more for this query. +/// +/// `truncated` is independent of `next_cursor`: it signals that +/// the budget loop modified the page (snippet shorten or k pop). +/// Caller may either widen `max_tokens` (and re-issue the same +/// query) or follow `next_cursor` (to advance through more hits) +/// or both. #[derive(Clone, Debug)] pub struct SearchResponse { pub hits: Vec, @@ -289,21 +297,12 @@ impl App { } /// p9-fb-34: budget-aware search facade. Returns hits trimmed to - /// `opts.max_tokens` (chars/4 approximation of the wire JSON), - /// honors a `snippet_chars` override, and threads an opaque - /// pagination cursor through `corpus_revision`. + /// `opts.max_tokens` (chars/4 approximation) plus pagination + /// metadata. `App::search` is now a thin wrapper that drops the + /// metadata for backwards compat. /// - /// Budget loop: - /// 1. Shorten snippets progressively (halve cap, floor at 60 - /// chars) until the estimated wire-JSON char total fits or the - /// floor is reached. - /// 2. Pop hits off the end until the budget fits, but always - /// retain ≥ 1 hit (the spec floor). - /// - /// `next_cursor` is set when the retriever returned a full page - /// (more results may exist) or the budget truncated mid-page. - /// `App::search` is unchanged and remains the cache-served fast - /// path used by the existing TUI / kebab-rag callers. + /// `SearchResponse.next_cursor` and `truncated` are independent + /// signals — see `SearchResponse` doc for details. pub fn search_with_opts( &self, query: SearchQuery, @@ -390,27 +389,30 @@ impl App { } } - // Compute next_cursor. Only emit on the full-page path: - // - We returned a full `k_effective` page → more hits may - // remain in the original retriever set; the cursor is - // speculative (the next call falls through to an empty - // page if nothing's left, which is fine). + // p9-fb-34: emit cursor whenever more hits may be reachable. + // Three cases produce a non-null cursor: + // (a) returned == k_effective: retriever filled the page; there + // may be more behind it. Speculative — next call may return + // an empty page if nothing remains. + // (b) truncated by k-pop: returned < k_effective because we + // popped hits to fit the budget. Those popped hits live at + // offset+returned..; next call (with same or wider budget) + // resumes from there. + // (c) truncated by snippet-only shrink: returned == k_effective, + // falls under (a). Cursor lets caller paginate; widening + // --max-tokens lets caller re-fetch fuller snippets at the + // same offset. // - // p9-fb-34 round-1 review: the previous "truncated mid-page" - // branch was misleading — when the budget loop only shrank - // snippets (no hits popped), `next_cursor` would point at the - // page *after* the current hits, but the caller often wants - // *fuller snippets for the same hits* (i.e. widen `max_tokens`) - // not the next page. We now only emit a cursor when k was - // actually reduced (k-pop case) or the page was naturally full; - // both produce `returned == k_effective`. For snippet-only - // shrinkage with `truncated: true` and `next_cursor: null`, the - // documented guidance is "widen max_tokens". + // No cursor when neither (a) nor (b) applies — i.e. the retriever + // returned fewer than k_effective AND we didn't pop. That means + // end of available results. let returned = hits.len(); - let next_cursor = if returned == k_effective - && offset.saturating_add(returned) > 0 - { - Some(cursor::encode(offset + returned, &corpus_revision)) + let next_cursor = if returned == k_effective || truncated { + if offset.saturating_add(returned) > 0 { + Some(cursor::encode(offset + returned, &corpus_revision)) + } else { + None + } } else { None }; diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs index c961d35..42ad346 100644 --- a/crates/kebab-app/tests/search_budget_integration.rs +++ b/crates/kebab-app/tests/search_budget_integration.rs @@ -152,4 +152,10 @@ fn max_tokens_zero_returns_one_hit_truncated() { .unwrap(); assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor"); assert!(resp.truncated); + // p9-fb-34 R2: cursor IS emitted on k-pop case so the popped + // hits remain reachable. + assert!( + resp.next_cursor.is_some(), + "k-pop truncation must still emit next_cursor; popped hits at offset+returned" + ); } diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json index 8c13355..20e6eb8 100644 --- a/docs/wire-schema/v1/search_response.schema.json +++ b/docs/wire-schema/v1/search_response.schema.json @@ -9,6 +9,6 @@ "schema_version": { "const": "search_response.v1" }, "hits": { "type": "array", "description": "search_hit.v1[]" }, "next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." }, - "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets." } + "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." } } } diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md index 2c7a53f..34d5ef6 100644 --- a/integrations/claude-code/kebab/SKILL.md +++ b/integrations/claude-code/kebab/SKILL.md @@ -54,7 +54,7 @@ Input: - **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page. - Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`. - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source. -- When `truncated: true` and `next_cursor` is non-null, k was reduced — paginate via cursor. When `truncated: true` and `next_cursor` is null, only snippets were shortened — widen `max_tokens` to get fuller snippets. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. +- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. ### `mcp__kebab__ask` — when you need the answer @@ -106,7 +106,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across - MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`. - `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance. - Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor. -- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets. +- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both. - `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape. - Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear. - `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality.