From e084b306e54712d6e370698f963cf643968d9516 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sat, 9 May 2026 21:07:04 +0900
Subject: [PATCH] fix(fb-34): align next_cursor semantics with docs (PR #125
round 2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Previous round-1 fix dropped the speculative cursor branch on
the truncated path, leaving a contradiction with the docs:
- snippet-only shrunk → cursor emitted (returned == k_effective)
- k-popped → cursor null (returned < k_effective)
But docs promised the opposite.
R2 resolution: emit cursor whenever more hits may be reachable
(either retriever filled the page OR budget popped hits — the
popped ones remain fetchable from offset+returned). Drop the
artificial "widen vs paginate" copy; truncated and next_cursor
are now independent signals — caller may do either or both.
Updates: app.rs::search_with_opts logic + SearchResponse doc +
schema description + SKILL.md two bullets + max_tokens=0 test
asserts cursor IS emitted on k-pop case.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/app.rs | 76 ++++++++++---------
.../tests/search_budget_integration.rs | 6 ++
.../v1/search_response.schema.json | 2 +-
integrations/claude-code/kebab/SKILL.md | 4 +-
4 files changed, 48 insertions(+), 40 deletions(-)
diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs
index 4dec19f..3e0c53d 100644
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -53,10 +53,18 @@ use kebab_store_vector::LanceVectorStore;
/// p9-fb-34: top-level wrapper around a paginated, budget-limited
/// search result. Mirrors the wire `search_response.v1` shape.
///
-/// `next_cursor` is `Some(_)` when the retriever returned a full
-/// `k_effective` page (more hits may exist) or when the budget loop
-/// truncated mid-page; the caller threads it back through
-/// [`SearchOpts::cursor`] on the next call.
+/// `next_cursor` is non-null whenever more hits may be reachable —
+/// either the retriever filled the page (more behind it), or the
+/// budget loop popped hits (those popped hits remain fetchable
+/// from `offset + returned`). It is null only when the retriever
+/// returned fewer hits than requested AND nothing was popped — i.e.
+/// the corpus has nothing more for this query.
+///
+/// `truncated` is independent of `next_cursor`: it signals that
+/// the budget loop modified the page (snippet shorten or k pop).
+/// Caller may either widen `max_tokens` (and re-issue the same
+/// query) or follow `next_cursor` (to advance through more hits)
+/// or both.
#[derive(Clone, Debug)]
pub struct SearchResponse {
pub hits: Vec,
@@ -289,21 +297,12 @@ impl App {
}
/// p9-fb-34: budget-aware search facade. Returns hits trimmed to
- /// `opts.max_tokens` (chars/4 approximation of the wire JSON),
- /// honors a `snippet_chars` override, and threads an opaque
- /// pagination cursor through `corpus_revision`.
+ /// `opts.max_tokens` (chars/4 approximation) plus pagination
+ /// metadata. `App::search` is now a thin wrapper that drops the
+ /// metadata for backwards compat.
///
- /// Budget loop:
- /// 1. Shorten snippets progressively (halve cap, floor at 60
- /// chars) until the estimated wire-JSON char total fits or the
- /// floor is reached.
- /// 2. Pop hits off the end until the budget fits, but always
- /// retain ≥ 1 hit (the spec floor).
- ///
- /// `next_cursor` is set when the retriever returned a full page
- /// (more results may exist) or the budget truncated mid-page.
- /// `App::search` is unchanged and remains the cache-served fast
- /// path used by the existing TUI / kebab-rag callers.
+ /// `SearchResponse.next_cursor` and `truncated` are independent
+ /// signals — see `SearchResponse` doc for details.
pub fn search_with_opts(
&self,
query: SearchQuery,
@@ -390,27 +389,30 @@ impl App {
}
}
- // Compute next_cursor. Only emit on the full-page path:
- // - We returned a full `k_effective` page → more hits may
- // remain in the original retriever set; the cursor is
- // speculative (the next call falls through to an empty
- // page if nothing's left, which is fine).
+ // p9-fb-34: emit cursor whenever more hits may be reachable.
+ // Three cases produce a non-null cursor:
+ // (a) returned == k_effective: retriever filled the page; there
+ // may be more behind it. Speculative — next call may return
+ // an empty page if nothing remains.
+ // (b) truncated by k-pop: returned < k_effective because we
+ // popped hits to fit the budget. Those popped hits live at
+ // offset+returned..; next call (with same or wider budget)
+ // resumes from there.
+ // (c) truncated by snippet-only shrink: returned == k_effective,
+ // falls under (a). Cursor lets caller paginate; widening
+ // --max-tokens lets caller re-fetch fuller snippets at the
+ // same offset.
//
- // p9-fb-34 round-1 review: the previous "truncated mid-page"
- // branch was misleading — when the budget loop only shrank
- // snippets (no hits popped), `next_cursor` would point at the
- // page *after* the current hits, but the caller often wants
- // *fuller snippets for the same hits* (i.e. widen `max_tokens`)
- // not the next page. We now only emit a cursor when k was
- // actually reduced (k-pop case) or the page was naturally full;
- // both produce `returned == k_effective`. For snippet-only
- // shrinkage with `truncated: true` and `next_cursor: null`, the
- // documented guidance is "widen max_tokens".
+ // No cursor when neither (a) nor (b) applies — i.e. the retriever
+ // returned fewer than k_effective AND we didn't pop. That means
+ // end of available results.
let returned = hits.len();
- let next_cursor = if returned == k_effective
- && offset.saturating_add(returned) > 0
- {
- Some(cursor::encode(offset + returned, &corpus_revision))
+ let next_cursor = if returned == k_effective || truncated {
+ if offset.saturating_add(returned) > 0 {
+ Some(cursor::encode(offset + returned, &corpus_revision))
+ } else {
+ None
+ }
} else {
None
};
diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs
index c961d35..42ad346 100644
--- a/crates/kebab-app/tests/search_budget_integration.rs
+++ b/crates/kebab-app/tests/search_budget_integration.rs
@@ -152,4 +152,10 @@ fn max_tokens_zero_returns_one_hit_truncated() {
.unwrap();
assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor");
assert!(resp.truncated);
+ // p9-fb-34 R2: cursor IS emitted on k-pop case so the popped
+ // hits remain reachable.
+ assert!(
+ resp.next_cursor.is_some(),
+ "k-pop truncation must still emit next_cursor; popped hits at offset+returned"
+ );
}
diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json
index 8c13355..20e6eb8 100644
--- a/docs/wire-schema/v1/search_response.schema.json
+++ b/docs/wire-schema/v1/search_response.schema.json
@@ -9,6 +9,6 @@
"schema_version": { "const": "search_response.v1" },
"hits": { "type": "array", "description": "search_hit.v1[]" },
"next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." },
- "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets." }
+ "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." }
}
}
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index 2c7a53f..34d5ef6 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -54,7 +54,7 @@ Input:
- **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page.
- Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
- Cite back to the user as `doc_path § heading_path[-1]` so they can open the source.
-- When `truncated: true` and `next_cursor` is non-null, k was reduced — paginate via cursor. When `truncated: true` and `next_cursor` is null, only snippets were shortened — widen `max_tokens` to get fuller snippets. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
+- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
### `mcp__kebab__ask` — when you need the answer
@@ -106,7 +106,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
- MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`.
- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance.
- Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor.
-- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets.
+- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both.
- `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape.
- Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear.
- `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality.