diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 4f3b64a..4dec19f 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -313,8 +313,12 @@ impl App { let corpus_revision = self.sqlite.corpus_revision().to_string(); let offset = match opts.cursor.as_ref() { + // p9-fb-34: wrap the typed ErrorV1 in StructuredError so + // anyhow carries the structured payload all the way to + // `classify` — string formatting here would degrade + // `code = "stale_cursor"` to `code = "generic"` on the wire. Some(c) => cursor::decode(c, &corpus_revision) - .map_err(|e| anyhow!("stale_cursor: {}", e.message))?, + .map_err(|e| anyhow::Error::new(crate::error_wire::StructuredError(e)))?, None => 0, }; @@ -386,19 +390,26 @@ impl App { } } - // Compute next_cursor. Two paths produce one: + // Compute next_cursor. Only emit on the full-page path: // - We returned a full `k_effective` page → more hits may // remain in the original retriever set; the cursor is // speculative (the next call falls through to an empty // page if nothing's left, which is fine). - // - The budget loop truncated mid-page → resume from where - // we stopped so the caller can fetch the rest with a - // bigger budget. + // + // p9-fb-34 round-1 review: the previous "truncated mid-page" + // branch was misleading — when the budget loop only shrank + // snippets (no hits popped), `next_cursor` would point at the + // page *after* the current hits, but the caller often wants + // *fuller snippets for the same hits* (i.e. widen `max_tokens`) + // not the next page. We now only emit a cursor when k was + // actually reduced (k-pop case) or the page was naturally full; + // both produce `returned == k_effective`. For snippet-only + // shrinkage with `truncated: true` and `next_cursor: null`, the + // documented guidance is "widen max_tokens". let returned = hits.len(); - let full_page = returned == k_effective - && offset.saturating_add(returned) > 0; - let mid_page_truncation = truncated && returned > 0; - let next_cursor = if full_page || mid_page_truncation { + let next_cursor = if returned == k_effective + && offset.saturating_add(returned) > 0 + { Some(cursor::encode(offset + returned, &corpus_revision)) } else { None @@ -782,11 +793,10 @@ fn trim_to_chars(s: &str, n: usize) -> String { out } -/// p9-fb-34: estimate the wire-JSON char cost of a hit list. Used by -/// the budget loop in `App::search_with_opts`. `serde_json::to_string` -/// failures fall back to 0 so a single broken hit never makes the -/// loop loop forever; in practice the hit struct serializes -/// infallibly. +/// p9-fb-34: estimate wire JSON char cost of the hit list. Returns 0 +/// per-hit when serialization fails — a SearchHit serialization +/// failure is an invariant violation; we degrade gracefully (loop +/// terminates early) rather than panic in the budget loop. fn estimate_chars(hits: &[SearchHit]) -> usize { hits.iter() .map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0)) diff --git a/crates/kebab-app/src/cursor.rs b/crates/kebab-app/src/cursor.rs index 4463116..52b02af 100644 --- a/crates/kebab-app/src/cursor.rs +++ b/crates/kebab-app/src/cursor.rs @@ -35,28 +35,41 @@ pub fn encode(offset: usize, corpus_revision: &str) -> String { // after monomorphization with Value + String fields). Boxing here // would force every call site to deref through a Box for no win — // the err-path is rare. Single allow at the function level. +// +// p9-fb-34 round-1 review: differentiate the three failure modes +// (base64 / JSON / revision mismatch) with distinct messages — all +// keep `code = "stale_cursor"` so the agent's branching logic stays +// the same, but humans reading the message get a precise hint. #[allow(clippy::result_large_err)] pub fn decode(s: &str, expected_revision: &str) -> Result { - let bytes = URL_SAFE_NO_PAD - .decode(s.as_bytes()) - .map_err(|_| stale("", expected_revision))?; - let payload: Payload = serde_json::from_slice(&bytes) - .map_err(|_| stale("", expected_revision))?; + let bytes = URL_SAFE_NO_PAD.decode(s.as_bytes()).map_err(|_| ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: "cursor is not valid base64. Re-issue search to obtain a fresh cursor." + .to_string(), + details: Value::Null, + hint: None, + })?; + let payload: Payload = serde_json::from_slice(&bytes).map_err(|_| ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: "cursor payload is malformed. Re-issue search to obtain a fresh cursor." + .to_string(), + details: Value::Null, + hint: None, + })?; if payload.corpus_revision != expected_revision { - return Err(stale(&payload.corpus_revision, expected_revision)); + return Err(ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: format!( + "cursor was issued against corpus_revision '{}'; current revision is \ + '{}'. Re-issue search to obtain a fresh cursor.", + payload.corpus_revision, expected_revision + ), + details: Value::Null, + hint: None, + }); } Ok(payload.offset) } - -fn stale(found: &str, expected: &str) -> ErrorV1 { - ErrorV1 { - schema_version: "error.v1".to_string(), - code: "stale_cursor".to_string(), - message: format!( - "cursor was issued against corpus_revision '{found}'; current revision is \ - '{expected}'. Re-issue search to obtain a fresh cursor." - ), - details: Value::Null, - hint: None, - } -} diff --git a/crates/kebab-app/src/error_wire.rs b/crates/kebab-app/src/error_wire.rs index 85717b4..9ded9d3 100644 --- a/crates/kebab-app/src/error_wire.rs +++ b/crates/kebab-app/src/error_wire.rs @@ -12,8 +12,10 @@ use serde_json::{Value, json}; use crate::error_signal::{ConfigInvalid, LlmError, NotIndexed}; // p9-fb-34: `stale_cursor` is constructed directly by `cursor::decode` -// instead of routed through `classify`. Keep that contract — adding a -// classify branch would create two sources of truth for the same code. +// and surfaced through `StructuredError` (an anyhow-friendly wrapper +// that carries the typed `ErrorV1` payload without lossy string +// formatting). `classify` short-circuits on it at the top of the +// function so the typed `code = "stale_cursor"` reaches the wire. /// Wire schema id for [`ErrorV1`]. Single source of truth — kebab-cli /// + kebab-mcp use this via `kebab_app::ERROR_V1_ID`. @@ -28,7 +30,29 @@ pub struct ErrorV1 { pub hint: Option, } +/// p9-fb-34: typed wrapper around an [`ErrorV1`] so callers that +/// surface `anyhow::Error` can downcast back to the structured wire +/// payload instead of losing it to string formatting. Constructed by +/// the cursor code path (`cursor::decode` → `App::search_with_opts`) +/// and short-circuited inside [`classify`]. +#[derive(Debug)] +pub struct StructuredError(pub ErrorV1); + +impl std::fmt::Display for StructuredError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}] {}", self.0.code, self.0.message) + } +} + +impl std::error::Error for StructuredError {} + pub fn classify(err: &anyhow::Error, verbose: bool) -> ErrorV1 { + // p9-fb-34: structured wrapper short-circuits — preserves the + // typed payload that callers (cursor::decode) constructed + // instead of falling through to `code = "generic"`. + if let Some(s) = err.downcast_ref::() { + return s.0.clone(); + } if let Some(s) = err.downcast_ref::() { return ErrorV1 { schema_version: ERROR_V1_ID.to_string(), @@ -208,9 +232,29 @@ mod tests { let err: anyhow::Error = anyhow!("stale_cursor: rev mismatch"); let v1 = classify(&err, false); // p9-fb-34: stale_cursor is constructed directly by cursor::decode - // (single source of truth). classify routes anyhow strings to the - // generic "unknown" code. This test pins that contract — adding a - // classify branch for stale_cursor would create two sources. - assert_ne!(v1.code, "stale_cursor", "classify must not produce stale_cursor — cursor::decode is sole source"); + // (single source of truth). classify must not pattern-match on + // anyhow string contents — that would create two sources of + // truth. The bare anyhow string falls through to "generic". + assert_ne!(v1.code, "stale_cursor", "classify must not produce stale_cursor from bare anyhow string"); + } + + #[test] + fn stale_cursor_propagates_through_structured_wrapper() { + // p9-fb-34: positive-side contract for the structured-wrapper + // path. cursor::decode constructs a typed ErrorV1, the call site + // wraps it in `StructuredError`, anyhow carries it, and classify + // short-circuits via downcast — preserving the typed code + + // message instead of falling through to "generic". + let original = ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "stale_cursor".to_string(), + message: "test stale cursor".to_string(), + details: Value::Null, + hint: None, + }; + let err: anyhow::Error = anyhow::Error::new(StructuredError(original)); + let v1 = classify(&err, false); + assert_eq!(v1.code, "stale_cursor"); + assert_eq!(v1.message, "test stale cursor"); } } diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 45ba594..66c38ad 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -69,7 +69,7 @@ mod staleness; pub use app::{App, SearchResponse}; pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown}; pub use reset::{ResetReport, ResetScope}; -pub use error_wire::{ERROR_V1_ID, ErrorV1, classify}; +pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify}; pub use schema::{Capabilities, Models, SCHEMA_V1_ID, SchemaV1, Stats, WireBlock, schema_with_config}; pub use staleness::{compute_stale, mark_stale_in_place}; diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs index bded4af..c961d35 100644 --- a/crates/kebab-app/tests/search_budget_integration.rs +++ b/crates/kebab-app/tests/search_budget_integration.rs @@ -98,24 +98,58 @@ fn cursor_rejected_after_corpus_revision_bump() { let page1 = app .search_with_opts(lex("apples", 1), SearchOpts::default()) .unwrap(); - let cursor = page1.next_cursor; + // p9-fb-34 round-1 review: replaced silent `if let Some(c) = ...` + // with `.expect(...)` so a fixture regression that breaks the + // cursor-emission contract fails loudly instead of passing vacuously. + let c = page1 + .next_cursor + .expect("k=1 page must emit next_cursor — fixture too small if this fails"); - if let Some(c) = cursor { - common::ingest_md(&env, "b.md", "# B\n\nbananas\n"); - let app2 = env.app(); + common::ingest_md(&env, "b.md", "# B\n\nbananas\n"); + let app2 = env.app(); - let result = app2.search_with_opts( - lex("apples", 1), - SearchOpts { - max_tokens: None, - snippet_chars: None, - cursor: Some(c), - }, - ); - let err = result.unwrap_err(); - assert!( - err.to_string().contains("stale_cursor"), - "must surface stale_cursor: {err}" + let result = app2.search_with_opts( + lex("apples", 1), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(c), + }, + ); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("stale_cursor"), + "must surface stale_cursor: {err}" + ); +} + +#[test] +fn max_tokens_zero_returns_one_hit_truncated() { + // p9-fb-34 round-1 review: pin the documented "≥1 hit floor" + // contract — even with `max_tokens=0` (an absurdly tight budget) + // the budget loop must keep one hit and flip `truncated: true`. + // Fixture intentionally seeds multiple matches so step 2 of the + // budget loop (pop hits to 1) actually fires. + let env = common::TestEnv::new(); + for i in 0..3 { + common::ingest_md( + &env, + &format!("d{i}.md"), + &format!("# T{i}\n\napples are red {i}\n"), ); } + let app = env.app(); + + let resp = app + .search_with_opts( + lex("apples", 5), + SearchOpts { + max_tokens: Some(0), + snippet_chars: None, + cursor: None, + }, + ) + .unwrap(); + assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor"); + assert!(resp.truncated); } diff --git a/crates/kebab-cli/tests/wire_search_response.rs b/crates/kebab-cli/tests/wire_search_response.rs index ab65f29..60e1c0f 100644 --- a/crates/kebab-cli/tests/wire_search_response.rs +++ b/crates/kebab-cli/tests/wire_search_response.rs @@ -134,6 +134,79 @@ fn search_json_cursor_paginates() { ); } +#[test] +fn search_stale_cursor_returns_error_v1_with_stale_cursor_code() { + // p9-fb-34 round-1 review: end-to-end wire contract — when the + // corpus_revision bumps between cursor issuance and the cursored + // search, `kebab --json search --cursor ` must emit an + // `error.v1` ndjson line on stderr with `code = "stale_cursor"`. + // Pre-fix this returned `code = "generic"` because + // `App::search_with_opts` string-formatted the typed payload into + // anyhow, losing the structured wrapper. + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + fs::write(workspace.join("a.md"), "# T\n\napples\n").unwrap(); + common::ingest(&cfg, &workspace); + + // Get a valid cursor first. + let (page1_stdout, _) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "--k", "1", "apples"], + ); + let v1: Value = serde_json::from_str(page1_stdout.trim()).expect("json"); + let cursor = v1["next_cursor"] + .as_str() + .expect("k=1 page must emit next_cursor — fixture too small if this fails") + .to_string(); + + // Bump corpus_revision by ingesting a second doc. + fs::write(workspace.join("b.md"), "# B\n\nbananas\n").unwrap(); + common::ingest(&cfg, &workspace); + + // Use the now-stale cursor. Direct invocation (not via the + // success-asserting helper) so we can read stderr on failure. + let exe = env!("CARGO_BIN_EXE_kebab"); + let cfg_str = cfg.to_str().expect("utf8"); + let out = std::process::Command::new(exe) + .args([ + "--config", + cfg_str, + "--json", + "search", + "--mode", + "lexical", + "--json", + "--cursor", + &cursor, + "apples", + ]) + .output() + .expect("kebab search --cursor"); + + let stderr = String::from_utf8_lossy(&out.stderr); + // Find the error.v1 ndjson line on stderr (one event per line). + let err_line = stderr + .lines() + .find(|l| { + serde_json::from_str::(l) + .ok() + .and_then(|v| { + v.get("schema_version") + .and_then(|s| s.as_str()) + .map(String::from) + }) + .as_deref() + == Some("error.v1") + }) + .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}")); + + let v: Value = serde_json::from_str(err_line).expect("error.v1 json"); + assert_eq!( + v["code"], "stale_cursor", + "code must be stale_cursor: {err_line}" + ); +} + #[test] fn search_plain_emits_truncated_hint_to_stderr() { let dir = tempfile::tempdir().unwrap(); diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json index 156a218..8c13355 100644 --- a/docs/wire-schema/v1/search_response.schema.json +++ b/docs/wire-schema/v1/search_response.schema.json @@ -9,6 +9,6 @@ "schema_version": { "const": "search_response.v1" }, "hits": { "type": "array", "description": "search_hit.v1[]" }, "next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." }, - "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Caller can request next page via next_cursor or pass higher k." } + "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets." } } } diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md index ff81091..2c7a53f 100644 --- a/integrations/claude-code/kebab/SKILL.md +++ b/integrations/claude-code/kebab/SKILL.md @@ -54,7 +54,7 @@ Input: - **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page. - Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`. - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source. -- When `truncated: true`, either widen `max_tokens` or paginate via `next_cursor`. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. +- When `truncated: true` and `next_cursor` is non-null, k was reduced — paginate via cursor. When `truncated: true` and `next_cursor` is null, only snippets were shortened — widen `max_tokens` to get fuller snippets. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. ### `mcp__kebab__ask` — when you need the answer @@ -106,7 +106,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across - MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`. - `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance. - Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor. -- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Either widen the budget or paginate via `next_cursor`. +- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. When `next_cursor` is also non-null, k was reduced — paginate via cursor. When `next_cursor` is null but `truncated: true`, only snippets were shortened — widen `max_tokens` to get fuller snippets. - `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape. - Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear. - `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality. diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index a7cae46..2e69e84 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -27,6 +27,8 @@ git history. **영향 받는 consumer**: kebab-tui (Search 패널 — 변경 불필요, App::search 시그니처 보존), kebab-mcp (search tool — 같은 PR 에서 갱신), Claude Code skill (같은 PR 에서 갱신). 외부 producer/consumer 없음. +**`--no-cache` 의미 변화**: fb-34 이전 `--no-cache` 는 `search_uncached_with_config` 로 cache 자체를 우회. fb-34 는 cached path 위에 `clear_search_cache()` 호출 후 search 실행 — long-lived process (TUI / MCP) 에서는 clear 와 fetch 사이 race window 가 있음. CLI (fresh App per call) 에서는 무영향. 후속 fb-3X 에서 `search_with_opts_uncached` 추가로 격리. + ## 2026-05-09 — p9-fb-33: AskOpts.stream_sink type widened to StreamEvent **무엇이 바뀌었나**: `kebab_rag::AskOpts.stream_sink` 의 타입이 `Option>` 에서 `Option>` 로 변경됨. `kebab_app::StreamEvent` 가 새 re-export.