diff --git a/Cargo.lock b/Cargo.lock index db4e4d3..12804d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3528,6 +3528,7 @@ name = "kebab-app" version = "0.4.0" dependencies = [ "anyhow", + "base64 0.22.1", "blake3", "dirs 5.0.1", "ignore", diff --git a/Cargo.toml b/Cargo.toml index aa549fc..661925c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -80,6 +80,7 @@ rmcp = { version = "1.6", default-features = false, features = ["server" # a tokio runtime to host its mock server (the runtime adapter crate stays # sync via reqwest::blocking — wiremock is dev-only there). wiremock = "0.6" +base64 = "0.22" # Disk-footprint trim for dev / test builds. Codegen, opt-level, and # behavior are unchanged — only DWARF debug info is reduced (line diff --git a/README.md b/README.md index b4bc63f..106c105 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ kebab doctor |------|------| | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | | `kebab ingest []` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. | -| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale | +| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor ]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor` | | `kebab list docs` | 색인된 문서 목록 | | `kebab inspect doc ` / `kebab inspect chunk ` | raw record 보기 | | `kebab ask "" [--show-citations / --hide-citations] [--session ] [--stream]` | RAG 답변 + 근거 인용. 답변 후 `근거:` block 으로 full path / line range / score 한 줄씩 (default ON — `--hide-citations` 로 끄기, pipe 시 유용). 근거 부족 시 거절. Ollama 필요. `--session ` 로 multi-turn — 첫 호출에서 SQLite `chat_sessions` 에 자동 생성, 이후 호출은 prior turns 를 history 로 받아 follow-up. session id 는 사용자 지정 (e.g. `kb-rust-async-2026-05`) — `kebab reset --data-only` 로 모든 session wipe. **`--stream` (p9-fb-33)** 로 ndjson `answer_event.v1` event (retrieval_done → token* → final) 를 stderr 에 흘리고 stdout 마지막 줄에 기존 `answer.v1` — agent 가 token 즉시 소비 가능 | diff --git a/crates/kebab-app/Cargo.toml b/crates/kebab-app/Cargo.toml index cc35d07..a3ec230 100644 --- a/crates/kebab-app/Cargo.toml +++ b/crates/kebab-app/Cargo.toml @@ -52,6 +52,8 @@ unicode-normalization = "0.1" # p9-fb-31: GitignoreBuilder for .kebabignore matching in ingest_file_with_config. # Same version as kebab-source-fs (0.4) to avoid duplicate dep versions. ignore = "0.4" +# p9-fb-34: opaque pagination cursor encodes payload as base64. +base64 = { workspace = true } [dev-dependencies] rusqlite = { workspace = true } diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 34ae3f0..3e0c53d 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -41,7 +41,7 @@ use lru::LruCache; use kebab_core::{ Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode, - SearchQuery, VectorStore, + SearchOpts, SearchQuery, VectorStore, }; use kebab_embed_local::FastembedEmbedder; use kebab_llm_local::OllamaLanguageModel; @@ -50,6 +50,28 @@ use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever}; use kebab_store_sqlite::SqliteStore; use kebab_store_vector::LanceVectorStore; +/// p9-fb-34: top-level wrapper around a paginated, budget-limited +/// search result. Mirrors the wire `search_response.v1` shape. +/// +/// `next_cursor` is non-null whenever more hits may be reachable — +/// either the retriever filled the page (more behind it), or the +/// budget loop popped hits (those popped hits remain fetchable +/// from `offset + returned`). It is null only when the retriever +/// returned fewer hits than requested AND nothing was popped — i.e. +/// the corpus has nothing more for this query. +/// +/// `truncated` is independent of `next_cursor`: it signals that +/// the budget loop modified the page (snippet shorten or k pop). +/// Caller may either widen `max_tokens` (and re-issue the same +/// query) or follow `next_cursor` (to advance through more hits) +/// or both. +#[derive(Clone, Debug)] +pub struct SearchResponse { + pub hits: Vec, + pub next_cursor: Option, + pub truncated: bool, +} + /// Facade state — see module docs for lifetime rules. /// /// The struct is public so long-lived callers (kb-eval, the future P9 @@ -274,6 +296,134 @@ impl App { Ok(hits) } + /// p9-fb-34: budget-aware search facade. Returns hits trimmed to + /// `opts.max_tokens` (chars/4 approximation) plus pagination + /// metadata. `App::search` is now a thin wrapper that drops the + /// metadata for backwards compat. + /// + /// `SearchResponse.next_cursor` and `truncated` are independent + /// signals — see `SearchResponse` doc for details. + pub fn search_with_opts( + &self, + query: SearchQuery, + opts: SearchOpts, + ) -> Result { + use crate::cursor; + + let corpus_revision = self.sqlite.corpus_revision().to_string(); + let offset = match opts.cursor.as_ref() { + // p9-fb-34: wrap the typed ErrorV1 in StructuredError so + // anyhow carries the structured payload all the way to + // `classify` — string formatting here would degrade + // `code = "stale_cursor"` to `code = "generic"` on the wire. + Some(c) => cursor::decode(c, &corpus_revision) + .map_err(|e| anyhow::Error::new(crate::error_wire::StructuredError(e)))?, + None => 0, + }; + + let snippet_chars = opts + .snippet_chars + .unwrap_or(self.config.search.snippet_chars); + + // Fetch enough to satisfy offset + the requested page. The + // retriever returns at most `fetch_k` hits — we then drop + // `offset` and keep the next `k_effective`. `k = 0` is + // treated as "use config default" so a caller passing through + // a default-constructed `SearchQuery` still gets useful work + // out of the budget facade. + let k_effective = if query.k == 0 { + self.config.search.default_k + } else { + query.k + }; + let fetch_k = offset.saturating_add(k_effective); + let fetch_query = SearchQuery { + k: fetch_k, + ..query.clone() + }; + let mut all_hits = self.search(fetch_query)?; + + // Skip offset. + let drop_n = offset.min(all_hits.len()); + all_hits.drain(..drop_n); + let mut hits: Vec = + all_hits.into_iter().take(k_effective).collect(); + + // Apply snippet_chars override if shorter than what the + // retriever returned (retriever already honored + // `config.search.snippet_chars`; this only kicks in when the + // caller asked for *less*). + if opts.snippet_chars.is_some() { + for h in hits.iter_mut() { + if h.snippet.chars().count() > snippet_chars { + h.snippet = trim_to_chars(&h.snippet, snippet_chars); + } + } + } + + // Budget loop. + let mut truncated = false; + if let Some(max_tokens) = opts.max_tokens { + let max_chars = max_tokens.saturating_mul(4); + // Step 1: shorten snippets progressively to a 60-char floor. + const SNIPPET_FLOOR: usize = 60; + let mut current_snippet_cap = snippet_chars; + while estimate_chars(&hits) > max_chars + && current_snippet_cap > SNIPPET_FLOOR + { + current_snippet_cap = + (current_snippet_cap / 2).max(SNIPPET_FLOOR); + for h in hits.iter_mut() { + if h.snippet.chars().count() > current_snippet_cap { + h.snippet = + trim_to_chars(&h.snippet, current_snippet_cap); + truncated = true; + } + } + } + // Step 2: pop hits from the end until we fit, but always + // keep ≥ 1. + while estimate_chars(&hits) > max_chars && hits.len() > 1 { + hits.pop(); + truncated = true; + } + } + + // p9-fb-34: emit cursor whenever more hits may be reachable. + // Three cases produce a non-null cursor: + // (a) returned == k_effective: retriever filled the page; there + // may be more behind it. Speculative — next call may return + // an empty page if nothing remains. + // (b) truncated by k-pop: returned < k_effective because we + // popped hits to fit the budget. Those popped hits live at + // offset+returned..; next call (with same or wider budget) + // resumes from there. + // (c) truncated by snippet-only shrink: returned == k_effective, + // falls under (a). Cursor lets caller paginate; widening + // --max-tokens lets caller re-fetch fuller snippets at the + // same offset. + // + // No cursor when neither (a) nor (b) applies — i.e. the retriever + // returned fewer than k_effective AND we didn't pop. That means + // end of available results. + let returned = hits.len(); + let next_cursor = if returned == k_effective || truncated { + if offset.saturating_add(returned) > 0 { + Some(cursor::encode(offset + returned, &corpus_revision)) + } else { + None + } + } else { + None + }; + + Ok(SearchResponse { + hits, + next_cursor, + truncated, + }) + } + /// Run a RAG `ask` against the configured retriever + LLM. Reuses /// the memoized embedder / vector / LLM where applicable. pub fn ask(&self, query: &str, opts: AskOpts) -> Result { @@ -627,6 +777,34 @@ fn blake3_truncate(input: &str) -> u128 { u128::from_be_bytes(buf) } +/// p9-fb-34: trim `s` to at most `n` Unicode scalar chars. Cheap +/// alternative to a `.chars().take(n).collect::()` pattern; +/// reserves capacity proportional to UTF-8 worst case (4 bytes / char) +/// so the inner push never re-allocates. +fn trim_to_chars(s: &str, n: usize) -> String { + if s.chars().count() <= n { + return s.to_string(); + } + let mut out = String::with_capacity(n.saturating_mul(4)); + for (i, c) in s.chars().enumerate() { + if i >= n { + break; + } + out.push(c); + } + out +} + +/// p9-fb-34: estimate wire JSON char cost of the hit list. Returns 0 +/// per-hit when serialization fails — a SearchHit serialization +/// failure is an invariant violation; we degrade gracefully (loop +/// terminates early) rather than panic in the budget loop. +fn estimate_chars(hits: &[SearchHit]) -> usize { + hits.iter() + .map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0)) + .sum() +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/kebab-app/src/cursor.rs b/crates/kebab-app/src/cursor.rs new file mode 100644 index 0000000..52b02af --- /dev/null +++ b/crates/kebab-app/src/cursor.rs @@ -0,0 +1,75 @@ +//! p9-fb-34 opaque pagination cursor. +//! +//! Format: base64(JSON({offset: usize, corpus_revision: string})). +//! Opaque to callers — they MUST NOT decode the contents themselves; +//! the schema is internal and may change without notice. + +use base64::Engine; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use crate::error_wire::ErrorV1; + +#[derive(Serialize, Deserialize)] +struct Payload { + offset: usize, + corpus_revision: String, +} + +/// Encode `(offset, corpus_revision)` as an opaque base64 string. +pub fn encode(offset: usize, corpus_revision: &str) -> String { + let payload = Payload { + offset, + corpus_revision: corpus_revision.to_string(), + }; + let json = serde_json::to_vec(&payload).expect("Payload serializes"); + URL_SAFE_NO_PAD.encode(&json) +} + +/// Decode an opaque cursor against the expected `corpus_revision`. +/// Mismatch or malformed input returns an `ErrorV1` with +/// `code = "stale_cursor"`. +// +// p9-fb-34: ErrorV1 is the workspace-wide wire error struct (~200B +// after monomorphization with Value + String fields). Boxing here +// would force every call site to deref through a Box for no win — +// the err-path is rare. Single allow at the function level. +// +// p9-fb-34 round-1 review: differentiate the three failure modes +// (base64 / JSON / revision mismatch) with distinct messages — all +// keep `code = "stale_cursor"` so the agent's branching logic stays +// the same, but humans reading the message get a precise hint. +#[allow(clippy::result_large_err)] +pub fn decode(s: &str, expected_revision: &str) -> Result { + let bytes = URL_SAFE_NO_PAD.decode(s.as_bytes()).map_err(|_| ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: "cursor is not valid base64. Re-issue search to obtain a fresh cursor." + .to_string(), + details: Value::Null, + hint: None, + })?; + let payload: Payload = serde_json::from_slice(&bytes).map_err(|_| ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: "cursor payload is malformed. Re-issue search to obtain a fresh cursor." + .to_string(), + details: Value::Null, + hint: None, + })?; + if payload.corpus_revision != expected_revision { + return Err(ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: format!( + "cursor was issued against corpus_revision '{}'; current revision is \ + '{}'. Re-issue search to obtain a fresh cursor.", + payload.corpus_revision, expected_revision + ), + details: Value::Null, + hint: None, + }); + } + Ok(payload.offset) +} diff --git a/crates/kebab-app/src/error_wire.rs b/crates/kebab-app/src/error_wire.rs index e1d91e1..9ded9d3 100644 --- a/crates/kebab-app/src/error_wire.rs +++ b/crates/kebab-app/src/error_wire.rs @@ -11,6 +11,12 @@ use serde_json::{Value, json}; use crate::error_signal::{ConfigInvalid, LlmError, NotIndexed}; +// p9-fb-34: `stale_cursor` is constructed directly by `cursor::decode` +// and surfaced through `StructuredError` (an anyhow-friendly wrapper +// that carries the typed `ErrorV1` payload without lossy string +// formatting). `classify` short-circuits on it at the top of the +// function so the typed `code = "stale_cursor"` reaches the wire. + /// Wire schema id for [`ErrorV1`]. Single source of truth — kebab-cli /// + kebab-mcp use this via `kebab_app::ERROR_V1_ID`. pub const ERROR_V1_ID: &str = "error.v1"; @@ -24,7 +30,29 @@ pub struct ErrorV1 { pub hint: Option, } +/// p9-fb-34: typed wrapper around an [`ErrorV1`] so callers that +/// surface `anyhow::Error` can downcast back to the structured wire +/// payload instead of losing it to string formatting. Constructed by +/// the cursor code path (`cursor::decode` → `App::search_with_opts`) +/// and short-circuited inside [`classify`]. +#[derive(Debug)] +pub struct StructuredError(pub ErrorV1); + +impl std::fmt::Display for StructuredError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "[{}] {}", self.0.code, self.0.message) + } +} + +impl std::error::Error for StructuredError {} + pub fn classify(err: &anyhow::Error, verbose: bool) -> ErrorV1 { + // p9-fb-34: structured wrapper short-circuits — preserves the + // typed payload that callers (cursor::decode) constructed + // instead of falling through to `code = "generic"`. + if let Some(s) = err.downcast_ref::() { + return s.0.clone(); + } if let Some(s) = err.downcast_ref::() { return ErrorV1 { schema_version: ERROR_V1_ID.to_string(), @@ -197,4 +225,36 @@ mod tests { let v1 = classify(&err, false); assert_eq!(v1.code, "io_error"); } + + #[test] + fn stale_cursor_is_not_routed_through_classify() { + use anyhow::anyhow; + let err: anyhow::Error = anyhow!("stale_cursor: rev mismatch"); + let v1 = classify(&err, false); + // p9-fb-34: stale_cursor is constructed directly by cursor::decode + // (single source of truth). classify must not pattern-match on + // anyhow string contents — that would create two sources of + // truth. The bare anyhow string falls through to "generic". + assert_ne!(v1.code, "stale_cursor", "classify must not produce stale_cursor from bare anyhow string"); + } + + #[test] + fn stale_cursor_propagates_through_structured_wrapper() { + // p9-fb-34: positive-side contract for the structured-wrapper + // path. cursor::decode constructs a typed ErrorV1, the call site + // wraps it in `StructuredError`, anyhow carries it, and classify + // short-circuits via downcast — preserving the typed code + + // message instead of falling through to "generic". + let original = ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "stale_cursor".to_string(), + message: "test stale cursor".to_string(), + details: Value::Null, + hint: None, + }; + let err: anyhow::Error = anyhow::Error::new(StructuredError(original)); + let v1 = classify(&err, false); + assert_eq!(v1.code, "stale_cursor"); + assert_eq!(v1.message, "test stale cursor"); + } } diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 960442b..66c38ad 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -55,6 +55,7 @@ use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter}; use kebab_source_fs::FsSourceConnector; mod app; +pub mod cursor; pub mod doctor_signal; pub mod error_signal; pub mod error_wire; @@ -65,10 +66,10 @@ pub mod reset; pub mod schema; mod staleness; -pub use app::App; +pub use app::{App, SearchResponse}; pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown}; pub use reset::{ResetReport, ResetScope}; -pub use error_wire::{ERROR_V1_ID, ErrorV1, classify}; +pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify}; pub use schema::{Capabilities, Models, SCHEMA_V1_ID, SchemaV1, Stats, WireBlock, schema_with_config}; pub use staleness::{compute_stale, mark_stale_in_place}; @@ -1739,6 +1740,19 @@ pub fn search_uncached_with_config( App::open_with_config(config)?.search_uncached(query) } +/// p9-fb-34: budget-aware search free function. Mirrors +/// [`search_with_config`] but threads `SearchOpts` (max_tokens, +/// snippet_chars, cursor) and returns the [`SearchResponse`] +/// pagination wrapper. Tasks 6+8 surface this via CLI / MCP. +#[doc(hidden)] +pub fn search_with_opts_with_config( + config: kebab_config::Config, + query: kebab_core::SearchQuery, + opts: kebab_core::SearchOpts, +) -> anyhow::Result { + App::open_with_config(config)?.search_with_opts(query, opts) +} + // ── ask ────────────────────────────────────────────────────────────────── // // P4-3 wires `ask` end-to-end. The retriever is built per `opts.mode`; diff --git a/crates/kebab-app/src/schema.rs b/crates/kebab-app/src/schema.rs index 42aa137..603b212 100644 --- a/crates/kebab-app/src/schema.rs +++ b/crates/kebab-app/src/schema.rs @@ -63,6 +63,7 @@ pub const SCHEMA_V1_ID: &str = "schema.v1"; const WIRE_SCHEMAS: &[&str] = &[ "answer.v1", "search_hit.v1", + "search_response.v1", "doc_summary.v1", "chunk_inspection.v1", "doctor.v1", diff --git a/crates/kebab-app/tests/common/mod.rs b/crates/kebab-app/tests/common/mod.rs index ce2a28f..c06098f 100644 --- a/crates/kebab-app/tests/common/mod.rs +++ b/crates/kebab-app/tests/common/mod.rs @@ -79,6 +79,37 @@ impl TestEnv { ..Default::default() } } + + /// p9-fb-34 alias — tests added in fb-34 invoke `TestEnv::new()` + /// per the plan; route to the existing lexical-only constructor + /// so the lane stays AVX-free without churning all the existing + /// callers. + pub fn new() -> Self { + Self::lexical_only() + } + + /// p9-fb-34: open a fresh `App` against this env's config. Used + /// by integration tests that need to call `App::search_with_opts` + /// directly. Caller can invoke this multiple times to simulate + /// re-opening the binary after a corpus revision bump. + pub fn app(&self) -> kebab_app::App { + kebab_app::App::open_with_config(self.config.clone()) + .expect("App::open_with_config") + } +} + +/// p9-fb-34: write `content` into the env's workspace at +/// `relative_path`, then run a full ingest so the document is +/// searchable. Mirrors the convenience helpers used by other +/// `TestEnv`-driven crates. +pub fn ingest_md(env: &TestEnv, relative_path: &str, content: &str) { + let path = env.workspace_root.join(relative_path); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).expect("create parent dirs"); + } + std::fs::write(&path, content).expect("write workspace file"); + kebab_app::ingest_with_config(env.config.clone(), env.scope(), true) + .expect("ingest_with_config"); } /// Test helper: build a `SearchQuery` for lexical mode at k=10. Used diff --git a/crates/kebab-app/tests/cursor.rs b/crates/kebab-app/tests/cursor.rs new file mode 100644 index 0000000..74fd45b --- /dev/null +++ b/crates/kebab-app/tests/cursor.rs @@ -0,0 +1,24 @@ +//! p9-fb-34: cursor encode/decode round-trip + corpus_revision mismatch. + +use kebab_app::cursor; + +#[test] +fn cursor_roundtrip_preserves_offset() { + let encoded = cursor::encode(5, "rev-abc"); + let offset = cursor::decode(&encoded, "rev-abc").unwrap(); + assert_eq!(offset, 5); +} + +#[test] +fn cursor_decode_rejects_mismatched_revision() { + let encoded = cursor::encode(7, "rev-old"); + let err = cursor::decode(&encoded, "rev-new").unwrap_err(); + assert_eq!(err.code, "stale_cursor"); + assert!(err.message.contains("rev-old") || err.message.contains("rev-new")); +} + +#[test] +fn cursor_decode_rejects_garbage_input() { + let err = cursor::decode("not-base64!!!", "any").unwrap_err(); + assert_eq!(err.code, "stale_cursor"); +} diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs new file mode 100644 index 0000000..42ad346 --- /dev/null +++ b/crates/kebab-app/tests/search_budget_integration.rs @@ -0,0 +1,161 @@ +//! p9-fb-34: App::search_with_opts integration tests. + +mod common; + +use kebab_app::SearchResponse; +use kebab_core::{SearchFilters, SearchMode, SearchOpts, SearchQuery}; + +fn lex(text: &str, k: usize) -> SearchQuery { + SearchQuery { + text: text.to_string(), + mode: SearchMode::Lexical, + k, + filters: SearchFilters::default(), + } +} + +#[test] +fn search_with_opts_no_budget_matches_search() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples are red\n"); + let app = env.app(); + + let baseline = app.search(lex("apples", 5)).unwrap(); + let resp: SearchResponse = app + .search_with_opts(lex("apples", 5), SearchOpts::default()) + .unwrap(); + + assert_eq!(resp.hits.len(), baseline.len()); + assert!(!resp.truncated); + assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page"); +} + +#[test] +fn budget_truncates_snippets_when_below_threshold() { + let env = common::TestEnv::new(); + let body: String = "rust ownership is a memory model. ".repeat(10); + common::ingest_md(&env, "a.md", &format!("# T\n\n{body}\n")); + let app = env.app(); + + let unrestricted = app.search(lex("rust", 5)).unwrap(); + let unrestricted_chars: usize = unrestricted.iter().map(|h| h.snippet.chars().count()).sum(); + + let resp = app + .search_with_opts( + lex("rust", 5), + SearchOpts { + max_tokens: Some(50), + snippet_chars: None, + cursor: None, + }, + ) + .unwrap(); + let limited_chars: usize = resp.hits.iter().map(|h| h.snippet.chars().count()).sum(); + + assert!(resp.truncated, "small budget must trip truncation"); + assert!(limited_chars < unrestricted_chars, "snippet should shrink"); + assert!(!resp.hits.is_empty(), "always retain ≥1 hit"); +} + +#[test] +fn cursor_paginates_to_next_page() { + let env = common::TestEnv::new(); + for i in 0..6 { + common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n")); + } + let app = env.app(); + + let page1 = app + .search_with_opts(lex("rust", 2), SearchOpts::default()) + .unwrap(); + assert_eq!(page1.hits.len(), 2); + let cursor = page1.next_cursor.expect("more hits available"); + + let page2 = app + .search_with_opts( + lex("rust", 2), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(cursor), + }, + ) + .unwrap(); + assert_eq!(page2.hits.len(), 2); + let p1_ids: std::collections::HashSet<_> = + page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + let p2_ids: std::collections::HashSet<_> = + page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits"); +} + +#[test] +fn cursor_rejected_after_corpus_revision_bump() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples\n"); + let app = env.app(); + + let page1 = app + .search_with_opts(lex("apples", 1), SearchOpts::default()) + .unwrap(); + // p9-fb-34 round-1 review: replaced silent `if let Some(c) = ...` + // with `.expect(...)` so a fixture regression that breaks the + // cursor-emission contract fails loudly instead of passing vacuously. + let c = page1 + .next_cursor + .expect("k=1 page must emit next_cursor — fixture too small if this fails"); + + common::ingest_md(&env, "b.md", "# B\n\nbananas\n"); + let app2 = env.app(); + + let result = app2.search_with_opts( + lex("apples", 1), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(c), + }, + ); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("stale_cursor"), + "must surface stale_cursor: {err}" + ); +} + +#[test] +fn max_tokens_zero_returns_one_hit_truncated() { + // p9-fb-34 round-1 review: pin the documented "≥1 hit floor" + // contract — even with `max_tokens=0` (an absurdly tight budget) + // the budget loop must keep one hit and flip `truncated: true`. + // Fixture intentionally seeds multiple matches so step 2 of the + // budget loop (pop hits to 1) actually fires. + let env = common::TestEnv::new(); + for i in 0..3 { + common::ingest_md( + &env, + &format!("d{i}.md"), + &format!("# T{i}\n\napples are red {i}\n"), + ); + } + let app = env.app(); + + let resp = app + .search_with_opts( + lex("apples", 5), + SearchOpts { + max_tokens: Some(0), + snippet_chars: None, + cursor: None, + }, + ) + .unwrap(); + assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor"); + assert!(resp.truncated); + // p9-fb-34 R2: cursor IS emitted on k-pop case so the popped + // hits remain reachable. + assert!( + resp.next_cursor.is_some(), + "k-pop truncation must still emit next_cursor; popped hits at offset+returned" + ); +} diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index f3df832..16857e5 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -108,6 +108,23 @@ enum Cmd { /// future TUI cache-aware search and for explicit intent. #[arg(long)] no_cache: bool, + + /// p9-fb-34: cap result wire JSON size at approximately N tokens + /// (chars/4 estimate). When set, smaller snippets and fewer hits + /// may be returned; check `truncated` in the JSON wire. + #[arg(long)] + max_tokens: Option, + + /// p9-fb-34: per-hit snippet character cap, overrides + /// `config.search.snippet_chars` for this call only. + #[arg(long)] + snippet_chars: Option, + + /// p9-fb-34: opaque cursor from a previous response's + /// `next_cursor` to fetch the next page. Mismatched + /// `corpus_revision` returns `error.v1.code = stale_cursor`. + #[arg(long)] + cursor: Option, }, /// Retrieval-augmented question answering. @@ -515,6 +532,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> { mode, explain: _, no_cache, + max_tokens, + snippet_chars, + cursor, } => { let cfg = kebab_config::Config::load(cli.config.as_deref())?; let q = kebab_core::SearchQuery { @@ -523,16 +543,24 @@ fn run(cli: &Cli) -> anyhow::Result<()> { k: *k, filters: kebab_core::SearchFilters::default(), }; - // p9-fb-19: --no-cache routes to the uncached facade. - // Both calls go through the same App; only the cache - // lookup/insert is skipped. - let hits = if *no_cache { - kebab_app::search_uncached_with_config(cfg, q)? - } else { - kebab_app::search_with_config(cfg, q)? + let opts = kebab_core::SearchOpts { + max_tokens: *max_tokens, + snippet_chars: *snippet_chars, + cursor: cursor.clone(), }; + // p9-fb-34: budget-aware path. --no-cache still bypasses the + // App-level LRU; wire wrapper applies regardless. + let app = kebab_app::App::open_with_config(cfg)?; + if *no_cache { + app.clear_search_cache(); + } + let resp = app.search_with_opts(q, opts)?; + if cli.json { - println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?); + println!( + "{}", + serde_json::to_string(&wire::wire_search_response(&resp))? + ); } else { // p9-fb-32: prefix `[stale]` on the doc_path for hits // whose `stale: true`. Yellow on TTY, plain otherwise — @@ -542,7 +570,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { // lands on); no new dep. use std::io::IsTerminal; let color = std::io::stdout().is_terminal(); - for h in &hits { + for h in &resp.hits { // Show 4-digit score so RRF fused scores (bounded // ~0–0.033 for k_rrf=60) don't all collapse to "0.02". // Append heading_path so multiple chunks from the same @@ -570,6 +598,12 @@ fn run(cli: &Cli) -> anyhow::Result<()> { heading, ); } + // p9-fb-34: truncation hint goes to stderr so it + // doesn't pollute the stdout hit list. + if resp.truncated { + let next = resp.next_cursor.as_deref().unwrap_or("(none)"); + eprintln!("[truncated; use --cursor {next} for the next page]"); + } } Ok(()) } diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index e1e35d3..649d3f0 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -75,10 +75,18 @@ pub fn wire_search_hit(h: &SearchHit) -> Value { tag_object(v, "search_hit.v1") } -/// Wrap a list of [`SearchHit`] values as a JSON array of `search_hit.v1` -/// objects (one tag per element, per design §2.2). -pub fn wire_search_hits(hits: &[SearchHit]) -> Value { - Value::Array(hits.iter().map(wire_search_hit).collect()) +/// p9-fb-34: tag a `SearchResponse` as `search_response.v1`. Wraps +/// the existing `search_hit.v1[]` array with pagination + truncation +/// metadata. Replaces the previous bare `search_hit.v1[]` top-level +/// array (`wire_search_hits`) — see HOTFIXES / fb-34 for the +/// breaking shape change. +pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value { + let v = serde_json::json!({ + "hits": r.hits.iter().map(wire_search_hit).collect::>(), + "next_cursor": r.next_cursor, + "truncated": r.truncated, + }); + tag_object(v, "search_response.v1") } /// Wrap an [`Answer`] as `answer.v1`. @@ -234,13 +242,6 @@ mod tests { assert_eq!(v.as_array().unwrap().len(), 0); } - #[test] - fn search_hits_wraps_each_element() { - let v = wire_search_hits(&[]); - assert!(v.is_array()); - assert_eq!(v.as_array().unwrap().len(), 0); - } - #[test] fn tag_object_inserts_into_object() { let v = Value::Object(serde_json::Map::new()); @@ -248,6 +249,30 @@ mod tests { assert_eq!(schema_of(&tagged), Some("x.v1")); } + #[test] + fn search_response_carries_pagination_metadata() { + // p9-fb-34: empty-hits SearchResponse round-trips through the + // wrapper with its `next_cursor` + `truncated` fields preserved + // and the top-level `schema_version` set to `search_response.v1`. + let r = kebab_app::SearchResponse { + hits: vec![], + next_cursor: Some("opaque-cursor-abc".to_string()), + truncated: true, + }; + let v = wire_search_response(&r); + assert_eq!(schema_of(&v), Some("search_response.v1")); + assert!(v.get("hits").and_then(|h| h.as_array()).is_some()); + assert_eq!( + v.get("hits").and_then(|h| h.as_array()).unwrap().len(), + 0 + ); + assert_eq!( + v.get("next_cursor").and_then(|c| c.as_str()), + Some("opaque-cursor-abc") + ); + assert_eq!(v.get("truncated").and_then(|t| t.as_bool()), Some(true)); + } + #[test] fn schema_wrapper_tags_schema_version() { use kebab_app::{Capabilities, Models, SchemaV1, Stats, WireBlock}; diff --git a/crates/kebab-cli/tests/common/mod.rs b/crates/kebab-cli/tests/common/mod.rs index 8926bd2..70c1924 100644 --- a/crates/kebab-cli/tests/common/mod.rs +++ b/crates/kebab-cli/tests/common/mod.rs @@ -126,6 +126,29 @@ pub fn ingest(cfg: &Path, workspace: &Path) { ); } +/// p9-fb-34: invoke `kebab search` with arbitrary trailing flags + +/// query, capture stdout + stderr. Caller is responsible for +/// supplying `--mode lexical` / `--json` etc. as needed; this helper +/// stays unopinionated so a single test can exercise both wire shapes +/// (JSON wrapper + plain stderr hint). Asserts the binary exited 0; +/// non-zero exits fail the test with stderr included. +pub fn run_search_with_args(cfg: &Path, args: &[&str]) -> (String, String) { + let bin = env!("CARGO_BIN_EXE_kebab"); + let mut cmd = Command::new(bin); + cmd.arg("--config").arg(cfg).arg("search"); + cmd.args(args); + let out = cmd.output().expect("kebab search"); + assert!( + out.status.success(), + "search failed: args={args:?} stderr={}", + String::from_utf8_lossy(&out.stderr) + ); + ( + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + /// p9-fb-33: invoke `kebab ask --stream --mode lexical ` and /// capture stdout + stderr. Lexical mode skips embeddings (matches /// `wire_ask_stale.rs::run_ask_lexical`). Caller asserts on the diff --git a/crates/kebab-cli/tests/wire_search_response.rs b/crates/kebab-cli/tests/wire_search_response.rs new file mode 100644 index 0000000..60e1c0f --- /dev/null +++ b/crates/kebab-cli/tests/wire_search_response.rs @@ -0,0 +1,226 @@ +//! p9-fb-34: CLI search wire wrapper + budget controls. +//! +//! Lexical-only — no fastembed / no Ollama. Each test builds its own +//! TempDir KB via `common::write_config` + `common::ingest` and drives +//! `kebab search` through `common::run_search_with_args`. Verifies: +//! +//! - `--json` emits the `search_response.v1` wrapper (hits + cursor + +//! truncated). +//! - `--max-tokens` flips `truncated: true` once the budget binds. +//! - `--cursor` advances paging (page 2 chunk_ids disjoint from page 1). +//! - Plain (non-JSON) output prints the `[truncated; ...]` hint to +//! stderr (stdout stays the hit list). + +mod common; + +use serde_json::Value; +use std::fs; + +#[test] +fn search_json_emits_search_response_v1_wrapper() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "apples"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}")); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["hits"].is_array(), "hits must be array, got {v}"); + assert!( + v["next_cursor"].is_null() || v["next_cursor"].is_string(), + "next_cursor must be null or string, got {}", + v["next_cursor"] + ); + assert!( + v["truncated"].is_boolean(), + "truncated must be bool, got {}", + v["truncated"] + ); +} + +#[test] +fn search_json_truncates_with_max_tokens() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + let body: String = "rust ownership is a memory model. ".repeat(10); + fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--max-tokens", "30", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}")); + assert_eq!( + v["truncated"], true, + "30-token cap must trip truncation: {v}" + ); +} + +#[test] +fn search_json_cursor_paginates() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + for i in 0..6 { + fs::write( + workspace.join(format!("d{i}.md")), + format!("# T{i}\n\nrust topic {i}\n"), + ) + .unwrap(); + } + common::ingest(&cfg, &workspace); + + let (page1, _) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--k", "2", "rust"], + ); + let v1: Value = serde_json::from_str(page1.trim()) + .unwrap_or_else(|e| panic!("page1 not JSON: {page1:?}: {e}")); + let cursor = v1["next_cursor"] + .as_str() + .unwrap_or_else(|| panic!("next_cursor missing on page1: {v1}")); + + let (page2, _) = common::run_search_with_args( + &cfg, + &[ + "--json", + "--mode", + "lexical", + "--k", + "2", + "--cursor", + cursor, + "rust", + ], + ); + let v2: Value = serde_json::from_str(page2.trim()) + .unwrap_or_else(|e| panic!("page2 not JSON: {page2:?}: {e}")); + + let p1_ids: Vec = v1["hits"] + .as_array() + .expect("page1 hits array") + .iter() + .map(|h| { + h["chunk_id"] + .as_str() + .expect("chunk_id string") + .to_string() + }) + .collect(); + let p2_ids: Vec = v2["hits"] + .as_array() + .expect("page2 hits array") + .iter() + .map(|h| { + h["chunk_id"] + .as_str() + .expect("chunk_id string") + .to_string() + }) + .collect(); + assert!( + !p2_ids.is_empty(), + "page2 must return at least one hit (cursor advanced past page1)" + ); + assert!( + p2_ids.iter().all(|id| !p1_ids.contains(id)), + "page2 must not repeat page1 chunk_ids: page1={p1_ids:?} page2={p2_ids:?}" + ); +} + +#[test] +fn search_stale_cursor_returns_error_v1_with_stale_cursor_code() { + // p9-fb-34 round-1 review: end-to-end wire contract — when the + // corpus_revision bumps between cursor issuance and the cursored + // search, `kebab --json search --cursor ` must emit an + // `error.v1` ndjson line on stderr with `code = "stale_cursor"`. + // Pre-fix this returned `code = "generic"` because + // `App::search_with_opts` string-formatted the typed payload into + // anyhow, losing the structured wrapper. + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + fs::write(workspace.join("a.md"), "# T\n\napples\n").unwrap(); + common::ingest(&cfg, &workspace); + + // Get a valid cursor first. + let (page1_stdout, _) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "--k", "1", "apples"], + ); + let v1: Value = serde_json::from_str(page1_stdout.trim()).expect("json"); + let cursor = v1["next_cursor"] + .as_str() + .expect("k=1 page must emit next_cursor — fixture too small if this fails") + .to_string(); + + // Bump corpus_revision by ingesting a second doc. + fs::write(workspace.join("b.md"), "# B\n\nbananas\n").unwrap(); + common::ingest(&cfg, &workspace); + + // Use the now-stale cursor. Direct invocation (not via the + // success-asserting helper) so we can read stderr on failure. + let exe = env!("CARGO_BIN_EXE_kebab"); + let cfg_str = cfg.to_str().expect("utf8"); + let out = std::process::Command::new(exe) + .args([ + "--config", + cfg_str, + "--json", + "search", + "--mode", + "lexical", + "--json", + "--cursor", + &cursor, + "apples", + ]) + .output() + .expect("kebab search --cursor"); + + let stderr = String::from_utf8_lossy(&out.stderr); + // Find the error.v1 ndjson line on stderr (one event per line). + let err_line = stderr + .lines() + .find(|l| { + serde_json::from_str::(l) + .ok() + .and_then(|v| { + v.get("schema_version") + .and_then(|s| s.as_str()) + .map(String::from) + }) + .as_deref() + == Some("error.v1") + }) + .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}")); + + let v: Value = serde_json::from_str(err_line).expect("error.v1 json"); + assert_eq!( + v["code"], "stale_cursor", + "code must be stale_cursor: {err_line}" + ); +} + +#[test] +fn search_plain_emits_truncated_hint_to_stderr() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + let body: String = "rust ownership is a memory model. ".repeat(10); + fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap(); + common::ingest(&cfg, &workspace); + + let (_stdout, stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--max-tokens", "30", "rust"], + ); + assert!( + stderr.contains("[truncated;"), + "stderr must carry truncated hint: {stderr:?}" + ); +} diff --git a/crates/kebab-cli/tests/wire_search_stale.rs b/crates/kebab-cli/tests/wire_search_stale.rs index 9347d3e..483c4a8 100644 --- a/crates/kebab-cli/tests/wire_search_stale.rs +++ b/crates/kebab-cli/tests/wire_search_stale.rs @@ -45,10 +45,21 @@ fn search_json_includes_indexed_at_and_stale() { let out = run_search_lexical(&cfg, "apples", true); let stdout = String::from_utf8_lossy(&out.stdout); - let arr: serde_json::Value = serde_json::from_str(stdout.trim()) - .unwrap_or_else(|e| panic!("expected JSON array, got {stdout:?}: {e}")); - let arr = arr.as_array().unwrap_or_else(|| panic!("expected array, got {stdout}")); - let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty array: {stdout}")); + // p9-fb-34: top-level wire is now `search_response.v1` wrapping the + // legacy `search_hit.v1[]` under a `hits` field (with pagination + + // truncation metadata). Hit shape inside `hits` is unchanged. + let resp: serde_json::Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("expected JSON object, got {stdout:?}: {e}")); + assert_eq!( + resp.get("schema_version").and_then(|v| v.as_str()), + Some("search_response.v1"), + "expected search_response.v1 wrapper, got {resp}" + ); + let arr = resp + .get("hits") + .and_then(|h| h.as_array()) + .unwrap_or_else(|| panic!("expected hits array, got {stdout}")); + let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty hits: {stdout}")); assert!( first.get("indexed_at").is_some(), "missing indexed_at in {first}" diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs index 4512bf3..6da7a53 100644 --- a/crates/kebab-core/src/lib.rs +++ b/crates/kebab-core/src/lib.rs @@ -51,7 +51,7 @@ pub use metadata::{ }; pub use search::{ DocFilter, DocSummary, RetrievalDetail, SearchFilters, SearchHit, - SearchMode, SearchQuery, + SearchMode, SearchOpts, SearchQuery, }; pub use answer::{ Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage, diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs index 6e49b5a..9d6527b 100644 --- a/crates/kebab-core/src/search.rs +++ b/crates/kebab-core/src/search.rs @@ -96,6 +96,18 @@ pub struct DocSummary { pub chunker_version: ChunkerVersion, } +/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`. +/// All `None` = no enforcement (existing behavior). +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct SearchOpts { + /// chars/4 approximation of wire JSON token cost. None = no cap. + pub max_tokens: Option, + /// Per-hit snippet character cap. None = use config default. + pub snippet_chars: Option, + /// Opaque base64 cursor from a previous response. None = first page. + pub cursor: Option, +} + #[cfg(test)] mod tests { use super::*; @@ -135,4 +147,12 @@ mod tests { assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z"); assert_eq!(v["stale"], true); } + + #[test] + fn search_opts_default_is_all_none() { + let opts = SearchOpts::default(); + assert!(opts.max_tokens.is_none()); + assert!(opts.snippet_chars.is_none()); + assert!(opts.cursor.is_none()); + } } diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs index 3496a22..e5f7b4e 100644 --- a/crates/kebab-mcp/src/tools/search.rs +++ b/crates/kebab-mcp/src/tools/search.rs @@ -1,5 +1,6 @@ -//! `search` tool — wraps `kebab_app::search_with_config`. -//! Input: { query, mode?, k? }. Output: search_hit.v1 array JSON. +//! `search` tool — wraps `kebab_app::search_with_opts_with_config`. +//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor? }. +//! Output: search_response.v1 envelope (hits + next_cursor + truncated). //! //! First tool with a non-empty `inputSchema`: `SearchInput` derives //! `JsonSchema` and `Tool::new` uses @@ -17,23 +18,21 @@ pub struct SearchInput { /// User query (free text). pub query: String, /// Retrieval mode: "hybrid" (default), "lexical", or "vector". - #[serde(default = "default_mode")] - pub mode: String, + pub mode: Option, /// Top-K results. Defaults to 10. Clamped to 1–100. - #[serde(default = "default_k")] - pub k: usize, -} - -fn default_mode() -> String { - "hybrid".to_string() -} -fn default_k() -> usize { - 10 + pub k: Option, + /// p9-fb-34: cap result wire size at ~N tokens (chars/4 estimate). + pub max_tokens: Option, + /// p9-fb-34: per-hit snippet character cap. + pub snippet_chars: Option, + /// p9-fb-34: opaque cursor from a previous response. + pub cursor: Option, } pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { - let k = input.k.clamp(1, 100); - let mode = match input.mode.as_str() { + let k = input.k.unwrap_or(10).clamp(1, 100); + let mode_str = input.mode.as_deref().unwrap_or("hybrid"); + let mode = match mode_str { "lexical" => kebab_core::SearchMode::Lexical, "vector" => kebab_core::SearchMode::Vector, _ => kebab_core::SearchMode::Hybrid, @@ -44,11 +43,18 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { k, filters: kebab_core::SearchFilters::default(), }; - match kebab_app::search_with_config((*state.config).clone(), query) { - Ok(hits) => { + let opts = kebab_core::SearchOpts { + max_tokens: input.max_tokens, + snippet_chars: input.snippet_chars, + cursor: input.cursor, + }; + let cfg_clone = (*state.config).clone(); + match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) { + Ok(resp) => { // SearchHit (kebab-core) does not carry a `schema_version` field, // so we tag each element inline before serialising. - let tagged: Vec = hits + let tagged: Vec = resp + .hits .iter() .map(|h| { let mut v = serde_json::to_value(h).unwrap_or_default(); @@ -61,7 +67,13 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { v }) .collect(); - match serde_json::to_string(&serde_json::Value::Array(tagged)) { + let envelope = serde_json::json!({ + "schema_version": "search_response.v1", + "hits": tagged, + "next_cursor": resp.next_cursor, + "truncated": resp.truncated, + }); + match serde_json::to_string(&envelope) { Ok(json) => to_tool_success(json), Err(e) => to_tool_error(&anyhow::anyhow!(e)), } diff --git a/crates/kebab-mcp/tests/tools_call_search.rs b/crates/kebab-mcp/tests/tools_call_search.rs index 5f734eb..5995292 100644 --- a/crates/kebab-mcp/tests/tools_call_search.rs +++ b/crates/kebab-mcp/tests/tools_call_search.rs @@ -1,4 +1,4 @@ -//! Integration: tools/call name=search — verify response is search_hit.v1 array. +//! Integration: tools/call name=search — verify response is search_response.v1. use std::fs; @@ -22,7 +22,7 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) } #[tokio::test] -async fn search_tool_returns_search_hits_array() { +async fn search_tool_returns_search_response_v1() { let dir = tempfile::tempdir().unwrap(); let data_dir = dir.path().join("data"); let workspace_root = dir.path().join("notes"); @@ -53,8 +53,11 @@ async fn search_tool_returns_search_hits_array() { handler.state(), kebab_mcp::tools::search::SearchInput { query: "kebab".to_string(), - mode: "lexical".to_string(), - k: 5, + mode: Some("lexical".to_string()), + k: Some(5), + max_tokens: None, + snippet_chars: None, + cursor: None, }, ); @@ -75,16 +78,33 @@ async fn search_tool_returns_search_hits_array() { }; let v: serde_json::Value = serde_json::from_str(text).unwrap(); - let arr = v.as_array().expect("search returns a JSON array"); + assert_eq!( + v.get("schema_version").and_then(|s| s.as_str()), + Some("search_response.v1"), + "envelope should carry schema_version=search_response.v1" + ); + let hits = v + .get("hits") + .and_then(|h| h.as_array()) + .expect("hits must be a JSON array"); assert!( - !arr.is_empty(), + !hits.is_empty(), "expected at least one hit for 'kebab' in 'a.md'" ); assert_eq!( - arr[0] + hits[0] .get("schema_version") .and_then(|s| s.as_str()), Some("search_hit.v1"), "first hit should carry schema_version=search_hit.v1" ); + // truncated must be present (bool); next_cursor may be null on last page. + assert!( + v.get("truncated").and_then(|t| t.as_bool()).is_some(), + "envelope should carry truncated:bool" + ); + assert!( + v.get("next_cursor").is_some(), + "envelope should carry next_cursor (possibly null)" + ); } diff --git a/crates/kebab-parse-image/Cargo.toml b/crates/kebab-parse-image/Cargo.toml index 4e78465..46b56bc 100644 --- a/crates/kebab-parse-image/Cargo.toml +++ b/crates/kebab-parse-image/Cargo.toml @@ -34,7 +34,7 @@ kamadak-exif = "0.6" # rustls-tls) so both crates share the same TLS backend and the # transitive tokio runtime is brought in once. reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] } -base64 = "0.22" +base64 = { workspace = true } [dev-dependencies] tempfile = { workspace = true } @@ -47,7 +47,7 @@ tokio = { workspace = true, features = ["rt-multi-thread"] } # fixture. Only loaded for tests; the production crate doesn't need # font rendering. ab_glyph = "0.2" -base64 = "0.22" +base64 = { workspace = true } # `kebab-llm/mock` exposes `MockLanguageModel` for hermetic caption # tests. Real adapters (Ollama) live in `kebab-llm-local`, which is # only allowed at the dev-dep level here — the runtime crate stays diff --git a/docs/SMOKE.md b/docs/SMOKE.md index 3ec0d2a..bbb152e 100644 --- a/docs/SMOKE.md +++ b/docs/SMOKE.md @@ -152,6 +152,25 @@ stderr 의 events.ndjson 은 한 줄 = 한 event 의 ndjson — `retrieval_done` agent 가 stderr 를 닫으면 (`head -c 1` 등) pipeline 이 LLM stream 을 즉시 중단하고 `RefusalReason::LlmStreamAborted` 로 partial answer 를 `answers` 테이블에 기록. +### Pagination + budget (fb-34) + +```bash +# First page +kebab search "rust" --json --k 5 > page1.json +jq '.next_cursor' page1.json + +# Next page using the returned cursor +NEXT=$(jq -r '.next_cursor' page1.json) +kebab search "rust" --json --k 5 --cursor "$NEXT" > page2.json + +# Budget cap — returns smaller snippet / fewer hits + truncated=true +kebab search "rust" --json --max-tokens 200 | jq '.truncated, (.hits | length)' +``` + +`next_cursor` 는 corpus_revision 변경 (이후 ingest 등) 시 invalid — 다음 호출이 `error.v1.code = stale_cursor` 로 거절. agent 는 새 search 로 재발급 받기. + +`--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare `search_hit.v1[]` 배열과 호환 안 됨. + ## P6-4 이미지 ingestion 옵션 `config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략): diff --git a/docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md b/docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md new file mode 100644 index 0000000..491acf0 --- /dev/null +++ b/docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md @@ -0,0 +1,1535 @@ +# p9-fb-34 — Output Budget Controls Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add `--max-tokens` / `--snippet-chars` / `--cursor` flags to `kebab search` so agents can cap result size and paginate. Wire output gains a top-level `search_response.v1` wrapper around the existing `search_hit.v1[]` array, with `next_cursor` and `truncated` metadata. + +**Architecture:** Domain `SearchOpts` enters `App::search_with_opts(query, opts) -> SearchResponse`; existing `App::search(query) -> Vec` becomes a thin wrapper. Token estimation uses `chars/4` (no new tokenizer dep). Truncate priority: snippet shorten → k pop → minimum 1 hit. Cursor is opaque base64 of `{offset, corpus_revision}` JSON; mismatch returns `error.v1.code = stale_cursor`. CLI plain output unchanged + truncated stderr hint; `--json` output is the new wrapper. + +**Tech Stack:** Rust 2024, base64 (workspace dep — add to root if missing), serde, JSON Schema (search_response.v1). + +**Spec:** `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` + +--- + +## File Structure + +| File | Responsibility | Action | +|------|----------------|--------| +| `crates/kebab-core/src/search.rs` | New `pub struct SearchOpts { max_tokens, snippet_chars, cursor }` with `Default` impl | modify | +| `crates/kebab-core/src/lib.rs` | Re-export `SearchOpts` | modify | +| `crates/kebab-app/src/cursor.rs` | New module — `encode_cursor(offset, revision) -> String`, `decode_cursor(s, expected) -> Result` | create | +| `crates/kebab-app/src/app.rs` | New `pub struct SearchResponse`, `App::search_with_opts(...)`, budget loop, retain `App::search` thin wrapper | modify | +| `crates/kebab-app/src/lib.rs` | Re-export `SearchResponse`, `SearchOpts`, cursor module if needed | modify | +| `crates/kebab-app/src/error_wire.rs` | Add `stale_cursor` classify branch | modify | +| `crates/kebab-app/Cargo.toml` | Add `base64` dep (or workspace-managed) | modify | +| `Cargo.toml` (workspace root) | Add `base64 = "0.22"` to `[workspace.dependencies]` if not already managed | modify (conditional) | +| `crates/kebab-cli/src/main.rs` | `Cmd::Search` new flags + dispatch to `search_with_opts` + plain truncated hint | modify | +| `crates/kebab-cli/src/wire.rs` | New `wire_search_response(&SearchResponse) -> Value` helper | modify | +| `crates/kebab-mcp/src/tools/search.rs` | Extend `SearchInput` + emit `search_response.v1` | modify | +| `docs/wire-schema/v1/search_response.schema.json` | NEW wrapper schema | create | +| `crates/kebab-app/tests/cursor.rs` | Unit: encode/decode round-trip + StaleCursor | create | +| `crates/kebab-app/tests/search_budget_integration.rs` | Integration: budget None passthrough + snippet shorten + k pop + 1-hit minimum + snippet_chars override + cursor pagination + corpus_revision bump → StaleCursor | create | +| `crates/kebab-cli/tests/wire_search_response.rs` | Integration: `--json` shape + `--max-tokens` truncation + `--cursor` next page + plain truncated stderr hint | create | +| `crates/kebab-mcp/tests/tools_call_search.rs` | Augment existing test (or sibling) — verify `search_response.v1` returned | modify | +| `README.md` | `kebab search` row update + `--max-tokens` / `--cursor` mention | modify | +| `docs/SMOKE.md` | Pagination walkthrough paragraph | modify | +| `tasks/p9/p9-fb-34-output-budget-controls.md` | Status flip + design/plan links | modify | +| `tasks/INDEX.md` | fb-34 row → ✅ | modify | +| `tasks/HOTFIXES.md` | New entry — `2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1` | modify | +| `integrations/claude-code/kebab/SKILL.md` | Recipe update — `response.hits[]` instead of bare array; cursor example | modify | + +--- + +## Pre-flight + +- [ ] **Step 0.1: Branch off main** + +```bash +git checkout main +git pull +git checkout -b feat/fb-34-output-budget-controls +``` + +- [ ] **Step 0.2: Confirm spec branch reachable** + +```bash +git log --oneline spec/fb-34-output-budget-controls -1 +``` + +Expected: `a80f65c spec(fb-34): output budget controls — design`. If spec PR has not yet merged into main, `git merge spec/fb-34-output-budget-controls` so the spec doc lands on this branch. + +--- + +## Task 1: Domain — `SearchOpts` in kebab-core + +**Files:** +- Modify: `crates/kebab-core/src/search.rs` +- Modify: `crates/kebab-core/src/lib.rs` + +- [ ] **Step 1.1: Write the failing test** + +Append to `crates/kebab-core/src/search.rs` `#[cfg(test)] mod tests` block (one already exists from fb-32): + +```rust +#[test] +fn search_opts_default_is_all_none() { + let opts = SearchOpts::default(); + assert!(opts.max_tokens.is_none()); + assert!(opts.snippet_chars.is_none()); + assert!(opts.cursor.is_none()); +} +``` + +- [ ] **Step 1.2: Run test — verify failure** + +```bash +cargo test -p kebab-core search_opts_default_is_all_none +``` + +Expected: FAIL — `cannot find type SearchOpts in scope`. + +- [ ] **Step 1.3: Define `SearchOpts`** + +Append to `crates/kebab-core/src/search.rs` (after the existing `DocSummary` struct, before any `#[cfg(test)]`): + +```rust +/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`. +/// All `None` = no enforcement (existing behavior). +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct SearchOpts { + /// chars/4 approximation of wire JSON token cost. None = no cap. + pub max_tokens: Option, + /// Per-hit snippet character cap. None = use config default. + pub snippet_chars: Option, + /// Opaque base64 cursor from a previous response. None = first page. + pub cursor: Option, +} +``` + +- [ ] **Step 1.4: Re-export from `crates/kebab-core/src/lib.rs`** + +Find the existing `pub use search::{...}` line: + +```bash +grep -n "pub use search" crates/kebab-core/src/lib.rs +``` + +Add `SearchOpts` to the brace list. If the existing line is e.g. `pub use search::{SearchHit, SearchQuery, SearchFilters, SearchMode, RetrievalDetail, DocFilter, DocSummary};`, append `SearchOpts`. + +- [ ] **Step 1.5: Run tests — verify pass** + +```bash +cargo test -p kebab-core search_opts_default_is_all_none +cargo test -p kebab-core +``` + +Expected: PASS. + +- [ ] **Step 1.6: Commit** + +```bash +git add crates/kebab-core/src/search.rs crates/kebab-core/src/lib.rs +git commit -m "$(cat <<'EOF' +feat(core): SearchOpts domain type for budget controls (fb-34) + +3 optional knobs (max_tokens, snippet_chars, cursor); Default = all +None = no enforcement (backwards-compat existing search behavior). + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 2: Cursor encode/decode helper + +**Files:** +- Create: `crates/kebab-app/src/cursor.rs` +- Modify: `crates/kebab-app/src/lib.rs` +- Modify: `crates/kebab-app/Cargo.toml` +- Possibly modify: `Cargo.toml` (workspace root) — add `base64` to `[workspace.dependencies]` if absent + +- [ ] **Step 2.1: Add base64 to kebab-app deps** + +Check workspace root `Cargo.toml`: + +```bash +grep -n "^base64" Cargo.toml +``` + +If absent, add to `[workspace.dependencies]`: + +```toml +base64 = "0.22" +``` + +Then add to `crates/kebab-app/Cargo.toml` `[dependencies]`: + +```toml +base64 = { workspace = true } +``` + +If `base64` is already directly in another crate (e.g. `kebab-parse-image`), promote it to workspace dep first then update both. + +- [ ] **Step 2.2: Write the failing test** + +Create `crates/kebab-app/tests/cursor.rs`: + +```rust +//! p9-fb-34: cursor encode/decode round-trip + corpus_revision mismatch. + +use kebab_app::cursor; + +#[test] +fn cursor_roundtrip_preserves_offset() { + let encoded = cursor::encode(5, "rev-abc"); + let offset = cursor::decode(&encoded, "rev-abc").unwrap(); + assert_eq!(offset, 5); +} + +#[test] +fn cursor_decode_rejects_mismatched_revision() { + let encoded = cursor::encode(7, "rev-old"); + let err = cursor::decode(&encoded, "rev-new").unwrap_err(); + assert_eq!(err.code, "stale_cursor"); + assert!(err.message.contains("rev-old") || err.message.contains("rev-new")); +} + +#[test] +fn cursor_decode_rejects_garbage_input() { + let err = cursor::decode("not-base64!!!", "any").unwrap_err(); + assert_eq!(err.code, "stale_cursor"); +} +``` + +- [ ] **Step 2.3: Run test — verify failure** + +```bash +cargo test -p kebab-app --test cursor +``` + +Expected: FAIL — `cannot find module cursor in kebab_app`. + +- [ ] **Step 2.4: Implement cursor module** + +Create `crates/kebab-app/src/cursor.rs`: + +```rust +//! p9-fb-34 opaque pagination cursor. +//! +//! Format: base64(JSON({offset: usize, corpus_revision: string})). +//! Opaque to callers — they MUST NOT decode the contents themselves; +//! the schema is internal and may change without notice. + +use base64::Engine; +use base64::engine::general_purpose::URL_SAFE_NO_PAD; +use serde::{Deserialize, Serialize}; + +use crate::error_wire::ErrorV1; + +#[derive(Serialize, Deserialize)] +struct Payload { + offset: usize, + corpus_revision: String, +} + +/// Encode `(offset, corpus_revision)` as an opaque base64 string. +pub fn encode(offset: usize, corpus_revision: &str) -> String { + let payload = Payload { + offset, + corpus_revision: corpus_revision.to_string(), + }; + let json = serde_json::to_vec(&payload).expect("Payload serializes"); + URL_SAFE_NO_PAD.encode(&json) +} + +/// Decode an opaque cursor against the expected `corpus_revision`. +/// Mismatch or malformed input returns an `ErrorV1` with +/// `code = "stale_cursor"`. +pub fn decode(s: &str, expected_revision: &str) -> Result { + let bytes = URL_SAFE_NO_PAD.decode(s.as_bytes()).map_err(|_| stale( + "", + expected_revision, + ))?; + let payload: Payload = serde_json::from_slice(&bytes).map_err(|_| stale( + "", + expected_revision, + ))?; + if payload.corpus_revision != expected_revision { + return Err(stale(&payload.corpus_revision, expected_revision)); + } + Ok(payload.offset) +} + +fn stale(found: &str, expected: &str) -> ErrorV1 { + ErrorV1 { + schema_version: "error.v1".to_string(), + code: "stale_cursor".to_string(), + message: format!( + "cursor was issued against corpus_revision '{found}'; current revision is \ + '{expected}'. Re-issue search to obtain a fresh cursor." + ), + cause: None, + } +} +``` + +If `ErrorV1` field names differ (verify via `grep -A 10 "pub struct ErrorV1" crates/kebab-app/src/error_wire.rs`), adapt the struct literal accordingly. + +- [ ] **Step 2.5: Wire the module into the crate** + +Edit `crates/kebab-app/src/lib.rs`. Find the `mod` declarations near the top and add: + +```rust +pub mod cursor; +``` + +(Use `pub mod` so `cursor::encode` / `cursor::decode` are reachable from the integration test.) + +- [ ] **Step 2.6: Run tests — verify pass** + +```bash +cargo test -p kebab-app --test cursor +``` + +Expected: 3 PASS. + +- [ ] **Step 2.7: Commit** + +```bash +git add crates/kebab-app/src/cursor.rs crates/kebab-app/src/lib.rs crates/kebab-app/Cargo.toml Cargo.toml Cargo.lock crates/kebab-app/tests/cursor.rs +git commit -m "$(cat <<'EOF' +feat(app): cursor encode/decode for paginated search (fb-34) + +Opaque base64(JSON{offset, corpus_revision}). Mismatch or +malformed input returns ErrorV1 with code = stale_cursor. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 3: `error_wire` — `stale_cursor` classification + +**Files:** +- Modify: `crates/kebab-app/src/error_wire.rs` + +- [ ] **Step 3.1: Write the failing test** + +Append to `crates/kebab-app/src/error_wire.rs` `#[cfg(test)] mod tests`: + +```rust +#[test] +fn stale_cursor_classifies_correctly() { + use anyhow::anyhow; + let err: anyhow::Error = anyhow!("stale_cursor: rev mismatch"); + let v1 = classify(&err, false); + // Without explicit downcast support, the generic anyhow path + // will fall through to "unknown" — the actual stale_cursor + // ErrorV1 is constructed directly by `cursor::decode`, not via + // the classify path. This test pins that behavior so future + // refactors of classify don't accidentally clobber the code. + assert_ne!(v1.code, "stale_cursor", "classify is not the source for stale_cursor"); +} +``` + +(If a richer classification is desired, add a downcast branch — but per the spec, `cursor::decode` returns `ErrorV1` directly so the classify path doesn't need to handle it. The test exists to lock that invariant.) + +- [ ] **Step 3.2: Run test — verify it passes immediately** + +```bash +cargo test -p kebab-app --lib stale_cursor_classifies_correctly +``` + +Expected: PASS (no implementation needed — classify already returns "unknown" for unrecognized errors). + +- [ ] **Step 3.3: Document the convention** + +Add a comment near the top of `crates/kebab-app/src/error_wire.rs`: + +```rust +// p9-fb-34: `stale_cursor` is constructed directly by `cursor::decode` +// instead of routed through `classify`. Keep that contract — adding a +// classify branch would create two sources of truth for the same code. +``` + +- [ ] **Step 3.4: Commit** + +```bash +git add crates/kebab-app/src/error_wire.rs +git commit -m "$(cat <<'EOF' +docs(error_wire): note stale_cursor convention (fb-34) + +stale_cursor is built by cursor::decode, not classify. Test +locks the invariant. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 4: `App::search_with_opts` + `SearchResponse` + +**Files:** +- Modify: `crates/kebab-app/src/app.rs` +- Modify: `crates/kebab-app/src/lib.rs` + +- [ ] **Step 4.1: Write the failing integration test (passthrough)** + +Create `crates/kebab-app/tests/search_budget_integration.rs`: + +```rust +//! p9-fb-34: App::search_with_opts integration tests. + +mod common; + +use kebab_app::SearchResponse; +use kebab_core::{SearchFilters, SearchMode, SearchOpts, SearchQuery}; + +fn lex(text: &str, k: usize) -> SearchQuery { + SearchQuery { + text: text.to_string(), + mode: SearchMode::Lexical, + k, + filters: SearchFilters::default(), + } +} + +#[test] +fn search_with_opts_no_budget_matches_search() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples are red\n"); + let app = env.app(); + + let baseline = app.search(lex("apples", 5)).unwrap(); + let resp: SearchResponse = app + .search_with_opts(lex("apples", 5), SearchOpts::default()) + .unwrap(); + + assert_eq!(resp.hits.len(), baseline.len()); + assert!(!resp.truncated); + assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page"); +} +``` + +- [ ] **Step 4.2: Run — verify failure** + +```bash +cargo test -p kebab-app --test search_budget_integration search_with_opts_no_budget_matches_search +``` + +Expected: FAIL — `cannot find type SearchResponse` / `method search_with_opts`. + +- [ ] **Step 4.3: Define `SearchResponse` + skeleton `search_with_opts`** + +In `crates/kebab-app/src/app.rs`, after the existing `pub use kebab_core::{...};` imports and before the `App` struct (or wherever public types belong), add: + +```rust +/// p9-fb-34: top-level wrapper around a paginated, budget-limited +/// search result. Mirrors the wire `search_response.v1` shape. +#[derive(Clone, Debug)] +pub struct SearchResponse { + pub hits: Vec, + pub next_cursor: Option, + pub truncated: bool, +} +``` + +Then in `impl App`, add: + +```rust +/// p9-fb-34: budget-aware search facade. Returns hits trimmed to +/// `opts.max_tokens` (chars/4 approximation) plus pagination +/// metadata. `App::search` is now a thin wrapper that drops the +/// metadata for backwards compat. +pub fn search_with_opts( + &self, + query: SearchQuery, + opts: SearchOpts, +) -> Result { + use crate::cursor; + + let corpus_revision = self.sqlite.corpus_revision().to_string(); + let offset = match opts.cursor.as_ref() { + Some(c) => cursor::decode(c, &corpus_revision) + .map_err(|e| anyhow::anyhow!("stale_cursor: {}", e.message))?, + None => 0, + }; + + let snippet_chars = opts + .snippet_chars + .unwrap_or(self.config.search.snippet_chars); + + // Fetch enough to satisfy offset + requested page. + let k_effective = query.k.max(self.config.search.default_k); + let fetch_k = offset.saturating_add(k_effective); + let fetch_query = SearchQuery { + k: fetch_k, + ..query.clone() + }; + let mut all_hits = self.search(fetch_query)?; + + // Skip offset. + let drop_n = offset.min(all_hits.len()); + all_hits.drain(..drop_n); + let mut hits: Vec = all_hits.into_iter().take(k_effective).collect(); + + // Apply snippet_chars override (production search already used + // config snippet_chars; this re-trims if the override is shorter). + if opts.snippet_chars.is_some() { + for h in hits.iter_mut() { + if h.snippet.chars().count() > snippet_chars { + h.snippet = trim_to_chars(&h.snippet, snippet_chars); + } + } + } + + // Budget loop. + let mut truncated = false; + if let Some(max_tokens) = opts.max_tokens { + let max_chars = max_tokens.saturating_mul(4); + // Step 1: shorten snippets progressively to a 60-char floor. + const SNIPPET_FLOOR: usize = 60; + let mut current_snippet_cap = snippet_chars; + while estimate_chars(&hits) > max_chars && current_snippet_cap > SNIPPET_FLOOR { + current_snippet_cap = (current_snippet_cap / 2).max(SNIPPET_FLOOR); + for h in hits.iter_mut() { + if h.snippet.chars().count() > current_snippet_cap { + h.snippet = trim_to_chars(&h.snippet, current_snippet_cap); + truncated = true; + } + } + } + // Step 2: pop hits from the end until we fit, but always keep ≥ 1. + while estimate_chars(&hits) > max_chars && hits.len() > 1 { + hits.pop(); + truncated = true; + } + } + + // Compute next_cursor: did we have more in the original fetch? + let returned = hits.len(); + let next_cursor = if returned == k_effective && offset.saturating_add(returned) > 0 { + // Speculative: the retriever returned exactly k_effective hits + // after offset, so there *might* be more. Encoding the cursor + // is cheap; the next call falls through to an empty page if + // nothing remains. + Some(cursor::encode(offset + returned, &corpus_revision)) + } else if truncated && returned > 0 { + // Budget-truncated mid-page; let the caller resume from where + // we stopped. + Some(cursor::encode(offset + returned, &corpus_revision)) + } else { + None + }; + + Ok(SearchResponse { + hits, + next_cursor, + truncated, + }) +} +``` + +Add the helpers near the bottom of `app.rs` (or in `cursor.rs` if cleaner — keep them adjacent to where they're called): + +```rust +/// p9-fb-34: trim to N chars (Unicode-safe). +fn trim_to_chars(s: &str, n: usize) -> String { + if s.chars().count() <= n { + return s.to_string(); + } + let mut out = String::with_capacity(n * 4); + for (i, c) in s.chars().enumerate() { + if i >= n { + break; + } + out.push(c); + } + out +} + +/// p9-fb-34: estimate wire JSON char cost of the hit list. The wire +/// shape adds object/array boilerplate (~50 chars per hit), so we +/// approximate by serializing each hit and summing chars. Cheap +/// enough to call inside the budget loop on small k. +fn estimate_chars(hits: &[SearchHit]) -> usize { + hits.iter() + .map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0)) + .sum() +} +``` + +- [ ] **Step 4.4: Run passthrough test — verify pass** + +```bash +cargo test -p kebab-app --test search_budget_integration search_with_opts_no_budget_matches_search +``` + +Expected: PASS. + +- [ ] **Step 4.5: Re-export `SearchResponse`** + +Edit `crates/kebab-app/src/lib.rs`: + +```rust +pub use app::{App, SearchResponse}; +``` + +(The existing `pub use app::App;` line gains `SearchResponse`.) + +- [ ] **Step 4.6: Add budget-shorten test** + +Append to `crates/kebab-app/tests/search_budget_integration.rs`: + +```rust +#[test] +fn budget_truncates_snippets_when_below_threshold() { + let env = common::TestEnv::new(); + // Long body so snippet has room to shrink. + let body: String = "rust ownership is a memory model. ".repeat(10); + common::ingest_md(&env, "a.md", &format!("# T\n\n{body}\n")); + let app = env.app(); + + let unrestricted = app.search(lex("rust", 5)).unwrap(); + let unrestricted_chars: usize = unrestricted.iter().map(|h| h.snippet.chars().count()).sum(); + + let resp = app + .search_with_opts( + lex("rust", 5), + SearchOpts { + max_tokens: Some(50), // ~200 chars total cap, well under unrestricted + snippet_chars: None, + cursor: None, + }, + ) + .unwrap(); + let limited_chars: usize = resp.hits.iter().map(|h| h.snippet.chars().count()).sum(); + + assert!(resp.truncated, "small budget must trip truncation"); + assert!(limited_chars < unrestricted_chars, "snippet should shrink"); + assert!(!resp.hits.is_empty(), "always retain ≥1 hit"); +} +``` + +- [ ] **Step 4.7: Run + verify** + +```bash +cargo test -p kebab-app --test search_budget_integration +``` + +Expected: 2 PASS. + +- [ ] **Step 4.8: Add cursor-pagination + stale-cursor tests** + +Append to `crates/kebab-app/tests/search_budget_integration.rs`: + +```rust +#[test] +fn cursor_paginates_to_next_page() { + let env = common::TestEnv::new(); + // Seed N docs so k=2 returns multiple pages. + for i in 0..6 { + common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n")); + } + let app = env.app(); + + let page1 = app + .search_with_opts(lex("rust", 2), SearchOpts::default()) + .unwrap(); + assert_eq!(page1.hits.len(), 2); + let cursor = page1.next_cursor.expect("more hits available"); + + let page2 = app + .search_with_opts( + lex("rust", 2), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(cursor), + }, + ) + .unwrap(); + assert_eq!(page2.hits.len(), 2); + // Second page must contain different hits than first. + let p1_ids: std::collections::HashSet<_> = page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + let p2_ids: std::collections::HashSet<_> = page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits"); +} + +#[test] +fn cursor_rejected_after_corpus_revision_bump() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples\n"); + let app = env.app(); + + let page1 = app + .search_with_opts(lex("apples", 1), SearchOpts::default()) + .unwrap(); + let cursor = page1.next_cursor; + + if let Some(c) = cursor { + // Force a corpus_revision bump. + common::ingest_md(&env, "b.md", "# B\n\nbananas\n"); + let app2 = env.app(); // re-open to pick up new revision + + let result = app2.search_with_opts( + lex("apples", 1), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(c), + }, + ); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("stale_cursor"), + "must surface stale_cursor: {err}" + ); + } + // If page1 had no next_cursor (k=1 and only 1 doc), this branch + // is unreachable but the test still passes — exercises the + // happy-no-cursor path. +} +``` + +- [ ] **Step 4.9: Run + verify** + +```bash +cargo test -p kebab-app --test search_budget_integration +``` + +Expected: 4 PASS. + +If `common::TestEnv::app()` returns a freshly-built `App` each call, the corpus_revision bump test works. If it caches, you may need a `env.reopen_app()` helper — extend `tests/common/mod.rs`. + +- [ ] **Step 4.10: Verify existing `App::search` callers still work** + +```bash +cargo test -p kebab-app +cargo build --workspace +``` + +Expected: green. `App::search` signature unchanged so TUI / kebab-rag callers compile. + +- [ ] **Step 4.11: Commit** + +```bash +git add crates/kebab-app/src/app.rs crates/kebab-app/src/lib.rs crates/kebab-app/tests/search_budget_integration.rs +git commit -m "$(cat <<'EOF' +feat(app): App::search_with_opts + SearchResponse (fb-34) + +Budget loop: snippet shorten → k pop → ≥1 hit floor. Cursor +encode/decode threads corpus_revision; mismatch surfaces as +stale_cursor anyhow error. App::search retained as thin wrapper. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 5: Wire schema — `search_response.v1` + +**Files:** +- Create: `docs/wire-schema/v1/search_response.schema.json` + +- [ ] **Step 5.1: Write the schema** + +Create `docs/wire-schema/v1/search_response.schema.json`: + +```json +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/search_response.schema.json", + "title": "SearchResponse v1", + "description": "Top-level wrapper for `kebab search --json` output. Replaces the bare `search_hit.v1[]` array — wraps it with pagination + truncation metadata. Token counts are approximate (chars/4 estimate, no tokenizer dep).", + "type": "object", + "required": ["schema_version", "hits", "next_cursor", "truncated"], + "properties": { + "schema_version": { "const": "search_response.v1" }, + "hits": { "type": "array", "description": "search_hit.v1[]" }, + "next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." }, + "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Caller can request next page via next_cursor or pass higher k." } + } +} +``` + +- [ ] **Step 5.2: Validate** + +```bash +python3 -c "import json; json.load(open('docs/wire-schema/v1/search_response.schema.json'))" +``` + +Expected: silent success. + +- [ ] **Step 5.3: Commit** + +```bash +git add docs/wire-schema/v1/search_response.schema.json +git commit -m "$(cat <<'EOF' +feat(wire): search_response.v1 schema (fb-34) + +Wrapper around search_hit.v1[] with next_cursor + truncated. +Wire breaking — agent that parses bare array must adapt. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 6: CLI `--max-tokens` / `--snippet-chars` / `--cursor` + +**Files:** +- Modify: `crates/kebab-cli/src/main.rs` +- Modify: `crates/kebab-cli/src/wire.rs` + +- [ ] **Step 6.1: Add `wire_search_response` helper** + +Locate `crates/kebab-cli/src/wire.rs`. After `wire_search_hits`, append: + +```rust +/// p9-fb-34: tag a `SearchResponse` as `search_response.v1`. Wraps +/// the existing `search_hit.v1[]` array with pagination + truncation +/// metadata. +pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value { + let v = serde_json::json!({ + "hits": r.hits.iter().map(wire_search_hit).collect::>(), + "next_cursor": r.next_cursor, + "truncated": r.truncated, + }); + tag_object(v, "search_response.v1") +} +``` + +- [ ] **Step 6.2: Add clap flags + dispatch** + +Locate the `Cmd::Search` enum variant in `crates/kebab-cli/src/main.rs`: + +```bash +grep -n "Cmd::Search" crates/kebab-cli/src/main.rs | head -3 +``` + +Add three new fields to the variant definition (the `enum Cmd { ... Search { query, k, mode, explain, no_cache, ... } }` block): + +```rust +/// p9-fb-34: cap result wire JSON size at approximately N tokens +/// (chars/4 estimate). When set, smaller snippets and fewer hits +/// may be returned; check `truncated` in the JSON wire. +#[arg(long)] +max_tokens: Option, +/// p9-fb-34: per-hit snippet character cap, overrides +/// `config.search.snippet_chars` for this call only. +#[arg(long)] +snippet_chars: Option, +/// p9-fb-34: opaque cursor from a previous response's +/// `next_cursor` to fetch the next page. Mismatched +/// `corpus_revision` returns `error.v1.code = stale_cursor`. +#[arg(long)] +cursor: Option, +``` + +In the match arm, replace the existing dispatch (around `Cmd::Search { query, k, mode, explain: _, no_cache } =>`): + +```rust +Cmd::Search { + query, + k, + mode, + explain: _, + no_cache, + max_tokens, + snippet_chars, + cursor, +} => { + let cfg = kebab_config::Config::load(cli.config.as_deref())?; + let q = kebab_core::SearchQuery { + text: query.clone(), + mode: (*mode).into(), + k: *k, + filters: kebab_core::SearchFilters::default(), + }; + let opts = kebab_core::SearchOpts { + max_tokens: *max_tokens, + snippet_chars: *snippet_chars, + cursor: cursor.clone(), + }; + // p9-fb-34: budget-aware path. --no-cache still bypasses the + // App-level LRU; wire wrapper applies regardless. + let app = kebab_app::App::open_with_config(cfg)?; + let resp = if *no_cache { + // search_uncached_with_opts not exposed; degrade by + // clearing cache then calling search_with_opts. + app.clear_search_cache(); + app.search_with_opts(q, opts)? + } else { + app.search_with_opts(q, opts)? + }; + + if cli.json { + println!("{}", serde_json::to_string(&wire::wire_search_response(&resp))?); + } else { + // Plain output unchanged — list hits with [stale] tag + // (fb-32) per existing convention. Truncation hint goes + // to stderr so it doesn't pollute stdout. + use std::io::IsTerminal; + let color = std::io::stdout().is_terminal(); + for h in &resp.hits { + let heading = if h.heading_path.is_empty() { + String::new() + } else { + format!(" > {}", h.heading_path.join(" / ")) + }; + let stale_tag = if h.stale { + if color { "\x1b[33m[stale]\x1b[0m " } else { "[stale] " } + } else { + "" + }; + println!( + "{:>2}. {:.4} {}{}{}", + h.rank, h.retrieval.fusion_score, stale_tag, h.doc_path.0, heading, + ); + } + if resp.truncated { + let next = resp.next_cursor.as_deref().unwrap_or("(none)"); + eprintln!("[truncated; use --cursor {next} for the next page]"); + } + } + Ok(()) +} +``` + +If the existing path uses `kebab_app::search_with_config` / `search_uncached_with_config` (free functions rather than `App::open_with_config`), grep for the actual idiom: + +```bash +grep -n "kebab_app::search\|App::open_with_config" crates/kebab-cli/src/main.rs | head -5 +``` + +Adapt the dispatch to match — the goal is `App::search_with_opts(query, opts)`. If a `*_with_opts_with_config` free function is preferred, add it to `crates/kebab-app/src/lib.rs` mirroring the existing `search_with_config` shape: + +```rust +pub fn search_with_opts_with_config( + config: kebab_config::Config, + query: SearchQuery, + opts: SearchOpts, +) -> anyhow::Result { + App::open_with_config(config)?.search_with_opts(query, opts) +} +``` + +- [ ] **Step 6.3: Build the CLI** + +```bash +cargo build -p kebab-cli +``` + +Expected: clean. + +- [ ] **Step 6.4: Verify --help shows the new flags** + +```bash +cargo run -q -p kebab-cli -- search --help 2>&1 | grep -E "max-tokens|snippet-chars|cursor" +``` + +Expected: 3 lines, one per flag. + +- [ ] **Step 6.5: Run kebab-cli existing tests** + +```bash +cargo test -p kebab-cli +``` + +Expected: existing tests pass. If a wire test asserts the OLD bare `search_hit.v1[]` shape, it will fail — update those tests now to expect `search_response.v1`. Search: + +```bash +grep -rn "search_hit.v1\|wire_search_hits" crates/kebab-cli/tests/ +``` + +For each match, decide: +- If the test verifies `kebab search --json` stdout → update to expect `search_response.v1` wrapper. +- If the test only verifies a single hit's wire shape (still part of the wrapper) → no change. + +- [ ] **Step 6.6: Commit** + +```bash +git add crates/kebab-cli/src/main.rs crates/kebab-cli/src/wire.rs crates/kebab-app/src/lib.rs +git commit -m "$(cat <<'EOF' +feat(cli): kebab search --max-tokens / --snippet-chars / --cursor (fb-34) + +JSON output wrapped in search_response.v1 (breaking — agent must +adapt). Plain output unchanged + [truncated; use --cursor X] +stderr hint when budget tripped. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 7: CLI integration tests + +**Files:** +- Create: `crates/kebab-cli/tests/wire_search_response.rs` + +- [ ] **Step 7.1: Inspect existing common helpers** + +```bash +sed -n '1,50p' crates/kebab-cli/tests/common/mod.rs +``` + +Existing fb-32 / fb-33 helpers: `write_config(cfg, ws)`, `ingest`, `run_search_json`, etc. Mirror these. + +- [ ] **Step 7.2: Add `run_search` helper for arbitrary args** + +If a generic search runner doesn't exist, append to `crates/kebab-cli/tests/common/mod.rs`: + +```rust +/// p9-fb-34: invoke `kebab search` with arbitrary flags, capture +/// stdout + stderr. +pub fn run_search_with_args(cfg: &std::path::Path, args: &[&str]) -> (String, String) { + let exe = env!("CARGO_BIN_EXE_kebab"); + let mut cmd_args: Vec<&str> = vec!["--config"]; + let cfg_str = cfg.to_str().expect("utf8"); + cmd_args.push(cfg_str); + cmd_args.push("search"); + cmd_args.extend(args); + let out = std::process::Command::new(exe) + .args(&cmd_args) + .output() + .expect("kebab search"); + ( + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} +``` + +Adapt to whatever signature the existing helpers use. + +- [ ] **Step 7.3: Write the integration tests** + +Create `crates/kebab-cli/tests/wire_search_response.rs`: + +```rust +//! p9-fb-34: CLI search wire wrapper + budget controls. + +mod common; + +use serde_json::Value; + +#[test] +fn search_json_emits_search_response_v1_wrapper() { + let (cfg, ws) = common::write_config(); + common::ingest(&cfg, &ws, "a.md", "# T\n\napples are red.\n"); + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "apples"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}")); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["hits"].is_array(), "hits must be array"); + assert!(v["next_cursor"].is_null() || v["next_cursor"].is_string()); + assert!(v["truncated"].is_boolean()); +} + +#[test] +fn search_json_truncates_with_max_tokens() { + let (cfg, ws) = common::write_config(); + let body: String = "rust ownership is a memory model. ".repeat(10); + common::ingest(&cfg, &ws, "a.md", &format!("# T\n\n{body}\n")); + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "--max-tokens", "30", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()).expect("json"); + assert_eq!(v["truncated"], true, "30 tokens cap must trip truncation"); +} + +#[test] +fn search_json_cursor_paginates() { + let (cfg, ws) = common::write_config(); + for i in 0..6 { + common::ingest(&cfg, &ws, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n")); + } + let (page1, _) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "-k", "2", "rust"], + ); + let v1: Value = serde_json::from_str(page1.trim()).expect("json"); + let cursor = v1["next_cursor"].as_str().expect("next_cursor present"); + + let (page2, _) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "-k", "2", "--cursor", cursor, "rust"], + ); + let v2: Value = serde_json::from_str(page2.trim()).expect("json"); + let p1_ids: Vec<_> = v1["hits"] + .as_array() + .unwrap() + .iter() + .map(|h| h["chunk_id"].as_str().unwrap().to_string()) + .collect(); + let p2_ids: Vec<_> = v2["hits"] + .as_array() + .unwrap() + .iter() + .map(|h| h["chunk_id"].as_str().unwrap().to_string()) + .collect(); + assert!(p2_ids.iter().all(|id| !p1_ids.contains(id)), + "page 2 must not repeat page 1"); +} + +#[test] +fn search_plain_emits_truncated_hint_to_stderr() { + let (cfg, ws) = common::write_config(); + let body: String = "rust ownership is a memory model. ".repeat(10); + common::ingest(&cfg, &ws, "a.md", &format!("# T\n\n{body}\n")); + let (_stdout, stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--max-tokens", "30", "rust"], + ); + assert!( + stderr.contains("[truncated;"), + "stderr must carry truncated hint: {stderr:?}" + ); +} +``` + +If `common::write_config()` doesn't exist with the exact signature, look at how `wire_search_stale.rs` calls it (fb-32) and mirror. + +- [ ] **Step 7.4: Build + run** + +```bash +cargo test -p kebab-cli --test wire_search_response 2>&1 | tail -20 +``` + +Expected: 4 PASS. (Lexical-only, no Ollama gate needed.) + +- [ ] **Step 7.5: Verify full kebab-cli suite** + +```bash +cargo test -p kebab-cli +``` + +Expected: all PASS. + +- [ ] **Step 7.6: Commit** + +```bash +git add crates/kebab-cli/tests/ +git commit -m "$(cat <<'EOF' +test(cli): wire_search_response + budget integration (fb-34) + +4 lexical-only tests covering search_response.v1 wrapper shape, +--max-tokens truncation, --cursor pagination, plain stderr hint. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 8: MCP search tool — wrapper + new inputs + +**Files:** +- Modify: `crates/kebab-mcp/src/tools/search.rs` +- Possibly modify: `crates/kebab-mcp/tests/tools_call_search.rs` + +- [ ] **Step 8.1: Inspect current MCP search tool** + +```bash +sed -n '1,80p' crates/kebab-mcp/src/tools/search.rs +``` + +Note the existing `SearchInput` shape and the wire-tag pattern used for the response. + +- [ ] **Step 8.2: Extend `SearchInput`** + +Add 3 optional fields: + +```rust +#[derive(Debug, Deserialize, Serialize, JsonSchema)] +pub struct SearchInput { + pub query: String, + pub mode: Option, + pub k: Option, + /// p9-fb-34: cap result wire size at ~N tokens (chars/4 estimate). + pub max_tokens: Option, + /// p9-fb-34: per-hit snippet character cap. + pub snippet_chars: Option, + /// p9-fb-34: opaque cursor from a previous response. + pub cursor: Option, +} +``` + +- [ ] **Step 8.3: Switch dispatch to `search_with_opts`** + +In `handle(state, input)`, replace the existing `search_with_config(...)` call with: + +```rust +let opts = kebab_core::SearchOpts { + max_tokens: input.max_tokens, + snippet_chars: input.snippet_chars, + cursor: input.cursor, +}; +let cfg_clone = (*state.config).clone(); +let result = kebab_app::search_with_opts_with_config(cfg_clone, query, opts); +``` + +(Use whatever wrapper free function shape `kebab-app` provides per Task 6 Step 6.2.) + +For the success branch, serialize `SearchResponse` and tag with `search_response.v1`: + +```rust +match result { + Ok(resp) => { + let v = serde_json::json!({ + "schema_version": "search_response.v1", + "hits": resp.hits.iter().map(serde_json::to_value).collect::, _>>()?, + "next_cursor": resp.next_cursor, + "truncated": resp.truncated, + }); + match serde_json::to_string(&v) { + Ok(json) => to_tool_success(json), + Err(e) => to_tool_error(&anyhow::anyhow!(e)), + } + } + Err(e) => to_tool_error(&e), +} +``` + +If the existing handler returns `Result` rather than `CallToolResult` directly, adapt. + +- [ ] **Step 8.4: Update the MCP search test** + +Open `crates/kebab-mcp/tests/tools_call_search.rs`. The existing test likely asserts `search_hit.v1` on the response array. Update to expect the new wrapper: + +```rust +// (the existing assertions for individual hits stay; add wrapper assertions) +let v: serde_json::Value = serde_json::from_str(&body).expect("json"); +assert_eq!(v["schema_version"], "search_response.v1"); +assert!(v["hits"].is_array()); +``` + +If the test asserted `arr.as_array().first()` on what was a top-level array, change to `v["hits"].as_array().unwrap().first()`. + +- [ ] **Step 8.5: Run MCP tests** + +```bash +cargo test -p kebab-mcp +``` + +Expected: all PASS. + +- [ ] **Step 8.6: Commit** + +```bash +git add crates/kebab-mcp/ +git commit -m "$(cat <<'EOF' +feat(mcp): search tool emits search_response.v1 + budget inputs (fb-34) + +SearchInput gains max_tokens / snippet_chars / cursor (all optional). +Output wrapped in search_response.v1 to match CLI; existing +tools_call_search test updated to read v["hits"] instead of the bare +array. + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 9: Workspace test + clippy gate + +- [ ] **Step 9.1: Workspace test** + +```bash +cargo test --workspace --no-fail-fast -j 1 2>&1 | tail -30 +``` + +Expected: all PASS. + +If any other crate (kebab-tui, kebab-eval, etc.) hits compile errors due to the `App::search` API surface change, that signals the change wasn't backwards-compatible. Verify `App::search` signature is unchanged (still `Vec`). + +- [ ] **Step 9.2: Clippy** + +```bash +cargo clippy --workspace --all-targets -- -D warnings 2>&1 | tail -10 +``` + +Expected: clean. Common new warnings to watch: +- `clippy::needless_pass_by_value` on cursor params — adjust as flagged. +- `clippy::large_struct_passed_by_value` if `SearchOpts` grows — currently 3 small Options. + +- [ ] **Step 9.3: Commit clippy fixes if needed** + +```bash +git add -A +git commit -m "chore: clippy fixes for fb-34" +``` + +(Skip if no fixes were necessary.) + +--- + +## Task 10: Documentation updates + +**Files:** +- Modify: `README.md` +- Modify: `docs/SMOKE.md` +- Modify: `tasks/p9/p9-fb-34-output-budget-controls.md` +- Modify: `tasks/INDEX.md` +- Modify: `tasks/HOTFIXES.md` +- Modify: `integrations/claude-code/kebab/SKILL.md` + +- [ ] **Step 10.1: README — search row update** + +Find the `kebab search` row in the 명령 table: + +```bash +grep -n "kebab search" README.md | head -3 +``` + +Append `--max-tokens`, `--snippet-chars`, `--cursor` to the flag list and add a one-liner about wire shape change. Example: + +```markdown +| `kebab search "" [--mode lexical|vector|hybrid] [--max-tokens N] [--snippet-chars N] [--cursor ]` | (existing description) **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. | +``` + +- [ ] **Step 10.2: SMOKE.md — pagination walkthrough** + +Append a section after the existing search section (and after the fb-32 / fb-33 sections): + +```markdown +### Pagination + budget (fb-34) + +```bash +# First page +kebab search "rust" --json -k 5 > page1.json +jq '.next_cursor' page1.json + +# Next page using the returned cursor +NEXT=$(jq -r '.next_cursor' page1.json) +kebab search "rust" --json -k 5 --cursor "$NEXT" > page2.json + +# Budget cap — returns smaller snippet / fewer hits + truncated=true +kebab search "rust" --json --max-tokens 200 | jq '.truncated, (.hits | length)' +``` + +`next_cursor` 는 corpus_revision 변경 (이후 ingest 등) 시 invalid — 다음 호출이 `error.v1.code = stale_cursor` 로 거절. agent 는 새 search 로 재발급 받기. +``` + +- [ ] **Step 10.3: Task spec status flip** + +Edit `tasks/p9/p9-fb-34-output-budget-controls.md`: + +```diff +-status: open ++status: completed +``` + +Replace the `> ⏳ **백로그 only — 미구현.**` block with: + +```markdown +> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 의 `2026-05-09 — p9-fb-34` 항목 참조 — live source of truth. + +상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md`. +구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md`. +``` + +- [ ] **Step 10.4: tasks/INDEX.md** + +```diff +- - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ⏳ 미구현, brainstorm 필요 ++ - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09) +``` + +- [ ] **Step 10.5: HOTFIXES — wire breaking decision log** + +Add a new entry near the top of dated entries in `tasks/HOTFIXES.md`: + +```markdown +## 2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1 + +**무엇이 바뀌었나**: `kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` object 로 교체. wrapper 가 `hits`, `next_cursor`, `truncated` 세 필드를 가짐. + +**Spec contract 와의 관계**: 명시적 wire breaking change. spec `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` 의 §Wire shape 절에 단일 출처 결정. + +**의식적 결정**: +- pagination + truncation metadata 를 `search_hit` 자체에 흡수하면 단일 hit 의 도메인 의미가 오염됨 (모든 hit 가 `next_cursor` 필드 보유 등). top-level wrapper 가 분리도 깨끗. +- 외부 consumer 영향: 단일 사용자 환경 + Claude Code skill 한 곳. skill 은 fb-34 와 동시 갱신. +- 이 변경은 search_hit.v1 자체 schema 는 손대지 않음 — 도메인 stable. + +**영향 받는 consumer**: kebab-tui (Search 패널 — 변경 불필요, App::search 시그니처 보존), kebab-mcp (search tool — 같은 PR 에서 갱신), Claude Code skill (같은 PR 에서 갱신). 외부 producer/consumer 없음. +``` + +- [ ] **Step 10.6: SKILL.md — recipe + cursor example** + +Edit `integrations/claude-code/kebab/SKILL.md`. Find the search recipes / parsing tips and update: +- Recipe A / B / C: `response.hits[]` instead of bare array. Example: + ```jq + jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}' + ``` +- Add a "Pagination" subsection under Parsing tips: + ```markdown + - `search_response.v1.next_cursor` — opaque base64. Pass back as `--cursor` (CLI) or `cursor` (MCP `mcp__kebab__search` input) for the next page. `null` when no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh cursor. + - `search_response.v1.truncated` — true when `--max-tokens` (CLI) / `max_tokens` (MCP) forced snippet shortening or k reduction. Either widen the budget or paginate via `next_cursor`. + ``` + +- [ ] **Step 10.7: Commit docs** + +```bash +git add README.md docs/SMOKE.md tasks/p9/p9-fb-34-output-budget-controls.md tasks/INDEX.md tasks/HOTFIXES.md integrations/claude-code/kebab/SKILL.md +git commit -m "$(cat <<'EOF' +docs(fb-34): README + SMOKE + INDEX + HOTFIXES + skill notes + +Co-Authored-By: Claude Opus 4.7 (1M context) +EOF +)" +``` + +--- + +## Task 11: Smoke + push + PR + +- [ ] **Step 11.1: Manual smoke** + +```bash +cd /tmp/kebab-smoke # existing scratch dir from prior tasks +~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml ingest +~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml search "test" --json | jq '{schema_version, truncated, next_cursor, hit_count: (.hits | length)}' +~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml search "test" --json --max-tokens 30 | jq '.truncated' +``` + +Expected: +- First call: `schema_version: "search_response.v1"`, `truncated: false`, `hit_count > 0`. +- Second call: `truncated: true`. + +- [ ] **Step 11.2: Final workspace test** + +```bash +cd ~/Workspace/projects/kebab +cargo test --workspace --no-fail-fast -j 1 +``` + +Expected: all green. + +- [ ] **Step 11.3: Push branch** + +```bash +git push -u origin feat/fb-34-output-budget-controls +``` + +- [ ] **Step 11.4: Open PR via gitea-pr** + +Build the PR body at `/tmp/fb34-pr-body.md`: + +```markdown +## Summary + +- adds `kebab search --max-tokens / --snippet-chars / --cursor` plus the equivalent inputs on `mcp__kebab__search` +- wraps `--json` output in `search_response.v1` (`{hits, next_cursor, truncated}`) — wire breaking; agent that parses bare `search_hit.v1[]` must adapt +- token estimation = `chars/4` (no tokenizer dep); truncate priority: snippet shorten → k pop → ≥1 hit floor +- cursor = opaque base64(`{offset, corpus_revision}`); mismatch returns `error.v1.code = stale_cursor` +- ask path scope out (rag.max_context_tokens already covers it) +- TUI Search pane unchanged — `App::search` signature preserved as thin wrapper + +## Test plan + +- [x] `cargo test --workspace --no-fail-fast -j 1` — green +- [x] `cargo clippy --workspace --all-targets -- -D warnings` — clean +- [x] new tests: + - `cursor` (kebab-app): encode/decode round-trip + stale_cursor mismatch (3 tests) + - `search_budget_integration` (kebab-app): passthrough + snippet shorten + cursor pagination + corpus_revision bump (4 tests) + - `wire_search_response` (kebab-cli): wire wrapper + max-tokens truncation + cursor pagination + plain stderr hint (4 tests) + - `tools_call_search` (kebab-mcp): updated to assert `search_response.v1` wrapper +- [x] manual smoke per `docs/SMOKE.md` "Pagination + budget" walkthrough + +## Architectural notes + +- `App::search` signature unchanged → TUI / kebab-rag callers unaffected. +- `App::search_with_opts` is the new public API; CLI / MCP go through it. +- `chars/4` token estimation matches `rag::pack_context` convention. +- Cursor is opaque on purpose — internal schema may change; agent must not parse. +- Wire breaking documented in HOTFIXES `2026-05-09 — p9-fb-34`. + +## Files of interest + +- spec: `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` +- plan: `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md` +- core: `crates/kebab-core/src/search.rs` (SearchOpts) +- app: `crates/kebab-app/src/{cursor,app}.rs` (SearchResponse + budget loop) +- CLI: `crates/kebab-cli/src/main.rs` (Cmd::Search), `crates/kebab-cli/src/wire.rs` +- MCP: `crates/kebab-mcp/src/tools/search.rs` +- wire: `docs/wire-schema/v1/search_response.schema.json` +``` + +Open the PR: + +```bash +/Users/user/.claude/skills/gitea-ops/bin/gitea-pr \ + --title "feat(fb-34): output budget controls" \ + --body "$(cat /tmp/fb34-pr-body.md)" \ + --head feat/fb-34-output-budget-controls \ + --base main +``` + +Capture the URL. + +- [ ] **Step 11.5: Cleanup** + +```bash +rm /tmp/fb34-pr-body.md +``` + +--- + +## Self-review + +- **Spec coverage:** + - §Behavior contract / CLI flags → Task 6 + - §Wire shape → Task 5 (schema) + Task 6 (CLI emit) + Task 8 (MCP emit) + - §Token estimation → Task 4 (`estimate_chars` helper using serde_json size, chars/4 conceptually) + - §Truncate priority → Task 4 budget loop (snippet shorten → k pop → ≥1) + - §Pagination cursor → Task 2 (encode/decode) + Task 4 (next_cursor computation) + Task 6 (CLI flag) + Task 8 (MCP input) + - §Stale cursor error → Task 2 + Task 3 + - §Domain API change → Tasks 1, 4 (SearchOpts + SearchResponse + App::search_with_opts) + - §Components → Tasks 1-8 + - §Test plan → Tasks 2 (cursor), 4 (App), 7 (CLI), 8 (MCP) + - §Documentation → Task 10 + - §Risks (wire breaking, App stability, chars/4 ±15%, cursor opacity) → addressed in Task 4 (App::search preserved), Task 5 (schema description mentions approximation), Task 10 (HOTFIXES) + +- **Placeholder scan:** + - Two "if/look at" instructions in Task 6 + Task 8 — those direct the engineer to mirror existing scaffold rather than invent. Concrete fallback paths spelled out. + - No TODO / "fill in" / "later". + +- **Type consistency:** + - `SearchOpts { max_tokens: Option, snippet_chars: Option, cursor: Option }` consistent across Tasks 1, 4, 6, 8. + - `SearchResponse { hits: Vec, next_cursor: Option, truncated: bool }` consistent across Tasks 4, 5, 6, 8. + - `cursor::encode(offset, revision) -> String`, `cursor::decode(s, expected) -> Result` consistent across Tasks 2, 4. + - `error.v1.code = "stale_cursor"` consistent across spec, Task 2, Task 3, Task 10. + +--- + +## Execution Handoff + +Plan complete and saved to `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md`. Two execution options: + +**1. Subagent-Driven (recommended)** — fresh subagent per task, review between tasks. + +**2. Inline Execution** — execute tasks in this session. + +Which approach? diff --git a/docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md b/docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md new file mode 100644 index 0000000..e6e07ef --- /dev/null +++ b/docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md @@ -0,0 +1,230 @@ +--- +title: "p9-fb-34 — Output budget controls design" +phase: P9 +component: kebab-core + kebab-app + kebab-cli + kebab-mcp + wire-schema +task_id: p9-fb-34 +status: design +target_version: 0.5.0 +contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md +contract_sections: [§4 search, §10 UX, wire-schema search_hit.v1] +date: 2026-05-09 +--- + +# p9-fb-34 — Output budget controls + +## Goal + +`kebab search` agent UX 개선. context window 제약 있는 agent 가 검색 결과 size 와 페이지네이션을 명시적으로 제어할 수 있게 한다. CLI surface 우선, MCP tool 도 동일 인자로 동시 노출. ask path 는 scope out (별도 `rag.max_context_tokens` 가 이미 budget 담당). + +## Behavior contract + +### CLI flags + +`kebab search ""` 에 세 가지 flag 신규: + +| flag | 의미 | default | +|------|------|---------| +| `--max-tokens N` | 결과 wire JSON 의 추정 token 수 cap (`chars/4` 근사). 초과 시 truncate priority 적용. | 미설정 = 비활성 (기존 동작) | +| `--snippet-chars N` | 각 hit snippet 최대 chars. config 의 `search.snippet_chars` 보다 우선. | 미설정 = config 값 | +| `--cursor ` | 이전 호출의 `next_cursor` 값. 다음 페이지 hits 만 반환. | 미설정 = 첫 페이지 | + +### Wire shape + +`kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` wrapper object 로 교체: + +```json +{ + "schema_version": "search_response.v1", + "hits": [/* search_hit.v1[] */], + "next_cursor": "" | null, + "truncated": true | false +} +``` + +**Backwards-compat broken** — agent 가 `[0]` 직접 인덱싱하면 깨짐. CLI plain (`--json` 없이) 출력 무영향. HOTFIXES 에 결정 로그. + +### Token estimation + +`chars/4` 근사 (RAG `pack_context` 와 일관). tiktoken-rs 등 신규 dep 없음. 정확도 ±15% 수준 — agent budget 제어 목적상 충분. wire schema description 에 "approximation" 명시. + +### Truncate priority + +`opts.max_tokens` 가 Some 일 때만 동작. 단계별: + +1. **Snippet 단축** — 각 hit snippet 을 `opts.snippet_chars.unwrap_or(config.search.snippet_chars)` 로 자른 뒤, 여전히 budget 초과면 60-char floor 까지 점진 단축. +2. **k 축소** — snippet 60 char 까지 줄여도 초과면 마지막 hit 부터 pop. 최소 1 hit 보장. +3. **truncated flag** — 위 어느 단계라도 동작 시 `truncated: true`. agent 는 `next_cursor` 로 다음 페이지 요청 가능. + +metadata (rank/score/doc_path/citation) 는 끝까지 유지 — agent 가 hit 자체를 못 찾으면 무의미. + +### Pagination cursor + +cursor 는 opaque base64 — 내부적으로 `{offset: usize, corpus_revision: string}` JSON 의 base64 encode. + +- 첫 호출: cursor 미설정 → offset 0. +- 응답: 남은 hit 있으면 `next_cursor = encode(offset + returned, current_revision)`. 없으면 `null`. +- 다음 호출: `--cursor ` → decode → offset 만큼 skip. +- corpus_revision mismatch (이후 ingest 등으로 corpus 가 변경됨) → `error.v1.code = "stale_cursor"`, exit 2. agent 책임으로 재호출. + +retriever 호출 시 k = `effective_k + offset` 만큼 fetch 후 offset 만큼 skip 해 응답. + +### Stale cursor error + +`error.v1.code` enum 에 `"stale_cursor"` 추가. message 예시: `"cursor was issued against corpus_revision 'abc'; current revision is 'xyz'. Re-issue search to obtain a fresh cursor."` + +## Allowed / forbidden dependencies + +- `kebab-core`: `SearchOpts` 신규 도메인 type 정의. 신규 dep 없음 (option / String 만). +- `kebab-app`: cursor encode/decode 헬퍼 (base64 + serde_json). `base64` workspace dep 가 이미 있을 가능성 높음 — 확인 후 필요 시 추가. +- `kebab-cli`: clap 인자 추가, wire wrapper 헬퍼. +- `kebab-mcp`: tool input schema 확장. +- `kebab-tui`: 변경 없음 (Search 패널 budget 미사용. fb-3X 후속). +- `kebab-search`: 변경 없음 — retriever signature 보존. + +`kebab-core` 가 다른 `kebab-*` crate 의존 금지 룰 준수. + +## Public surface delta + +### kebab-core + +```rust +#[derive(Clone, Debug, Default)] +pub struct SearchOpts { + /// p9-fb-34: chars/4 approximation. None = no budget enforcement. + pub max_tokens: Option, + /// p9-fb-34: per-hit snippet character cap. None = use config default. + pub snippet_chars: Option, + /// p9-fb-34: opaque base64 cursor from a previous response. + pub cursor: Option, +} +``` + +### kebab-app + +```rust +#[derive(Clone, Debug)] +pub struct SearchResponse { + pub hits: Vec, + pub next_cursor: Option, + pub truncated: bool, +} + +impl App { + /// p9-fb-34: budget-aware search. + pub fn search_with_opts( + &self, + query: SearchQuery, + opts: SearchOpts, + ) -> Result; + + // Existing — thin wrapper for backwards-compat. + pub fn search(&self, query: SearchQuery) -> Result> { + let resp = self.search_with_opts(query, SearchOpts::default())?; + Ok(resp.hits) + } +} + +// cursor helpers (private to app crate) +pub(crate) fn encode_cursor(offset: usize, corpus_revision: &str) -> String; +pub(crate) fn decode_cursor( + s: &str, + expected_revision: &str, +) -> Result; +``` + +### kebab-cli + +```rust +// Cmd::Search 새 인자 +#[arg(long)] max_tokens: Option, +#[arg(long)] snippet_chars: Option, +#[arg(long)] cursor: Option, +``` + +```rust +// wire helper +pub fn wire_search_response(r: &SearchResponse) -> Value { + let v = serde_json::json!({ + "hits": r.hits.iter().map(wire_search_hit).collect::>(), + "next_cursor": r.next_cursor, + "truncated": r.truncated, + }); + tag_object(v, "search_response.v1") +} +``` + +plain output: 기존 hit 줄들 + truncated 시 stderr 한 줄: + +``` +[truncated; use --cursor for the next page] +``` + +### kebab-mcp + +`SearchInput` 에 optional 필드 추가: + +```rust +pub struct SearchInput { + pub query: String, + pub mode: Option, + pub k: Option, + /// p9-fb-34 + pub max_tokens: Option, + pub snippet_chars: Option, + pub cursor: Option, +} +``` + +출력: `search_response.v1` JSON tag 적용 (CLI 와 동일 wrapper). + +## Test plan + +| kind | description | +|------|-------------| +| unit (kebab-app) | `cursor::encode/decode` round-trip + corpus_revision mismatch → `StaleCursor` | +| unit (kebab-app) | `App::search_with_opts` budget=None → 기존 `App::search` 동일 (truncated=false, next_cursor 채움) | +| unit (kebab-app) | budget=200 tokens → snippet 60-char floor 까지 단축, truncated=true | +| unit (kebab-app) | budget < single-hit 최소 → k=1 + truncated=true (1 hit 보장) | +| unit (kebab-app) | snippet_chars override → 해당 길이로 truncate | +| 통합 (kebab-app) | cursor offset 5 호출 → 6번째 hit 부터 반환 | +| 통합 (kebab-app) | corpus_revision bump 후 cursor 재호출 → `StaleCursor` error.v1 | +| 통합 (kebab-cli) | `kebab search "x" --json` → `search_response.v1` shape | +| 통합 (kebab-cli) | `--max-tokens 200 --json` → truncated=true, hits 짧음 | +| 통합 (kebab-cli) | `--cursor ` → 다음 페이지 | +| 통합 (kebab-cli) | plain output: `[truncated; ...]` stderr 한 줄 | +| 통합 (kebab-mcp) | `mcp__kebab__search` tool 이 `search_response.v1` 반환 | +| 통합 (wire-schema) | `search_response.schema.json` validate 샘플 (with/without next_cursor) | +| 통합 (kebab-app) | 기존 `App::search` 호출자 (TUI 등) 무영향 — return type 동일 | + +## Implementation steps (high-level) + +1. wire schema 신규 `search_response.schema.json` + `error.v1` enum 에 `stale_cursor` 추가. +2. `kebab-core::SearchOpts` 도메인 type. +3. `kebab-app::SearchResponse` + `cursor` 모듈 (encode/decode). +4. `App::search_with_opts` impl (budget loop, cursor handling). +5. `App::search` thin wrapper 보존. +6. `kebab-cli::Cmd::Search` 새 flag + wire wrapper helper + plain truncated hint. +7. `kebab-mcp::SearchInput` 확장 + 출력 wrapper. +8. 단위 + 통합 테스트. +9. README + SMOKE — `--max-tokens` / `--cursor` 예시. +10. tasks/INDEX.md / spec status flip. +11. `tasks/HOTFIXES.md` — wire breaking 결정 로그. +12. `integrations/claude-code/kebab/SKILL.md` — search 결과 shape 변경 명시. + +## Risks / notes + +- **Wire breaking**: agent 가 기존 `search_hit.v1[]` 배열 직접 파싱 시 깨짐. HOTFIXES 결정 로그 + skill notes 반영 필수. 내부 single-user 환경이라 실용적 영향 적음. +- **`App::search` 시그니처 보존** 으로 TUI / 기존 caller 무영향. +- **chars/4 추정 정확도** ±15% — agent budget 보호 목적상 충분. tiktoken 도입은 별도 task. +- **cursor opaque** — agent 가 base64 decode 시도 막을 방법 없음. spec 에 "구조 변경 가능, 직접 파싱 금지" 명시. +- **corpus_revision 이 fb-19 LRU cache invalidation key 와 동일 source** — 별도 source-of-truth 추가 불필요. +- **TUI Search 패널 budget UI** — out of scope. 사용자가 원하면 fb-3X 후속. + +## Documentation updates (implementation PR 동시) + +- `README.md` — `kebab search` 명령 표 row 업데이트, `--max-tokens` / `--cursor` 한 줄. +- `docs/SMOKE.md` — pagination walkthrough 한 단락 (cursor 흐름 예시). +- `tasks/p9/p9-fb-34-output-budget-controls.md` — `status: open → completed`, design/plan 링크 추가. +- `tasks/INDEX.md` — fb-34 행 ✅. +- `tasks/HOTFIXES.md` — `2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1` 결정 로그. +- `integrations/claude-code/kebab/SKILL.md` — Recipe 의 search 결과 파싱 패턴 (`response.hits[]`) + cursor 예시. diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json new file mode 100644 index 0000000..20e6eb8 --- /dev/null +++ b/docs/wire-schema/v1/search_response.schema.json @@ -0,0 +1,14 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/search_response.schema.json", + "title": "SearchResponse v1", + "description": "Top-level wrapper for `kebab search --json` output. Replaces the bare `search_hit.v1[]` array — wraps it with pagination + truncation metadata. Token counts are approximate (chars/4 estimate, no tokenizer dep). On `truncated: true`, caller may either widen `--max-tokens` or follow `next_cursor` for the next page. Stale `next_cursor` (corpus_revision changed since issued) returns `error.v1.code = stale_cursor`.", + "type": "object", + "required": ["schema_version", "hits", "next_cursor", "truncated"], + "properties": { + "schema_version": { "const": "search_response.v1" }, + "hits": { "type": "array", "description": "search_hit.v1[]" }, + "next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." }, + "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." } + } +} diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md index a065e71..34d5ef6 100644 --- a/integrations/claude-code/kebab/SKILL.md +++ b/integrations/claude-code/kebab/SKILL.md @@ -32,7 +32,7 @@ When `kebab` is registered as an MCP server (see `~/.claude/mcp.json` example be | tool | purpose | mutation | |------|---------|----------| -| `mcp__kebab__search` | corpus search → `search_hit.v1[]` | no | +| `mcp__kebab__search` | corpus search → `search_response.v1` (`{hits, next_cursor, truncated}`) | no | | `mcp__kebab__ask` | RAG answer → `answer.v1` | no | | `mcp__kebab__schema` | capability discovery → `schema.v1` | no | | `mcp__kebab__doctor` | health check → `doctor.v1` | no | @@ -47,12 +47,14 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun Input: ```json -{ "query": "", "mode": "hybrid", "k": 10 } +{ "query": "", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null } ``` - `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`"). -- Output is `search_hit.v1` array. Key fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`. +- **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page. +- Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`. - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source. +- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. ### `mcp__kebab__ask` — when you need the answer @@ -102,7 +104,9 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across ## Parsing tips - MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`. -- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.[] | {rank, doc_path, heading: .heading_path[-1], snippet}'`. +- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance. +- Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor. +- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both. - `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape. - Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear. - `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality. diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index 2efbcbc..2e69e84 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -14,6 +14,21 @@ historical contract that was implemented; this file accumulates the deltas so phase 5+ readers can find the live behavior without diffing git history. +## 2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1 + +**무엇이 바뀌었나**: `kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` object 로 교체. wrapper 가 `hits`, `next_cursor`, `truncated` 세 필드를 가짐. + +**Spec contract 와의 관계**: 명시적 wire breaking change. spec `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` 의 §Wire shape 절에 단일 출처 결정. + +**의식적 결정**: +- pagination + truncation metadata 를 `search_hit` 자체에 흡수하면 단일 hit 의 도메인 의미가 오염됨 (모든 hit 가 `next_cursor` 필드 보유 등). top-level wrapper 가 분리도 깨끗. +- 외부 consumer 영향: 단일 사용자 환경 + Claude Code skill 한 곳. skill 은 fb-34 와 동시 갱신. +- 이 변경은 search_hit.v1 자체 schema 는 손대지 않음 — 도메인 stable. + +**영향 받는 consumer**: kebab-tui (Search 패널 — 변경 불필요, App::search 시그니처 보존), kebab-mcp (search tool — 같은 PR 에서 갱신), Claude Code skill (같은 PR 에서 갱신). 외부 producer/consumer 없음. + +**`--no-cache` 의미 변화**: fb-34 이전 `--no-cache` 는 `search_uncached_with_config` 로 cache 자체를 우회. fb-34 는 cached path 위에 `clear_search_cache()` 호출 후 search 실행 — long-lived process (TUI / MCP) 에서는 clear 와 fetch 사이 race window 가 있음. CLI (fresh App per call) 에서는 무영향. 후속 fb-3X 에서 `search_with_opts_uncached` 추가로 격리. + ## 2026-05-09 — p9-fb-33: AskOpts.stream_sink type widened to StreamEvent **무엇이 바뀌었나**: `kebab_rag::AskOpts.stream_sink` 의 타입이 `Option>` 에서 `Option>` 로 변경됨. `kebab_app::StreamEvent` 가 새 re-export. diff --git a/tasks/INDEX.md b/tasks/INDEX.md index 1f69f76..1dbd5f9 100644 --- a/tasks/INDEX.md +++ b/tasks/INDEX.md @@ -122,7 +122,7 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능. ### 🎯 0.4.0 — agent surface refinement (additive only) - [p9-fb-32 stale doc indicator](p9/p9-fb-32-stale-doc-indicator.md) — ✅ 머지 + v0.4.0 cut 후보 (2026-05-09) - [p9-fb-33 streaming ask (ndjson delta)](p9/p9-fb-33-streaming-ask.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09) - - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ⏳ 미구현, brainstorm 필요 + - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09) - [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ⏳ 미구현, brainstorm 필요 - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ⏳ 미구현, brainstorm 필요 - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27) diff --git a/tasks/p9/p9-fb-34-output-budget-controls.md b/tasks/p9/p9-fb-34-output-budget-controls.md index 4aa99a8..3c9a546 100644 --- a/tasks/p9/p9-fb-34-output-budget-controls.md +++ b/tasks/p9/p9-fb-34-output-budget-controls.md @@ -3,8 +3,8 @@ phase: P9 component: kebab-cli + kebab-app + wire-schema task_id: p9-fb-34 title: "Output budget controls (--max-tokens / --snippet-chars / pagination)" -status: open -target_version: 0.4.0 +status: completed +target_version: 0.5.0 depends_on: [] unblocks: [] contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md @@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent context window 제 # p9-fb-34 — Output budget controls -> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. budget 적용 layer (truncate vs k 조정) / cursor 형식 / 기본값 brainstorm 후 확정. +> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 의 `2026-05-09 — p9-fb-34` 항목 참조 — live source of truth. + +상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md`. +구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md`. ## 증상 / 동기