diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 34ae3f0..4f3b64a 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -41,7 +41,7 @@ use lru::LruCache; use kebab_core::{ Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode, - SearchQuery, VectorStore, + SearchOpts, SearchQuery, VectorStore, }; use kebab_embed_local::FastembedEmbedder; use kebab_llm_local::OllamaLanguageModel; @@ -50,6 +50,20 @@ use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever}; use kebab_store_sqlite::SqliteStore; use kebab_store_vector::LanceVectorStore; +/// p9-fb-34: top-level wrapper around a paginated, budget-limited +/// search result. Mirrors the wire `search_response.v1` shape. +/// +/// `next_cursor` is `Some(_)` when the retriever returned a full +/// `k_effective` page (more hits may exist) or when the budget loop +/// truncated mid-page; the caller threads it back through +/// [`SearchOpts::cursor`] on the next call. +#[derive(Clone, Debug)] +pub struct SearchResponse { + pub hits: Vec, + pub next_cursor: Option, + pub truncated: bool, +} + /// Facade state — see module docs for lifetime rules. /// /// The struct is public so long-lived callers (kb-eval, the future P9 @@ -274,6 +288,129 @@ impl App { Ok(hits) } + /// p9-fb-34: budget-aware search facade. Returns hits trimmed to + /// `opts.max_tokens` (chars/4 approximation of the wire JSON), + /// honors a `snippet_chars` override, and threads an opaque + /// pagination cursor through `corpus_revision`. + /// + /// Budget loop: + /// 1. Shorten snippets progressively (halve cap, floor at 60 + /// chars) until the estimated wire-JSON char total fits or the + /// floor is reached. + /// 2. Pop hits off the end until the budget fits, but always + /// retain ≥ 1 hit (the spec floor). + /// + /// `next_cursor` is set when the retriever returned a full page + /// (more results may exist) or the budget truncated mid-page. + /// `App::search` is unchanged and remains the cache-served fast + /// path used by the existing TUI / kebab-rag callers. + pub fn search_with_opts( + &self, + query: SearchQuery, + opts: SearchOpts, + ) -> Result { + use crate::cursor; + + let corpus_revision = self.sqlite.corpus_revision().to_string(); + let offset = match opts.cursor.as_ref() { + Some(c) => cursor::decode(c, &corpus_revision) + .map_err(|e| anyhow!("stale_cursor: {}", e.message))?, + None => 0, + }; + + let snippet_chars = opts + .snippet_chars + .unwrap_or(self.config.search.snippet_chars); + + // Fetch enough to satisfy offset + the requested page. The + // retriever returns at most `fetch_k` hits — we then drop + // `offset` and keep the next `k_effective`. `k = 0` is + // treated as "use config default" so a caller passing through + // a default-constructed `SearchQuery` still gets useful work + // out of the budget facade. + let k_effective = if query.k == 0 { + self.config.search.default_k + } else { + query.k + }; + let fetch_k = offset.saturating_add(k_effective); + let fetch_query = SearchQuery { + k: fetch_k, + ..query.clone() + }; + let mut all_hits = self.search(fetch_query)?; + + // Skip offset. + let drop_n = offset.min(all_hits.len()); + all_hits.drain(..drop_n); + let mut hits: Vec = + all_hits.into_iter().take(k_effective).collect(); + + // Apply snippet_chars override if shorter than what the + // retriever returned (retriever already honored + // `config.search.snippet_chars`; this only kicks in when the + // caller asked for *less*). + if opts.snippet_chars.is_some() { + for h in hits.iter_mut() { + if h.snippet.chars().count() > snippet_chars { + h.snippet = trim_to_chars(&h.snippet, snippet_chars); + } + } + } + + // Budget loop. + let mut truncated = false; + if let Some(max_tokens) = opts.max_tokens { + let max_chars = max_tokens.saturating_mul(4); + // Step 1: shorten snippets progressively to a 60-char floor. + const SNIPPET_FLOOR: usize = 60; + let mut current_snippet_cap = snippet_chars; + while estimate_chars(&hits) > max_chars + && current_snippet_cap > SNIPPET_FLOOR + { + current_snippet_cap = + (current_snippet_cap / 2).max(SNIPPET_FLOOR); + for h in hits.iter_mut() { + if h.snippet.chars().count() > current_snippet_cap { + h.snippet = + trim_to_chars(&h.snippet, current_snippet_cap); + truncated = true; + } + } + } + // Step 2: pop hits from the end until we fit, but always + // keep ≥ 1. + while estimate_chars(&hits) > max_chars && hits.len() > 1 { + hits.pop(); + truncated = true; + } + } + + // Compute next_cursor. Two paths produce one: + // - We returned a full `k_effective` page → more hits may + // remain in the original retriever set; the cursor is + // speculative (the next call falls through to an empty + // page if nothing's left, which is fine). + // - The budget loop truncated mid-page → resume from where + // we stopped so the caller can fetch the rest with a + // bigger budget. + let returned = hits.len(); + let full_page = returned == k_effective + && offset.saturating_add(returned) > 0; + let mid_page_truncation = truncated && returned > 0; + let next_cursor = if full_page || mid_page_truncation { + Some(cursor::encode(offset + returned, &corpus_revision)) + } else { + None + }; + + Ok(SearchResponse { + hits, + next_cursor, + truncated, + }) + } + /// Run a RAG `ask` against the configured retriever + LLM. Reuses /// the memoized embedder / vector / LLM where applicable. pub fn ask(&self, query: &str, opts: AskOpts) -> Result { @@ -627,6 +764,35 @@ fn blake3_truncate(input: &str) -> u128 { u128::from_be_bytes(buf) } +/// p9-fb-34: trim `s` to at most `n` Unicode scalar chars. Cheap +/// alternative to a `.chars().take(n).collect::()` pattern; +/// reserves capacity proportional to UTF-8 worst case (4 bytes / char) +/// so the inner push never re-allocates. +fn trim_to_chars(s: &str, n: usize) -> String { + if s.chars().count() <= n { + return s.to_string(); + } + let mut out = String::with_capacity(n.saturating_mul(4)); + for (i, c) in s.chars().enumerate() { + if i >= n { + break; + } + out.push(c); + } + out +} + +/// p9-fb-34: estimate the wire-JSON char cost of a hit list. Used by +/// the budget loop in `App::search_with_opts`. `serde_json::to_string` +/// failures fall back to 0 so a single broken hit never makes the +/// loop loop forever; in practice the hit struct serializes +/// infallibly. +fn estimate_chars(hits: &[SearchHit]) -> usize { + hits.iter() + .map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0)) + .sum() +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 8bbc5d2..45ba594 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -66,7 +66,7 @@ pub mod reset; pub mod schema; mod staleness; -pub use app::App; +pub use app::{App, SearchResponse}; pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown}; pub use reset::{ResetReport, ResetScope}; pub use error_wire::{ERROR_V1_ID, ErrorV1, classify}; @@ -1740,6 +1740,19 @@ pub fn search_uncached_with_config( App::open_with_config(config)?.search_uncached(query) } +/// p9-fb-34: budget-aware search free function. Mirrors +/// [`search_with_config`] but threads `SearchOpts` (max_tokens, +/// snippet_chars, cursor) and returns the [`SearchResponse`] +/// pagination wrapper. Tasks 6+8 surface this via CLI / MCP. +#[doc(hidden)] +pub fn search_with_opts_with_config( + config: kebab_config::Config, + query: kebab_core::SearchQuery, + opts: kebab_core::SearchOpts, +) -> anyhow::Result { + App::open_with_config(config)?.search_with_opts(query, opts) +} + // ── ask ────────────────────────────────────────────────────────────────── // // P4-3 wires `ask` end-to-end. The retriever is built per `opts.mode`; diff --git a/crates/kebab-app/tests/common/mod.rs b/crates/kebab-app/tests/common/mod.rs index ce2a28f..c06098f 100644 --- a/crates/kebab-app/tests/common/mod.rs +++ b/crates/kebab-app/tests/common/mod.rs @@ -79,6 +79,37 @@ impl TestEnv { ..Default::default() } } + + /// p9-fb-34 alias — tests added in fb-34 invoke `TestEnv::new()` + /// per the plan; route to the existing lexical-only constructor + /// so the lane stays AVX-free without churning all the existing + /// callers. + pub fn new() -> Self { + Self::lexical_only() + } + + /// p9-fb-34: open a fresh `App` against this env's config. Used + /// by integration tests that need to call `App::search_with_opts` + /// directly. Caller can invoke this multiple times to simulate + /// re-opening the binary after a corpus revision bump. + pub fn app(&self) -> kebab_app::App { + kebab_app::App::open_with_config(self.config.clone()) + .expect("App::open_with_config") + } +} + +/// p9-fb-34: write `content` into the env's workspace at +/// `relative_path`, then run a full ingest so the document is +/// searchable. Mirrors the convenience helpers used by other +/// `TestEnv`-driven crates. +pub fn ingest_md(env: &TestEnv, relative_path: &str, content: &str) { + let path = env.workspace_root.join(relative_path); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).expect("create parent dirs"); + } + std::fs::write(&path, content).expect("write workspace file"); + kebab_app::ingest_with_config(env.config.clone(), env.scope(), true) + .expect("ingest_with_config"); } /// Test helper: build a `SearchQuery` for lexical mode at k=10. Used diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs new file mode 100644 index 0000000..bded4af --- /dev/null +++ b/crates/kebab-app/tests/search_budget_integration.rs @@ -0,0 +1,121 @@ +//! p9-fb-34: App::search_with_opts integration tests. + +mod common; + +use kebab_app::SearchResponse; +use kebab_core::{SearchFilters, SearchMode, SearchOpts, SearchQuery}; + +fn lex(text: &str, k: usize) -> SearchQuery { + SearchQuery { + text: text.to_string(), + mode: SearchMode::Lexical, + k, + filters: SearchFilters::default(), + } +} + +#[test] +fn search_with_opts_no_budget_matches_search() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples are red\n"); + let app = env.app(); + + let baseline = app.search(lex("apples", 5)).unwrap(); + let resp: SearchResponse = app + .search_with_opts(lex("apples", 5), SearchOpts::default()) + .unwrap(); + + assert_eq!(resp.hits.len(), baseline.len()); + assert!(!resp.truncated); + assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page"); +} + +#[test] +fn budget_truncates_snippets_when_below_threshold() { + let env = common::TestEnv::new(); + let body: String = "rust ownership is a memory model. ".repeat(10); + common::ingest_md(&env, "a.md", &format!("# T\n\n{body}\n")); + let app = env.app(); + + let unrestricted = app.search(lex("rust", 5)).unwrap(); + let unrestricted_chars: usize = unrestricted.iter().map(|h| h.snippet.chars().count()).sum(); + + let resp = app + .search_with_opts( + lex("rust", 5), + SearchOpts { + max_tokens: Some(50), + snippet_chars: None, + cursor: None, + }, + ) + .unwrap(); + let limited_chars: usize = resp.hits.iter().map(|h| h.snippet.chars().count()).sum(); + + assert!(resp.truncated, "small budget must trip truncation"); + assert!(limited_chars < unrestricted_chars, "snippet should shrink"); + assert!(!resp.hits.is_empty(), "always retain ≥1 hit"); +} + +#[test] +fn cursor_paginates_to_next_page() { + let env = common::TestEnv::new(); + for i in 0..6 { + common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n")); + } + let app = env.app(); + + let page1 = app + .search_with_opts(lex("rust", 2), SearchOpts::default()) + .unwrap(); + assert_eq!(page1.hits.len(), 2); + let cursor = page1.next_cursor.expect("more hits available"); + + let page2 = app + .search_with_opts( + lex("rust", 2), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(cursor), + }, + ) + .unwrap(); + assert_eq!(page2.hits.len(), 2); + let p1_ids: std::collections::HashSet<_> = + page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + let p2_ids: std::collections::HashSet<_> = + page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect(); + assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits"); +} + +#[test] +fn cursor_rejected_after_corpus_revision_bump() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "# T\n\napples\n"); + let app = env.app(); + + let page1 = app + .search_with_opts(lex("apples", 1), SearchOpts::default()) + .unwrap(); + let cursor = page1.next_cursor; + + if let Some(c) = cursor { + common::ingest_md(&env, "b.md", "# B\n\nbananas\n"); + let app2 = env.app(); + + let result = app2.search_with_opts( + lex("apples", 1), + SearchOpts { + max_tokens: None, + snippet_chars: None, + cursor: Some(c), + }, + ); + let err = result.unwrap_err(); + assert!( + err.to_string().contains("stale_cursor"), + "must surface stale_cursor: {err}" + ); + } +}