From 603061fb8650abcb4e5ab3d5040d70ab6d8a5421 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sat, 9 May 2026 20:09:01 +0900 Subject: [PATCH] test(cli): wire_search_response + budget integration (fb-34) 4 lexical-only tests covering search_response.v1 wrapper shape, --max-tokens truncation, --cursor pagination, plain stderr hint. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-cli/tests/common/mod.rs | 23 +++ .../kebab-cli/tests/wire_search_response.rs | 153 ++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 crates/kebab-cli/tests/wire_search_response.rs diff --git a/crates/kebab-cli/tests/common/mod.rs b/crates/kebab-cli/tests/common/mod.rs index 8926bd2..70c1924 100644 --- a/crates/kebab-cli/tests/common/mod.rs +++ b/crates/kebab-cli/tests/common/mod.rs @@ -126,6 +126,29 @@ pub fn ingest(cfg: &Path, workspace: &Path) { ); } +/// p9-fb-34: invoke `kebab search` with arbitrary trailing flags + +/// query, capture stdout + stderr. Caller is responsible for +/// supplying `--mode lexical` / `--json` etc. as needed; this helper +/// stays unopinionated so a single test can exercise both wire shapes +/// (JSON wrapper + plain stderr hint). Asserts the binary exited 0; +/// non-zero exits fail the test with stderr included. +pub fn run_search_with_args(cfg: &Path, args: &[&str]) -> (String, String) { + let bin = env!("CARGO_BIN_EXE_kebab"); + let mut cmd = Command::new(bin); + cmd.arg("--config").arg(cfg).arg("search"); + cmd.args(args); + let out = cmd.output().expect("kebab search"); + assert!( + out.status.success(), + "search failed: args={args:?} stderr={}", + String::from_utf8_lossy(&out.stderr) + ); + ( + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + /// p9-fb-33: invoke `kebab ask --stream --mode lexical ` and /// capture stdout + stderr. Lexical mode skips embeddings (matches /// `wire_ask_stale.rs::run_ask_lexical`). Caller asserts on the diff --git a/crates/kebab-cli/tests/wire_search_response.rs b/crates/kebab-cli/tests/wire_search_response.rs new file mode 100644 index 0000000..ab65f29 --- /dev/null +++ b/crates/kebab-cli/tests/wire_search_response.rs @@ -0,0 +1,153 @@ +//! p9-fb-34: CLI search wire wrapper + budget controls. +//! +//! Lexical-only — no fastembed / no Ollama. Each test builds its own +//! TempDir KB via `common::write_config` + `common::ingest` and drives +//! `kebab search` through `common::run_search_with_args`. Verifies: +//! +//! - `--json` emits the `search_response.v1` wrapper (hits + cursor + +//! truncated). +//! - `--max-tokens` flips `truncated: true` once the budget binds. +//! - `--cursor` advances paging (page 2 chunk_ids disjoint from page 1). +//! - Plain (non-JSON) output prints the `[truncated; ...]` hint to +//! stderr (stdout stays the hit list). + +mod common; + +use serde_json::Value; +use std::fs; + +#[test] +fn search_json_emits_search_response_v1_wrapper() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "apples"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}")); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["hits"].is_array(), "hits must be array, got {v}"); + assert!( + v["next_cursor"].is_null() || v["next_cursor"].is_string(), + "next_cursor must be null or string, got {}", + v["next_cursor"] + ); + assert!( + v["truncated"].is_boolean(), + "truncated must be bool, got {}", + v["truncated"] + ); +} + +#[test] +fn search_json_truncates_with_max_tokens() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + let body: String = "rust ownership is a memory model. ".repeat(10); + fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--max-tokens", "30", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}")); + assert_eq!( + v["truncated"], true, + "30-token cap must trip truncation: {v}" + ); +} + +#[test] +fn search_json_cursor_paginates() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + for i in 0..6 { + fs::write( + workspace.join(format!("d{i}.md")), + format!("# T{i}\n\nrust topic {i}\n"), + ) + .unwrap(); + } + common::ingest(&cfg, &workspace); + + let (page1, _) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--k", "2", "rust"], + ); + let v1: Value = serde_json::from_str(page1.trim()) + .unwrap_or_else(|e| panic!("page1 not JSON: {page1:?}: {e}")); + let cursor = v1["next_cursor"] + .as_str() + .unwrap_or_else(|| panic!("next_cursor missing on page1: {v1}")); + + let (page2, _) = common::run_search_with_args( + &cfg, + &[ + "--json", + "--mode", + "lexical", + "--k", + "2", + "--cursor", + cursor, + "rust", + ], + ); + let v2: Value = serde_json::from_str(page2.trim()) + .unwrap_or_else(|e| panic!("page2 not JSON: {page2:?}: {e}")); + + let p1_ids: Vec = v1["hits"] + .as_array() + .expect("page1 hits array") + .iter() + .map(|h| { + h["chunk_id"] + .as_str() + .expect("chunk_id string") + .to_string() + }) + .collect(); + let p2_ids: Vec = v2["hits"] + .as_array() + .expect("page2 hits array") + .iter() + .map(|h| { + h["chunk_id"] + .as_str() + .expect("chunk_id string") + .to_string() + }) + .collect(); + assert!( + !p2_ids.is_empty(), + "page2 must return at least one hit (cursor advanced past page1)" + ); + assert!( + p2_ids.iter().all(|id| !p1_ids.contains(id)), + "page2 must not repeat page1 chunk_ids: page1={p1_ids:?} page2={p2_ids:?}" + ); +} + +#[test] +fn search_plain_emits_truncated_hint_to_stderr() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + let body: String = "rust ownership is a memory model. ".repeat(10); + fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap(); + common::ingest(&cfg, &workspace); + + let (_stdout, stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--max-tokens", "30", "rust"], + ); + assert!( + stderr.contains("[truncated;"), + "stderr must carry truncated hint: {stderr:?}" + ); +}