test(cli): wire_search_response + budget integration (fb-34)

4 lexical-only tests covering search_response.v1 wrapper shape, --max-tokens truncation, --cursor pagination, plain stderr hint. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 20:09:01 +09:00
parent 21220f6d39
commit 603061fb86
2 changed files with 176 additions and 0 deletions
--- a/crates/kebab-cli/tests/common/mod.rs
+++ b/crates/kebab-cli/tests/common/mod.rs
@@ -126,6 +126,29 @@ pub fn ingest(cfg: &Path, workspace: &Path) {
    );
 }

+/// p9-fb-34: invoke `kebab search` with arbitrary trailing flags +
+/// query, capture stdout + stderr. Caller is responsible for
+/// supplying `--mode lexical` / `--json` etc. as needed; this helper
+/// stays unopinionated so a single test can exercise both wire shapes
+/// (JSON wrapper + plain stderr hint). Asserts the binary exited 0;
+/// non-zero exits fail the test with stderr included.
+pub fn run_search_with_args(cfg: &Path, args: &[&str]) -> (String, String) {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let mut cmd = Command::new(bin);
+    cmd.arg("--config").arg(cfg).arg("search");
+    cmd.args(args);
+    let out = cmd.output().expect("kebab search");
+    assert!(
+        out.status.success(),
+        "search failed: args={args:?} stderr={}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+    (
+        String::from_utf8_lossy(&out.stdout).to_string(),
+        String::from_utf8_lossy(&out.stderr).to_string(),
+    )
+}
+
 /// p9-fb-33: invoke `kebab ask --stream --mode lexical <query>` and
 /// capture stdout + stderr. Lexical mode skips embeddings (matches
 /// `wire_ask_stale.rs::run_ask_lexical`). Caller asserts on the
--- a/crates/kebab-cli/tests/wire_search_response.rs
+++ b/crates/kebab-cli/tests/wire_search_response.rs
@@ -0,0 +1,153 @@
+//! p9-fb-34: CLI search wire wrapper + budget controls.
+//!
+//! Lexical-only — no fastembed / no Ollama. Each test builds its own
+//! TempDir KB via `common::write_config` + `common::ingest` and drives
+//! `kebab search` through `common::run_search_with_args`. Verifies:
+//!
+//! - `--json` emits the `search_response.v1` wrapper (hits + cursor +
+//!   truncated).
+//! - `--max-tokens` flips `truncated: true` once the budget binds.
+//! - `--cursor` advances paging (page 2 chunk_ids disjoint from page 1).
+//! - Plain (non-JSON) output prints the `[truncated; ...]` hint to
+//!   stderr (stdout stays the hit list).
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+
+#[test]
+fn search_json_emits_search_response_v1_wrapper() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "apples"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    assert_eq!(v["schema_version"], "search_response.v1");
+    assert!(v["hits"].is_array(), "hits must be array, got {v}");
+    assert!(
+        v["next_cursor"].is_null() || v["next_cursor"].is_string(),
+        "next_cursor must be null or string, got {}",
+        v["next_cursor"]
+    );
+    assert!(
+        v["truncated"].is_boolean(),
+        "truncated must be bool, got {}",
+        v["truncated"]
+    );
+}
+
+#[test]
+fn search_json_truncates_with_max_tokens() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    let body: String = "rust ownership is a memory model. ".repeat(10);
+    fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--max-tokens", "30", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    assert_eq!(
+        v["truncated"], true,
+        "30-token cap must trip truncation: {v}"
+    );
+}
+
+#[test]
+fn search_json_cursor_paginates() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    for i in 0..6 {
+        fs::write(
+            workspace.join(format!("d{i}.md")),
+            format!("# T{i}\n\nrust topic {i}\n"),
+        )
+        .unwrap();
+    }
+    common::ingest(&cfg, &workspace);
+
+    let (page1, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--k", "2", "rust"],
+    );
+    let v1: Value = serde_json::from_str(page1.trim())
+        .unwrap_or_else(|e| panic!("page1 not JSON: {page1:?}: {e}"));
+    let cursor = v1["next_cursor"]
+        .as_str()
+        .unwrap_or_else(|| panic!("next_cursor missing on page1: {v1}"));
+
+    let (page2, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json",
+            "--mode",
+            "lexical",
+            "--k",
+            "2",
+            "--cursor",
+            cursor,
+            "rust",
+        ],
+    );
+    let v2: Value = serde_json::from_str(page2.trim())
+        .unwrap_or_else(|e| panic!("page2 not JSON: {page2:?}: {e}"));
+
+    let p1_ids: Vec<String> = v1["hits"]
+        .as_array()
+        .expect("page1 hits array")
+        .iter()
+        .map(|h| {
+            h["chunk_id"]
+                .as_str()
+                .expect("chunk_id string")
+                .to_string()
+        })
+        .collect();
+    let p2_ids: Vec<String> = v2["hits"]
+        .as_array()
+        .expect("page2 hits array")
+        .iter()
+        .map(|h| {
+            h["chunk_id"]
+                .as_str()
+                .expect("chunk_id string")
+                .to_string()
+        })
+        .collect();
+    assert!(
+        !p2_ids.is_empty(),
+        "page2 must return at least one hit (cursor advanced past page1)"
+    );
+    assert!(
+        p2_ids.iter().all(|id| !p1_ids.contains(id)),
+        "page2 must not repeat page1 chunk_ids: page1={p1_ids:?} page2={p2_ids:?}"
+    );
+}
+
+#[test]
+fn search_plain_emits_truncated_hint_to_stderr() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    let body: String = "rust ownership is a memory model. ".repeat(10);
+    fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (_stdout, stderr) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--max-tokens", "30", "rust"],
+    );
+    assert!(
+        stderr.contains("[truncated;"),
+        "stderr must carry truncated hint: {stderr:?}"
+    );
+}