diff --git a/crates/kebab-cli/tests/common/mod.rs b/crates/kebab-cli/tests/common/mod.rs index 70c1924..27e2d3b 100644 --- a/crates/kebab-cli/tests/common/mod.rs +++ b/crates/kebab-cli/tests/common/mod.rs @@ -196,6 +196,30 @@ pub fn run_ask_json(cfg: &Path, query: &str) -> String { String::from_utf8_lossy(&out.stdout).to_string() } +/// p9-fb-35: invoke `kebab fetch` with arbitrary trailing flags, +/// capture stdout + stderr. Caller is responsible for supplying +/// `--json` (global flag) before the subcommand position via the +/// `args` slice (e.g. `&["--json", "chunk", &id]`). Asserts the +/// binary exited 0; non-zero exits fail the test with stderr +/// included — for negative-path tests (unknown chunk_id etc.) drive +/// the binary directly via `std::process::Command`. +pub fn run_fetch_with_args(cfg: &Path, args: &[&str]) -> (String, String) { + let bin = env!("CARGO_BIN_EXE_kebab"); + let mut cmd = Command::new(bin); + cmd.arg("--config").arg(cfg).arg("fetch"); + cmd.args(args); + let out = cmd.output().expect("kebab fetch"); + assert!( + out.status.success(), + "fetch failed: args={args:?} stderr={}", + String::from_utf8_lossy(&out.stderr) + ); + ( + String::from_utf8_lossy(&out.stdout).to_string(), + String::from_utf8_lossy(&out.stderr).to_string(), + ) +} + /// Rewrite `documents.updated_at` for one workspace path to /// `now - days_ago` (RFC3339 UTC). Mirrors /// `kebab-app/tests/common/mod.rs::backdate_document_updated_at`. diff --git a/crates/kebab-cli/tests/wire_fetch.rs b/crates/kebab-cli/tests/wire_fetch.rs new file mode 100644 index 0000000..550720b --- /dev/null +++ b/crates/kebab-cli/tests/wire_fetch.rs @@ -0,0 +1,130 @@ +//! p9-fb-35: CLI fetch wire shape + plain output + exit codes. +//! +//! Lexical-only — no fastembed / no Ollama. Each test builds its own +//! TempDir KB via `common::write_config` + `common::ingest` and drives +//! `kebab fetch` through `common::run_fetch_with_args`. Verifies: +//! +//! - `--json fetch chunk ` emits the `fetch_result.v1` wrapper +//! with `kind = "chunk"` and a populated `chunk` object. +//! - `--json fetch doc --max-tokens N` flips `truncated: true` +//! once the budget binds. +//! - Unknown `chunk_id` exits non-zero and emits an `error.v1` +//! ndjson line on stderr with `code = "chunk_not_found"`. + +mod common; + +use serde_json::Value; +use std::fs; + +#[test] +fn fetch_chunk_json_emits_fetch_result_v1() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap(); + common::ingest(&cfg, &workspace); + + // Find chunk_id via search. + let (search_stdout, _) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--k", "1", "apples"], + ); + let search: Value = serde_json::from_str(search_stdout.trim()) + .unwrap_or_else(|e| panic!("search not JSON: {search_stdout:?}: {e}")); + let chunk_id = search["hits"][0]["chunk_id"] + .as_str() + .expect("chunk_id on first hit") + .to_string(); + + let (stdout, _) = common::run_fetch_with_args( + &cfg, + &["--json", "chunk", &chunk_id], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("fetch not JSON: {stdout:?}: {e}")); + assert_eq!(v["schema_version"], "fetch_result.v1"); + assert_eq!(v["kind"], "chunk"); + assert!( + v["chunk"].is_object(), + "target chunk must be populated: {v}" + ); + assert_eq!(v["truncated"], false); +} + +#[test] +fn fetch_doc_json_with_max_tokens_truncates() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 30); + let body: String = "Lorem ipsum dolor sit amet. ".repeat(20); + fs::write(workspace.join("big.md"), format!("# Big\n\n{body}\n")).unwrap(); + common::ingest(&cfg, &workspace); + + // Find doc_id via search. + let (search_stdout, _) = common::run_search_with_args( + &cfg, + &["--json", "--mode", "lexical", "--k", "1", "Lorem"], + ); + let search: Value = serde_json::from_str(search_stdout.trim()) + .unwrap_or_else(|e| panic!("search not JSON: {search_stdout:?}: {e}")); + let doc_id = search["hits"][0]["doc_id"] + .as_str() + .expect("doc_id on first hit") + .to_string(); + + let (stdout, _) = common::run_fetch_with_args( + &cfg, + &["--json", "doc", &doc_id, "--max-tokens", "20"], + ); + let v: Value = serde_json::from_str(stdout.trim()) + .unwrap_or_else(|e| panic!("fetch not JSON: {stdout:?}: {e}")); + assert_eq!(v["kind"], "doc"); + assert_eq!( + v["truncated"], true, + "20-token cap must trip truncation: {v}" + ); +} + +#[test] +fn fetch_chunk_unknown_id_exits_with_error_v1() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, _workspace, _data) = common::write_config(dir.path(), 30); + + // Direct invocation (not via the success-asserting helper) so we + // can read stderr on failure — mirrors the stale_cursor test in + // `wire_search_response.rs`. + let exe = env!("CARGO_BIN_EXE_kebab"); + let cfg_str = cfg.to_str().expect("utf8"); + let out = std::process::Command::new(exe) + .args([ + "--config", + cfg_str, + "--json", + "fetch", + "chunk", + "nonexistent", + ]) + .output() + .expect("kebab fetch"); + + assert_ne!(out.status.code(), Some(0), "must exit non-zero"); + let stderr = String::from_utf8_lossy(&out.stderr); + let err_line = stderr + .lines() + .find(|l| { + serde_json::from_str::(l) + .ok() + .and_then(|v| { + v.get("schema_version") + .and_then(|s| s.as_str()) + .map(String::from) + }) + .as_deref() + == Some("error.v1") + }) + .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}")); + + let v: Value = serde_json::from_str(err_line).expect("error.v1 json"); + assert_eq!( + v["code"], "chunk_not_found", + "code must be chunk_not_found: {err_line}" + ); +}