diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 5ed1325..935ff81 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -250,6 +250,18 @@ enum Cmd { /// `answer.v1`. Off by default to preserve final-only behavior. #[arg(long)] stream: bool, + + /// p9-fb-41: route this ask through the multi-hop pipeline + /// — the query is decomposed into sub-questions, each + /// retrieved independently, then synthesized over the + /// merged chunk pool. Cost trade-off: 2–5× LLM calls + /// (decompose + 0..N decide + synthesize) vs. single-pass. + /// Worth it for compound questions (X 와 Y 의 관계, prereq + /// chain, cross-doc reasoning); single-pass is faster for + /// simple fact lookups. The full per-hop trace is exposed + /// on `Answer.hops` in `--json` mode. + #[arg(long)] + multi_hop: bool, }, /// Wipe XDG data dirs (and optionally the Lance vector store) so the @@ -973,6 +985,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { hide_citations, session, stream, + multi_hop, } => { let cfg = kebab_config::Config::load(cli.config.as_deref())?; if *stream { @@ -999,7 +1012,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { history: Vec::new(), conversation_id: None, turn_index: None, - multi_hop: false, + multi_hop: *multi_hop, }; let cfg2 = cfg.clone(); let q = query.clone(); @@ -1075,7 +1088,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { history: Vec::new(), conversation_id: None, turn_index: None, - multi_hop: false, + multi_hop: *multi_hop, }; let ans = match session.as_deref() { Some(sid) => kebab_app::ask_with_session_with_config(cfg, sid, query, opts)?, diff --git a/crates/kebab-cli/tests/wire_ask_multi_hop.rs b/crates/kebab-cli/tests/wire_ask_multi_hop.rs new file mode 100644 index 0000000..ab65443 --- /dev/null +++ b/crates/kebab-cli/tests/wire_ask_multi_hop.rs @@ -0,0 +1,146 @@ +//! p9-fb-41 PR-4: CLI `--multi-hop` flag wiring + answer.v1 / error.v1 +//! schema additivity. +//! +//! Four Ollama-free pins: +//! +//! 1. `--multi-hop` is exposed on `kebab ask --help` so users can +//! discover the flag at the CLI surface (clap-level smoke). +//! 2. `answer.schema.json` parses as valid JSON and declares a +//! `hops` property with a `HopRecord` `$defs` entry — guards +//! against accidental schema deletion / typo in future edits. +//! 3. `answer.schema.json`'s `refusal_reason` enum lists +//! `multi_hop_decompose_failed` — agents validating against +//! the schema accept the new variant on refusal answers. +//! 4. `error.schema.json`'s `code` enum lists +//! `multi_hop_decompose_failed` — forward-looking enum extension +//! documented in PR-4. +//! +//! End-to-end multi-hop ask against a live Ollama lands in a +//! follow-up `#[ignore]` test (same pattern as `wire_ask_stale.rs`). + +use std::path::PathBuf; +use std::process::Command; + +fn schema_path(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("docs") + .join("wire-schema") + .join("v1") + .join(name) +} + +fn parse_schema(name: &str) -> serde_json::Value { + let text = std::fs::read_to_string(schema_path(name)) + .unwrap_or_else(|e| panic!("read {name}: {e}")); + serde_json::from_str(&text) + .unwrap_or_else(|e| panic!("{name} must parse as valid JSON: {e}")) +} + +#[test] +fn cli_ask_help_advertises_multi_hop_flag() { + let bin = env!("CARGO_BIN_EXE_kebab"); + let out = Command::new(bin).args(["ask", "--help"]).output().unwrap(); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("--multi-hop"), + "`kebab ask --help` must advertise --multi-hop so users can discover it:\n{stdout}" + ); +} + +#[test] +fn answer_schema_declares_hops_property_with_hop_record_defs() { + let schema = parse_schema("answer.schema.json"); + assert!( + schema["properties"]["hops"].is_object(), + "`hops` property must be declared on answer.v1" + ); + // `hops` allows array-or-null (single-pass omits the field; + // multi-hop emits a non-empty array). + let hops_any_of = schema["properties"]["hops"]["anyOf"] + .as_array() + .expect("hops must declare anyOf (array | null)"); + assert!( + hops_any_of.iter().any(|v| v["type"] == "array"), + "hops anyOf must include array shape" + ); + assert!( + hops_any_of.iter().any(|v| v["type"] == "null"), + "hops anyOf must include null (single-pass omits the field)" + ); + + // HopRecord $defs entry — guards against accidental deletion or + // structural drift in future schema edits. + let hop_record = &schema["$defs"]["HopRecord"]; + assert!( + hop_record.is_object(), + "$defs.HopRecord must be declared so `hops.items` can $ref it" + ); + let kind_enum = hop_record["properties"]["kind"]["enum"] + .as_array() + .expect("HopRecord.kind must be an enum"); + let kinds: Vec<&str> = kind_enum.iter().filter_map(|v| v.as_str()).collect(); + for needed in ["decompose", "decide", "synthesize"] { + assert!( + kinds.contains(&needed), + "HopRecord.kind enum must include {needed:?}, got {kinds:?}" + ); + } +} + +#[test] +fn answer_schema_refusal_reason_enum_includes_multi_hop_decompose_failed() { + let schema = parse_schema("answer.schema.json"); + let refusal_any_of = schema["properties"]["refusal_reason"]["anyOf"] + .as_array() + .expect("refusal_reason must declare anyOf"); + let enum_arr = refusal_any_of + .iter() + .find_map(|v| v["enum"].as_array()) + .expect("one of refusal_reason.anyOf entries must declare an enum"); + let values: Vec<&str> = enum_arr.iter().filter_map(|v| v.as_str()).collect(); + assert!( + values.contains(&"multi_hop_decompose_failed"), + "refusal_reason enum must include `multi_hop_decompose_failed`, got {values:?}" + ); + // All earlier RefusalReason wire values remain on the enum — + // guards against an accidental rewrite dropping old variants. + for needed in [ + "score_gate", + "llm_self_judge", + "no_index", + "no_chunks", + "llm_stream_aborted", + ] { + assert!( + values.contains(&needed), + "refusal_reason enum must keep prior variant {needed:?}, got {values:?}" + ); + } +} + +#[test] +fn error_schema_code_enum_includes_multi_hop_decompose_failed() { + let schema = parse_schema("error.schema.json"); + let code_enum = schema["properties"]["code"]["enum"] + .as_array() + .expect("error.v1 must declare code.enum"); + let values: Vec<&str> = code_enum.iter().filter_map(|v| v.as_str()).collect(); + assert!( + values.contains(&"multi_hop_decompose_failed"), + "error.v1 code enum must include forward-looking `multi_hop_decompose_failed`, got {values:?}" + ); + // Existing codes remain — guards against accidental deletion. + for needed in [ + "config_invalid", + "not_indexed", + "model_unreachable", + "generic", + ] { + assert!( + values.contains(&needed), + "error.v1 code enum must keep prior code {needed:?}, got {values:?}" + ); + } +} diff --git a/docs/wire-schema/v1/answer.schema.json b/docs/wire-schema/v1/answer.schema.json index 4901bb0..ecb2c5f 100644 --- a/docs/wire-schema/v1/answer.schema.json +++ b/docs/wire-schema/v1/answer.schema.json @@ -20,7 +20,23 @@ "answer": { "type": "string" }, "citations": { "type": "array" }, "grounded": { "type": "boolean" }, - "refusal_reason": { "type": ["string", "null"] }, + "refusal_reason": { + "anyOf": [ + { + "type": "string", + "enum": [ + "score_gate", + "llm_self_judge", + "no_index", + "no_chunks", + "llm_stream_aborted", + "multi_hop_decompose_failed" + ] + }, + { "type": "null" } + ], + "description": "p9-fb-41: `multi_hop_decompose_failed` added in PR-3a for the multi-hop pipeline (only emitted when AskOpts.multi_hop = true and the decompose LLM call fails to parse). Other variants are unchanged from earlier phases." + }, "model": { "type": "object" }, "embedding": { "type": ["object", "null"] }, "prompt_template_version": { "type": "string" }, @@ -35,6 +51,52 @@ "type": ["integer", "null"], "minimum": 0, "description": "p9-fb-15: 같은 conversation 안 0-based 순서. null 이면 single-shot." + }, + "hops": { + "anyOf": [ + { + "type": "array", + "items": { "$ref": "#/$defs/HopRecord" } + }, + { "type": "null" } + ], + "description": "p9-fb-41 multi-hop trace. Present (non-null array) only when the ask routed through the multi-hop pipeline (`AskOpts.multi_hop = true`); single-pass answers omit the field entirely (serde `skip_serializing_if = None`). Each entry records one LLM hop — decompose / decide / synthesize — with sub-queries, retrieval count, and per-hop latency. Wire-additive: pre-fb-41 readers tolerate the missing field; new readers branch on its presence to render the per-hop trace." + } + }, + "$defs": { + "HopRecord": { + "type": "object", + "required": ["iter", "kind", "context_chunks_added", "forced_stop", "llm_call_ms"], + "properties": { + "iter": { + "type": "integer", + "minimum": 0, + "description": "0-based hop index. iter=0 is always the initial decompose; subsequent iters are decide calls; the final iter is the synthesize call." + }, + "kind": { + "type": "string", + "enum": ["decompose", "decide", "synthesize"] + }, + "sub_queries": { + "type": "array", + "items": { "type": "string" }, + "description": "Per-kind semantics. Decompose: the initial sub-queries the LLM produced. Decide: the *new* sub-queries to retrieve next iter (empty when the LLM signalled stop or when forced_stop=true). Synthesize: always empty." + }, + "context_chunks_added": { + "type": "integer", + "minimum": 0, + "description": "Number of *new* chunks the retrieval round contributed to the pool (deduped by chunk_id). 0 for decompose / synthesize hops." + }, + "forced_stop": { + "type": "boolean", + "description": "True when the pipeline cut the loop short due to a safety cap (max_depth / max_pool_chunks) rather than the LLM's own stop signal. Tracing signal, not a refusal." + }, + "llm_call_ms": { + "type": "integer", + "minimum": 0, + "description": "Wall-clock latency of the LLM call for this hop. `0` is overloaded — means 'no LLM call happened' when (a) the Decide hop was skipped due to forced_stop or (b) the pool was empty before any decide could run. Treat 0 as absent or instantaneous." + } + } } } } diff --git a/docs/wire-schema/v1/error.schema.json b/docs/wire-schema/v1/error.schema.json index a186e28..d113b65 100644 --- a/docs/wire-schema/v1/error.schema.json +++ b/docs/wire-schema/v1/error.schema.json @@ -16,14 +16,15 @@ "model_not_pulled", "timeout", "io_error", - "generic" + "generic", + "multi_hop_decompose_failed" ] }, "message": { "type": "string" }, "details": { "type": "object", "additionalProperties": true, - "description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }." + "description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }. multi_hop_decompose_failed: {} (reserved — currently emitted as Answer.refusal_reason on stdout, NOT as error.v1 on stderr; the enum member is forward-looking for a future RefusalReason → error_wire promotion)." }, "hint": { "anyOf": [