Merge pull request 'feat(cli): fb-41 PR-4 CLI --multi-hop flag + answer.v1 / error.v1 wire' (#171) from feat/fb-41-pr-4-cli-multi-hop-flag into main
This commit was merged in pull request #171.
This commit is contained in:
@@ -250,6 +250,18 @@ enum Cmd {
|
||||
/// `answer.v1`. Off by default to preserve final-only behavior.
|
||||
#[arg(long)]
|
||||
stream: bool,
|
||||
|
||||
/// p9-fb-41: route this ask through the multi-hop pipeline
|
||||
/// — the query is decomposed into sub-questions, each
|
||||
/// retrieved independently, then synthesized over the
|
||||
/// merged chunk pool. Cost trade-off: 2–5× LLM calls
|
||||
/// (decompose + 0..N decide + synthesize) vs. single-pass.
|
||||
/// Worth it for compound questions (X 와 Y 의 관계, prereq
|
||||
/// chain, cross-doc reasoning); single-pass is faster for
|
||||
/// simple fact lookups. The full per-hop trace is exposed
|
||||
/// on `Answer.hops` in `--json` mode.
|
||||
#[arg(long)]
|
||||
multi_hop: bool,
|
||||
},
|
||||
|
||||
/// Wipe XDG data dirs (and optionally the Lance vector store) so the
|
||||
@@ -973,6 +985,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
hide_citations,
|
||||
session,
|
||||
stream,
|
||||
multi_hop,
|
||||
} => {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
if *stream {
|
||||
@@ -999,7 +1012,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
multi_hop: false,
|
||||
multi_hop: *multi_hop,
|
||||
};
|
||||
let cfg2 = cfg.clone();
|
||||
let q = query.clone();
|
||||
@@ -1075,7 +1088,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
multi_hop: false,
|
||||
multi_hop: *multi_hop,
|
||||
};
|
||||
let ans = match session.as_deref() {
|
||||
Some(sid) => kebab_app::ask_with_session_with_config(cfg, sid, query, opts)?,
|
||||
|
||||
146
crates/kebab-cli/tests/wire_ask_multi_hop.rs
Normal file
146
crates/kebab-cli/tests/wire_ask_multi_hop.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
//! p9-fb-41 PR-4: CLI `--multi-hop` flag wiring + answer.v1 / error.v1
|
||||
//! schema additivity.
|
||||
//!
|
||||
//! Four Ollama-free pins:
|
||||
//!
|
||||
//! 1. `--multi-hop` is exposed on `kebab ask --help` so users can
|
||||
//! discover the flag at the CLI surface (clap-level smoke).
|
||||
//! 2. `answer.schema.json` parses as valid JSON and declares a
|
||||
//! `hops` property with a `HopRecord` `$defs` entry — guards
|
||||
//! against accidental schema deletion / typo in future edits.
|
||||
//! 3. `answer.schema.json`'s `refusal_reason` enum lists
|
||||
//! `multi_hop_decompose_failed` — agents validating against
|
||||
//! the schema accept the new variant on refusal answers.
|
||||
//! 4. `error.schema.json`'s `code` enum lists
|
||||
//! `multi_hop_decompose_failed` — forward-looking enum extension
|
||||
//! documented in PR-4.
|
||||
//!
|
||||
//! End-to-end multi-hop ask against a live Ollama lands in a
|
||||
//! follow-up `#[ignore]` test (same pattern as `wire_ask_stale.rs`).
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::Command;
|
||||
|
||||
fn schema_path(name: &str) -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("..")
|
||||
.join("docs")
|
||||
.join("wire-schema")
|
||||
.join("v1")
|
||||
.join(name)
|
||||
}
|
||||
|
||||
fn parse_schema(name: &str) -> serde_json::Value {
|
||||
let text = std::fs::read_to_string(schema_path(name))
|
||||
.unwrap_or_else(|e| panic!("read {name}: {e}"));
|
||||
serde_json::from_str(&text)
|
||||
.unwrap_or_else(|e| panic!("{name} must parse as valid JSON: {e}"))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cli_ask_help_advertises_multi_hop_flag() {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let out = Command::new(bin).args(["ask", "--help"]).output().unwrap();
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
assert!(
|
||||
stdout.contains("--multi-hop"),
|
||||
"`kebab ask --help` must advertise --multi-hop so users can discover it:\n{stdout}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_schema_declares_hops_property_with_hop_record_defs() {
|
||||
let schema = parse_schema("answer.schema.json");
|
||||
assert!(
|
||||
schema["properties"]["hops"].is_object(),
|
||||
"`hops` property must be declared on answer.v1"
|
||||
);
|
||||
// `hops` allows array-or-null (single-pass omits the field;
|
||||
// multi-hop emits a non-empty array).
|
||||
let hops_any_of = schema["properties"]["hops"]["anyOf"]
|
||||
.as_array()
|
||||
.expect("hops must declare anyOf (array | null)");
|
||||
assert!(
|
||||
hops_any_of.iter().any(|v| v["type"] == "array"),
|
||||
"hops anyOf must include array shape"
|
||||
);
|
||||
assert!(
|
||||
hops_any_of.iter().any(|v| v["type"] == "null"),
|
||||
"hops anyOf must include null (single-pass omits the field)"
|
||||
);
|
||||
|
||||
// HopRecord $defs entry — guards against accidental deletion or
|
||||
// structural drift in future schema edits.
|
||||
let hop_record = &schema["$defs"]["HopRecord"];
|
||||
assert!(
|
||||
hop_record.is_object(),
|
||||
"$defs.HopRecord must be declared so `hops.items` can $ref it"
|
||||
);
|
||||
let kind_enum = hop_record["properties"]["kind"]["enum"]
|
||||
.as_array()
|
||||
.expect("HopRecord.kind must be an enum");
|
||||
let kinds: Vec<&str> = kind_enum.iter().filter_map(|v| v.as_str()).collect();
|
||||
for needed in ["decompose", "decide", "synthesize"] {
|
||||
assert!(
|
||||
kinds.contains(&needed),
|
||||
"HopRecord.kind enum must include {needed:?}, got {kinds:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn answer_schema_refusal_reason_enum_includes_multi_hop_decompose_failed() {
|
||||
let schema = parse_schema("answer.schema.json");
|
||||
let refusal_any_of = schema["properties"]["refusal_reason"]["anyOf"]
|
||||
.as_array()
|
||||
.expect("refusal_reason must declare anyOf");
|
||||
let enum_arr = refusal_any_of
|
||||
.iter()
|
||||
.find_map(|v| v["enum"].as_array())
|
||||
.expect("one of refusal_reason.anyOf entries must declare an enum");
|
||||
let values: Vec<&str> = enum_arr.iter().filter_map(|v| v.as_str()).collect();
|
||||
assert!(
|
||||
values.contains(&"multi_hop_decompose_failed"),
|
||||
"refusal_reason enum must include `multi_hop_decompose_failed`, got {values:?}"
|
||||
);
|
||||
// All earlier RefusalReason wire values remain on the enum —
|
||||
// guards against an accidental rewrite dropping old variants.
|
||||
for needed in [
|
||||
"score_gate",
|
||||
"llm_self_judge",
|
||||
"no_index",
|
||||
"no_chunks",
|
||||
"llm_stream_aborted",
|
||||
] {
|
||||
assert!(
|
||||
values.contains(&needed),
|
||||
"refusal_reason enum must keep prior variant {needed:?}, got {values:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error_schema_code_enum_includes_multi_hop_decompose_failed() {
|
||||
let schema = parse_schema("error.schema.json");
|
||||
let code_enum = schema["properties"]["code"]["enum"]
|
||||
.as_array()
|
||||
.expect("error.v1 must declare code.enum");
|
||||
let values: Vec<&str> = code_enum.iter().filter_map(|v| v.as_str()).collect();
|
||||
assert!(
|
||||
values.contains(&"multi_hop_decompose_failed"),
|
||||
"error.v1 code enum must include forward-looking `multi_hop_decompose_failed`, got {values:?}"
|
||||
);
|
||||
// Existing codes remain — guards against accidental deletion.
|
||||
for needed in [
|
||||
"config_invalid",
|
||||
"not_indexed",
|
||||
"model_unreachable",
|
||||
"generic",
|
||||
] {
|
||||
assert!(
|
||||
values.contains(&needed),
|
||||
"error.v1 code enum must keep prior code {needed:?}, got {values:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -20,7 +20,23 @@
|
||||
"answer": { "type": "string" },
|
||||
"citations": { "type": "array" },
|
||||
"grounded": { "type": "boolean" },
|
||||
"refusal_reason": { "type": ["string", "null"] },
|
||||
"refusal_reason": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"score_gate",
|
||||
"llm_self_judge",
|
||||
"no_index",
|
||||
"no_chunks",
|
||||
"llm_stream_aborted",
|
||||
"multi_hop_decompose_failed"
|
||||
]
|
||||
},
|
||||
{ "type": "null" }
|
||||
],
|
||||
"description": "p9-fb-41: `multi_hop_decompose_failed` added in PR-2 alongside the multi-hop pipeline skeleton (only emitted when AskOpts.multi_hop = true and the decompose LLM call fails to parse). Other variants are unchanged from earlier phases."
|
||||
},
|
||||
"model": { "type": "object" },
|
||||
"embedding": { "type": ["object", "null"] },
|
||||
"prompt_template_version": { "type": "string" },
|
||||
@@ -35,6 +51,52 @@
|
||||
"type": ["integer", "null"],
|
||||
"minimum": 0,
|
||||
"description": "p9-fb-15: 같은 conversation 안 0-based 순서. null 이면 single-shot."
|
||||
},
|
||||
"hops": {
|
||||
"anyOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": { "$ref": "#/$defs/HopRecord" }
|
||||
},
|
||||
{ "type": "null" }
|
||||
],
|
||||
"description": "p9-fb-41 multi-hop trace. Present (non-null array) only when the ask routed through the multi-hop pipeline (`AskOpts.multi_hop = true`); single-pass answers omit the field entirely (serde `skip_serializing_if = None`). Each entry records one LLM hop — decompose / decide / synthesize — with sub-queries, retrieval count, and per-hop latency. Wire-additive: pre-fb-41 readers tolerate the missing field; new readers branch on its presence to render the per-hop trace."
|
||||
}
|
||||
},
|
||||
"$defs": {
|
||||
"HopRecord": {
|
||||
"type": "object",
|
||||
"required": ["iter", "kind", "context_chunks_added", "forced_stop", "llm_call_ms"],
|
||||
"properties": {
|
||||
"iter": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "0-based hop index. iter=0 is always the initial decompose; subsequent iters are decide calls; the final iter is the synthesize call."
|
||||
},
|
||||
"kind": {
|
||||
"type": "string",
|
||||
"enum": ["decompose", "decide", "synthesize"]
|
||||
},
|
||||
"sub_queries": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Per-kind semantics. Decompose: the initial sub-queries the LLM produced. Decide: the *new* sub-queries to retrieve next iter (empty when the LLM signalled stop or when forced_stop=true). Synthesize: always empty."
|
||||
},
|
||||
"context_chunks_added": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Number of *new* chunks the retrieval round contributed to the pool (deduped by chunk_id). 0 for decompose / synthesize hops."
|
||||
},
|
||||
"forced_stop": {
|
||||
"type": "boolean",
|
||||
"description": "True when the pipeline cut the loop short due to a safety cap (max_depth / max_pool_chunks) rather than the LLM's own stop signal. Tracing signal, not a refusal."
|
||||
},
|
||||
"llm_call_ms": {
|
||||
"type": "integer",
|
||||
"minimum": 0,
|
||||
"description": "Wall-clock latency of the LLM call for this hop. `0` is overloaded — means 'no LLM call happened' when (a) the Decide hop was skipped due to forced_stop or (b) the pool was empty before any decide could run. Treat 0 as absent or instantaneous."
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,14 +16,15 @@
|
||||
"model_not_pulled",
|
||||
"timeout",
|
||||
"io_error",
|
||||
"generic"
|
||||
"generic",
|
||||
"multi_hop_decompose_failed"
|
||||
]
|
||||
},
|
||||
"message": { "type": "string" },
|
||||
"details": {
|
||||
"type": "object",
|
||||
"additionalProperties": true,
|
||||
"description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }."
|
||||
"description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }. multi_hop_decompose_failed: {} (reserved — currently emitted as Answer.refusal_reason on stdout, NOT as error.v1 on stderr; the enum member is forward-looking for a future RefusalReason → error_wire promotion)."
|
||||
},
|
||||
"hint": {
|
||||
"anyOf": [
|
||||
|
||||
Reference in New Issue
Block a user