From 17c48a0ee6a4f9d0d106c6e3d8346eedc021ee5e Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 25 May 2026 08:45:01 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(cli):=20fb-41=20PR-4=20=E2=80=94=20CLI?= =?UTF-8?q?=20--multi-hop=20flag=20+=20answer.v1=20/=20error.v1=20wire=20?= =?UTF-8?q?=ED=99=95=EC=9E=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fb-41 multi-hop RAG 의 **PR-4** (PR-3b-ii 의 ScriptedLm + tests 위에서 user-facing CLI surface + JSON Schema 확장). PR-3b-i / PR-3b-ii 의 multi-hop pipeline 을 `kebab ask --multi-hop` 으로 사용자에게 노출. 설계: docs/superpowers/specs/2026-05-25-p9-fb-41-multi-hop-rag-design.md 계획: docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md (PR-4 단락) ## CLI surface - `kebab ask --multi-hop ` — 새 flag (default false). `AskOpts.multi_hop` 로 전달, stream + non-stream 두 callsite 모두 갱신. - `--show-citations` / `--hide-citations` / `--stream` / `--session` 등 기존 flag 와 orthogonal. - `--json` 모드에서 `Answer.hops` 배열이 multi-hop happy path / refusal-with- partial-trace 양쪽 경로에서 노출됨 (PR-3b-i + PR-3b-ii 의 wiring). ## Wire schema 확장 - `docs/wire-schema/v1/answer.schema.json`: - 신규 `hops: array | null` 필드 (optional, additive). `HopRecord` 의 `$defs` 추가 — `iter` / `kind` (decompose|decide|synthesize) / `sub_queries` / `context_chunks_added` / `forced_stop` / `llm_call_ms` 6 필드 + per-field doc. - `refusal_reason` 필드를 `anyOf [enum, null]` 로 명시 — 6 variant (`score_gate`, `llm_self_judge`, `no_index`, `no_chunks`, `llm_stream_aborted`, `multi_hop_decompose_failed`). 이전 schema 는 `type: string|null` 만 명시 → enum 명시는 agent / consumer 의 strict validate 강화 (additive — 기존 producer 값 모두 enum 안). - `$id` / `schema_version` 변경 없음 — additive minor. - `docs/wire-schema/v1/error.schema.json`: - `code` enum 에 `multi_hop_decompose_failed` 추가. **이는 forward-looking enum extension** — 현재 RefusalReason 은 `Answer.refusal_reason` (stdout) 으로만 노출되고 `error.v1` (stderr) 경로 안 거침. 미래 PR 에서 fatal promotion 정책 결정 시 trigger 가능하도록 enum 만 미리 reserve. - details.description 의 per-code 안내에 `multi_hop_decompose_failed: {}` note 추가 — reserved 상태 명시. ## Tests - `crates/kebab-cli/tests/wire_ask_multi_hop.rs` 신규 (4 Ollama-free pins): - `cli_ask_help_advertises_multi_hop_flag`: clap-level smoke, `kebab ask --help` 출력에 `--multi-hop` 등장 확인. - `answer_schema_declares_hops_property_with_hop_record_defs`: `hops` property 존재 + `$defs.HopRecord` 의 `kind` enum 3 variant (decompose/decide/synthesize) 회귀 핀. - `answer_schema_refusal_reason_enum_includes_multi_hop_decompose_failed`: 6 variant 모두 enum 에 존재 — 기존 5 도 함께 핀 (회귀 방지). - `error_schema_code_enum_includes_multi_hop_decompose_failed`: 신규 code enum 확장 + 기존 code (config_invalid / not_indexed / ...) 보존 핀. End-to-end multi-hop ask 의 live Ollama 검증은 후속 `#[ignore]` test 로 (같은 `wire_ask_stale.rs` 패턴). PR-4 의 범위 = clap + schema 정합성 만. ## 변경 없음 - `crates/kebab-app/src/error_wire.rs` — plan 의 "error_wire 매핑" 항목은 현재 RefusalReason 가 `Answer.refusal_reason` 로만 노출 (anyhow chain 안 거침) 라 trigger 가 없음. enum reservation 만으로 충분, 매핑 코드는 dead code 회피. 향후 fatal-promotion 정책 (refusal → error.v1) 결정 시 PR-4b 로 split. - `prompt_template_version` — `rag-multi-hop-v1` 그대로. - TUI / MCP surface — PR-5 / PR-6 에서. ## 검증 - `cargo test -p kebab-cli -j 1` — 모든 test 통과 (신규 wire_ask_multi_hop 4 + 기존 ask / search / schema / ingest / mcp / reset 등 모두). - `cargo clippy -p kebab-cli --all-targets -j 1 -- -D warnings` clean. - 단일 crate 직렬 build (16 GB RAM 제약). ## 다음 PR - PR-5: MCP `ask` tool 의 `multi_hop: bool` argument + `integrations/claude- code/kebab/SKILL.md` 의 ask 절 갱신. - PR-6: TUI Ask 패널 multi-hop toggle (F2 / Ctrl-T) + hop trace render. - v0.18.0 cut (PR-6 머지 후): `Cargo.toml` 0.17.2 → 0.18.0 + HANDOFF / HOTFIXES / INDEX 갱신 + gitea-release. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-cli/src/main.rs | 17 ++- crates/kebab-cli/tests/wire_ask_multi_hop.rs | 146 +++++++++++++++++++ docs/wire-schema/v1/answer.schema.json | 64 +++++++- docs/wire-schema/v1/error.schema.json | 5 +- 4 files changed, 227 insertions(+), 5 deletions(-) create mode 100644 crates/kebab-cli/tests/wire_ask_multi_hop.rs diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 5ed1325..935ff81 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -250,6 +250,18 @@ enum Cmd { /// `answer.v1`. Off by default to preserve final-only behavior. #[arg(long)] stream: bool, + + /// p9-fb-41: route this ask through the multi-hop pipeline + /// — the query is decomposed into sub-questions, each + /// retrieved independently, then synthesized over the + /// merged chunk pool. Cost trade-off: 2–5× LLM calls + /// (decompose + 0..N decide + synthesize) vs. single-pass. + /// Worth it for compound questions (X 와 Y 의 관계, prereq + /// chain, cross-doc reasoning); single-pass is faster for + /// simple fact lookups. The full per-hop trace is exposed + /// on `Answer.hops` in `--json` mode. + #[arg(long)] + multi_hop: bool, }, /// Wipe XDG data dirs (and optionally the Lance vector store) so the @@ -973,6 +985,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { hide_citations, session, stream, + multi_hop, } => { let cfg = kebab_config::Config::load(cli.config.as_deref())?; if *stream { @@ -999,7 +1012,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { history: Vec::new(), conversation_id: None, turn_index: None, - multi_hop: false, + multi_hop: *multi_hop, }; let cfg2 = cfg.clone(); let q = query.clone(); @@ -1075,7 +1088,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { history: Vec::new(), conversation_id: None, turn_index: None, - multi_hop: false, + multi_hop: *multi_hop, }; let ans = match session.as_deref() { Some(sid) => kebab_app::ask_with_session_with_config(cfg, sid, query, opts)?, diff --git a/crates/kebab-cli/tests/wire_ask_multi_hop.rs b/crates/kebab-cli/tests/wire_ask_multi_hop.rs new file mode 100644 index 0000000..ab65443 --- /dev/null +++ b/crates/kebab-cli/tests/wire_ask_multi_hop.rs @@ -0,0 +1,146 @@ +//! p9-fb-41 PR-4: CLI `--multi-hop` flag wiring + answer.v1 / error.v1 +//! schema additivity. +//! +//! Four Ollama-free pins: +//! +//! 1. `--multi-hop` is exposed on `kebab ask --help` so users can +//! discover the flag at the CLI surface (clap-level smoke). +//! 2. `answer.schema.json` parses as valid JSON and declares a +//! `hops` property with a `HopRecord` `$defs` entry — guards +//! against accidental schema deletion / typo in future edits. +//! 3. `answer.schema.json`'s `refusal_reason` enum lists +//! `multi_hop_decompose_failed` — agents validating against +//! the schema accept the new variant on refusal answers. +//! 4. `error.schema.json`'s `code` enum lists +//! `multi_hop_decompose_failed` — forward-looking enum extension +//! documented in PR-4. +//! +//! End-to-end multi-hop ask against a live Ollama lands in a +//! follow-up `#[ignore]` test (same pattern as `wire_ask_stale.rs`). + +use std::path::PathBuf; +use std::process::Command; + +fn schema_path(name: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("docs") + .join("wire-schema") + .join("v1") + .join(name) +} + +fn parse_schema(name: &str) -> serde_json::Value { + let text = std::fs::read_to_string(schema_path(name)) + .unwrap_or_else(|e| panic!("read {name}: {e}")); + serde_json::from_str(&text) + .unwrap_or_else(|e| panic!("{name} must parse as valid JSON: {e}")) +} + +#[test] +fn cli_ask_help_advertises_multi_hop_flag() { + let bin = env!("CARGO_BIN_EXE_kebab"); + let out = Command::new(bin).args(["ask", "--help"]).output().unwrap(); + let stdout = String::from_utf8_lossy(&out.stdout); + assert!( + stdout.contains("--multi-hop"), + "`kebab ask --help` must advertise --multi-hop so users can discover it:\n{stdout}" + ); +} + +#[test] +fn answer_schema_declares_hops_property_with_hop_record_defs() { + let schema = parse_schema("answer.schema.json"); + assert!( + schema["properties"]["hops"].is_object(), + "`hops` property must be declared on answer.v1" + ); + // `hops` allows array-or-null (single-pass omits the field; + // multi-hop emits a non-empty array). + let hops_any_of = schema["properties"]["hops"]["anyOf"] + .as_array() + .expect("hops must declare anyOf (array | null)"); + assert!( + hops_any_of.iter().any(|v| v["type"] == "array"), + "hops anyOf must include array shape" + ); + assert!( + hops_any_of.iter().any(|v| v["type"] == "null"), + "hops anyOf must include null (single-pass omits the field)" + ); + + // HopRecord $defs entry — guards against accidental deletion or + // structural drift in future schema edits. + let hop_record = &schema["$defs"]["HopRecord"]; + assert!( + hop_record.is_object(), + "$defs.HopRecord must be declared so `hops.items` can $ref it" + ); + let kind_enum = hop_record["properties"]["kind"]["enum"] + .as_array() + .expect("HopRecord.kind must be an enum"); + let kinds: Vec<&str> = kind_enum.iter().filter_map(|v| v.as_str()).collect(); + for needed in ["decompose", "decide", "synthesize"] { + assert!( + kinds.contains(&needed), + "HopRecord.kind enum must include {needed:?}, got {kinds:?}" + ); + } +} + +#[test] +fn answer_schema_refusal_reason_enum_includes_multi_hop_decompose_failed() { + let schema = parse_schema("answer.schema.json"); + let refusal_any_of = schema["properties"]["refusal_reason"]["anyOf"] + .as_array() + .expect("refusal_reason must declare anyOf"); + let enum_arr = refusal_any_of + .iter() + .find_map(|v| v["enum"].as_array()) + .expect("one of refusal_reason.anyOf entries must declare an enum"); + let values: Vec<&str> = enum_arr.iter().filter_map(|v| v.as_str()).collect(); + assert!( + values.contains(&"multi_hop_decompose_failed"), + "refusal_reason enum must include `multi_hop_decompose_failed`, got {values:?}" + ); + // All earlier RefusalReason wire values remain on the enum — + // guards against an accidental rewrite dropping old variants. + for needed in [ + "score_gate", + "llm_self_judge", + "no_index", + "no_chunks", + "llm_stream_aborted", + ] { + assert!( + values.contains(&needed), + "refusal_reason enum must keep prior variant {needed:?}, got {values:?}" + ); + } +} + +#[test] +fn error_schema_code_enum_includes_multi_hop_decompose_failed() { + let schema = parse_schema("error.schema.json"); + let code_enum = schema["properties"]["code"]["enum"] + .as_array() + .expect("error.v1 must declare code.enum"); + let values: Vec<&str> = code_enum.iter().filter_map(|v| v.as_str()).collect(); + assert!( + values.contains(&"multi_hop_decompose_failed"), + "error.v1 code enum must include forward-looking `multi_hop_decompose_failed`, got {values:?}" + ); + // Existing codes remain — guards against accidental deletion. + for needed in [ + "config_invalid", + "not_indexed", + "model_unreachable", + "generic", + ] { + assert!( + values.contains(&needed), + "error.v1 code enum must keep prior code {needed:?}, got {values:?}" + ); + } +} diff --git a/docs/wire-schema/v1/answer.schema.json b/docs/wire-schema/v1/answer.schema.json index 4901bb0..ecb2c5f 100644 --- a/docs/wire-schema/v1/answer.schema.json +++ b/docs/wire-schema/v1/answer.schema.json @@ -20,7 +20,23 @@ "answer": { "type": "string" }, "citations": { "type": "array" }, "grounded": { "type": "boolean" }, - "refusal_reason": { "type": ["string", "null"] }, + "refusal_reason": { + "anyOf": [ + { + "type": "string", + "enum": [ + "score_gate", + "llm_self_judge", + "no_index", + "no_chunks", + "llm_stream_aborted", + "multi_hop_decompose_failed" + ] + }, + { "type": "null" } + ], + "description": "p9-fb-41: `multi_hop_decompose_failed` added in PR-3a for the multi-hop pipeline (only emitted when AskOpts.multi_hop = true and the decompose LLM call fails to parse). Other variants are unchanged from earlier phases." + }, "model": { "type": "object" }, "embedding": { "type": ["object", "null"] }, "prompt_template_version": { "type": "string" }, @@ -35,6 +51,52 @@ "type": ["integer", "null"], "minimum": 0, "description": "p9-fb-15: 같은 conversation 안 0-based 순서. null 이면 single-shot." + }, + "hops": { + "anyOf": [ + { + "type": "array", + "items": { "$ref": "#/$defs/HopRecord" } + }, + { "type": "null" } + ], + "description": "p9-fb-41 multi-hop trace. Present (non-null array) only when the ask routed through the multi-hop pipeline (`AskOpts.multi_hop = true`); single-pass answers omit the field entirely (serde `skip_serializing_if = None`). Each entry records one LLM hop — decompose / decide / synthesize — with sub-queries, retrieval count, and per-hop latency. Wire-additive: pre-fb-41 readers tolerate the missing field; new readers branch on its presence to render the per-hop trace." + } + }, + "$defs": { + "HopRecord": { + "type": "object", + "required": ["iter", "kind", "context_chunks_added", "forced_stop", "llm_call_ms"], + "properties": { + "iter": { + "type": "integer", + "minimum": 0, + "description": "0-based hop index. iter=0 is always the initial decompose; subsequent iters are decide calls; the final iter is the synthesize call." + }, + "kind": { + "type": "string", + "enum": ["decompose", "decide", "synthesize"] + }, + "sub_queries": { + "type": "array", + "items": { "type": "string" }, + "description": "Per-kind semantics. Decompose: the initial sub-queries the LLM produced. Decide: the *new* sub-queries to retrieve next iter (empty when the LLM signalled stop or when forced_stop=true). Synthesize: always empty." + }, + "context_chunks_added": { + "type": "integer", + "minimum": 0, + "description": "Number of *new* chunks the retrieval round contributed to the pool (deduped by chunk_id). 0 for decompose / synthesize hops." + }, + "forced_stop": { + "type": "boolean", + "description": "True when the pipeline cut the loop short due to a safety cap (max_depth / max_pool_chunks) rather than the LLM's own stop signal. Tracing signal, not a refusal." + }, + "llm_call_ms": { + "type": "integer", + "minimum": 0, + "description": "Wall-clock latency of the LLM call for this hop. `0` is overloaded — means 'no LLM call happened' when (a) the Decide hop was skipped due to forced_stop or (b) the pool was empty before any decide could run. Treat 0 as absent or instantaneous." + } + } } } } diff --git a/docs/wire-schema/v1/error.schema.json b/docs/wire-schema/v1/error.schema.json index a186e28..d113b65 100644 --- a/docs/wire-schema/v1/error.schema.json +++ b/docs/wire-schema/v1/error.schema.json @@ -16,14 +16,15 @@ "model_not_pulled", "timeout", "io_error", - "generic" + "generic", + "multi_hop_decompose_failed" ] }, "message": { "type": "string" }, "details": { "type": "object", "additionalProperties": true, - "description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }." + "description": "Per-code free-form context. config_invalid: { path, cause }. not_indexed: { expected, found }. model_unreachable: { endpoint, source }. model_not_pulled: { model }. timeout: { source }. io_error: { kind }. generic: { chain (when --verbose) }. multi_hop_decompose_failed: {} (reserved — currently emitted as Answer.refusal_reason on stdout, NOT as error.v1 on stderr; the enum member is forward-looking for a future RefusalReason → error_wire promotion)." }, "hint": { "anyOf": [ From c56242d04f2a276c18852d7df6c10c8006df9540 Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 25 May 2026 08:47:35 +0000 Subject: [PATCH 2/2] =?UTF-8?q?chore(cli):=20PR=20#171=20=ED=9A=8C?= =?UTF-8?q?=EC=B0=A8=201=20=EB=A6=AC=EB=B7=B0=20=EB=B0=98=EC=98=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `answer.schema.json` 의 `refusal_reason` description 의 PR 번호 정정: `multi_hop_decompose_failed` 도입 시점 = PR-2 (#167, RefusalReason variant + ask_multi_hop decompose-failure 분기). PR-3a (#168) 는 `Answer.hops` field + RagCfg knob 만 — refusal variant 와 무관. 검증 - `cargo test -p kebab-cli -j 1 --test wire_ask_multi_hop` 4 모두 통과. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/wire-schema/v1/answer.schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/wire-schema/v1/answer.schema.json b/docs/wire-schema/v1/answer.schema.json index ecb2c5f..da8ac27 100644 --- a/docs/wire-schema/v1/answer.schema.json +++ b/docs/wire-schema/v1/answer.schema.json @@ -35,7 +35,7 @@ }, { "type": "null" } ], - "description": "p9-fb-41: `multi_hop_decompose_failed` added in PR-3a for the multi-hop pipeline (only emitted when AskOpts.multi_hop = true and the decompose LLM call fails to parse). Other variants are unchanged from earlier phases." + "description": "p9-fb-41: `multi_hop_decompose_failed` added in PR-2 alongside the multi-hop pipeline skeleton (only emitted when AskOpts.multi_hop = true and the decompose LLM call fails to parse). Other variants are unchanged from earlier phases." }, "model": { "type": "object" }, "embedding": { "type": ["object", "null"] },