From 7150c376bbad2d9ab2eb17503c6dc7d5908faaeb Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 25 May 2026 07:15:01 +0000 Subject: [PATCH] =?UTF-8?q?feat(rag):=20fb-41=20PR-3a=20=E2=80=94=20HopRec?= =?UTF-8?q?ord=20wire=20+=20RagCfg=20multi-hop=20knobs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-3 의 분할 첫 PR. wire additive (HopRecord + HopKind + Answer.hops field) + RagCfg 의 multi_hop_* 3 노브. RAG pipeline 동작 미변경 — 모든 Answer literal 의 `hops = None`. PR-3b (후속) 가 ask_multi_hop 의 happy path 에서 dynamic decide loop 구현 + hops trace 채움. 분할 이유: 원래 PR-3 가 wire + cfg + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR 였는데 ~1500 줄 단일 patch 가 review 부담 + 회기 위험 ↑. additive foundation 부터 ship 후 decide loop 별 PR — 사용자 결정 (2026-05-25). - `kebab_core::HopRecord` (iter, kind, sub_queries, context_chunks_added, forced_stop, llm_call_ms) + `HopKind` (Decompose / Decide / Synthesize) — wire-additive shape. - `kebab_core::Answer.hops: Option>` — `#[serde(default, skip_serializing_if = "Option::is_none")]`, single-pass / refusal path 는 None, PR-3b 의 multi-hop happy path 가 Some. - `kebab_config::RagCfg` 에 3 신규 노브: - `multi_hop_max_depth: u32` (default 3) - `multi_hop_max_sub_queries_per_iter: u32` (default 5) - `multi_hop_max_pool_chunks: u32` (default 30) 3 모두 `#[serde(default)]` + env override (`KEBAB_RAG_MULTI_HOP_MAX_*`) + legacy parse 핀 (`LEGACY_PRE_TIMEOUT_TOML` 공유). - 9 Answer literal site (pipeline.rs ×6 + kebab-cli + kebab-tui tests + kebab-eval test) 에 `hops: None` 명시 추가. exhaustive field check 가 자동 guard — 빠진 site 시 compile fail. - plan 의 PR-3 단락 → PR-3a / PR-3b 분할 명시 + scope 정정. Tests (163 passing across kebab-config + kebab-core + kebab-rag): - 5 신규 multi-hop knob test (default / env override / legacy parse). - 기존 50+57+31+19+3+3 test 모두 hops:None 추가 후도 통과. Wire 영향: `answer.v1` 의 optional `hops` 필드 — `skip_serializing_ if = None` 이라 single-pass response 에 emit 안 됨. wire breaking 아님, JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-cli/src/main.rs | 1 + crates/kebab-config/src/lib.rs | 107 ++++++++++++++++++ crates/kebab-core/src/answer.rs | 64 +++++++++++ crates/kebab-core/src/lib.rs | 4 +- crates/kebab-eval/src/metrics.rs | 1 + crates/kebab-rag/src/pipeline.rs | 26 +++++ crates/kebab-tui/tests/ask.rs | 1 + .../2026-05-25-p9-fb-41-multi-hop-rag.md | 32 +++++- 8 files changed, 233 insertions(+), 3 deletions(-) diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 1f951a0..5ed1325 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -1639,6 +1639,7 @@ mod tests { created_at: OffsetDateTime::now_utc(), conversation_id: None, turn_index: None, + hops: None, } } diff --git a/crates/kebab-config/src/lib.rs b/crates/kebab-config/src/lib.rs index 77455fd..8e3c04e 100644 --- a/crates/kebab-config/src/lib.rs +++ b/crates/kebab-config/src/lib.rs @@ -181,6 +181,39 @@ pub struct RagCfg { pub score_gate: f32, pub explain_default: bool, pub max_context_tokens: usize, + /// p9-fb-41: hard ceiling on the number of multi-hop iterations + /// (decompose iter + decide iters). When the LLM keeps returning + /// `continue` past this depth the pipeline cuts to `synthesize` + /// with `HopRecord.forced_stop = true`. Default `3` — enough for + /// most cross-doc reasoning, low enough to bound LLM cost. + #[serde(default = "default_multi_hop_max_depth")] + pub multi_hop_max_depth: u32, + /// p9-fb-41: cap on how many sub-queries the LLM may emit in a + /// single decompose / decide call. Mirrors + /// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the + /// const is the hard floor while this is the runtime knob. + /// Default `5`. + #[serde(default = "default_multi_hop_max_sub_queries_per_iter")] + pub multi_hop_max_sub_queries_per_iter: u32, + /// p9-fb-41: hard ceiling on the deduped chunk pool. When the + /// accumulated pool would exceed this many chunks the pipeline + /// stops accepting new retrieval results and forces synthesize + /// with `forced_stop = true`. Default `30` — generous for + /// 5-hop / 10-hits multi-hop runs while still bounded. + #[serde(default = "default_multi_hop_max_pool_chunks")] + pub multi_hop_max_pool_chunks: u32, +} + +fn default_multi_hop_max_depth() -> u32 { + 3 +} + +fn default_multi_hop_max_sub_queries_per_iter() -> u32 { + 5 +} + +fn default_multi_hop_max_pool_chunks() -> u32 { + 30 } /// Settings for the image ingest pipeline (P6). `ocr` controls OCR @@ -434,6 +467,10 @@ impl Config { score_gate: 0.30, explain_default: false, max_context_tokens: 8000, + multi_hop_max_depth: default_multi_hop_max_depth(), + multi_hop_max_sub_queries_per_iter: + default_multi_hop_max_sub_queries_per_iter(), + multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(), }, image: ImageCfg::defaults(), ui: UiCfg::defaults(), @@ -717,6 +754,21 @@ impl Config { self.rag.max_context_tokens = n; } } + "KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => { + if let Ok(n) = v.parse::() { + self.rag.multi_hop_max_depth = n; + } + } + "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => { + if let Ok(n) = v.parse::() { + self.rag.multi_hop_max_sub_queries_per_iter = n; + } + } + "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => { + if let Ok(n) = v.parse::() { + self.rag.multi_hop_max_pool_chunks = n; + } + } // image.ocr "KEBAB_IMAGE_OCR_ENABLED" => { @@ -1092,6 +1144,61 @@ theme = "dark" assert_eq!(c.image.ocr.request_timeout_secs, 300); } + // ── p9-fb-41: multi-hop RAG knobs ──────────────────────────────────── + + #[test] + fn default_multi_hop_max_depth_is_3() { + assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3); + } + + #[test] + fn default_multi_hop_max_sub_queries_per_iter_is_5() { + assert_eq!( + Config::defaults().rag.multi_hop_max_sub_queries_per_iter, + 5 + ); + } + + #[test] + fn default_multi_hop_max_pool_chunks_is_30() { + assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30); + } + + #[test] + fn env_overrides_multi_hop_knobs() { + let mut env = HashMap::new(); + env.insert( + "KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(), + "5".to_string(), + ); + env.insert( + "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(), + "7".to_string(), + ); + env.insert( + "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(), + "50".to_string(), + ); + let c = Config::defaults().apply_env(&env); + assert_eq!(c.rag.multi_hop_max_depth, 5); + assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7); + assert_eq!(c.rag.multi_hop_max_pool_chunks, 50); + } + + /// post-PR-3 fb-41: a config file written before the multi-hop + /// knobs existed must still parse and fall back to the documented + /// defaults — backwards-compat invariant. Fixture shared with the + /// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`] + /// (that fixture also predates the multi_hop_* fields). + #[test] + fn legacy_config_without_multi_hop_knobs_uses_defaults() { + let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML) + .expect("parse legacy config"); + assert_eq!(c.rag.multi_hop_max_depth, 3); + assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5); + assert_eq!(c.rag.multi_hop_max_pool_chunks, 30); + } + #[test] fn image_ocr_env_overrides() { let mut env = HashMap::new(); diff --git a/crates/kebab-core/src/answer.rs b/crates/kebab-core/src/answer.rs index 31e299f..cb2c3ce 100644 --- a/crates/kebab-core/src/answer.rs +++ b/crates/kebab-core/src/answer.rs @@ -29,6 +29,14 @@ pub struct Answer { /// 이면 single-shot. #[serde(default, skip_serializing_if = "Option::is_none")] pub turn_index: Option, + /// p9-fb-41: multi-hop hop trace. `None` for single-pass asks. + /// Each entry records one hop (`decompose` / `decide` / `synthesize`) + /// — the LLM call category, the sub-queries emitted, retrieval + /// counts, and a `forced_stop` flag for cap-driven termination. + /// Wire-additive: `answer.v1` schema_version unchanged; consumers + /// reading older single-pass answers see `hops: None` (or absent). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub hops: Option>, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -55,6 +63,62 @@ pub struct Turn { pub created_at: OffsetDateTime, } +/// p9-fb-41: one entry in [`Answer::hops`] — the per-iteration trace +/// of a multi-hop ask. The pipeline appends a `HopRecord` per LLM +/// call (decompose / decide / synthesize) so a `--multi-hop` user +/// can see what sub-queries the LLM emitted, how many chunks each +/// hop contributed, whether the iter stopped on the model's own +/// signal or hit a cap, and the per-hop LLM latency. +/// +/// Wire-additive — every field uses `#[serde(default)]` where it +/// could plausibly be omitted by a future schema reader. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct HopRecord { + /// 0-based hop index within this ask. `iter=0` is always the + /// initial decompose call; subsequent iters are decide calls; + /// the final iter is the synthesize call. + pub iter: u32, + pub kind: HopKind, + /// Sub-queries the LLM emitted at this iter. For the synthesize + /// hop this is empty (no sub-queries — just the final answer). + #[serde(default)] + pub sub_queries: Vec, + /// Number of *new* chunks the retrieval round contributed to the + /// pool (dedup'd by `chunk_id` — repeated hits from a previous + /// iter do not count). `0` for the decompose hop (no retrieval + /// yet) and the synthesize hop. + pub context_chunks_added: u32, + /// `true` when the pipeline cut the iter loop short because a + /// safety cap fired (`max_depth` / `max_total_sub_queries` / + /// `max_pool_chunks`) rather than because the LLM signalled + /// stop. The user-visible answer still reflects all chunks + /// accumulated up to that point — `forced_stop` is a tracing + /// signal, not a refusal. + pub forced_stop: bool, + /// Wall-clock latency of the LLM call for this hop, in + /// milliseconds. Useful for cost / latency analysis when a + /// `kebab eval` run records `Answer.hops`. + pub llm_call_ms: u32, +} + +/// p9-fb-41: which stage of the multi-hop pipeline a [`HopRecord`] +/// describes. The serde tag matches the wire shape so agents / +/// CLIs can branch on the snake_case string without referencing +/// the Rust enum. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum HopKind { + /// First hop — LLM decomposed the user query into sub-queries. + Decompose, + /// Subsequent hop — LLM was asked whether more retrieval is + /// needed and either emitted new sub-queries (`continue`) or + /// returned an empty array (`stop`). + Decide, + /// Terminal hop — LLM produced the final user-visible answer + /// over the accumulated chunk pool. + Synthesize, +} + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum RefusalReason { diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs index 3b3b285..b534bf0 100644 --- a/crates/kebab-core/src/lib.rs +++ b/crates/kebab-core/src/lib.rs @@ -56,8 +56,8 @@ pub use search::{ TraceCandidate, TraceFusionInput, TraceTiming, }; pub use answer::{ - Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage, - TraceId, Turn, + Answer, AnswerCitation, AnswerRetrievalSummary, HopKind, HopRecord, ModelRef, + RefusalReason, TokenUsage, TraceId, Turn, }; pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples}; pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus}; diff --git a/crates/kebab-eval/src/metrics.rs b/crates/kebab-eval/src/metrics.rs index 6a80ed0..15d5cd6 100644 --- a/crates/kebab-eval/src/metrics.rs +++ b/crates/kebab-eval/src/metrics.rs @@ -532,6 +532,7 @@ mod tests { created_at: OffsetDateTime::UNIX_EPOCH, conversation_id: None, turn_index: None, + hops: None, } } diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index ee54889..b168725 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -531,6 +531,11 @@ impl RagPipeline { created_at: OffsetDateTime::now_utc(), conversation_id: opts.conversation_id.clone(), turn_index: opts.turn_index, + // p9-fb-41 Step 2 of PR-3: every Answer literal carries + // `hops`. Single-pass + refusal paths leave it `None`; + // only the multi-hop happy path will set `Some(...)` in + // Step 5 once the decide loop populates a hop trace. + hops: None, }; // Drop the moved `finish_reason` early into a tracing breadcrumb; the @@ -843,6 +848,11 @@ impl RagPipeline { created_at: OffsetDateTime::now_utc(), conversation_id: opts.conversation_id.clone(), turn_index: opts.turn_index, + // p9-fb-41 Step 2 of PR-3: every Answer literal carries + // `hops`. Single-pass + refusal paths leave it `None`; + // only the multi-hop happy path will set `Some(...)` in + // Step 5 once the decide loop populates a hop trace. + hops: None, }; tracing::debug!( @@ -979,6 +989,11 @@ impl RagPipeline { created_at: OffsetDateTime::now_utc(), conversation_id: opts.conversation_id.clone(), turn_index: opts.turn_index, + // p9-fb-41 Step 2 of PR-3: every Answer literal carries + // `hops`. Single-pass + refusal paths leave it `None`; + // only the multi-hop happy path will set `Some(...)` in + // Step 5 once the decide loop populates a hop trace. + hops: None, }; if let Some(sink) = &opts.stream_sink { let _ = sink.send(StreamEvent::Final { @@ -1100,6 +1115,11 @@ impl RagPipeline { created_at: OffsetDateTime::now_utc(), conversation_id: opts.conversation_id.clone(), turn_index: opts.turn_index, + // p9-fb-41 Step 2 of PR-3: every Answer literal carries + // `hops`. Single-pass + refusal paths leave it `None`; + // only the multi-hop happy path will set `Some(...)` in + // Step 5 once the decide loop populates a hop trace. + hops: None, }; if let Err(e) = self.docs.put_answer(&answer, query, None) { tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed"); @@ -1182,6 +1202,11 @@ impl RagPipeline { created_at: OffsetDateTime::now_utc(), conversation_id: opts.conversation_id.clone(), turn_index: opts.turn_index, + // p9-fb-41 Step 2 of PR-3: every Answer literal carries + // `hops`. Single-pass + refusal paths leave it `None`; + // only the multi-hop happy path will set `Some(...)` in + // Step 5 once the decide loop populates a hop trace. + hops: None, }; if let Err(e) = self.docs.put_answer(&answer, query, None) { tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed"); @@ -1789,6 +1814,7 @@ mod stream_event_serde_tests { created_at: datetime!(2026-05-09 12:00:00 UTC), conversation_id: None, turn_index: None, + hops: None, }; let ev = StreamEvent::Final { answer }; let v = serde_json::to_value(&ev).unwrap(); diff --git a/crates/kebab-tui/tests/ask.rs b/crates/kebab-tui/tests/ask.rs index 8d35e20..c841a54 100644 --- a/crates/kebab-tui/tests/ask.rs +++ b/crates/kebab-tui/tests/ask.rs @@ -77,6 +77,7 @@ fn make_answer(grounded: bool, refusal: Option, body: &str) -> An created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), conversation_id: None, turn_index: None, + hops: None, } } diff --git a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md index d56b575..96d53ab 100644 --- a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md +++ b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md @@ -85,7 +85,37 @@ XL 작업 — 6 PR 분할 (각 머지 후 누적, 마지막 PR 후 v0.18.0 cut). --- -## PR-3: Dynamic iteration (decide loop + caps) +## PR-3 분할 (작업 양 측면, 2026-05-25 사용자 결정) + +**원래 plan**: PR-3 가 wire additive (`Answer.hops`) + RagCfg 노브 + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR. + +**실제 분할** (~1500+ 줄 단일 PR → review 부담 + 회기 위험 ↓): +- **PR-3a (본 PR)**: wire additive (HopRecord + HopKind + Answer.hops) + RagCfg 3 노브 + 모든 Answer literal 갱신 (hops:None). **RAG pipeline 동작 미변경** — additive only. +- **PR-3b (후속)**: dynamic decide loop + ScriptedLm helper + 5+ integration tests + format! named arg + 회차 1 carry-over (mirror refactor / history block helper). + +## PR-3a: Wire additive + RagCfg 노브 (HopRecord type + Answer.hops field) + +**Goal**: 후속 PR (PR-3b decide loop) 의 wire / config foundation. RAG pipeline 동작 변경 없음 — `Answer.hops` 가 모든 path 에서 `None`, RagCfg 새 3 노브가 default 만 적용. PR-3b 가 이 위에서 decide loop 구현. + +**Files**: +- `crates/kebab-core/src/answer.rs`: + - `HopRecord` struct (`iter`, `kind`, `sub_queries`, `context_chunks_added`, `forced_stop`, `llm_call_ms`). + - `HopKind` enum (`Decompose` / `Decide` / `Synthesize`). + - `Answer.hops: Option>` field — `#[serde(default, skip_serializing_if = "Option::is_none")]`. +- `crates/kebab-core/src/lib.rs`: `pub use answer::{HopKind, HopRecord, ...}`. +- `crates/kebab-config/src/lib.rs`: + - `RagCfg` 에 `multi_hop_max_depth: u32` (default 3), `multi_hop_max_sub_queries_per_iter: u32` (default 5), `multi_hop_max_pool_chunks: u32` (default 30). `#[serde(default)]` + env override + legacy parse. +- 모든 Answer literal site 갱신 (9 sites: kebab-rag/src/pipeline.rs ×6 + kebab-cli/src/main.rs + kebab-tui/tests/ask.rs + kebab-eval/src/metrics.rs): `hops: None` 명시. 향후 PR-3b 의 ask_multi_hop happy path 만 `Some(hops_trace)` 채움. + +**Tests**: +- `default_multi_hop_max_depth_is_3`, `default_multi_hop_max_sub_queries_per_iter_is_5`, `default_multi_hop_max_pool_chunks_is_30`. +- `env_overrides_multi_hop_knobs`. +- `legacy_config_without_multi_hop_knobs_uses_defaults` (LEGACY_PRE_TIMEOUT_TOML 공유). +- 모든 기존 RAG / TUI / CLI / eval test 가 hops:None 추가 후도 통과 (회귀 핀). + +**Wire 영향**: `answer.v1` JSON Schema 의 `hops` optional 필드 — `skip_serializing_if` 가 `None` 일 때 emit 안 함이라 옛 single-pass response 에 변동 없음. wire breaking 아님. JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점). + +## PR-3b: Dynamic iteration (decide loop + caps) — 후속 PR **Goal**: depth=2 fixed → dynamic N-hop. LLM 의 decide signal + max_depth / max_sub_queries / max_pool_chunks cap. -- 2.49.1