feat(rag): fb-41 PR-3a — HopRecord wire + RagCfg multi-hop knobs

PR-3 의 분할 첫 PR. wire additive (HopRecord + HopKind + Answer.hops field) + RagCfg 의 multi_hop_* 3 노브. RAG pipeline 동작 미변경 — 모든 Answer literal 의 `hops = None`. PR-3b (후속) 가 ask_multi_hop 의 happy path 에서 dynamic decide loop 구현 + hops trace 채움. 분할 이유: 원래 PR-3 가 wire + cfg + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR 였는데 ~1500 줄 단일 patch 가 review 부담 + 회기 위험 ↑. additive foundation 부터 ship 후 decide loop 별 PR — 사용자 결정 (2026-05-25). - `kebab_core::HopRecord` (iter, kind, sub_queries, context_chunks_added, forced_stop, llm_call_ms) + `HopKind` (Decompose / Decide / Synthesize) — wire-additive shape. - `kebab_core::Answer.hops: Option<Vec<HopRecord>>` — `#[serde(default, skip_serializing_if = "Option::is_none")]`, single-pass / refusal path 는 None, PR-3b 의 multi-hop happy path 가 Some. - `kebab_config::RagCfg` 에 3 신규 노브: - `multi_hop_max_depth: u32` (default 3) - `multi_hop_max_sub_queries_per_iter: u32` (default 5) - `multi_hop_max_pool_chunks: u32` (default 30) 3 모두 `#[serde(default)]` + env override (`KEBAB_RAG_MULTI_HOP_MAX_*`) + legacy parse 핀 (`LEGACY_PRE_TIMEOUT_TOML` 공유). - 9 Answer literal site (pipeline.rs ×6 + kebab-cli + kebab-tui tests + kebab-eval test) 에 `hops: None` 명시 추가. exhaustive field check 가 자동 guard — 빠진 site 시 compile fail. - plan 의 PR-3 단락 → PR-3a / PR-3b 분할 명시 + scope 정정. Tests (163 passing across kebab-config + kebab-core + kebab-rag): - 5 신규 multi-hop knob test (default / env override / legacy parse). - 기존 50+57+31+19+3+3 test 모두 hops:None 추가 후도 통과. Wire 영향: `answer.v1` 의 optional `hops` 필드 — `skip_serializing_ if = None` 이라 single-pass response 에 emit 안 됨. wire breaking 아님, JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 07:15:01 +00:00
parent 6280abf2df
commit 7150c376bb
8 changed files with 233 additions and 3 deletions
--- a/crates/kebab-config/src/lib.rs
+++ b/crates/kebab-config/src/lib.rs
@@ -181,6 +181,39 @@ pub struct RagCfg {
    pub score_gate: f32,
    pub explain_default: bool,
    pub max_context_tokens: usize,
+    /// p9-fb-41: hard ceiling on the number of multi-hop iterations
+    /// (decompose iter + decide iters). When the LLM keeps returning
+    /// `continue` past this depth the pipeline cuts to `synthesize`
+    /// with `HopRecord.forced_stop = true`. Default `3` — enough for
+    /// most cross-doc reasoning, low enough to bound LLM cost.
+    #[serde(default = "default_multi_hop_max_depth")]
+    pub multi_hop_max_depth: u32,
+    /// p9-fb-41: cap on how many sub-queries the LLM may emit in a
+    /// single decompose / decide call. Mirrors
+    /// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the
+    /// const is the hard floor while this is the runtime knob.
+    /// Default `5`.
+    #[serde(default = "default_multi_hop_max_sub_queries_per_iter")]
+    pub multi_hop_max_sub_queries_per_iter: u32,
+    /// p9-fb-41: hard ceiling on the deduped chunk pool. When the
+    /// accumulated pool would exceed this many chunks the pipeline
+    /// stops accepting new retrieval results and forces synthesize
+    /// with `forced_stop = true`. Default `30` — generous for
+    /// 5-hop / 10-hits multi-hop runs while still bounded.
+    #[serde(default = "default_multi_hop_max_pool_chunks")]
+    pub multi_hop_max_pool_chunks: u32,
+}
+
+fn default_multi_hop_max_depth() -> u32 {
+    3
+}
+
+fn default_multi_hop_max_sub_queries_per_iter() -> u32 {
+    5
+}
+
+fn default_multi_hop_max_pool_chunks() -> u32 {
+    30
 }

 /// Settings for the image ingest pipeline (P6). `ocr` controls OCR
@@ -434,6 +467,10 @@ impl Config {
                score_gate: 0.30,
                explain_default: false,
                max_context_tokens: 8000,
+                multi_hop_max_depth: default_multi_hop_max_depth(),
+                multi_hop_max_sub_queries_per_iter:
+                    default_multi_hop_max_sub_queries_per_iter(),
+                multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
            },
            image: ImageCfg::defaults(),
            ui: UiCfg::defaults(),
@@ -717,6 +754,21 @@ impl Config {
                        self.rag.max_context_tokens = n;
                    }
                }
+                "KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_depth = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_sub_queries_per_iter = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_pool_chunks = n;
+                    }
+                }

                // image.ocr
                "KEBAB_IMAGE_OCR_ENABLED" => {
@@ -1092,6 +1144,61 @@ theme = "dark"
        assert_eq!(c.image.ocr.request_timeout_secs, 300);
    }

+    // ── p9-fb-41: multi-hop RAG knobs ────────────────────────────────────
+
+    #[test]
+    fn default_multi_hop_max_depth_is_3() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3);
+    }
+
+    #[test]
+    fn default_multi_hop_max_sub_queries_per_iter_is_5() {
+        assert_eq!(
+            Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
+            5
+        );
+    }
+
+    #[test]
+    fn default_multi_hop_max_pool_chunks_is_30() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30);
+    }
+
+    #[test]
+    fn env_overrides_multi_hop_knobs() {
+        let mut env = HashMap::new();
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
+            "5".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
+            "7".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(),
+            "50".to_string(),
+        );
+        let c = Config::defaults().apply_env(&env);
+        assert_eq!(c.rag.multi_hop_max_depth, 5);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 50);
+    }
+
+    /// post-PR-3 fb-41: a config file written before the multi-hop
+    /// knobs existed must still parse and fall back to the documented
+    /// defaults — backwards-compat invariant. Fixture shared with the
+    /// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`]
+    /// (that fixture also predates the multi_hop_* fields).
+    #[test]
+    fn legacy_config_without_multi_hop_knobs_uses_defaults() {
+        let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
+            .expect("parse legacy config");
+        assert_eq!(c.rag.multi_hop_max_depth, 3);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 30);
+    }
+
    #[test]
    fn image_ocr_env_overrides() {
        let mut env = HashMap::new();