Merge pull request 'feat(rag): fb-41 PR-3a HopRecord wire + RagCfg multi-hop knobs' (#168) from feat/fb-41-pr-3-dynamic-decide-loop into main

2026-05-25 07:18:27 +00:00
parent 6280abf2df 7150c376bb
commit cd1d4fb807
8 changed files with 233 additions and 3 deletions
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -1639,6 +1639,7 @@ mod tests {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: None,
            turn_index: None,
+            hops: None,
        }
    }

--- a/crates/kebab-config/src/lib.rs
+++ b/crates/kebab-config/src/lib.rs
@@ -181,6 +181,39 @@ pub struct RagCfg {
    pub score_gate: f32,
    pub explain_default: bool,
    pub max_context_tokens: usize,
+    /// p9-fb-41: hard ceiling on the number of multi-hop iterations
+    /// (decompose iter + decide iters). When the LLM keeps returning
+    /// `continue` past this depth the pipeline cuts to `synthesize`
+    /// with `HopRecord.forced_stop = true`. Default `3` — enough for
+    /// most cross-doc reasoning, low enough to bound LLM cost.
+    #[serde(default = "default_multi_hop_max_depth")]
+    pub multi_hop_max_depth: u32,
+    /// p9-fb-41: cap on how many sub-queries the LLM may emit in a
+    /// single decompose / decide call. Mirrors
+    /// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the
+    /// const is the hard floor while this is the runtime knob.
+    /// Default `5`.
+    #[serde(default = "default_multi_hop_max_sub_queries_per_iter")]
+    pub multi_hop_max_sub_queries_per_iter: u32,
+    /// p9-fb-41: hard ceiling on the deduped chunk pool. When the
+    /// accumulated pool would exceed this many chunks the pipeline
+    /// stops accepting new retrieval results and forces synthesize
+    /// with `forced_stop = true`. Default `30` — generous for
+    /// 5-hop / 10-hits multi-hop runs while still bounded.
+    #[serde(default = "default_multi_hop_max_pool_chunks")]
+    pub multi_hop_max_pool_chunks: u32,
+}
+
+fn default_multi_hop_max_depth() -> u32 {
+    3
+}
+
+fn default_multi_hop_max_sub_queries_per_iter() -> u32 {
+    5
+}
+
+fn default_multi_hop_max_pool_chunks() -> u32 {
+    30
 }

 /// Settings for the image ingest pipeline (P6). `ocr` controls OCR
@@ -434,6 +467,10 @@ impl Config {
                score_gate: 0.30,
                explain_default: false,
                max_context_tokens: 8000,
+                multi_hop_max_depth: default_multi_hop_max_depth(),
+                multi_hop_max_sub_queries_per_iter:
+                    default_multi_hop_max_sub_queries_per_iter(),
+                multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
            },
            image: ImageCfg::defaults(),
            ui: UiCfg::defaults(),
@@ -717,6 +754,21 @@ impl Config {
                        self.rag.max_context_tokens = n;
                    }
                }
+                "KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_depth = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_sub_queries_per_iter = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_pool_chunks = n;
+                    }
+                }

                // image.ocr
                "KEBAB_IMAGE_OCR_ENABLED" => {
@@ -1092,6 +1144,61 @@ theme = "dark"
        assert_eq!(c.image.ocr.request_timeout_secs, 300);
    }

+    // ── p9-fb-41: multi-hop RAG knobs ────────────────────────────────────
+
+    #[test]
+    fn default_multi_hop_max_depth_is_3() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3);
+    }
+
+    #[test]
+    fn default_multi_hop_max_sub_queries_per_iter_is_5() {
+        assert_eq!(
+            Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
+            5
+        );
+    }
+
+    #[test]
+    fn default_multi_hop_max_pool_chunks_is_30() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30);
+    }
+
+    #[test]
+    fn env_overrides_multi_hop_knobs() {
+        let mut env = HashMap::new();
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
+            "5".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
+            "7".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(),
+            "50".to_string(),
+        );
+        let c = Config::defaults().apply_env(&env);
+        assert_eq!(c.rag.multi_hop_max_depth, 5);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 50);
+    }
+
+    /// post-PR-3 fb-41: a config file written before the multi-hop
+    /// knobs existed must still parse and fall back to the documented
+    /// defaults — backwards-compat invariant. Fixture shared with the
+    /// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`]
+    /// (that fixture also predates the multi_hop_* fields).
+    #[test]
+    fn legacy_config_without_multi_hop_knobs_uses_defaults() {
+        let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
+            .expect("parse legacy config");
+        assert_eq!(c.rag.multi_hop_max_depth, 3);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 30);
+    }
+
    #[test]
    fn image_ocr_env_overrides() {
        let mut env = HashMap::new();
--- a/crates/kebab-core/src/answer.rs
+++ b/crates/kebab-core/src/answer.rs
@@ -29,6 +29,14 @@ pub struct Answer {
    /// 이면 single-shot.
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub turn_index: Option<u32>,
+    /// p9-fb-41: multi-hop hop trace. `None` for single-pass asks.
+    /// Each entry records one hop (`decompose` / `decide` / `synthesize`)
+    /// — the LLM call category, the sub-queries emitted, retrieval
+    /// counts, and a `forced_stop` flag for cap-driven termination.
+    /// Wire-additive: `answer.v1` schema_version unchanged; consumers
+    /// reading older single-pass answers see `hops: None` (or absent).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub hops: Option<Vec<HopRecord>>,
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -55,6 +63,62 @@ pub struct Turn {
    pub created_at: OffsetDateTime,
 }

+/// p9-fb-41: one entry in [`Answer::hops`] — the per-iteration trace
+/// of a multi-hop ask. The pipeline appends a `HopRecord` per LLM
+/// call (decompose / decide / synthesize) so a `--multi-hop` user
+/// can see what sub-queries the LLM emitted, how many chunks each
+/// hop contributed, whether the iter stopped on the model's own
+/// signal or hit a cap, and the per-hop LLM latency.
+///
+/// Wire-additive — every field uses `#[serde(default)]` where it
+/// could plausibly be omitted by a future schema reader.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct HopRecord {
+    /// 0-based hop index within this ask. `iter=0` is always the
+    /// initial decompose call; subsequent iters are decide calls;
+    /// the final iter is the synthesize call.
+    pub iter: u32,
+    pub kind: HopKind,
+    /// Sub-queries the LLM emitted at this iter. For the synthesize
+    /// hop this is empty (no sub-queries — just the final answer).
+    #[serde(default)]
+    pub sub_queries: Vec<String>,
+    /// Number of *new* chunks the retrieval round contributed to the
+    /// pool (dedup'd by `chunk_id` — repeated hits from a previous
+    /// iter do not count). `0` for the decompose hop (no retrieval
+    /// yet) and the synthesize hop.
+    pub context_chunks_added: u32,
+    /// `true` when the pipeline cut the iter loop short because a
+    /// safety cap fired (`max_depth` / `max_total_sub_queries` /
+    /// `max_pool_chunks`) rather than because the LLM signalled
+    /// stop. The user-visible answer still reflects all chunks
+    /// accumulated up to that point — `forced_stop` is a tracing
+    /// signal, not a refusal.
+    pub forced_stop: bool,
+    /// Wall-clock latency of the LLM call for this hop, in
+    /// milliseconds. Useful for cost / latency analysis when a
+    /// `kebab eval` run records `Answer.hops`.
+    pub llm_call_ms: u32,
+}
+
+/// p9-fb-41: which stage of the multi-hop pipeline a [`HopRecord`]
+/// describes. The serde tag matches the wire shape so agents /
+/// CLIs can branch on the snake_case string without referencing
+/// the Rust enum.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum HopKind {
+    /// First hop — LLM decomposed the user query into sub-queries.
+    Decompose,
+    /// Subsequent hop — LLM was asked whether more retrieval is
+    /// needed and either emitted new sub-queries (`continue`) or
+    /// returned an empty array (`stop`).
+    Decide,
+    /// Terminal hop — LLM produced the final user-visible answer
+    /// over the accumulated chunk pool.
+    Synthesize,
+}
+
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum RefusalReason {
--- a/crates/kebab-core/src/lib.rs
+++ b/crates/kebab-core/src/lib.rs
@@ -56,8 +56,8 @@ pub use search::{
    TraceCandidate, TraceFusionInput, TraceTiming,
 };
 pub use answer::{
-    Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
-    TraceId, Turn,
+    Answer, AnswerCitation, AnswerRetrievalSummary, HopKind, HopRecord, ModelRef,
+    RefusalReason, TokenUsage, TraceId, Turn,
 };
 pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples};
 pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};
--- a/crates/kebab-eval/src/metrics.rs
+++ b/crates/kebab-eval/src/metrics.rs
@@ -532,6 +532,7 @@ mod tests {
            created_at: OffsetDateTime::UNIX_EPOCH,
            conversation_id: None,
            turn_index: None,
+            hops: None,
        }
    }

--- a/crates/kebab-rag/src/pipeline.rs
+++ b/crates/kebab-rag/src/pipeline.rs
@@ -531,6 +531,11 @@ impl RagPipeline {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: opts.conversation_id.clone(),
            turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
        };

        // Drop the moved `finish_reason` early into a tracing breadcrumb; the
@@ -843,6 +848,11 @@ impl RagPipeline {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: opts.conversation_id.clone(),
            turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
        };

        tracing::debug!(
@@ -979,6 +989,11 @@ impl RagPipeline {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: opts.conversation_id.clone(),
            turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
        };
        if let Some(sink) = &opts.stream_sink {
            let _ = sink.send(StreamEvent::Final {
@@ -1100,6 +1115,11 @@ impl RagPipeline {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: opts.conversation_id.clone(),
            turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
        };
        if let Err(e) = self.docs.put_answer(&answer, query, None) {
            tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
@@ -1182,6 +1202,11 @@ impl RagPipeline {
            created_at: OffsetDateTime::now_utc(),
            conversation_id: opts.conversation_id.clone(),
            turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
        };
        if let Err(e) = self.docs.put_answer(&answer, query, None) {
            tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
@@ -1789,6 +1814,7 @@ mod stream_event_serde_tests {
            created_at: datetime!(2026-05-09 12:00:00 UTC),
            conversation_id: None,
            turn_index: None,
+            hops: None,
        };
        let ev = StreamEvent::Final { answer };
        let v = serde_json::to_value(&ev).unwrap();
--- a/crates/kebab-tui/tests/ask.rs
+++ b/crates/kebab-tui/tests/ask.rs
@@ -77,6 +77,7 @@ fn make_answer(grounded: bool, refusal: Option<RefusalReason>, body: &str) -> An
        created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
        conversation_id: None,
        turn_index: None,
+        hops: None,
    }
 }

--- a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md
+++ b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md
@@ -85,7 +85,37 @@ XL 작업 — 6 PR 분할 (각 머지 후 누적, 마지막 PR 후 v0.18.0 cut).

 ---

-## PR-3: Dynamic iteration (decide loop + caps)
+## PR-3 분할 (작업 양 측면, 2026-05-25 사용자 결정)
+
+**원래 plan**: PR-3 가 wire additive (`Answer.hops`) + RagCfg 노브 + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR.
+
+**실제 분할** (~1500+ 줄 단일 PR → review 부담 + 회기 위험 ↓):
+- **PR-3a (본 PR)**: wire additive (HopRecord + HopKind + Answer.hops) + RagCfg 3 노브 + 모든 Answer literal 갱신 (hops:None). **RAG pipeline 동작 미변경** — additive only.
+- **PR-3b (후속)**: dynamic decide loop + ScriptedLm helper + 5+ integration tests + format! named arg + 회차 1 carry-over (mirror refactor / history block helper).
+
+## PR-3a: Wire additive + RagCfg 노브 (HopRecord type + Answer.hops field)
+
+**Goal**: 후속 PR (PR-3b decide loop) 의 wire / config foundation. RAG pipeline 동작 변경 없음 — `Answer.hops` 가 모든 path 에서 `None`, RagCfg 새 3 노브가 default 만 적용. PR-3b 가 이 위에서 decide loop 구현.
+
+**Files**:
+- `crates/kebab-core/src/answer.rs`:
+  - `HopRecord` struct (`iter`, `kind`, `sub_queries`, `context_chunks_added`, `forced_stop`, `llm_call_ms`).
+  - `HopKind` enum (`Decompose` / `Decide` / `Synthesize`).
+  - `Answer.hops: Option<Vec<HopRecord>>` field — `#[serde(default, skip_serializing_if = "Option::is_none")]`.
+- `crates/kebab-core/src/lib.rs`: `pub use answer::{HopKind, HopRecord, ...}`.
+- `crates/kebab-config/src/lib.rs`:
+  - `RagCfg` 에 `multi_hop_max_depth: u32` (default 3), `multi_hop_max_sub_queries_per_iter: u32` (default 5), `multi_hop_max_pool_chunks: u32` (default 30). `#[serde(default)]` + env override + legacy parse.
+- 모든 Answer literal site 갱신 (9 sites: kebab-rag/src/pipeline.rs ×6 + kebab-cli/src/main.rs + kebab-tui/tests/ask.rs + kebab-eval/src/metrics.rs): `hops: None` 명시. 향후 PR-3b 의 ask_multi_hop happy path 만 `Some(hops_trace)` 채움.
+
+**Tests**:
+- `default_multi_hop_max_depth_is_3`, `default_multi_hop_max_sub_queries_per_iter_is_5`, `default_multi_hop_max_pool_chunks_is_30`.
+- `env_overrides_multi_hop_knobs`.
+- `legacy_config_without_multi_hop_knobs_uses_defaults` (LEGACY_PRE_TIMEOUT_TOML 공유).
+- 모든 기존 RAG / TUI / CLI / eval test 가 hops:None 추가 후도 통과 (회귀 핀).
+
+**Wire 영향**: `answer.v1` JSON Schema 의 `hops` optional 필드 — `skip_serializing_if` 가 `None` 일 때 emit 안 함이라 옛 single-pass response 에 변동 없음. wire breaking 아님. JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점).
+
+## PR-3b: Dynamic iteration (decide loop + caps) — 후속 PR

 **Goal**: depth=2 fixed → dynamic N-hop. LLM 의 decide signal + max_depth / max_sub_queries / max_pool_chunks cap.