From 7150c376bbad2d9ab2eb17503c6dc7d5908faaeb Mon Sep 17 00:00:00 2001
From: altair823 <dlsrks0734@gmail.com>
Date: Mon, 25 May 2026 07:15:01 +0000
Subject: [PATCH] =?UTF-8?q?feat(rag):=20fb-41=20PR-3a=20=E2=80=94=20HopRec?=
 =?UTF-8?q?ord=20wire=20+=20RagCfg=20multi-hop=20knobs?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR-3 의 분할 첫 PR. wire additive (HopRecord + HopKind + Answer.hops
field) + RagCfg 의 multi_hop_* 3 노브. RAG pipeline 동작 미변경 —
모든 Answer literal 의 `hops = None`. PR-3b (후속) 가 ask_multi_hop
의 happy path 에서 dynamic decide loop 구현 + hops trace 채움.

분할 이유: 원래 PR-3 가 wire + cfg + decide loop + ScriptedLm +
helper refactor + 5+ tests 단일 PR 였는데 ~1500 줄 단일 patch 가
review 부담 + 회기 위험 ↑. additive foundation 부터 ship 후 decide
loop 별 PR — 사용자 결정 (2026-05-25).

- `kebab_core::HopRecord` (iter, kind, sub_queries,
  context_chunks_added, forced_stop, llm_call_ms) + `HopKind`
  (Decompose / Decide / Synthesize) — wire-additive shape.
- `kebab_core::Answer.hops: Option<Vec<HopRecord>>` —
  `#[serde(default, skip_serializing_if = "Option::is_none")]`,
  single-pass / refusal path 는 None, PR-3b 의 multi-hop happy
  path 가 Some.
- `kebab_config::RagCfg` 에 3 신규 노브:
  - `multi_hop_max_depth: u32` (default 3)
  - `multi_hop_max_sub_queries_per_iter: u32` (default 5)
  - `multi_hop_max_pool_chunks: u32` (default 30)
  3 모두 `#[serde(default)]` + env override
  (`KEBAB_RAG_MULTI_HOP_MAX_*`) + legacy parse 핀
  (`LEGACY_PRE_TIMEOUT_TOML` 공유).
- 9 Answer literal site (pipeline.rs ×6 + kebab-cli + kebab-tui
  tests + kebab-eval test) 에 `hops: None` 명시 추가. exhaustive
  field check 가 자동 guard — 빠진 site 시 compile fail.
- plan 의 PR-3 단락 → PR-3a / PR-3b 분할 명시 + scope 정정.

Tests (163 passing across kebab-config + kebab-core + kebab-rag):
- 5 신규 multi-hop knob test (default / env override / legacy parse).
- 기존 50+57+31+19+3+3 test 모두 hops:None 추가 후도 통과.

Wire 영향: `answer.v1` 의 optional `hops` 필드 — `skip_serializing_
if = None` 이라 single-pass response 에 emit 안 됨. wire breaking
아님, JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-cli/src/main.rs                  |   1 +
 crates/kebab-config/src/lib.rs                | 107 ++++++++++++++++++
 crates/kebab-core/src/answer.rs               |  64 +++++++++++
 crates/kebab-core/src/lib.rs                  |   4 +-
 crates/kebab-eval/src/metrics.rs              |   1 +
 crates/kebab-rag/src/pipeline.rs              |  26 +++++
 crates/kebab-tui/tests/ask.rs                 |   1 +
 .../2026-05-25-p9-fb-41-multi-hop-rag.md      |  32 +++++-
 8 files changed, 233 insertions(+), 3 deletions(-)
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index 1f951a0..5ed1325 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -1639,6 +1639,7 @@ mod tests {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: None,
             turn_index: None,
+            hops: None,
         }
     }
 
diff --git a/crates/kebab-config/src/lib.rs b/crates/kebab-config/src/lib.rs
index 77455fd..8e3c04e 100644
--- a/crates/kebab-config/src/lib.rs
+++ b/crates/kebab-config/src/lib.rs
@@ -181,6 +181,39 @@ pub struct RagCfg {
     pub score_gate: f32,
     pub explain_default: bool,
     pub max_context_tokens: usize,
+    /// p9-fb-41: hard ceiling on the number of multi-hop iterations
+    /// (decompose iter + decide iters). When the LLM keeps returning
+    /// `continue` past this depth the pipeline cuts to `synthesize`
+    /// with `HopRecord.forced_stop = true`. Default `3` — enough for
+    /// most cross-doc reasoning, low enough to bound LLM cost.
+    #[serde(default = "default_multi_hop_max_depth")]
+    pub multi_hop_max_depth: u32,
+    /// p9-fb-41: cap on how many sub-queries the LLM may emit in a
+    /// single decompose / decide call. Mirrors
+    /// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the
+    /// const is the hard floor while this is the runtime knob.
+    /// Default `5`.
+    #[serde(default = "default_multi_hop_max_sub_queries_per_iter")]
+    pub multi_hop_max_sub_queries_per_iter: u32,
+    /// p9-fb-41: hard ceiling on the deduped chunk pool. When the
+    /// accumulated pool would exceed this many chunks the pipeline
+    /// stops accepting new retrieval results and forces synthesize
+    /// with `forced_stop = true`. Default `30` — generous for
+    /// 5-hop / 10-hits multi-hop runs while still bounded.
+    #[serde(default = "default_multi_hop_max_pool_chunks")]
+    pub multi_hop_max_pool_chunks: u32,
+}
+
+fn default_multi_hop_max_depth() -> u32 {
+    3
+}
+
+fn default_multi_hop_max_sub_queries_per_iter() -> u32 {
+    5
+}
+
+fn default_multi_hop_max_pool_chunks() -> u32 {
+    30
 }
 
 /// Settings for the image ingest pipeline (P6). `ocr` controls OCR
@@ -434,6 +467,10 @@ impl Config {
                 score_gate: 0.30,
                 explain_default: false,
                 max_context_tokens: 8000,
+                multi_hop_max_depth: default_multi_hop_max_depth(),
+                multi_hop_max_sub_queries_per_iter:
+                    default_multi_hop_max_sub_queries_per_iter(),
+                multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
             },
             image: ImageCfg::defaults(),
             ui: UiCfg::defaults(),
@@ -717,6 +754,21 @@ impl Config {
                         self.rag.max_context_tokens = n;
                     }
                 }
+                "KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_depth = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_sub_queries_per_iter = n;
+                    }
+                }
+                "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => {
+                    if let Ok(n) = v.parse::<u32>() {
+                        self.rag.multi_hop_max_pool_chunks = n;
+                    }
+                }
 
                 // image.ocr
                 "KEBAB_IMAGE_OCR_ENABLED" => {
@@ -1092,6 +1144,61 @@ theme = "dark"
         assert_eq!(c.image.ocr.request_timeout_secs, 300);
     }
 
+    // ── p9-fb-41: multi-hop RAG knobs ────────────────────────────────────
+
+    #[test]
+    fn default_multi_hop_max_depth_is_3() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3);
+    }
+
+    #[test]
+    fn default_multi_hop_max_sub_queries_per_iter_is_5() {
+        assert_eq!(
+            Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
+            5
+        );
+    }
+
+    #[test]
+    fn default_multi_hop_max_pool_chunks_is_30() {
+        assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30);
+    }
+
+    #[test]
+    fn env_overrides_multi_hop_knobs() {
+        let mut env = HashMap::new();
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
+            "5".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
+            "7".to_string(),
+        );
+        env.insert(
+            "KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(),
+            "50".to_string(),
+        );
+        let c = Config::defaults().apply_env(&env);
+        assert_eq!(c.rag.multi_hop_max_depth, 5);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 50);
+    }
+
+    /// post-PR-3 fb-41: a config file written before the multi-hop
+    /// knobs existed must still parse and fall back to the documented
+    /// defaults — backwards-compat invariant. Fixture shared with the
+    /// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`]
+    /// (that fixture also predates the multi_hop_* fields).
+    #[test]
+    fn legacy_config_without_multi_hop_knobs_uses_defaults() {
+        let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
+            .expect("parse legacy config");
+        assert_eq!(c.rag.multi_hop_max_depth, 3);
+        assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
+        assert_eq!(c.rag.multi_hop_max_pool_chunks, 30);
+    }
+
     #[test]
     fn image_ocr_env_overrides() {
         let mut env = HashMap::new();
diff --git a/crates/kebab-core/src/answer.rs b/crates/kebab-core/src/answer.rs
index 31e299f..cb2c3ce 100644
--- a/crates/kebab-core/src/answer.rs
+++ b/crates/kebab-core/src/answer.rs
@@ -29,6 +29,14 @@ pub struct Answer {
     /// 이면 single-shot.
     #[serde(default, skip_serializing_if = "Option::is_none")]
     pub turn_index: Option<u32>,
+    /// p9-fb-41: multi-hop hop trace. `None` for single-pass asks.
+    /// Each entry records one hop (`decompose` / `decide` / `synthesize`)
+    /// — the LLM call category, the sub-queries emitted, retrieval
+    /// counts, and a `forced_stop` flag for cap-driven termination.
+    /// Wire-additive: `answer.v1` schema_version unchanged; consumers
+    /// reading older single-pass answers see `hops: None` (or absent).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub hops: Option<Vec<HopRecord>>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -55,6 +63,62 @@ pub struct Turn {
     pub created_at: OffsetDateTime,
 }
 
+/// p9-fb-41: one entry in [`Answer::hops`] — the per-iteration trace
+/// of a multi-hop ask. The pipeline appends a `HopRecord` per LLM
+/// call (decompose / decide / synthesize) so a `--multi-hop` user
+/// can see what sub-queries the LLM emitted, how many chunks each
+/// hop contributed, whether the iter stopped on the model's own
+/// signal or hit a cap, and the per-hop LLM latency.
+///
+/// Wire-additive — every field uses `#[serde(default)]` where it
+/// could plausibly be omitted by a future schema reader.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct HopRecord {
+    /// 0-based hop index within this ask. `iter=0` is always the
+    /// initial decompose call; subsequent iters are decide calls;
+    /// the final iter is the synthesize call.
+    pub iter: u32,
+    pub kind: HopKind,
+    /// Sub-queries the LLM emitted at this iter. For the synthesize
+    /// hop this is empty (no sub-queries — just the final answer).
+    #[serde(default)]
+    pub sub_queries: Vec<String>,
+    /// Number of *new* chunks the retrieval round contributed to the
+    /// pool (dedup'd by `chunk_id` — repeated hits from a previous
+    /// iter do not count). `0` for the decompose hop (no retrieval
+    /// yet) and the synthesize hop.
+    pub context_chunks_added: u32,
+    /// `true` when the pipeline cut the iter loop short because a
+    /// safety cap fired (`max_depth` / `max_total_sub_queries` /
+    /// `max_pool_chunks`) rather than because the LLM signalled
+    /// stop. The user-visible answer still reflects all chunks
+    /// accumulated up to that point — `forced_stop` is a tracing
+    /// signal, not a refusal.
+    pub forced_stop: bool,
+    /// Wall-clock latency of the LLM call for this hop, in
+    /// milliseconds. Useful for cost / latency analysis when a
+    /// `kebab eval` run records `Answer.hops`.
+    pub llm_call_ms: u32,
+}
+
+/// p9-fb-41: which stage of the multi-hop pipeline a [`HopRecord`]
+/// describes. The serde tag matches the wire shape so agents /
+/// CLIs can branch on the snake_case string without referencing
+/// the Rust enum.
+#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum HopKind {
+    /// First hop — LLM decomposed the user query into sub-queries.
+    Decompose,
+    /// Subsequent hop — LLM was asked whether more retrieval is
+    /// needed and either emitted new sub-queries (`continue`) or
+    /// returned an empty array (`stop`).
+    Decide,
+    /// Terminal hop — LLM produced the final user-visible answer
+    /// over the accumulated chunk pool.
+    Synthesize,
+}
+
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum RefusalReason {
diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs
index 3b3b285..b534bf0 100644
--- a/crates/kebab-core/src/lib.rs
+++ b/crates/kebab-core/src/lib.rs
@@ -56,8 +56,8 @@ pub use search::{
     TraceCandidate, TraceFusionInput, TraceTiming,
 };
 pub use answer::{
-    Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
-    TraceId, Turn,
+    Answer, AnswerCitation, AnswerRetrievalSummary, HopKind, HopRecord, ModelRef,
+    RefusalReason, TokenUsage, TraceId, Turn,
 };
 pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples};
 pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};
diff --git a/crates/kebab-eval/src/metrics.rs b/crates/kebab-eval/src/metrics.rs
index 6a80ed0..15d5cd6 100644
--- a/crates/kebab-eval/src/metrics.rs
+++ b/crates/kebab-eval/src/metrics.rs
@@ -532,6 +532,7 @@ mod tests {
             created_at: OffsetDateTime::UNIX_EPOCH,
             conversation_id: None,
             turn_index: None,
+            hops: None,
         }
     }
 
diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs
index ee54889..b168725 100644
--- a/crates/kebab-rag/src/pipeline.rs
+++ b/crates/kebab-rag/src/pipeline.rs
@@ -531,6 +531,11 @@ impl RagPipeline {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: opts.conversation_id.clone(),
             turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
         };
 
         // Drop the moved `finish_reason` early into a tracing breadcrumb; the
@@ -843,6 +848,11 @@ impl RagPipeline {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: opts.conversation_id.clone(),
             turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
         };
 
         tracing::debug!(
@@ -979,6 +989,11 @@ impl RagPipeline {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: opts.conversation_id.clone(),
             turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
         };
         if let Some(sink) = &opts.stream_sink {
             let _ = sink.send(StreamEvent::Final {
@@ -1100,6 +1115,11 @@ impl RagPipeline {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: opts.conversation_id.clone(),
             turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
         };
         if let Err(e) = self.docs.put_answer(&answer, query, None) {
             tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
@@ -1182,6 +1202,11 @@ impl RagPipeline {
             created_at: OffsetDateTime::now_utc(),
             conversation_id: opts.conversation_id.clone(),
             turn_index: opts.turn_index,
+            // p9-fb-41 Step 2 of PR-3: every Answer literal carries
+            // `hops`. Single-pass + refusal paths leave it `None`;
+            // only the multi-hop happy path will set `Some(...)` in
+            // Step 5 once the decide loop populates a hop trace.
+            hops: None,
         };
         if let Err(e) = self.docs.put_answer(&answer, query, None) {
             tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
@@ -1789,6 +1814,7 @@ mod stream_event_serde_tests {
             created_at: datetime!(2026-05-09 12:00:00 UTC),
             conversation_id: None,
             turn_index: None,
+            hops: None,
         };
         let ev = StreamEvent::Final { answer };
         let v = serde_json::to_value(&ev).unwrap();
diff --git a/crates/kebab-tui/tests/ask.rs b/crates/kebab-tui/tests/ask.rs
index 8d35e20..c841a54 100644
--- a/crates/kebab-tui/tests/ask.rs
+++ b/crates/kebab-tui/tests/ask.rs
@@ -77,6 +77,7 @@ fn make_answer(grounded: bool, refusal: Option<RefusalReason>, body: &str) -> An
         created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
         conversation_id: None,
         turn_index: None,
+        hops: None,
     }
 }
 
diff --git a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md
index d56b575..96d53ab 100644
--- a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md
+++ b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md
@@ -85,7 +85,37 @@ XL 작업 — 6 PR 분할 (각 머지 후 누적, 마지막 PR 후 v0.18.0 cut).
 
 ---
 
-## PR-3: Dynamic iteration (decide loop + caps)
+## PR-3 분할 (작업 양 측면, 2026-05-25 사용자 결정)
+
+**원래 plan**: PR-3 가 wire additive (`Answer.hops`) + RagCfg 노브 + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR.
+
+**실제 분할** (~1500+ 줄 단일 PR → review 부담 + 회기 위험 ↓):
+- **PR-3a (본 PR)**: wire additive (HopRecord + HopKind + Answer.hops) + RagCfg 3 노브 + 모든 Answer literal 갱신 (hops:None). **RAG pipeline 동작 미변경** — additive only.
+- **PR-3b (후속)**: dynamic decide loop + ScriptedLm helper + 5+ integration tests + format! named arg + 회차 1 carry-over (mirror refactor / history block helper).
+
+## PR-3a: Wire additive + RagCfg 노브 (HopRecord type + Answer.hops field)
+
+**Goal**: 후속 PR (PR-3b decide loop) 의 wire / config foundation. RAG pipeline 동작 변경 없음 — `Answer.hops` 가 모든 path 에서 `None`, RagCfg 새 3 노브가 default 만 적용. PR-3b 가 이 위에서 decide loop 구현.
+
+**Files**:
+- `crates/kebab-core/src/answer.rs`:
+  - `HopRecord` struct (`iter`, `kind`, `sub_queries`, `context_chunks_added`, `forced_stop`, `llm_call_ms`).
+  - `HopKind` enum (`Decompose` / `Decide` / `Synthesize`).
+  - `Answer.hops: Option<Vec<HopRecord>>` field — `#[serde(default, skip_serializing_if = "Option::is_none")]`.
+- `crates/kebab-core/src/lib.rs`: `pub use answer::{HopKind, HopRecord, ...}`.
+- `crates/kebab-config/src/lib.rs`:
+  - `RagCfg` 에 `multi_hop_max_depth: u32` (default 3), `multi_hop_max_sub_queries_per_iter: u32` (default 5), `multi_hop_max_pool_chunks: u32` (default 30). `#[serde(default)]` + env override + legacy parse.
+- 모든 Answer literal site 갱신 (9 sites: kebab-rag/src/pipeline.rs ×6 + kebab-cli/src/main.rs + kebab-tui/tests/ask.rs + kebab-eval/src/metrics.rs): `hops: None` 명시. 향후 PR-3b 의 ask_multi_hop happy path 만 `Some(hops_trace)` 채움.
+
+**Tests**:
+- `default_multi_hop_max_depth_is_3`, `default_multi_hop_max_sub_queries_per_iter_is_5`, `default_multi_hop_max_pool_chunks_is_30`.
+- `env_overrides_multi_hop_knobs`.
+- `legacy_config_without_multi_hop_knobs_uses_defaults` (LEGACY_PRE_TIMEOUT_TOML 공유).
+- 모든 기존 RAG / TUI / CLI / eval test 가 hops:None 추가 후도 통과 (회귀 핀).
+
+**Wire 영향**: `answer.v1` JSON Schema 의 `hops` optional 필드 — `skip_serializing_if` 가 `None` 일 때 emit 안 함이라 옛 single-pass response 에 변동 없음. wire breaking 아님. JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점).
+
+## PR-3b: Dynamic iteration (decide loop + caps) — 후속 PR
 
 **Goal**: depth=2 fixed → dynamic N-hop. LLM 의 decide signal + max_depth / max_sub_queries / max_pool_chunks cap.
 
-- 
2.49.1