Merge pull request 'feat(rag): fb-41 PR-3a HopRecord wire + RagCfg multi-hop knobs' (#168) from feat/fb-41-pr-3-dynamic-decide-loop into main
This commit was merged in pull request #168.
This commit is contained in:
@@ -1639,6 +1639,7 @@ mod tests {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
hops: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -181,6 +181,39 @@ pub struct RagCfg {
|
||||
pub score_gate: f32,
|
||||
pub explain_default: bool,
|
||||
pub max_context_tokens: usize,
|
||||
/// p9-fb-41: hard ceiling on the number of multi-hop iterations
|
||||
/// (decompose iter + decide iters). When the LLM keeps returning
|
||||
/// `continue` past this depth the pipeline cuts to `synthesize`
|
||||
/// with `HopRecord.forced_stop = true`. Default `3` — enough for
|
||||
/// most cross-doc reasoning, low enough to bound LLM cost.
|
||||
#[serde(default = "default_multi_hop_max_depth")]
|
||||
pub multi_hop_max_depth: u32,
|
||||
/// p9-fb-41: cap on how many sub-queries the LLM may emit in a
|
||||
/// single decompose / decide call. Mirrors
|
||||
/// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the
|
||||
/// const is the hard floor while this is the runtime knob.
|
||||
/// Default `5`.
|
||||
#[serde(default = "default_multi_hop_max_sub_queries_per_iter")]
|
||||
pub multi_hop_max_sub_queries_per_iter: u32,
|
||||
/// p9-fb-41: hard ceiling on the deduped chunk pool. When the
|
||||
/// accumulated pool would exceed this many chunks the pipeline
|
||||
/// stops accepting new retrieval results and forces synthesize
|
||||
/// with `forced_stop = true`. Default `30` — generous for
|
||||
/// 5-hop / 10-hits multi-hop runs while still bounded.
|
||||
#[serde(default = "default_multi_hop_max_pool_chunks")]
|
||||
pub multi_hop_max_pool_chunks: u32,
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_depth() -> u32 {
|
||||
3
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_sub_queries_per_iter() -> u32 {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_pool_chunks() -> u32 {
|
||||
30
|
||||
}
|
||||
|
||||
/// Settings for the image ingest pipeline (P6). `ocr` controls OCR
|
||||
@@ -434,6 +467,10 @@ impl Config {
|
||||
score_gate: 0.30,
|
||||
explain_default: false,
|
||||
max_context_tokens: 8000,
|
||||
multi_hop_max_depth: default_multi_hop_max_depth(),
|
||||
multi_hop_max_sub_queries_per_iter:
|
||||
default_multi_hop_max_sub_queries_per_iter(),
|
||||
multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
|
||||
},
|
||||
image: ImageCfg::defaults(),
|
||||
ui: UiCfg::defaults(),
|
||||
@@ -717,6 +754,21 @@ impl Config {
|
||||
self.rag.max_context_tokens = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_depth = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_sub_queries_per_iter = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_pool_chunks = n;
|
||||
}
|
||||
}
|
||||
|
||||
// image.ocr
|
||||
"KEBAB_IMAGE_OCR_ENABLED" => {
|
||||
@@ -1092,6 +1144,61 @@ theme = "dark"
|
||||
assert_eq!(c.image.ocr.request_timeout_secs, 300);
|
||||
}
|
||||
|
||||
// ── p9-fb-41: multi-hop RAG knobs ────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_depth_is_3() {
|
||||
assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_sub_queries_per_iter_is_5() {
|
||||
assert_eq!(
|
||||
Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
|
||||
5
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_pool_chunks_is_30() {
|
||||
assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_overrides_multi_hop_knobs() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
|
||||
"5".to_string(),
|
||||
);
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
|
||||
"7".to_string(),
|
||||
);
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(),
|
||||
"50".to_string(),
|
||||
);
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.rag.multi_hop_max_depth, 5);
|
||||
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7);
|
||||
assert_eq!(c.rag.multi_hop_max_pool_chunks, 50);
|
||||
}
|
||||
|
||||
/// post-PR-3 fb-41: a config file written before the multi-hop
|
||||
/// knobs existed must still parse and fall back to the documented
|
||||
/// defaults — backwards-compat invariant. Fixture shared with the
|
||||
/// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`]
|
||||
/// (that fixture also predates the multi_hop_* fields).
|
||||
#[test]
|
||||
fn legacy_config_without_multi_hop_knobs_uses_defaults() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
assert_eq!(c.rag.multi_hop_max_depth, 3);
|
||||
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
|
||||
assert_eq!(c.rag.multi_hop_max_pool_chunks, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_ocr_env_overrides() {
|
||||
let mut env = HashMap::new();
|
||||
|
||||
@@ -29,6 +29,14 @@ pub struct Answer {
|
||||
/// 이면 single-shot.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub turn_index: Option<u32>,
|
||||
/// p9-fb-41: multi-hop hop trace. `None` for single-pass asks.
|
||||
/// Each entry records one hop (`decompose` / `decide` / `synthesize`)
|
||||
/// — the LLM call category, the sub-queries emitted, retrieval
|
||||
/// counts, and a `forced_stop` flag for cap-driven termination.
|
||||
/// Wire-additive: `answer.v1` schema_version unchanged; consumers
|
||||
/// reading older single-pass answers see `hops: None` (or absent).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub hops: Option<Vec<HopRecord>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
@@ -55,6 +63,62 @@ pub struct Turn {
|
||||
pub created_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
/// p9-fb-41: one entry in [`Answer::hops`] — the per-iteration trace
|
||||
/// of a multi-hop ask. The pipeline appends a `HopRecord` per LLM
|
||||
/// call (decompose / decide / synthesize) so a `--multi-hop` user
|
||||
/// can see what sub-queries the LLM emitted, how many chunks each
|
||||
/// hop contributed, whether the iter stopped on the model's own
|
||||
/// signal or hit a cap, and the per-hop LLM latency.
|
||||
///
|
||||
/// Wire-additive — every field uses `#[serde(default)]` where it
|
||||
/// could plausibly be omitted by a future schema reader.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct HopRecord {
|
||||
/// 0-based hop index within this ask. `iter=0` is always the
|
||||
/// initial decompose call; subsequent iters are decide calls;
|
||||
/// the final iter is the synthesize call.
|
||||
pub iter: u32,
|
||||
pub kind: HopKind,
|
||||
/// Sub-queries the LLM emitted at this iter. For the synthesize
|
||||
/// hop this is empty (no sub-queries — just the final answer).
|
||||
#[serde(default)]
|
||||
pub sub_queries: Vec<String>,
|
||||
/// Number of *new* chunks the retrieval round contributed to the
|
||||
/// pool (dedup'd by `chunk_id` — repeated hits from a previous
|
||||
/// iter do not count). `0` for the decompose hop (no retrieval
|
||||
/// yet) and the synthesize hop.
|
||||
pub context_chunks_added: u32,
|
||||
/// `true` when the pipeline cut the iter loop short because a
|
||||
/// safety cap fired (`max_depth` / `max_total_sub_queries` /
|
||||
/// `max_pool_chunks`) rather than because the LLM signalled
|
||||
/// stop. The user-visible answer still reflects all chunks
|
||||
/// accumulated up to that point — `forced_stop` is a tracing
|
||||
/// signal, not a refusal.
|
||||
pub forced_stop: bool,
|
||||
/// Wall-clock latency of the LLM call for this hop, in
|
||||
/// milliseconds. Useful for cost / latency analysis when a
|
||||
/// `kebab eval` run records `Answer.hops`.
|
||||
pub llm_call_ms: u32,
|
||||
}
|
||||
|
||||
/// p9-fb-41: which stage of the multi-hop pipeline a [`HopRecord`]
|
||||
/// describes. The serde tag matches the wire shape so agents /
|
||||
/// CLIs can branch on the snake_case string without referencing
|
||||
/// the Rust enum.
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum HopKind {
|
||||
/// First hop — LLM decomposed the user query into sub-queries.
|
||||
Decompose,
|
||||
/// Subsequent hop — LLM was asked whether more retrieval is
|
||||
/// needed and either emitted new sub-queries (`continue`) or
|
||||
/// returned an empty array (`stop`).
|
||||
Decide,
|
||||
/// Terminal hop — LLM produced the final user-visible answer
|
||||
/// over the accumulated chunk pool.
|
||||
Synthesize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RefusalReason {
|
||||
|
||||
@@ -56,8 +56,8 @@ pub use search::{
|
||||
TraceCandidate, TraceFusionInput, TraceTiming,
|
||||
};
|
||||
pub use answer::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
|
||||
TraceId, Turn,
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, HopKind, HopRecord, ModelRef,
|
||||
RefusalReason, TokenUsage, TraceId, Turn,
|
||||
};
|
||||
pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples};
|
||||
pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};
|
||||
|
||||
@@ -532,6 +532,7 @@ mod tests {
|
||||
created_at: OffsetDateTime::UNIX_EPOCH,
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
hops: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -531,6 +531,11 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
// p9-fb-41 Step 2 of PR-3: every Answer literal carries
|
||||
// `hops`. Single-pass + refusal paths leave it `None`;
|
||||
// only the multi-hop happy path will set `Some(...)` in
|
||||
// Step 5 once the decide loop populates a hop trace.
|
||||
hops: None,
|
||||
};
|
||||
|
||||
// Drop the moved `finish_reason` early into a tracing breadcrumb; the
|
||||
@@ -843,6 +848,11 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
// p9-fb-41 Step 2 of PR-3: every Answer literal carries
|
||||
// `hops`. Single-pass + refusal paths leave it `None`;
|
||||
// only the multi-hop happy path will set `Some(...)` in
|
||||
// Step 5 once the decide loop populates a hop trace.
|
||||
hops: None,
|
||||
};
|
||||
|
||||
tracing::debug!(
|
||||
@@ -979,6 +989,11 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
// p9-fb-41 Step 2 of PR-3: every Answer literal carries
|
||||
// `hops`. Single-pass + refusal paths leave it `None`;
|
||||
// only the multi-hop happy path will set `Some(...)` in
|
||||
// Step 5 once the decide loop populates a hop trace.
|
||||
hops: None,
|
||||
};
|
||||
if let Some(sink) = &opts.stream_sink {
|
||||
let _ = sink.send(StreamEvent::Final {
|
||||
@@ -1100,6 +1115,11 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
// p9-fb-41 Step 2 of PR-3: every Answer literal carries
|
||||
// `hops`. Single-pass + refusal paths leave it `None`;
|
||||
// only the multi-hop happy path will set `Some(...)` in
|
||||
// Step 5 once the decide loop populates a hop trace.
|
||||
hops: None,
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
|
||||
@@ -1182,6 +1202,11 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
// p9-fb-41 Step 2 of PR-3: every Answer literal carries
|
||||
// `hops`. Single-pass + refusal paths leave it `None`;
|
||||
// only the multi-hop happy path will set `Some(...)` in
|
||||
// Step 5 once the decide loop populates a hop trace.
|
||||
hops: None,
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
|
||||
@@ -1789,6 +1814,7 @@ mod stream_event_serde_tests {
|
||||
created_at: datetime!(2026-05-09 12:00:00 UTC),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
hops: None,
|
||||
};
|
||||
let ev = StreamEvent::Final { answer };
|
||||
let v = serde_json::to_value(&ev).unwrap();
|
||||
|
||||
@@ -77,6 +77,7 @@ fn make_answer(grounded: bool, refusal: Option<RefusalReason>, body: &str) -> An
|
||||
created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
hops: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -85,7 +85,37 @@ XL 작업 — 6 PR 분할 (각 머지 후 누적, 마지막 PR 후 v0.18.0 cut).
|
||||
|
||||
---
|
||||
|
||||
## PR-3: Dynamic iteration (decide loop + caps)
|
||||
## PR-3 분할 (작업 양 측면, 2026-05-25 사용자 결정)
|
||||
|
||||
**원래 plan**: PR-3 가 wire additive (`Answer.hops`) + RagCfg 노브 + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR.
|
||||
|
||||
**실제 분할** (~1500+ 줄 단일 PR → review 부담 + 회기 위험 ↓):
|
||||
- **PR-3a (본 PR)**: wire additive (HopRecord + HopKind + Answer.hops) + RagCfg 3 노브 + 모든 Answer literal 갱신 (hops:None). **RAG pipeline 동작 미변경** — additive only.
|
||||
- **PR-3b (후속)**: dynamic decide loop + ScriptedLm helper + 5+ integration tests + format! named arg + 회차 1 carry-over (mirror refactor / history block helper).
|
||||
|
||||
## PR-3a: Wire additive + RagCfg 노브 (HopRecord type + Answer.hops field)
|
||||
|
||||
**Goal**: 후속 PR (PR-3b decide loop) 의 wire / config foundation. RAG pipeline 동작 변경 없음 — `Answer.hops` 가 모든 path 에서 `None`, RagCfg 새 3 노브가 default 만 적용. PR-3b 가 이 위에서 decide loop 구현.
|
||||
|
||||
**Files**:
|
||||
- `crates/kebab-core/src/answer.rs`:
|
||||
- `HopRecord` struct (`iter`, `kind`, `sub_queries`, `context_chunks_added`, `forced_stop`, `llm_call_ms`).
|
||||
- `HopKind` enum (`Decompose` / `Decide` / `Synthesize`).
|
||||
- `Answer.hops: Option<Vec<HopRecord>>` field — `#[serde(default, skip_serializing_if = "Option::is_none")]`.
|
||||
- `crates/kebab-core/src/lib.rs`: `pub use answer::{HopKind, HopRecord, ...}`.
|
||||
- `crates/kebab-config/src/lib.rs`:
|
||||
- `RagCfg` 에 `multi_hop_max_depth: u32` (default 3), `multi_hop_max_sub_queries_per_iter: u32` (default 5), `multi_hop_max_pool_chunks: u32` (default 30). `#[serde(default)]` + env override + legacy parse.
|
||||
- 모든 Answer literal site 갱신 (9 sites: kebab-rag/src/pipeline.rs ×6 + kebab-cli/src/main.rs + kebab-tui/tests/ask.rs + kebab-eval/src/metrics.rs): `hops: None` 명시. 향후 PR-3b 의 ask_multi_hop happy path 만 `Some(hops_trace)` 채움.
|
||||
|
||||
**Tests**:
|
||||
- `default_multi_hop_max_depth_is_3`, `default_multi_hop_max_sub_queries_per_iter_is_5`, `default_multi_hop_max_pool_chunks_is_30`.
|
||||
- `env_overrides_multi_hop_knobs`.
|
||||
- `legacy_config_without_multi_hop_knobs_uses_defaults` (LEGACY_PRE_TIMEOUT_TOML 공유).
|
||||
- 모든 기존 RAG / TUI / CLI / eval test 가 hops:None 추가 후도 통과 (회귀 핀).
|
||||
|
||||
**Wire 영향**: `answer.v1` JSON Schema 의 `hops` optional 필드 — `skip_serializing_if` 가 `None` 일 때 emit 안 함이라 옛 single-pass response 에 변동 없음. wire breaking 아님. JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점).
|
||||
|
||||
## PR-3b: Dynamic iteration (decide loop + caps) — 후속 PR
|
||||
|
||||
**Goal**: depth=2 fixed → dynamic N-hop. LLM 의 decide signal + max_depth / max_sub_queries / max_pool_chunks cap.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user