feat(rag): fb-41 PR-3a — HopRecord wire + RagCfg multi-hop knobs
PR-3 의 분할 첫 PR. wire additive (HopRecord + HopKind + Answer.hops field) + RagCfg 의 multi_hop_* 3 노브. RAG pipeline 동작 미변경 — 모든 Answer literal 의 `hops = None`. PR-3b (후속) 가 ask_multi_hop 의 happy path 에서 dynamic decide loop 구현 + hops trace 채움. 분할 이유: 원래 PR-3 가 wire + cfg + decide loop + ScriptedLm + helper refactor + 5+ tests 단일 PR 였는데 ~1500 줄 단일 patch 가 review 부담 + 회기 위험 ↑. additive foundation 부터 ship 후 decide loop 별 PR — 사용자 결정 (2026-05-25). - `kebab_core::HopRecord` (iter, kind, sub_queries, context_chunks_added, forced_stop, llm_call_ms) + `HopKind` (Decompose / Decide / Synthesize) — wire-additive shape. - `kebab_core::Answer.hops: Option<Vec<HopRecord>>` — `#[serde(default, skip_serializing_if = "Option::is_none")]`, single-pass / refusal path 는 None, PR-3b 의 multi-hop happy path 가 Some. - `kebab_config::RagCfg` 에 3 신규 노브: - `multi_hop_max_depth: u32` (default 3) - `multi_hop_max_sub_queries_per_iter: u32` (default 5) - `multi_hop_max_pool_chunks: u32` (default 30) 3 모두 `#[serde(default)]` + env override (`KEBAB_RAG_MULTI_HOP_MAX_*`) + legacy parse 핀 (`LEGACY_PRE_TIMEOUT_TOML` 공유). - 9 Answer literal site (pipeline.rs ×6 + kebab-cli + kebab-tui tests + kebab-eval test) 에 `hops: None` 명시 추가. exhaustive field check 가 자동 guard — 빠진 site 시 compile fail. - plan 의 PR-3 단락 → PR-3a / PR-3b 분할 명시 + scope 정정. Tests (163 passing across kebab-config + kebab-core + kebab-rag): - 5 신규 multi-hop knob test (default / env override / legacy parse). - 기존 50+57+31+19+3+3 test 모두 hops:None 추가 후도 통과. Wire 영향: `answer.v1` 의 optional `hops` 필드 — `skip_serializing_ if = None` 이라 single-pass response 에 emit 안 됨. wire breaking 아님, JSON Schema 갱신은 PR-3b 또는 PR-4 (실제 emit 시점). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -181,6 +181,39 @@ pub struct RagCfg {
|
||||
pub score_gate: f32,
|
||||
pub explain_default: bool,
|
||||
pub max_context_tokens: usize,
|
||||
/// p9-fb-41: hard ceiling on the number of multi-hop iterations
|
||||
/// (decompose iter + decide iters). When the LLM keeps returning
|
||||
/// `continue` past this depth the pipeline cuts to `synthesize`
|
||||
/// with `HopRecord.forced_stop = true`. Default `3` — enough for
|
||||
/// most cross-doc reasoning, low enough to bound LLM cost.
|
||||
#[serde(default = "default_multi_hop_max_depth")]
|
||||
pub multi_hop_max_depth: u32,
|
||||
/// p9-fb-41: cap on how many sub-queries the LLM may emit in a
|
||||
/// single decompose / decide call. Mirrors
|
||||
/// [`MULTI_HOP_MAX_SUB_QUERIES_DEFAULT`] in kebab-rag — the
|
||||
/// const is the hard floor while this is the runtime knob.
|
||||
/// Default `5`.
|
||||
#[serde(default = "default_multi_hop_max_sub_queries_per_iter")]
|
||||
pub multi_hop_max_sub_queries_per_iter: u32,
|
||||
/// p9-fb-41: hard ceiling on the deduped chunk pool. When the
|
||||
/// accumulated pool would exceed this many chunks the pipeline
|
||||
/// stops accepting new retrieval results and forces synthesize
|
||||
/// with `forced_stop = true`. Default `30` — generous for
|
||||
/// 5-hop / 10-hits multi-hop runs while still bounded.
|
||||
#[serde(default = "default_multi_hop_max_pool_chunks")]
|
||||
pub multi_hop_max_pool_chunks: u32,
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_depth() -> u32 {
|
||||
3
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_sub_queries_per_iter() -> u32 {
|
||||
5
|
||||
}
|
||||
|
||||
fn default_multi_hop_max_pool_chunks() -> u32 {
|
||||
30
|
||||
}
|
||||
|
||||
/// Settings for the image ingest pipeline (P6). `ocr` controls OCR
|
||||
@@ -434,6 +467,10 @@ impl Config {
|
||||
score_gate: 0.30,
|
||||
explain_default: false,
|
||||
max_context_tokens: 8000,
|
||||
multi_hop_max_depth: default_multi_hop_max_depth(),
|
||||
multi_hop_max_sub_queries_per_iter:
|
||||
default_multi_hop_max_sub_queries_per_iter(),
|
||||
multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
|
||||
},
|
||||
image: ImageCfg::defaults(),
|
||||
ui: UiCfg::defaults(),
|
||||
@@ -717,6 +754,21 @@ impl Config {
|
||||
self.rag.max_context_tokens = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_depth = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_sub_queries_per_iter = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.rag.multi_hop_max_pool_chunks = n;
|
||||
}
|
||||
}
|
||||
|
||||
// image.ocr
|
||||
"KEBAB_IMAGE_OCR_ENABLED" => {
|
||||
@@ -1092,6 +1144,61 @@ theme = "dark"
|
||||
assert_eq!(c.image.ocr.request_timeout_secs, 300);
|
||||
}
|
||||
|
||||
// ── p9-fb-41: multi-hop RAG knobs ────────────────────────────────────
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_depth_is_3() {
|
||||
assert_eq!(Config::defaults().rag.multi_hop_max_depth, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_sub_queries_per_iter_is_5() {
|
||||
assert_eq!(
|
||||
Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
|
||||
5
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_pool_chunks_is_30() {
|
||||
assert_eq!(Config::defaults().rag.multi_hop_max_pool_chunks, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn env_overrides_multi_hop_knobs() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
|
||||
"5".to_string(),
|
||||
);
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
|
||||
"7".to_string(),
|
||||
);
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_POOL_CHUNKS".to_string(),
|
||||
"50".to_string(),
|
||||
);
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.rag.multi_hop_max_depth, 5);
|
||||
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 7);
|
||||
assert_eq!(c.rag.multi_hop_max_pool_chunks, 50);
|
||||
}
|
||||
|
||||
/// post-PR-3 fb-41: a config file written before the multi-hop
|
||||
/// knobs existed must still parse and fall back to the documented
|
||||
/// defaults — backwards-compat invariant. Fixture shared with the
|
||||
/// LLM / OCR timeout invariants via [`LEGACY_PRE_TIMEOUT_TOML`]
|
||||
/// (that fixture also predates the multi_hop_* fields).
|
||||
#[test]
|
||||
fn legacy_config_without_multi_hop_knobs_uses_defaults() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
assert_eq!(c.rag.multi_hop_max_depth, 3);
|
||||
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
|
||||
assert_eq!(c.rag.multi_hop_max_pool_chunks, 30);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_ocr_env_overrides() {
|
||||
let mut env = HashMap::new();
|
||||
|
||||
Reference in New Issue
Block a user