chore(rag): PR #170 회차 1 리뷰 반영

(A) ScriptedLm doc 의 `Arc<Vec<String>>` 표기 → 실제 구현 (`Vec<String>` +
    `AtomicUsize`, 외부에서 `Arc::new(ScriptedLm::new(...))` 로 wrap)
    반영.
(B) ScriptedLm::new doc 의 미존재 `with_*` builder 언급 제거.
(C) refuse path 의 hops 보존 회귀 핀 2 건 추가 (`tests/multi_hop.rs`):
    - `multi_hop_refuse_no_chunks_preserves_hops_trace`: empty pool →
      `refuse_no_chunks(Some(hops))` → Answer.hops = Some([Decompose,
      Decide]).
    - `multi_hop_refuse_score_gate_preserves_hops_trace`: top score 0.10
      < 0.30 gate → `refuse_score_gate(Some(hops))` → 같은 shape.
    refuse_* widening + ask_multi_hop 의 forwarding wiring 이 reverting
    되면 두 test 가 회귀 잡음.
(D) test 5 의 redundant `assert_ne!(.., Some(MultiHopDecomposeFailed))`
    제거 — `assert_eq!(.., None)` 이미 함의. 메시지에 의도 통합.

검증
- `cargo test -p kebab-rag -j 1 --test multi_hop` — 7 (5+2) 모두 통과.
- `cargo clippy -p kebab-rag --all-targets -j 1 -- -D warnings` clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-25 08:22:58 +00:00
parent 6188a50c1c
commit 104363a0db
2 changed files with 122 additions and 12 deletions

View File

@@ -266,10 +266,11 @@ pub fn id32(prefix: &str) -> String {
/// of `RagPipeline::ask_multi_hop` — each can return a different
/// payload (`["q1","q2"]`, `[]`, `"final answer [#1]"`, etc.).
///
/// Internally `Arc<Vec<String>> + AtomicUsize` so the type is
/// `Send + Sync` and can be wrapped in `Arc<dyn LanguageModel>`.
/// Tests can read `calls()` for an assertion on the expected LLM
/// call count.
/// Internally `Vec<String>` (immutable after construction) plus an
/// `AtomicUsize` index counter, so the type is `Send + Sync` and
/// tests wrap it in `Arc::new(ScriptedLm::new(...))` to share with
/// the pipeline. Tests can read `calls()` for an assertion on the
/// expected LLM call count.
///
/// Exhaustion (more calls than scripted responses) panics — tests
/// that need an "infinite" final response can supply a longer
@@ -291,8 +292,10 @@ pub struct ScriptedLm {
impl ScriptedLm {
/// Build a scripted LM with the default model_id/provider used by
/// the rest of the test suite (`mock-lm` / `mock`) and the
/// MockLanguageModel-equivalent canned usage. Call `with_*`
/// builders if a test needs to override the defaults.
/// MockLanguageModel-equivalent canned usage. No knobs are
/// exposed today — every multi-hop test exercises the pipeline
/// flow, not the LM identity. Add builders only when a test
/// genuinely needs to override defaults.
pub fn new(responses: Vec<&str>) -> Self {
Self {
model_id: "mock-lm".to_string(),

View File

@@ -320,12 +320,8 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
);
assert_eq!(
answer.refusal_reason, None,
"decide parse failure is graceful degrade, not refusal"
);
assert_ne!(
answer.refusal_reason,
Some(RefusalReason::MultiHopDecomposeFailed),
"MultiHopDecomposeFailed is reserved for the initial decompose hop"
"decide parse failure is graceful degrade, not refusal\
MultiHopDecomposeFailed is reserved for the initial decompose hop"
);
assert_eq!(
lm_handle.calls(),
@@ -346,3 +342,114 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
flag stays false even though we synthesize early"
);
}
// ── 6. refuse path: NoChunks preserves partial hop trace ──────────────────
//
// PR-3b-ii widens `refuse_no_chunks` to accept `hops:
// Option<Vec<HopRecord>>` and wires `ask_multi_hop` to forward the
// partial trace. This test pins that contract — a refusal Answer
// still carries the decompose + decide hops the loop accumulated
// before pool came up empty.
#[test]
fn multi_hop_refuse_no_chunks_preserves_hops_trace() {
let env = RagEnv::new();
// Retriever returns 0 hits — pool stays empty → refuse_no_chunks.
let retriever = Arc::new(ScriptedRetriever::new(vec![vec![]]));
let retriever_handle = retriever.clone();
let retriever_dyn: Arc<dyn Retriever> = retriever;
// Only one LM call needed (decompose). Decide is skipped because
// `pool.is_empty()` triggers the (Vec::new(), 0) shortcut. If a
// bug calls the LM beyond decompose, ScriptedLm panics on
// exhaustion and the test fails loudly.
let lm = Arc::new(ScriptedLm::new(vec![r#"["q1"]"#]));
let lm_handle = lm.clone();
let lm_dyn: Arc<dyn LanguageModel> = lm;
let pipeline =
RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
assert!(!answer.grounded);
assert_eq!(answer.refusal_reason, Some(RefusalReason::NoChunks));
assert_eq!(retriever_handle.calls(), 1, "single sub-query → single retrieve");
assert_eq!(lm_handle.calls(), 1, "decompose only — decide skipped (empty pool), no synthesize");
let hops = answer
.hops
.expect("PR-3b-ii: refuse_no_chunks must preserve the partial hop trace");
assert_eq!(
hops.len(),
2,
"[Decompose, Decide(empty_pool_skip)] — synthesize never ran"
);
assert_eq!(hops[0].kind, HopKind::Decompose);
assert_eq!(hops[0].sub_queries, vec!["q1"]);
assert_eq!(hops[1].kind, HopKind::Decide);
assert!(hops[1].sub_queries.is_empty());
assert_eq!(
hops[1].context_chunks_added, 0,
"retrieve returned 0 hits → 0 added to pool"
);
}
// ── 7. refuse path: ScoreGate preserves partial hop trace ─────────────────
#[test]
fn multi_hop_refuse_score_gate_preserves_hops_trace() {
let env = RagEnv::new();
let cid = i32_below_gate_chunk(&env);
// Top score 0.10 is well below the default gate (0.30) — the
// score-gate refusal fires after the pool has been built, so
// the decide LLM call did run and the hop trace contains both
// Decompose and Decide entries.
let hits = vec![mk_hit(1, &cid, &id32("d_low"), "notes/low.md", 0.10, &["Low"])];
let retriever = Arc::new(ScriptedRetriever::new(vec![hits]));
let retriever_dyn: Arc<dyn Retriever> = retriever;
// decompose + decide (pool not empty so decide fires) — synthesize
// never runs because we refuse before pack_context.
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"[]"#,
]));
let lm_handle = lm.clone();
let lm_dyn: Arc<dyn LanguageModel> = lm;
let pipeline =
RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
assert!(!answer.grounded);
assert_eq!(answer.refusal_reason, Some(RefusalReason::ScoreGate));
assert_eq!(
lm_handle.calls(),
2,
"decompose + decide ran; synthesize skipped by gate"
);
let hops = answer
.hops
.expect("PR-3b-ii: refuse_score_gate must preserve the partial hop trace");
assert_eq!(
hops.len(),
2,
"[Decompose, Decide(stop)] — synthesize never ran"
);
assert_eq!(hops[0].kind, HopKind::Decompose);
assert_eq!(hops[1].kind, HopKind::Decide);
assert_eq!(
hops[1].context_chunks_added, 1,
"the low-score chunk did enter the pool — gate fires after pool build"
);
}
/// Seed a chunk + return its id. Helper for the score-gate test so
/// the test body stays focused on the hop-trace assertions.
fn i32_below_gate_chunk(env: &RagEnv) -> String {
let cid = id32("c_low");
let did = id32("d_low");
env.seed_chunk(&cid, &did, "notes/low.md", "low score text", &["Low"]);
cid
}