chore(rag): PR #170 회차 1 리뷰 반영

(A) ScriptedLm doc 의 `Arc<Vec<String>>` 표기 → 실제 구현 (`Vec<String>` + `AtomicUsize`, 외부에서 `Arc::new(ScriptedLm::new(...))` 로 wrap) 반영. (B) ScriptedLm::new doc 의 미존재 `with_*` builder 언급 제거. (C) refuse path 의 hops 보존 회귀 핀 2 건 추가 (`tests/multi_hop.rs`): - `multi_hop_refuse_no_chunks_preserves_hops_trace`: empty pool → `refuse_no_chunks(Some(hops))` → Answer.hops = Some([Decompose, Decide]). - `multi_hop_refuse_score_gate_preserves_hops_trace`: top score 0.10 < 0.30 gate → `refuse_score_gate(Some(hops))` → 같은 shape. refuse_* widening + ask_multi_hop 의 forwarding wiring 이 reverting 되면 두 test 가 회귀 잡음. (D) test 5 의 redundant `assert_ne!(.., Some(MultiHopDecomposeFailed))` 제거 — `assert_eq!(.., None)` 이미 함의. 메시지에 의도 통합. 검증 - `cargo test -p kebab-rag -j 1 --test multi_hop` — 7 (5+2) 모두 통과. - `cargo clippy -p kebab-rag --all-targets -j 1 -- -D warnings` clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 08:22:58 +00:00
parent 6188a50c1c
commit 104363a0db
2 changed files with 122 additions and 12 deletions
--- a/crates/kebab-rag/tests/common/mod.rs
+++ b/crates/kebab-rag/tests/common/mod.rs
@@ -266,10 +266,11 @@ pub fn id32(prefix: &str) -> String {
 /// of `RagPipeline::ask_multi_hop` — each can return a different
 /// payload (`["q1","q2"]`, `[]`, `"final answer [#1]"`, etc.).
 ///
-/// Internally `Arc<Vec<String>> + AtomicUsize` so the type is
-/// `Send + Sync` and can be wrapped in `Arc<dyn LanguageModel>`.
-/// Tests can read `calls()` for an assertion on the expected LLM
-/// call count.
+/// Internally `Vec<String>` (immutable after construction) plus an
+/// `AtomicUsize` index counter, so the type is `Send + Sync` and
+/// tests wrap it in `Arc::new(ScriptedLm::new(...))` to share with
+/// the pipeline. Tests can read `calls()` for an assertion on the
+/// expected LLM call count.
 ///
 /// Exhaustion (more calls than scripted responses) panics — tests
 /// that need an "infinite" final response can supply a longer
@@ -291,8 +292,10 @@ pub struct ScriptedLm {
 impl ScriptedLm {
    /// Build a scripted LM with the default model_id/provider used by
    /// the rest of the test suite (`mock-lm` / `mock`) and the
-    /// MockLanguageModel-equivalent canned usage. Call `with_*`
-    /// builders if a test needs to override the defaults.
+    /// MockLanguageModel-equivalent canned usage. No knobs are
+    /// exposed today — every multi-hop test exercises the pipeline
+    /// flow, not the LM identity. Add builders only when a test
+    /// genuinely needs to override defaults.
    pub fn new(responses: Vec<&str>) -> Self {
        Self {
            model_id: "mock-lm".to_string(),
--- a/crates/kebab-rag/tests/multi_hop.rs
+++ b/crates/kebab-rag/tests/multi_hop.rs
@@ -320,12 +320,8 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
    );
    assert_eq!(
        answer.refusal_reason, None,
-        "decide parse failure is graceful degrade, not refusal"
-    );
-    assert_ne!(
-        answer.refusal_reason,
-        Some(RefusalReason::MultiHopDecomposeFailed),
-        "MultiHopDecomposeFailed is reserved for the initial decompose hop"
+        "decide parse failure is graceful degrade, not refusal — \
+         MultiHopDecomposeFailed is reserved for the initial decompose hop"
    );
    assert_eq!(
        lm_handle.calls(),
@@ -346,3 +342,114 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
         flag stays false even though we synthesize early"
    );
 }
+
+// ── 6. refuse path: NoChunks preserves partial hop trace ──────────────────
+//
+// PR-3b-ii widens `refuse_no_chunks` to accept `hops:
+// Option<Vec<HopRecord>>` and wires `ask_multi_hop` to forward the
+// partial trace. This test pins that contract — a refusal Answer
+// still carries the decompose + decide hops the loop accumulated
+// before pool came up empty.
+
+#[test]
+fn multi_hop_refuse_no_chunks_preserves_hops_trace() {
+    let env = RagEnv::new();
+    // Retriever returns 0 hits — pool stays empty → refuse_no_chunks.
+    let retriever = Arc::new(ScriptedRetriever::new(vec![vec![]]));
+    let retriever_handle = retriever.clone();
+    let retriever_dyn: Arc<dyn Retriever> = retriever;
+
+    // Only one LM call needed (decompose). Decide is skipped because
+    // `pool.is_empty()` triggers the (Vec::new(), 0) shortcut. If a
+    // bug calls the LM beyond decompose, ScriptedLm panics on
+    // exhaustion and the test fails loudly.
+    let lm = Arc::new(ScriptedLm::new(vec![r#"["q1"]"#]));
+    let lm_handle = lm.clone();
+    let lm_dyn: Arc<dyn LanguageModel> = lm;
+    let pipeline =
+        RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
+
+    let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
+
+    assert!(!answer.grounded);
+    assert_eq!(answer.refusal_reason, Some(RefusalReason::NoChunks));
+    assert_eq!(retriever_handle.calls(), 1, "single sub-query → single retrieve");
+    assert_eq!(lm_handle.calls(), 1, "decompose only — decide skipped (empty pool), no synthesize");
+
+    let hops = answer
+        .hops
+        .expect("PR-3b-ii: refuse_no_chunks must preserve the partial hop trace");
+    assert_eq!(
+        hops.len(),
+        2,
+        "[Decompose, Decide(empty_pool_skip)] — synthesize never ran"
+    );
+    assert_eq!(hops[0].kind, HopKind::Decompose);
+    assert_eq!(hops[0].sub_queries, vec!["q1"]);
+    assert_eq!(hops[1].kind, HopKind::Decide);
+    assert!(hops[1].sub_queries.is_empty());
+    assert_eq!(
+        hops[1].context_chunks_added, 0,
+        "retrieve returned 0 hits → 0 added to pool"
+    );
+}
+
+// ── 7. refuse path: ScoreGate preserves partial hop trace ─────────────────
+
+#[test]
+fn multi_hop_refuse_score_gate_preserves_hops_trace() {
+    let env = RagEnv::new();
+    let cid = i32_below_gate_chunk(&env);
+    // Top score 0.10 is well below the default gate (0.30) — the
+    // score-gate refusal fires after the pool has been built, so
+    // the decide LLM call did run and the hop trace contains both
+    // Decompose and Decide entries.
+    let hits = vec![mk_hit(1, &cid, &id32("d_low"), "notes/low.md", 0.10, &["Low"])];
+    let retriever = Arc::new(ScriptedRetriever::new(vec![hits]));
+    let retriever_dyn: Arc<dyn Retriever> = retriever;
+
+    // decompose + decide (pool not empty so decide fires) — synthesize
+    // never runs because we refuse before pack_context.
+    let lm = Arc::new(ScriptedLm::new(vec![
+        r#"["q1"]"#,
+        r#"[]"#,
+    ]));
+    let lm_handle = lm.clone();
+    let lm_dyn: Arc<dyn LanguageModel> = lm;
+    let pipeline =
+        RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
+
+    let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
+
+    assert!(!answer.grounded);
+    assert_eq!(answer.refusal_reason, Some(RefusalReason::ScoreGate));
+    assert_eq!(
+        lm_handle.calls(),
+        2,
+        "decompose + decide ran; synthesize skipped by gate"
+    );
+
+    let hops = answer
+        .hops
+        .expect("PR-3b-ii: refuse_score_gate must preserve the partial hop trace");
+    assert_eq!(
+        hops.len(),
+        2,
+        "[Decompose, Decide(stop)] — synthesize never ran"
+    );
+    assert_eq!(hops[0].kind, HopKind::Decompose);
+    assert_eq!(hops[1].kind, HopKind::Decide);
+    assert_eq!(
+        hops[1].context_chunks_added, 1,
+        "the low-score chunk did enter the pool — gate fires after pool build"
+    );
+}
+
+/// Seed a chunk + return its id. Helper for the score-gate test so
+/// the test body stays focused on the hop-trace assertions.
+fn i32_below_gate_chunk(env: &RagEnv) -> String {
+    let cid = id32("c_low");
+    let did = id32("d_low");
+    env.seed_chunk(&cid, &did, "notes/low.md", "low score text", &["Low"]);
+    cid
+}