chore(rag): PR #170 회차 1 리뷰 반영
(A) ScriptedLm doc 의 `Arc<Vec<String>>` 표기 → 실제 구현 (`Vec<String>` +
`AtomicUsize`, 외부에서 `Arc::new(ScriptedLm::new(...))` 로 wrap)
반영.
(B) ScriptedLm::new doc 의 미존재 `with_*` builder 언급 제거.
(C) refuse path 의 hops 보존 회귀 핀 2 건 추가 (`tests/multi_hop.rs`):
- `multi_hop_refuse_no_chunks_preserves_hops_trace`: empty pool →
`refuse_no_chunks(Some(hops))` → Answer.hops = Some([Decompose,
Decide]).
- `multi_hop_refuse_score_gate_preserves_hops_trace`: top score 0.10
< 0.30 gate → `refuse_score_gate(Some(hops))` → 같은 shape.
refuse_* widening + ask_multi_hop 의 forwarding wiring 이 reverting
되면 두 test 가 회귀 잡음.
(D) test 5 의 redundant `assert_ne!(.., Some(MultiHopDecomposeFailed))`
제거 — `assert_eq!(.., None)` 이미 함의. 메시지에 의도 통합.
검증
- `cargo test -p kebab-rag -j 1 --test multi_hop` — 7 (5+2) 모두 통과.
- `cargo clippy -p kebab-rag --all-targets -j 1 -- -D warnings` clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -266,10 +266,11 @@ pub fn id32(prefix: &str) -> String {
|
||||
/// of `RagPipeline::ask_multi_hop` — each can return a different
|
||||
/// payload (`["q1","q2"]`, `[]`, `"final answer [#1]"`, etc.).
|
||||
///
|
||||
/// Internally `Arc<Vec<String>> + AtomicUsize` so the type is
|
||||
/// `Send + Sync` and can be wrapped in `Arc<dyn LanguageModel>`.
|
||||
/// Tests can read `calls()` for an assertion on the expected LLM
|
||||
/// call count.
|
||||
/// Internally `Vec<String>` (immutable after construction) plus an
|
||||
/// `AtomicUsize` index counter, so the type is `Send + Sync` and
|
||||
/// tests wrap it in `Arc::new(ScriptedLm::new(...))` to share with
|
||||
/// the pipeline. Tests can read `calls()` for an assertion on the
|
||||
/// expected LLM call count.
|
||||
///
|
||||
/// Exhaustion (more calls than scripted responses) panics — tests
|
||||
/// that need an "infinite" final response can supply a longer
|
||||
@@ -291,8 +292,10 @@ pub struct ScriptedLm {
|
||||
impl ScriptedLm {
|
||||
/// Build a scripted LM with the default model_id/provider used by
|
||||
/// the rest of the test suite (`mock-lm` / `mock`) and the
|
||||
/// MockLanguageModel-equivalent canned usage. Call `with_*`
|
||||
/// builders if a test needs to override the defaults.
|
||||
/// MockLanguageModel-equivalent canned usage. No knobs are
|
||||
/// exposed today — every multi-hop test exercises the pipeline
|
||||
/// flow, not the LM identity. Add builders only when a test
|
||||
/// genuinely needs to override defaults.
|
||||
pub fn new(responses: Vec<&str>) -> Self {
|
||||
Self {
|
||||
model_id: "mock-lm".to_string(),
|
||||
|
||||
@@ -320,12 +320,8 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
|
||||
);
|
||||
assert_eq!(
|
||||
answer.refusal_reason, None,
|
||||
"decide parse failure is graceful degrade, not refusal"
|
||||
);
|
||||
assert_ne!(
|
||||
answer.refusal_reason,
|
||||
Some(RefusalReason::MultiHopDecomposeFailed),
|
||||
"MultiHopDecomposeFailed is reserved for the initial decompose hop"
|
||||
"decide parse failure is graceful degrade, not refusal — \
|
||||
MultiHopDecomposeFailed is reserved for the initial decompose hop"
|
||||
);
|
||||
assert_eq!(
|
||||
lm_handle.calls(),
|
||||
@@ -346,3 +342,114 @@ fn multi_hop_decide_parse_failure_falls_through_to_synthesize() {
|
||||
flag stays false even though we synthesize early"
|
||||
);
|
||||
}
|
||||
|
||||
// ── 6. refuse path: NoChunks preserves partial hop trace ──────────────────
|
||||
//
|
||||
// PR-3b-ii widens `refuse_no_chunks` to accept `hops:
|
||||
// Option<Vec<HopRecord>>` and wires `ask_multi_hop` to forward the
|
||||
// partial trace. This test pins that contract — a refusal Answer
|
||||
// still carries the decompose + decide hops the loop accumulated
|
||||
// before pool came up empty.
|
||||
|
||||
#[test]
|
||||
fn multi_hop_refuse_no_chunks_preserves_hops_trace() {
|
||||
let env = RagEnv::new();
|
||||
// Retriever returns 0 hits — pool stays empty → refuse_no_chunks.
|
||||
let retriever = Arc::new(ScriptedRetriever::new(vec![vec![]]));
|
||||
let retriever_handle = retriever.clone();
|
||||
let retriever_dyn: Arc<dyn Retriever> = retriever;
|
||||
|
||||
// Only one LM call needed (decompose). Decide is skipped because
|
||||
// `pool.is_empty()` triggers the (Vec::new(), 0) shortcut. If a
|
||||
// bug calls the LM beyond decompose, ScriptedLm panics on
|
||||
// exhaustion and the test fails loudly.
|
||||
let lm = Arc::new(ScriptedLm::new(vec![r#"["q1"]"#]));
|
||||
let lm_handle = lm.clone();
|
||||
let lm_dyn: Arc<dyn LanguageModel> = lm;
|
||||
let pipeline =
|
||||
RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
|
||||
|
||||
let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
|
||||
|
||||
assert!(!answer.grounded);
|
||||
assert_eq!(answer.refusal_reason, Some(RefusalReason::NoChunks));
|
||||
assert_eq!(retriever_handle.calls(), 1, "single sub-query → single retrieve");
|
||||
assert_eq!(lm_handle.calls(), 1, "decompose only — decide skipped (empty pool), no synthesize");
|
||||
|
||||
let hops = answer
|
||||
.hops
|
||||
.expect("PR-3b-ii: refuse_no_chunks must preserve the partial hop trace");
|
||||
assert_eq!(
|
||||
hops.len(),
|
||||
2,
|
||||
"[Decompose, Decide(empty_pool_skip)] — synthesize never ran"
|
||||
);
|
||||
assert_eq!(hops[0].kind, HopKind::Decompose);
|
||||
assert_eq!(hops[0].sub_queries, vec!["q1"]);
|
||||
assert_eq!(hops[1].kind, HopKind::Decide);
|
||||
assert!(hops[1].sub_queries.is_empty());
|
||||
assert_eq!(
|
||||
hops[1].context_chunks_added, 0,
|
||||
"retrieve returned 0 hits → 0 added to pool"
|
||||
);
|
||||
}
|
||||
|
||||
// ── 7. refuse path: ScoreGate preserves partial hop trace ─────────────────
|
||||
|
||||
#[test]
|
||||
fn multi_hop_refuse_score_gate_preserves_hops_trace() {
|
||||
let env = RagEnv::new();
|
||||
let cid = i32_below_gate_chunk(&env);
|
||||
// Top score 0.10 is well below the default gate (0.30) — the
|
||||
// score-gate refusal fires after the pool has been built, so
|
||||
// the decide LLM call did run and the hop trace contains both
|
||||
// Decompose and Decide entries.
|
||||
let hits = vec![mk_hit(1, &cid, &id32("d_low"), "notes/low.md", 0.10, &["Low"])];
|
||||
let retriever = Arc::new(ScriptedRetriever::new(vec![hits]));
|
||||
let retriever_dyn: Arc<dyn Retriever> = retriever;
|
||||
|
||||
// decompose + decide (pool not empty so decide fires) — synthesize
|
||||
// never runs because we refuse before pack_context.
|
||||
let lm = Arc::new(ScriptedLm::new(vec![
|
||||
r#"["q1"]"#,
|
||||
r#"[]"#,
|
||||
]));
|
||||
let lm_handle = lm.clone();
|
||||
let lm_dyn: Arc<dyn LanguageModel> = lm;
|
||||
let pipeline =
|
||||
RagPipeline::new(env.config.clone(), retriever_dyn, lm_dyn, env.sqlite.clone());
|
||||
|
||||
let answer = pipeline.ask("q", multi_hop_opts()).unwrap();
|
||||
|
||||
assert!(!answer.grounded);
|
||||
assert_eq!(answer.refusal_reason, Some(RefusalReason::ScoreGate));
|
||||
assert_eq!(
|
||||
lm_handle.calls(),
|
||||
2,
|
||||
"decompose + decide ran; synthesize skipped by gate"
|
||||
);
|
||||
|
||||
let hops = answer
|
||||
.hops
|
||||
.expect("PR-3b-ii: refuse_score_gate must preserve the partial hop trace");
|
||||
assert_eq!(
|
||||
hops.len(),
|
||||
2,
|
||||
"[Decompose, Decide(stop)] — synthesize never ran"
|
||||
);
|
||||
assert_eq!(hops[0].kind, HopKind::Decompose);
|
||||
assert_eq!(hops[1].kind, HopKind::Decide);
|
||||
assert_eq!(
|
||||
hops[1].context_chunks_added, 1,
|
||||
"the low-score chunk did enter the pool — gate fires after pool build"
|
||||
);
|
||||
}
|
||||
|
||||
/// Seed a chunk + return its id. Helper for the score-gate test so
|
||||
/// the test body stays focused on the hop-trace assertions.
|
||||
fn i32_below_gate_chunk(env: &RagEnv) -> String {
|
||||
let cid = id32("c_low");
|
||||
let did = id32("d_low");
|
||||
env.seed_chunk(&cid, &did, "notes/low.md", "low score text", &["Low"]);
|
||||
cid
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user