Files
kebab/crates/kebab-rag/tests/pipeline.rs

687 lines
27 KiB
Rust

//! Integration tests for `RagPipeline` (P4-3 spec test plan).
//!
//! Real adapters (Ollama, fastembed, LanceDB) are NOT used. Every test
//! injects a `MockLanguageModel` and a `MockRetriever` so the pipeline's
//! behavior is exercised in isolation from network / heavy IO.
mod common;
use std::sync::Arc;
use std::sync::atomic::Ordering;
use common::{MockRetriever, RagEnv, id32, mk_hit, mk_hit_with_indexed_at};
use kebab_core::{FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage};
use kebab_llm::MockLanguageModel;
use kebab_rag::{AskOpts, RagPipeline, RefusalReason, StreamEvent};
/// LM ID used everywhere — kept short so snapshots stay stable.
const TEST_LM_ID: &str = "mock-lm";
/// Counter wrapper so tests can assert "no LLM call happened".
struct CountingLm {
inner: MockLanguageModel,
calls: std::sync::atomic::AtomicUsize,
}
impl CountingLm {
fn new(canned: &str) -> Self {
Self {
inner: MockLanguageModel {
model_id: TEST_LM_ID.to_string(),
provider: "mock".to_string(),
context_tokens: 32_768,
canned_response: canned.to_string(),
canned_finish: FinishReason::Stop,
canned_usage: TokenUsage {
prompt_tokens: 10,
completion_tokens: 5,
latency_ms: 7,
},
},
calls: std::sync::atomic::AtomicUsize::new(0),
}
}
fn calls(&self) -> usize {
self.calls.load(Ordering::SeqCst)
}
}
impl LanguageModel for CountingLm {
fn model_ref(&self) -> kebab_core::ModelRef {
self.inner.model_ref()
}
fn context_tokens(&self) -> usize {
self.inner.context_tokens()
}
fn generate_stream(
&self,
req: kebab_core::GenerateRequest,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
self.calls.fetch_add(1, Ordering::SeqCst);
self.inner.generate_stream(req)
}
}
fn default_opts() -> AskOpts {
AskOpts {
k: 5,
explain: false,
mode: SearchMode::Lexical,
temperature: Some(0.0),
seed: Some(0),
stream_sink: None,
history: Vec::new(),
conversation_id: None,
turn_index: None,
multi_hop: false,
}
}
// ── 1. empty hits → NoChunks, no LLM call ────────────────────────────────
#[test]
fn empty_hits_refuses_no_chunks_without_llm_call() {
let env = RagEnv::new();
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(Vec::new()));
let lm = Arc::new(CountingLm::new("(unused)"));
let lm_dyn: Arc<dyn LanguageModel> = lm.clone();
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm_dyn, env.sqlite.clone());
let answer = pipeline.ask("anything", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::NoChunks));
assert!(!answer.grounded);
assert!(answer.citations.is_empty());
assert_eq!(lm.calls(), 0, "LM must NOT be called on empty hits");
assert_eq!(env.count_answers(), 1, "answers row written for refusal");
}
// ── 2. score gate refuses without LLM call ────────────────────────────────
#[test]
fn top_below_gate_refuses_score_gate_without_llm_call() {
let env = RagEnv::new();
// top score 0.10 below default gate 0.30
let hits = vec![
mk_hit(1, &id32("c1"), &id32("d1"), "notes/a.md", 0.10, &["A"]),
mk_hit(2, &id32("c2"), &id32("d2"), "notes/b.md", 0.05, &["B"]),
];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm = Arc::new(CountingLm::new("(unused)"));
let lm_dyn: Arc<dyn LanguageModel> = lm.clone();
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm_dyn, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::ScoreGate));
assert!(!answer.grounded);
assert_eq!(
answer.citations.len(),
2,
"all near-miss candidates surfaced"
);
for c in &answer.citations {
assert!(c.marker.is_none(), "ScoreGate citations have no marker");
}
assert_eq!(lm.calls(), 0, "LM must NOT be called when gate refuses");
assert_eq!(env.count_answers(), 1);
assert!(answer.answer.contains("근거 부족"));
assert!(answer.answer.contains("notes/a.md"));
}
// ── 3. grounded happy path with [#1] ──────────────────────────────────────
#[test]
fn grounded_happy_path_marker_one() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(
&cid,
&did,
"notes/a.md",
"Rust is a systems language.",
&["Intro"],
);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let canned = "Rust is a systems language. [#1]";
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("what is rust", default_opts()).unwrap();
assert!(answer.grounded);
assert_eq!(answer.refusal_reason, None);
assert_eq!(answer.citations.len(), 1);
assert_eq!(answer.citations[0].marker.as_deref(), Some("[1]"));
assert_eq!(answer.retrieval.chunks_used, 1);
assert_eq!(env.count_answers(), 1);
}
// ── 4. unknown marker [#7] → LlmSelfJudge ─────────────────────────────────
#[test]
fn unknown_marker_refuses_llm_self_judge() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc text", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
// Marker 7 is NOT in the packed set (only #1 is).
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("answer text [#7]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::LlmSelfJudge));
assert!(!answer.grounded);
// Even unknown markers are NOT included in citations (we only report
// markers that map to the packed set).
assert!(answer.citations.is_empty());
}
// ── 5. [1] (no #) → LlmSelfJudge (regex strictness) ───────────────────────
#[test]
fn marker_without_hash_is_no_marker() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc text", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
// `[1]` is NOT a valid marker — strict regex requires `[#1]`.
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("the answer [1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::LlmSelfJudge));
assert!(!answer.grounded);
}
// ── 6. vec![1] no real citation → LlmSelfJudge (no false positive) ────────
#[test]
fn vec_bracket_one_is_no_false_positive() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
// `vec![1]` MUST NOT be misread as a citation marker.
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("see vec![1] in code"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::LlmSelfJudge));
assert!(!answer.grounded);
}
// ── 7. "근거가 부족합니다" → LlmSelfJudge ────────────────────────────────
#[test]
fn explicit_korean_refusal_is_self_judge() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("근거가 부족합니다."));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::LlmSelfJudge));
assert!(!answer.grounded);
}
// ── 8. context packing budget overflow ────────────────────────────────────
#[test]
fn packing_stops_before_budget_overflow() {
let env = RagEnv::new();
// Squeeze the budget so only one chunk fits.
let mut cfg = env.config.clone();
cfg.rag.max_context_tokens = 50; // very small budget
// Three giant chunks
let huge_text: String = "X".repeat(2_000); // ~500 tokens each
let mut hits = Vec::new();
for i in 0..3_u32 {
let cid = id32(&format!("c{i}"));
let did = id32(&format!("d{i}"));
env.seed_chunk(
&cid,
&did,
&format!("notes/a{i}.md"),
&huge_text,
&["Intro"],
);
hits.push(mk_hit(
i + 1,
&cid,
&did,
&format!("notes/a{i}.md"),
0.9,
&["Intro"],
));
}
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("ok [#1]"));
let pipeline = RagPipeline::new(cfg, retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
// At least one chunk was packed; the budget cap should keep it to <= 1.
assert_eq!(
answer.retrieval.chunks_used, 1,
"exactly one chunk fits when budget is tiny"
);
assert_eq!(answer.retrieval.chunks_returned, 3);
assert!(answer.grounded);
}
// ── 9. streaming forwards tokens to mpsc ──────────────────────────────────
#[test]
fn streaming_forwards_tokens_to_sink() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let canned = "ok [#1]";
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let (tx, rx) = std::sync::mpsc::channel::<StreamEvent>();
let mut opts = default_opts();
opts.stream_sink = Some(tx);
let _ = pipeline.ask("q", opts).unwrap();
// p9-fb-33: extract Token deltas from the staged event stream.
let collected: String = rx
.into_iter()
.filter_map(|ev| match ev {
StreamEvent::Token { delta, .. } => Some(delta),
_ => None,
})
.collect::<String>();
assert_eq!(collected, canned);
}
// ── 10. dropped receiver aborts generation, records LlmStreamAborted ──────
//
// p9-fb-33: cancel semantics changed. Pre-fb-33 the pipeline drove
// the LM loop to completion and silently dropped sends. Now a
// SendError breaks the loop and stamps `RefusalReason::LlmStreamAborted`
// onto the persisted row — the partial answer (whatever was buffered
// before the cancel) still gets written for audit.
#[test]
fn dropped_receiver_aborts_with_llm_stream_aborted() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let canned = "ok [#1]";
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let (tx, rx) = std::sync::mpsc::channel::<StreamEvent>();
drop(rx); // receiver gone — first Token send fails, loop breaks
let mut opts = default_opts();
opts.stream_sink = Some(tx);
let answer = pipeline.ask("q", opts).unwrap();
assert!(!answer.grounded, "cancel takes priority over grounded");
assert_eq!(
answer.refusal_reason,
Some(RefusalReason::LlmStreamAborted),
"cancel records LlmStreamAborted",
);
assert_eq!(env.count_answers(), 1, "answers row still persisted");
}
// ── 11. Send + Sync compile check ─────────────────────────────────────────
// Implemented inside `kb-rag::pipeline::tests::rag_pipeline_is_send_sync`.
// ── 12. usage from final Done chunk ───────────────────────────────────────
#[test]
fn usage_populated_from_done_chunk() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("ok [#1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.usage.prompt_tokens, 10, "from canned_usage");
assert_eq!(answer.usage.completion_tokens, 5);
}
// ── 13. answers row inserted in all paths (incl. refusals) ────────────────
#[test]
fn answers_row_inserted_for_each_refusal_kind() {
// NoChunks
{
let env = RagEnv::new();
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(Vec::new()));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(""));
let p = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
p.ask("q", default_opts()).unwrap();
assert_eq!(env.count_answers(), 1);
}
// ScoreGate
{
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.05, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(""));
let p = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
p.ask("q", default_opts()).unwrap();
assert_eq!(env.count_answers(), 1);
}
// LlmSelfJudge (silent ungrounded)
{
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("answer with no marker"));
let p = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
p.ask("q", default_opts()).unwrap();
assert_eq!(env.count_answers(), 1);
}
}
// ── 14. determinism: temp=0 + seed=0 → identical Answer (mock) ────────────
#[test]
fn determinism_temperature_zero_seed_zero() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "doc", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
// Two pipelines, two retrievers, two LMs — but identical canned configs.
let mk_pipeline = || {
let r: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits.clone()));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("Rust is. [#1]"));
RagPipeline::new(env.config.clone(), r, lm, env.sqlite.clone())
};
let a1 = mk_pipeline().ask("q", default_opts()).unwrap();
let a2 = mk_pipeline().ask("q", default_opts()).unwrap();
assert_eq!(a1.answer, a2.answer);
assert_eq!(a1.grounded, a2.grounded);
assert_eq!(a1.citations, a2.citations);
assert_eq!(a1.retrieval.chunks_used, a2.retrieval.chunks_used);
assert_eq!(a1.retrieval.k, a2.retrieval.k);
// trace_id and created_at and latency_ms WILL differ — they include
// wall-clock — so we don't compare them.
}
// ── 15a. all chunks unfetchable from store → NoChunks fallback ───────────
#[test]
fn unfetchable_chunks_fall_back_to_no_chunks() {
// Hits exist (so the score gate passes) but their chunk_id rows are
// never seeded into the store — `DocumentStore::get_chunk` returns
// None for every one. Pipeline should detect the empty packed list
// and refuse with NoChunks rather than letting the LLM run with an
// empty `[근거]` block (which would self-refuse → LlmSelfJudge).
let env = RagEnv::new();
let cid = id32("missing");
let did = id32("d_missing");
// NOTE: no `env.seed_chunk(...)` call — chunk row absent from store.
let hits = vec![mk_hit(1, &cid, &did, "notes/missing.md", 0.85, &["X"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm = Arc::new(CountingLm::new("(should never run)"));
let lm_dyn: Arc<dyn LanguageModel> = lm.clone();
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm_dyn, env.sqlite.clone());
let answer = pipeline.ask("q", default_opts()).unwrap();
assert_eq!(answer.refusal_reason, Some(RefusalReason::NoChunks));
assert!(!answer.grounded);
assert!(answer.citations.is_empty());
assert_eq!(
lm.calls(),
0,
"LM must NOT be called when every retrieved chunk is unfetchable"
);
assert_eq!(env.count_answers(), 1, "answers row written for refusal");
}
// ── 16. p9-fb-32: AnswerCitation carries indexed_at + stale ──────────────
//
// Previously the LLM-citation construction site stamped `UNIX_EPOCH` +
// `false` as a Task-7 placeholder. Task 7 plumbs real values from the
// upstream `SearchHit` through `pack_context` so the wire-side
// `AnswerCitation` reflects the document's actual age.
#[test]
fn grounded_citations_inherit_indexed_at_and_stale_from_hit() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "Apples are fruit.", &["Intro"]);
// 60 days old vs. the default 30-day threshold → stale.
let now = time::OffsetDateTime::now_utc();
let sixty_days_ago = now - time::Duration::days(60);
let hits = vec![mk_hit_with_indexed_at(
1,
&cid,
&did,
"notes/a.md",
0.85,
&["Intro"],
sixty_days_ago,
)];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("apples are fruit. [#1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("apples", default_opts()).unwrap();
assert!(answer.grounded);
assert_eq!(answer.citations.len(), 1, "one cited marker [#1]");
let c = &answer.citations[0];
// indexed_at must be the value the retriever produced — NOT the
// UNIX_EPOCH placeholder the Task 6 cross-task patch left behind.
assert_eq!(
c.indexed_at, sixty_days_ago,
"AnswerCitation.indexed_at must inherit from SearchHit.indexed_at"
);
// 60d > default 30d threshold → stale.
assert!(
c.stale,
"60-day-old hit must surface stale=true on the AnswerCitation"
);
}
#[test]
fn grounded_citations_not_stale_for_fresh_hit() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "Apples are fruit.", &["Intro"]);
// 1 day old vs. the default 30-day threshold → fresh.
let now = time::OffsetDateTime::now_utc();
let one_day_ago = now - time::Duration::days(1);
let hits = vec![mk_hit_with_indexed_at(
1,
&cid,
&did,
"notes/a.md",
0.85,
&["Intro"],
one_day_ago,
)];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("apples are fruit. [#1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("apples", default_opts()).unwrap();
assert!(answer.grounded);
assert_eq!(answer.citations.len(), 1);
let c = &answer.citations[0];
assert_eq!(c.indexed_at, one_day_ago);
assert!(
!c.stale,
"1-day-old hit must NOT be stale at default 30d threshold"
);
}
// ── 15. snapshot Answer JSON stable ───────────────────────────────────────
#[test]
fn answer_json_serializes_with_expected_keys() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(
&cid,
&did,
"notes/a.md",
"Rust is a systems language.",
&["Intro"],
);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("Rust is. [#1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("what", default_opts()).unwrap();
let v: serde_json::Value = serde_json::to_value(&answer).unwrap();
// Stable top-level key set per `answer.v1` (§2.3).
let keys: Vec<&str> = v
.as_object()
.unwrap()
.keys()
.map(std::string::String::as_str)
.collect();
for needed in [
"answer",
"citations",
"grounded",
"refusal_reason",
"model",
"embedding",
"prompt_template_version",
"retrieval",
"usage",
"created_at",
] {
assert!(keys.contains(&needed), "missing top-level key {needed}");
}
// citations is a JSON array
assert!(v["citations"].is_array());
// retrieval.trace_id starts with `ret_`
let trace_id = v["retrieval"]["trace_id"].as_str().unwrap();
assert!(trace_id.starts_with("ret_"), "got trace_id {trace_id:?}");
}
// ── p9-fb-41: multi-hop dispatch + decompose-failure refusal ─────────────
/// `AskOpts.multi_hop = true` routes into `ask_multi_hop`. When the
/// (single) mock LLM returns garbage that `parse_decompose_response`
/// can't deserialize as `Vec<String>`, the pipeline refuses with
/// `RefusalReason::MultiHopDecomposeFailed`. Pins both the dispatch
/// (different code path than single-pass) and the early-exit refusal.
///
/// Happy-path multi-hop (decompose succeeds → retrieve → synthesize)
/// pins land in PR-3 once a scripted mock supports per-call response
/// scripting (current `MockLanguageModel` returns the same canned
/// string for every call).
#[test]
fn ask_multi_hop_dispatches_and_decompose_garbage_refuses() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "Body text.", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
// Garbage that is NOT a JSON array of strings — the only LLM call
// multi-hop makes here (decompose) returns this, so the pipeline
// never gets to synthesize and exits via the decompose-failure
// refusal path.
let lm = Arc::new(CountingLm::new("definitely not a JSON array"));
let lm_handle = lm.clone();
let pipeline = RagPipeline::new(
env.config.clone(),
retriever,
lm.clone() as Arc<dyn LanguageModel>,
env.sqlite.clone(),
);
let opts = AskOpts {
multi_hop: true,
..default_opts()
};
let answer = pipeline.ask("compound question", opts).unwrap();
assert!(
!answer.grounded,
"decompose-failure refusal must report grounded=false"
);
assert_eq!(
answer.refusal_reason,
Some(RefusalReason::MultiHopDecomposeFailed),
"garbage decompose response must surface MultiHopDecomposeFailed"
);
assert!(
answer.citations.is_empty(),
"refusal Answer carries no citations"
);
assert_eq!(
answer.prompt_template_version.0, "rag-multi-hop-v1",
"multi-hop path must stamp the rag-multi-hop-v1 template version"
);
assert_eq!(
lm_handle.calls(),
1,
"decompose-failure exits before synthesize — exactly 1 LLM call"
);
}
/// Regression pin: `AskOpts.multi_hop = false` keeps the single-pass
/// path. Same fixture as the snapshot test above; verifies that the
/// PR-2 dispatcher doesn't accidentally divert legacy callers.
#[test]
fn ask_with_multi_hop_false_keeps_single_pass_path() {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(
&cid,
&did,
"notes/a.md",
"Rust is a systems language.",
&["Intro"],
);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new("Rust is. [#1]"));
let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
let answer = pipeline.ask("what", default_opts()).unwrap();
assert_eq!(
answer.prompt_template_version.0,
// Single-pass stamps the config's prompt_template_version
// (config default = "rag-v3"), NOT "rag-multi-hop-v1".
env.config.rag.prompt_template_version,
"multi_hop=false must keep the config's prompt template (single-pass)"
);
assert_ne!(
answer.prompt_template_version.0, "rag-multi-hop-v1",
"multi_hop=false must NOT route through ask_multi_hop"
);
}