diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index 5f93ec3..cd3a0db 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -44,11 +44,24 @@ use regex::Regex; use std::sync::OnceLock; use time::OffsetDateTime; +/// One entry in the packed context returned by +/// [`RagPipeline::pack_context`]. Carries the marker number, the +/// upstream `Citation`, and the per-hit `indexed_at` + `stale` so the +/// LLM-citation construction site can build a complete +/// [`kebab_core::AnswerCitation`] (p9-fb-32). +#[derive(Clone, Debug)] +struct PackedCitation { + marker: u32, + citation: Citation, + indexed_at: OffsetDateTime, + stale: bool, +} + /// Tuple returned by [`RagPipeline::pack_context`]: the packed -/// `[#n] doc=… heading=… span=…\n` block, the marker→Citation +/// `[#n] doc=… heading=… span=…\n` block, the marker→PackedCitation /// mapping (in packed order), and an estimated token count for the /// prompt section the LLM will see (system + query + packed context). -type PackedContext = (String, Vec<(u32, Citation)>, usize); +type PackedContext = (String, Vec, usize); // ── AskOpts ───────────────────────────────────────────────────────────────── @@ -172,10 +185,20 @@ impl RagPipeline { k: k_effective, filters: SearchFilters::default(), }; - let hits = self + let mut hits = self .retriever .search(&search_query) .context("kb-rag: retriever.search")?; + // p9-fb-32: stamp `stale` on every hit against `now_utc()` and + // the configured threshold. Cheap (per-hit comparison). Both + // the score-gate refusal path and the LLM-citation path read + // `hit.stale` downstream, so stamping once here keeps both + // call sites aligned with the App-level `search` post-process. + let now = OffsetDateTime::now_utc(); + let stale_threshold_days = self.config.search.stale_threshold_days; + for h in &mut hits { + h.stale = compute_stale(h.indexed_at, now, stale_threshold_days); + } let chunks_returned = u32::try_from(hits.len()).unwrap_or(u32::MAX); let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0); @@ -302,7 +325,7 @@ impl RagPipeline { // ── 7. Citation validate ─────────────────────────────────────────── let valid_markers: std::collections::BTreeSet = - packed_entries.iter().map(|(n, _)| *n).collect(); + packed_entries.iter().map(|p| p.marker).collect(); let unknown_markers: Vec = extracted .iter() .copied() @@ -335,18 +358,19 @@ impl RagPipeline { let cited_set: std::collections::BTreeSet = extracted.iter().copied().collect(); let citations: Vec = packed_entries .iter() - .filter(|(n, _)| cited_set.contains(n)) - .map(|(n, c)| AnswerCitation { + .filter(|p| cited_set.contains(&p.marker)) + .map(|p| AnswerCitation { // Wire-format marker per design §2.3: bare bracketed form // `[1]`. The `[#1]` form is the *prompt-side* citation // grammar (what the LLM emits in its text); the wire-side // `AnswerCitation.marker` strips the `#`. - marker: Some(format!("[{n}]")), - citation: c.clone(), - // p9-fb-32: placeholder — Task 7 owns wiring real - // indexed_at / stale here from the underlying SearchHit. - indexed_at: time::OffsetDateTime::UNIX_EPOCH, - stale: false, + marker: Some(format!("[{}]", p.marker)), + citation: p.citation.clone(), + // p9-fb-32: real values from the upstream SearchHit + // (post-processed for `stale` against the configured + // threshold at retrieval time — see `ask` body). + indexed_at: p.indexed_at, + stale: p.stale, }) .collect(); @@ -411,10 +435,10 @@ impl RagPipeline { // `kb explain` can reconstruct what was sent to the LLM. let v: Vec<_> = packed_entries .iter() - .map(|(n, c)| { + .map(|p| { serde_json::json!({ - "marker": n, - "citation": c, + "marker": p.marker, + "citation": p.citation, }) }) .collect(); @@ -446,7 +470,7 @@ impl RagPipeline { let budget_tokens = cap.saturating_sub(prompt_overhead_tokens); let mut text = String::new(); - let mut entries: Vec<(u32, Citation)> = Vec::new(); + let mut entries: Vec = Vec::new(); let mut tokens_so_far: usize = 0; let mut n: u32 = 1; @@ -479,7 +503,19 @@ impl RagPipeline { break; } text.push_str(&block); - entries.push((n, hit.citation.clone())); + // p9-fb-32: forward indexed_at + stale from the upstream + // SearchHit so the LLM-citation construction site can build + // a complete AnswerCitation (replaces Task 6's UNIX_EPOCH + // placeholder). `hit.stale` is stamped by the pipeline + // entry (`ask`) right after `retriever.search`, so by the + // time this method runs it already reflects the + // configured threshold. + entries.push(PackedCitation { + marker: n, + citation: hit.citation.clone(), + indexed_at: hit.indexed_at, + stale: hit.stale, + }); tokens_so_far = next_total; n = n.saturating_add(1); } @@ -623,6 +659,25 @@ impl RagPipeline { /// paths attach the configured embedding model so `kb explain` can /// later identify which embedder shaped the retrieval (even on /// refusals — see `refuse_score_gate`). +/// p9-fb-32: pipeline-local mirror of `kebab_app::staleness::compute_stale`. +/// Duplicated here (rather than imported) because `kebab-rag` cannot +/// depend on `kebab-app` — that would invert the crate-stack dependency +/// direction. The `App::search` post-process and this helper share a +/// behavioral contract: `now - indexed_at > threshold_days * 24h`, +/// strict `>` so exactly-threshold hits stay fresh, and +/// `threshold_days = 0` short-circuits to `false` (feature off). +fn compute_stale( + indexed_at: OffsetDateTime, + now: OffsetDateTime, + threshold_days: u32, +) -> bool { + if threshold_days == 0 { + return false; + } + let threshold = time::Duration::days(i64::from(threshold_days)); + (now - indexed_at) > threshold +} + fn embedding_ref_for(mode: SearchMode, cfg: &kebab_config::Config) -> Option { match mode { SearchMode::Lexical => None, diff --git a/crates/kebab-rag/tests/common/mod.rs b/crates/kebab-rag/tests/common/mod.rs index 709fa9b..7e0521d 100644 --- a/crates/kebab-rag/tests/common/mod.rs +++ b/crates/kebab-rag/tests/common/mod.rs @@ -116,6 +116,29 @@ pub fn mk_hit( workspace_path: &str, fusion_score: f32, heading: &[&str], +) -> SearchHit { + mk_hit_with_indexed_at( + rank, + chunk_id, + doc_id, + workspace_path, + fusion_score, + heading, + time::OffsetDateTime::UNIX_EPOCH, + ) +} + +/// Build a `SearchHit` with an explicit `indexed_at` timestamp. Used by +/// p9-fb-32 staleness tests so the pipeline sees realistic per-hit +/// indexed_at values flowing through to `AnswerCitation`. +pub fn mk_hit_with_indexed_at( + rank: u32, + chunk_id: &str, + doc_id: &str, + workspace_path: &str, + fusion_score: f32, + heading: &[&str], + indexed_at: time::OffsetDateTime, ) -> SearchHit { let p = WorkspacePath::new(workspace_path.to_string()).expect("workspace path valid"); SearchHit { @@ -143,8 +166,9 @@ pub fn mk_hit( index_version: IndexVersion("test-iv".to_string()), embedding_model: None, chunker_version: ChunkerVersion("v1".to_string()), - // p9-fb-32: synthetic test mock — staleness not exercised here. - indexed_at: time::OffsetDateTime::UNIX_EPOCH, + // p9-fb-32: pipeline post-processes `stale` from `indexed_at` + // + cfg threshold; tests configure both via this helper. + indexed_at, stale: false, } } diff --git a/crates/kebab-rag/tests/pipeline.rs b/crates/kebab-rag/tests/pipeline.rs index 18fe1a2..9dc9bd4 100644 --- a/crates/kebab-rag/tests/pipeline.rs +++ b/crates/kebab-rag/tests/pipeline.rs @@ -9,7 +9,7 @@ mod common; use std::sync::Arc; use std::sync::atomic::Ordering; -use common::{MockRetriever, RagEnv, id32, mk_hit}; +use common::{MockRetriever, RagEnv, id32, mk_hit, mk_hit_with_indexed_at}; use kebab_core::{ FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage, }; @@ -421,6 +421,73 @@ fn unfetchable_chunks_fall_back_to_no_chunks() { assert_eq!(env.count_answers(), 1, "answers row written for refusal"); } +// ── 16. p9-fb-32: AnswerCitation carries indexed_at + stale ────────────── +// +// Previously the LLM-citation construction site stamped `UNIX_EPOCH` + +// `false` as a Task-7 placeholder. Task 7 plumbs real values from the +// upstream `SearchHit` through `pack_context` so the wire-side +// `AnswerCitation` reflects the document's actual age. + +#[test] +fn grounded_citations_inherit_indexed_at_and_stale_from_hit() { + let env = RagEnv::new(); + let cid = id32("c1"); + let did = id32("d1"); + env.seed_chunk(&cid, &did, "notes/a.md", "Apples are fruit.", &["Intro"]); + // 60 days old vs. the default 30-day threshold → stale. + let now = time::OffsetDateTime::now_utc(); + let sixty_days_ago = now - time::Duration::days(60); + let hits = vec![mk_hit_with_indexed_at( + 1, &cid, &did, "notes/a.md", 0.85, &["Intro"], sixty_days_ago, + )]; + let retriever: Arc = Arc::new(MockRetriever::new(hits)); + let lm: Arc = Arc::new(CountingLm::new("apples are fruit. [#1]")); + let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone()); + + let answer = pipeline.ask("apples", default_opts()).unwrap(); + assert!(answer.grounded); + assert_eq!(answer.citations.len(), 1, "one cited marker [#1]"); + let c = &answer.citations[0]; + // indexed_at must be the value the retriever produced — NOT the + // UNIX_EPOCH placeholder the Task 6 cross-task patch left behind. + assert_eq!( + c.indexed_at, sixty_days_ago, + "AnswerCitation.indexed_at must inherit from SearchHit.indexed_at" + ); + // 60d > default 30d threshold → stale. + assert!( + c.stale, + "60-day-old hit must surface stale=true on the AnswerCitation" + ); +} + +#[test] +fn grounded_citations_not_stale_for_fresh_hit() { + let env = RagEnv::new(); + let cid = id32("c1"); + let did = id32("d1"); + env.seed_chunk(&cid, &did, "notes/a.md", "Apples are fruit.", &["Intro"]); + // 1 day old vs. the default 30-day threshold → fresh. + let now = time::OffsetDateTime::now_utc(); + let one_day_ago = now - time::Duration::days(1); + let hits = vec![mk_hit_with_indexed_at( + 1, &cid, &did, "notes/a.md", 0.85, &["Intro"], one_day_ago, + )]; + let retriever: Arc = Arc::new(MockRetriever::new(hits)); + let lm: Arc = Arc::new(CountingLm::new("apples are fruit. [#1]")); + let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone()); + + let answer = pipeline.ask("apples", default_opts()).unwrap(); + assert!(answer.grounded); + assert_eq!(answer.citations.len(), 1); + let c = &answer.citations[0]; + assert_eq!(c.indexed_at, one_day_ago); + assert!( + !c.stale, + "1-day-old hit must NOT be stale at default 30d threshold" + ); +} + // ── 15. snapshot Answer JSON stable ─────────────────────────────────────── #[test]