From b51cdb9e8ff0e899f065231d3097f5e6419dd173 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 17:56:56 +0900 Subject: [PATCH] feat(search/hybrid): fuse hits override score_kind to Rrf (fb-38) --- crates/kebab-rag/src/pipeline.rs | 1 + crates/kebab-rag/tests/common/mod.rs | 1 + crates/kebab-search/src/hybrid.rs | 83 ++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index bf70900..47bfee1 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -1117,6 +1117,7 @@ mod stream_event_serde_tests { chunker_version: ChunkerVersion("c@1".into()), indexed_at: datetime!(2026-05-09 12:00:00 UTC), stale: false, + score_kind: kebab_core::ScoreKind::Rrf, } } diff --git a/crates/kebab-rag/tests/common/mod.rs b/crates/kebab-rag/tests/common/mod.rs index 7e0521d..022176c 100644 --- a/crates/kebab-rag/tests/common/mod.rs +++ b/crates/kebab-rag/tests/common/mod.rs @@ -170,6 +170,7 @@ pub fn mk_hit_with_indexed_at( // + cfg threshold; tests configure both via this helper. indexed_at, stale: false, + score_kind: kebab_core::ScoreKind::Rrf, } } diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs index e285915..6d9286b 100644 --- a/crates/kebab-search/src/hybrid.rs +++ b/crates/kebab-search/src/hybrid.rs @@ -313,6 +313,9 @@ impl HybridRetriever { lexical_rank: s.lex_rank, vector_rank: s.vec_rank, }; + // p9-fb-38: base was cloned from a lex/vec hit (Bm25/Cosine); + // fuse output is RRF-scored so override. + base.score_kind = kebab_core::ScoreKind::Rrf; hits.push(base); } @@ -824,4 +827,84 @@ mod tests { assert!(trace.vector.is_empty()); assert_eq!(trace.timing.vector_ms, 0); } + + #[test] + fn hybrid_fuse_labels_hits_as_rrf() { + use kebab_core::{ScoreKind, SearchMode, SearchQuery}; + use std::sync::Arc; + + struct Stub { + hits: Vec, + } + impl Retriever for Stub { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(self.hits.clone()) + } + fn index_version(&self) -> kebab_core::IndexVersion { + kebab_core::IndexVersion("v1".into()) + } + } + + let lex = Arc::new(Stub { + hits: vec![mk_hit("c1", 1, SearchMode::Lexical, 0.9)], + }); + let vec_r = Arc::new(Stub { + hits: vec![mk_hit("c1", 1, SearchMode::Vector, 0.8)], + }); + let hybrid = HybridRetriever::with_policy( + lex, + vec_r, + FusionPolicy::Rrf { k_rrf: 60 }, + 2, + ); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Hybrid, + k: 1, + filters: Default::default(), + }; + let hits = hybrid.search(&q).unwrap(); + assert!(!hits.is_empty()); + assert_eq!(hits[0].score_kind, ScoreKind::Rrf); + } + + #[test] + fn hybrid_search_with_trace_lexical_mode_passes_through_bm25() { + use kebab_core::{ScoreKind, SearchMode, SearchQuery}; + use std::sync::Arc; + + struct Stub { + hits: Vec, + } + impl Retriever for Stub { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(self.hits.clone()) + } + fn index_version(&self) -> kebab_core::IndexVersion { + kebab_core::IndexVersion("v1".into()) + } + } + + // mk_hit defaults to Rrf; override per spec for this test. + let mut lex_hit = mk_hit("c1", 1, SearchMode::Lexical, 0.5); + lex_hit.score_kind = ScoreKind::Bm25; + let lex = Arc::new(Stub { hits: vec![lex_hit] }); + let vec_r = Arc::new(Stub { hits: vec![] }); + let hybrid = HybridRetriever::with_policy( + lex, + vec_r, + FusionPolicy::Rrf { k_rrf: 60 }, + 2, + ); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 1, + filters: Default::default(), + }; + let (hits, _trace) = hybrid.search_with_trace(&q).unwrap(); + assert!(!hits.is_empty()); + // search_with_trace mode=Lexical passes through underlying hits. + assert_eq!(hits[0].score_kind, ScoreKind::Bm25); + } }