feat(search/lexical): label hits with ScoreKind::Bm25 (fb-38 task 2)
- Add ScoreKind::Bm25 to LexicalRetriever::build_hit SearchHit construction - Import ScoreKind from kebab_core in lexical.rs - Add integration test lexical_retriever_hits_carry_bm25_score_kind to verify all hits from LexicalRetriever carry score_kind == ScoreKind::Bm25 - Update lexical snapshot test baseline to include new score_kind field Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,7 @@ use anyhow::{Context, Result};
|
||||
use globset::GlobMatcher;
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
|
||||
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
|
||||
ScoreKind, SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
|
||||
WorkspacePath,
|
||||
};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
@@ -469,6 +469,7 @@ fn build_hit(
|
||||
// (called from `App::search` / `App::search_uncached`) and the equivalent
|
||||
// in `RagPipeline::ask` against the configured threshold.
|
||||
stale: false,
|
||||
score_kind: ScoreKind::Bm25,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -9,8 +9,8 @@ use std::sync::Arc;
|
||||
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
DocumentId, IndexVersion, Lang, MediaType, Retriever, SearchFilters, SearchHit, SearchMode,
|
||||
SearchQuery, TrustLevel,
|
||||
DocumentId, IndexVersion, Lang, MediaType, Retriever, ScoreKind, SearchFilters, SearchHit,
|
||||
SearchMode, SearchQuery, TrustLevel,
|
||||
};
|
||||
use kebab_search::LexicalRetriever;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
@@ -683,6 +683,53 @@ fn search_hit_carries_indexed_at_from_documents_updated_at() {
|
||||
assert!(!hit.stale, "lexical retriever must default stale=false");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexical_retriever_hits_carry_bm25_score_kind() {
|
||||
// p9-fb-38: verify that every hit returned by LexicalRetriever
|
||||
// has score_kind == ScoreKind::Bm25. This establishes the
|
||||
// relationship: Lexical-only search → Bm25 score semantics.
|
||||
let env = Env::new();
|
||||
let conn = env.raw_conn();
|
||||
insert_document(&conn, &id32("d"), "notes/bm25.md", "Bm25", "en", "primary", &[]);
|
||||
for (cid, body) in [
|
||||
("c1", "alpha bravo charlie"),
|
||||
("c2", "alpha delta"),
|
||||
("c3", "bravo echo"),
|
||||
] {
|
||||
insert_chunk(
|
||||
&conn,
|
||||
&id32(cid),
|
||||
&id32("d"),
|
||||
body,
|
||||
&["Bm25"],
|
||||
None,
|
||||
r#"[{"kind":"line","start":1,"end":1}]"#,
|
||||
"v1",
|
||||
);
|
||||
}
|
||||
drop(conn);
|
||||
|
||||
let r = env.retriever();
|
||||
let hits = r
|
||||
.search(&SearchQuery {
|
||||
text: "alpha".to_string(),
|
||||
mode: SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: SearchFilters::default(),
|
||||
})
|
||||
.expect("search");
|
||||
assert!(
|
||||
!hits.is_empty(),
|
||||
"fixture should produce at least one hit for 'alpha'"
|
||||
);
|
||||
for h in &hits {
|
||||
assert_eq!(
|
||||
h.score_kind, ScoreKind::Bm25,
|
||||
"lexical retriever must label all hits with ScoreKind::Bm25"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ── TestEnv helper for fb-36 filter tests ───────────────────────────────
|
||||
|
||||
/// Convenience wrapper over `Env` that exposes higher-level fixture helpers
|
||||
|
||||
Reference in New Issue
Block a user