feat(search/lexical): label hits with ScoreKind::Bm25 (fb-38 task 2)

- Add ScoreKind::Bm25 to LexicalRetriever::build_hit SearchHit construction
- Import ScoreKind from kebab_core in lexical.rs
- Add integration test lexical_retriever_hits_carry_bm25_score_kind to verify all
  hits from LexicalRetriever carry score_kind == ScoreKind::Bm25
- Update lexical snapshot test baseline to include new score_kind field

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-10 17:54:11 +09:00
parent 3c605b1a5d
commit 3a621bba0d
2 changed files with 51 additions and 3 deletions

View File

@@ -11,7 +11,7 @@ use anyhow::{Context, Result};
use globset::GlobMatcher;
use kebab_core::{
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
ScoreKind, SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
WorkspacePath,
};
use kebab_store_sqlite::SqliteStore;
@@ -469,6 +469,7 @@ fn build_hit(
// (called from `App::search` / `App::search_uncached`) and the equivalent
// in `RagPipeline::ask` against the configured threshold.
stale: false,
score_kind: ScoreKind::Bm25,
})
}

View File

@@ -9,8 +9,8 @@ use std::sync::Arc;
use kebab_config::Config;
use kebab_core::{
DocumentId, IndexVersion, Lang, MediaType, Retriever, SearchFilters, SearchHit, SearchMode,
SearchQuery, TrustLevel,
DocumentId, IndexVersion, Lang, MediaType, Retriever, ScoreKind, SearchFilters, SearchHit,
SearchMode, SearchQuery, TrustLevel,
};
use kebab_search::LexicalRetriever;
use kebab_store_sqlite::SqliteStore;
@@ -683,6 +683,53 @@ fn search_hit_carries_indexed_at_from_documents_updated_at() {
assert!(!hit.stale, "lexical retriever must default stale=false");
}
#[test]
fn lexical_retriever_hits_carry_bm25_score_kind() {
// p9-fb-38: verify that every hit returned by LexicalRetriever
// has score_kind == ScoreKind::Bm25. This establishes the
// relationship: Lexical-only search → Bm25 score semantics.
let env = Env::new();
let conn = env.raw_conn();
insert_document(&conn, &id32("d"), "notes/bm25.md", "Bm25", "en", "primary", &[]);
for (cid, body) in [
("c1", "alpha bravo charlie"),
("c2", "alpha delta"),
("c3", "bravo echo"),
] {
insert_chunk(
&conn,
&id32(cid),
&id32("d"),
body,
&["Bm25"],
None,
r#"[{"kind":"line","start":1,"end":1}]"#,
"v1",
);
}
drop(conn);
let r = env.retriever();
let hits = r
.search(&SearchQuery {
text: "alpha".to_string(),
mode: SearchMode::Lexical,
k: 10,
filters: SearchFilters::default(),
})
.expect("search");
assert!(
!hits.is_empty(),
"fixture should produce at least one hit for 'alpha'"
);
for h in &hits {
assert_eq!(
h.score_kind, ScoreKind::Bm25,
"lexical retriever must label all hits with ScoreKind::Bm25"
);
}
}
// ── TestEnv helper for fb-36 filter tests ───────────────────────────────
/// Convenience wrapper over `Env` that exposes higher-level fixture helpers