review(p9-fb-10-task5): promote lexical_query to common + tighten Korean hit assertion
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -81,6 +81,19 @@ impl TestEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Test helper: build a `SearchQuery` for lexical mode at k=10. Used
|
||||
/// by every kebab-app integration test that calls
|
||||
/// `kebab_app::search_with_config`. Centralized here so a future
|
||||
/// `SearchQuery` field bump only edits one site.
|
||||
pub fn lexical_query(text: &str) -> kebab_core::SearchQuery {
|
||||
kebab_core::SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn copy_fixture_workspace(dest: &Path) {
|
||||
let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("tests")
|
||||
|
||||
@@ -7,15 +7,6 @@ mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
|
||||
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
|
||||
kebab_core::SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-10 — A Korean token present in a Hangul document must survive
|
||||
/// the ingest → FTS5 → search round-trip. NFC normalization is wired
|
||||
/// upstream in `kebab-normalize`; this test just verifies the facade
|
||||
@@ -37,7 +28,7 @@ fn korean_lexical_query_returns_korean_document() {
|
||||
.expect("ingest must succeed");
|
||||
|
||||
// Lexical search for "러스트" — must return the Korean document.
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("러스트"))
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query("러스트"))
|
||||
.expect("search must succeed");
|
||||
|
||||
assert!(
|
||||
@@ -46,12 +37,12 @@ fn korean_lexical_query_returns_korean_document() {
|
||||
);
|
||||
|
||||
// At least one hit must reference our Korean document.
|
||||
let any_korean = hits.iter().any(|h| {
|
||||
let p = &h.doc_path.0;
|
||||
p.contains("러스트") || p.contains("비동기")
|
||||
});
|
||||
// "러스트-비동기" is the exact filename stem — a single combined
|
||||
// check is unambiguous and avoids false positives from other docs.
|
||||
let any_korean = hits.iter().any(|h| h.doc_path.0.contains("러스트-비동기"));
|
||||
assert!(
|
||||
any_korean,
|
||||
"expected a hit referencing the Korean document; got: {hits:#?}"
|
||||
"expected at least one hit on the Korean fixture doc, got: {:?}",
|
||||
hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -5,15 +5,6 @@ mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
|
||||
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
|
||||
kebab_core::SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexical_search_returns_hits_after_ingest() {
|
||||
let env = TestEnv::lexical_only();
|
||||
@@ -22,7 +13,7 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
// "Ownership" appears as a heading + paragraph in intro.md and
|
||||
// matches FTS5 default tokenizer easily.
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
|
||||
.unwrap();
|
||||
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
|
||||
|
||||
@@ -44,7 +35,7 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
fn lexical_search_empty_query_returns_empty() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query(" "))
|
||||
.unwrap();
|
||||
assert!(hits.is_empty(), "blank query must short-circuit empty");
|
||||
}
|
||||
@@ -57,9 +48,9 @@ fn cached_search_returns_same_hits_on_repeat() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
|
||||
let first = app.search(lexical_query("ownership")).unwrap();
|
||||
let first = app.search(common::lexical_query("ownership")).unwrap();
|
||||
assert!(!first.is_empty(), "first call must return ≥1 hit");
|
||||
let second = app.search(lexical_query("ownership")).unwrap();
|
||||
let second = app.search(common::lexical_query("ownership")).unwrap();
|
||||
assert_eq!(
|
||||
first.len(),
|
||||
second.len(),
|
||||
@@ -79,9 +70,9 @@ fn cache_key_normalization_treats_case_and_whitespace_as_equivalent() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
|
||||
let plain = app.search(lexical_query("ownership")).unwrap();
|
||||
let upper = app.search(lexical_query("OWNERSHIP")).unwrap();
|
||||
let padded = app.search(lexical_query(" Ownership ")).unwrap();
|
||||
let plain = app.search(common::lexical_query("ownership")).unwrap();
|
||||
let upper = app.search(common::lexical_query("OWNERSHIP")).unwrap();
|
||||
let padded = app.search(common::lexical_query(" Ownership ")).unwrap();
|
||||
assert_eq!(plain.len(), upper.len());
|
||||
assert_eq!(plain.len(), padded.len());
|
||||
// chunk_ids are deterministic — same query class, same set.
|
||||
@@ -97,11 +88,11 @@ fn search_uncached_returns_same_hits_as_cached() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let cached =
|
||||
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
|
||||
.unwrap();
|
||||
let uncached = kebab_app::search_uncached_with_config(
|
||||
env.config.clone(),
|
||||
lexical_query("ownership"),
|
||||
common::lexical_query("ownership"),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(cached.len(), uncached.len());
|
||||
|
||||
Reference in New Issue
Block a user