review(p9-fb-10-task5): promote lexical_query to common + tighten Korean hit assertion

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-03 10:14:17 +00:00
parent 60e583252e
commit 3f0b00439a
3 changed files with 28 additions and 33 deletions

View File

@@ -81,6 +81,19 @@ impl TestEnv {
}
}
/// Test helper: build a `SearchQuery` for lexical mode at k=10. Used
/// by every kebab-app integration test that calls
/// `kebab_app::search_with_config`. Centralized here so a future
/// `SearchQuery` field bump only edits one site.
pub fn lexical_query(text: &str) -> kebab_core::SearchQuery {
kebab_core::SearchQuery {
text: text.to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kebab_core::SearchFilters::default(),
}
}
fn copy_fixture_workspace(dest: &Path) {
let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("tests")

View File

@@ -7,15 +7,6 @@ mod common;
use common::TestEnv;
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
kebab_core::SearchQuery {
text: text.to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kebab_core::SearchFilters::default(),
}
}
/// p9-fb-10 — A Korean token present in a Hangul document must survive
/// the ingest → FTS5 → search round-trip. NFC normalization is wired
/// upstream in `kebab-normalize`; this test just verifies the facade
@@ -37,7 +28,7 @@ fn korean_lexical_query_returns_korean_document() {
.expect("ingest must succeed");
// Lexical search for "러스트" — must return the Korean document.
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("러스트"))
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query("러스트"))
.expect("search must succeed");
assert!(
@@ -46,12 +37,12 @@ fn korean_lexical_query_returns_korean_document() {
);
// At least one hit must reference our Korean document.
let any_korean = hits.iter().any(|h| {
let p = &h.doc_path.0;
p.contains("러스트") || p.contains("비동기")
});
// "러스트-비동기" is the exact filename stem — a single combined
// check is unambiguous and avoids false positives from other docs.
let any_korean = hits.iter().any(|h| h.doc_path.0.contains("러스트-비동기"));
assert!(
any_korean,
"expected a hit referencing the Korean document; got: {hits:#?}"
"expected at least one hit on the Korean fixture doc, got: {:?}",
hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
);
}

View File

@@ -5,15 +5,6 @@ mod common;
use common::TestEnv;
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
kebab_core::SearchQuery {
text: text.to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kebab_core::SearchFilters::default(),
}
}
#[test]
fn lexical_search_returns_hits_after_ingest() {
let env = TestEnv::lexical_only();
@@ -22,7 +13,7 @@ fn lexical_search_returns_hits_after_ingest() {
// "Ownership" appears as a heading + paragraph in intro.md and
// matches FTS5 default tokenizer easily.
let hits =
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
.unwrap();
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
@@ -44,7 +35,7 @@ fn lexical_search_returns_hits_after_ingest() {
fn lexical_search_empty_query_returns_empty() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query(" "))
.unwrap();
assert!(hits.is_empty(), "blank query must short-circuit empty");
}
@@ -57,9 +48,9 @@ fn cached_search_returns_same_hits_on_repeat() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
let first = app.search(lexical_query("ownership")).unwrap();
let first = app.search(common::lexical_query("ownership")).unwrap();
assert!(!first.is_empty(), "first call must return ≥1 hit");
let second = app.search(lexical_query("ownership")).unwrap();
let second = app.search(common::lexical_query("ownership")).unwrap();
assert_eq!(
first.len(),
second.len(),
@@ -79,9 +70,9 @@ fn cache_key_normalization_treats_case_and_whitespace_as_equivalent() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
let plain = app.search(lexical_query("ownership")).unwrap();
let upper = app.search(lexical_query("OWNERSHIP")).unwrap();
let padded = app.search(lexical_query(" Ownership ")).unwrap();
let plain = app.search(common::lexical_query("ownership")).unwrap();
let upper = app.search(common::lexical_query("OWNERSHIP")).unwrap();
let padded = app.search(common::lexical_query(" Ownership ")).unwrap();
assert_eq!(plain.len(), upper.len());
assert_eq!(plain.len(), padded.len());
// chunk_ids are deterministic — same query class, same set.
@@ -97,11 +88,11 @@ fn search_uncached_returns_same_hits_as_cached() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let cached =
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
.unwrap();
let uncached = kebab_app::search_uncached_with_config(
env.config.clone(),
lexical_query("ownership"),
common::lexical_query("ownership"),
)
.unwrap();
assert_eq!(cached.len(), uncached.len());