Files
kebab/crates/kebab-app/tests/search_lexical.rs
altair823 e03d03cb26 test: 별칭 전용 테스트 삭제 + 영향 테스트/fixture 갱신
kebab-search/tests/lexical.rs 의 alias 채널 테스트 + insert_chunk_with_aliases
헬퍼 제거(body 회수 회귀 테스트로 대체). Chunk 리터럴 aliases: None 제거
(embedding_records_fk/idempotency/inspect). chunk 스냅샷 fixture 의 aliases
키 제거. config_migrate 는 ingest.code 앵커로, corpus_revision/search_lexical
주석은 V013 비-bump 명시로 갱신.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 21:37:58 +00:00

151 lines
6.0 KiB
Rust

//! Lexical search integration tests. The vector / hybrid lanes are
//! AVX-gated and live in `search_vector.rs` (`#[ignore]`).
mod common;
use common::TestEnv;
#[test]
fn lexical_search_returns_hits_after_ingest() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
// "Ownership" appears as a heading + paragraph in intro.md and
// matches FTS5 default tokenizer easily.
let hits =
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
.unwrap();
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
for h in &hits {
// Lexical retriever sets embedding_model=None per spec.
assert!(
h.embedding_model.is_none(),
"lexical-mode hit must have None embedding_model: {h:?}"
);
assert_eq!(
h.retrieval.method,
kebab_core::SearchMode::Lexical,
"method label should be Lexical"
);
}
}
#[test]
fn lexical_search_empty_query_returns_empty() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits =
kebab_app::search_with_config(env.config.clone(), common::lexical_query(" ")).unwrap();
assert!(hits.is_empty(), "blank query must short-circuit empty");
}
/// p9-fb-19 — `App::search` returns the same hit list for a repeated
/// query (cache hit doesn't corrupt the result). Both calls share an
/// `App` instance so the cache is in scope.
#[test]
fn cached_search_returns_same_hits_on_repeat() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
let first = app.search(common::lexical_query("ownership")).unwrap();
assert!(!first.is_empty(), "first call must return ≥1 hit");
let second = app.search(common::lexical_query("ownership")).unwrap();
assert_eq!(
first.len(),
second.len(),
"cached call must yield identical hit count"
);
for (a, b) in first.iter().zip(second.iter()) {
assert_eq!(a.chunk_id, b.chunk_id, "chunk_ids must align");
assert_eq!(a.rank, b.rank, "ranks must align");
}
}
/// p9-fb-19 — query normalization (NFKC + trim + lowercase) collapses
/// `"Ownership"` / `"OWNERSHIP"` / `" ownership "` into one cache
/// entry. Verified by ensuring all three forms return the same hits.
#[test]
fn cache_key_normalization_treats_case_and_whitespace_as_equivalent() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let app = kebab_app::App::open_with_config(env.config.clone()).unwrap();
let plain = app.search(common::lexical_query("ownership")).unwrap();
let upper = app.search(common::lexical_query("OWNERSHIP")).unwrap();
let padded = app.search(common::lexical_query(" Ownership ")).unwrap();
assert_eq!(plain.len(), upper.len());
assert_eq!(plain.len(), padded.len());
// chunk_ids are deterministic — same query class, same set.
let plain_ids: Vec<_> = plain.iter().map(|h| h.chunk_id.0.clone()).collect();
let upper_ids: Vec<_> = upper.iter().map(|h| h.chunk_id.0.clone()).collect();
assert_eq!(plain_ids, upper_ids);
}
/// p9-fb-19 — `--no-cache` (`search_uncached_with_config`) bypasses
/// the cache. Result correctness is identical to `search_with_config`.
#[test]
fn search_uncached_returns_same_hits_as_cached() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let cached =
kebab_app::search_with_config(env.config.clone(), common::lexical_query("ownership"))
.unwrap();
let uncached = kebab_app::search_uncached_with_config(
env.config.clone(),
common::lexical_query("ownership"),
)
.unwrap();
assert_eq!(cached.len(), uncached.len());
for (a, b) in cached.iter().zip(uncached.iter()) {
assert_eq!(a.chunk_id, b.chunk_id);
}
}
/// p9-fb-19 — first ingest with commits bumps `corpus_revision` from
/// 0 to ≥1. Verified by reading the persisted kv via a fresh
/// SqliteStore handle (the field on `App` is `pub(crate)`).
#[test]
fn first_ingest_bumps_corpus_revision() {
let env = TestEnv::lexical_only();
let store_before = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
store_before.run_migrations().unwrap();
// V004 seeds 0; V009 + V010 + V011 migrations each bump by 1 to
// invalidate stale LRU caches (spec §5.2). Baseline before ingest = 3.
// (V012 derivation_cache + V013 drop-chunk-aliases are structural/additive
// — neither bumps corpus_revision.)
let baseline = store_before.corpus_revision();
assert_eq!(baseline, 3, "fresh store post-V011 baseline = 3");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert!(
report.new + report.updated > 0,
"first ingest must commit ≥1 doc"
);
let store_after = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
assert!(
store_after.corpus_revision() > baseline,
"ingest commit must bump corpus_revision past baseline {baseline} (got {})",
store_after.corpus_revision(),
);
}
#[test]
fn vector_mode_with_provider_none_errors_clearly() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let q = kebab_core::SearchQuery {
text: "ownership".to_string(),
mode: kebab_core::SearchMode::Vector,
k: 10,
filters: kebab_core::SearchFilters::default(),
};
let err = kebab_app::search_with_config(env.config.clone(), q).unwrap_err();
let msg = format!("{err:#}");
assert!(
msg.contains("embeddings disabled") || msg.contains("disabled"),
"error must mention embeddings disabled: {msg}"
);
}