From 60e583252e83132ee45f271ef8c212df73177264 Mon Sep 17 00:00:00 2001 From: altair823 Date: Sun, 3 May 2026 10:08:32 +0000 Subject: [PATCH] =?UTF-8?q?test(kebab-app):=20Korean=20query=20=E2=86=92?= =?UTF-8?q?=20FTS5=20smoke=20pin?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit p9-fb-10: verifies that a Korean (Hangul) token survives the ingest → FTS5 lexical search round-trip via the kebab-app facade. NFC normalization is wired upstream in kebab-normalize; this test only exercises end-to-end correctness — no AVX, no fastembed required. Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-app/tests/search_korean.rs | 57 +++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 crates/kebab-app/tests/search_korean.rs diff --git a/crates/kebab-app/tests/search_korean.rs b/crates/kebab-app/tests/search_korean.rs new file mode 100644 index 0000000..15c41cf --- /dev/null +++ b/crates/kebab-app/tests/search_korean.rs @@ -0,0 +1,57 @@ +//! p9-fb-10: smoke pin that a Korean query reaches FTS5 and returns +//! the matching Hangul document. NFC normalization happens upstream +//! in `kebab-normalize`; this test only exercises the end-to-end +//! facade — ingest a Korean .md → lexical search → at least one hit. + +mod common; + +use common::TestEnv; + +fn lexical_query(text: &str) -> kebab_core::SearchQuery { + kebab_core::SearchQuery { + text: text.to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 10, + filters: kebab_core::SearchFilters::default(), + } +} + +/// p9-fb-10 — A Korean token present in a Hangul document must survive +/// the ingest → FTS5 → search round-trip. NFC normalization is wired +/// upstream in `kebab-normalize`; this test just verifies the facade +/// doesn't drop or corrupt CJK text along the way. +#[test] +fn korean_lexical_query_returns_korean_document() { + let env = TestEnv::lexical_only(); + + // Write a Korean Markdown document into the temp workspace. + let doc_path = env.workspace_root.join("러스트-비동기.md"); + std::fs::write( + &doc_path, + "# 러스트 비동기 프로그래밍\n\n토큰: 러스트, 비동기, async, await\n", + ) + .expect("write Korean fixture doc"); + + // Ingest — lexical_only() disables fastembed so no AVX required. + kebab_app::ingest_with_config(env.config.clone(), env.scope(), true) + .expect("ingest must succeed"); + + // Lexical search for "러스트" — must return the Korean document. + let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("러스트")) + .expect("search must succeed"); + + assert!( + !hits.is_empty(), + "expected at least one hit for Korean lexical query '러스트'" + ); + + // At least one hit must reference our Korean document. + let any_korean = hits.iter().any(|h| { + let p = &h.doc_path.0; + p.contains("러스트") || p.contains("비동기") + }); + assert!( + any_korean, + "expected a hit referencing the Korean document; got: {hits:#?}" + ); +}