From f94e0c4a9b7a6f84b5a80c4bd035f4d2e6f6568f Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 28 May 2026 11:23:13 +0000 Subject: [PATCH] feat(app): bump lexical_index_version to V009 (fts5-v009-korean-morphological) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit V009 의 FTS5 tokenizer 가 trigram → unicode61 + 한국어 형태소 분해 column 로 갱신됨. lexical_index_version 의 format 에 `fts5-v009-korean-morphological` suffix 추가하여 V007 baseline 과 구별. eval runner 의 config_snapshot 및 search cache 무효화에 자동 picks up. 기존 format: lex:{chunker_version} 신규 format: lex:{chunker_version}:fts5-v009-korean-morphological Wire schema shape 변경 없음 (SearchHit.index_version 의 string content 만 변화). lexical_index_version_is_returned_unchanged test 는 IndexVersion 의 임의 string 을 사용해 unchanged. Spec: docs/superpowers/specs/2026-05-28-v0.20.x-korean-morphological-tokenizer-spec.md §11.1, §11.3 Plan: docs/superpowers/plans/2026-05-28-v0.20.x-korean-morphological-tokenizer-plan.md (S6) --- crates/kebab-app/src/app.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 87e51a2..a47a80c 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -993,8 +993,16 @@ impl App { /// the active config. This token surfaces in `SearchHit.index_version` /// and on snapshot tests; including the chunker version pins it to /// the chunking policy in effect. +/// +/// V009 (2026-05-28): FTS5 tokenizer 가 trigram → unicode61 + 한국어 +/// 형태소 분해 column 로 갱신됨. `fts5-v009-korean-morphological` +/// suffix 가 V007 baseline 과 구별되어 eval runner 의 config +/// snapshot 및 search cache 무효화에 picks up 된다. fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion { - IndexVersion(format!("lex:{}", config.chunking.chunker_version)) + IndexVersion(format!( + "lex:{}:fts5-v009-korean-morphological", + config.chunking.chunker_version + )) } /// p9-fb-37: stand-in for the vector retriever in the trace path when