From 848b75c069149c0fede80ea36e930016eebd0fba Mon Sep 17 00:00:00 2001 From: altair823 Date: Sat, 30 May 2026 02:09:39 +0000 Subject: [PATCH] =?UTF-8?q?feat(core):=20Chunk.aliases=20=ED=95=84?= =?UTF-8?q?=EB=93=9C=20(doc-side=20expansion)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-chunk/src/code_c_ast_v1.rs | 1 + crates/kebab-chunk/src/code_cpp_ast_v1.rs | 1 + crates/kebab-chunk/src/code_go_ast_v1.rs | 1 + crates/kebab-chunk/src/code_java_ast_v1.rs | 1 + crates/kebab-chunk/src/code_js_ast_v1.rs | 1 + crates/kebab-chunk/src/code_kotlin_ast_v1.rs | 1 + crates/kebab-chunk/src/code_python_ast_v1.rs | 1 + crates/kebab-chunk/src/code_rust_ast_v1.rs | 1 + crates/kebab-chunk/src/code_ts_ast_v1.rs | 1 + crates/kebab-chunk/src/md_heading_v1.rs | 1 + crates/kebab-chunk/src/pdf_page_v1.rs | 1 + crates/kebab-chunk/src/tier2_shared.rs | 1 + crates/kebab-core/src/chunk.rs | 31 +++++++++++++++++++ crates/kebab-store-sqlite/src/documents.rs | 1 + .../kebab-store-sqlite/tests/idempotency.rs | 1 + crates/kebab-tui/tests/inspect.rs | 1 + 16 files changed, 46 insertions(+) diff --git a/crates/kebab-chunk/src/code_c_ast_v1.rs b/crates/kebab-chunk/src/code_c_ast_v1.rs index 642f9d3..9bedb5a 100644 --- a/crates/kebab-chunk/src/code_c_ast_v1.rs +++ b/crates/kebab-chunk/src/code_c_ast_v1.rs @@ -152,6 +152,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_cpp_ast_v1.rs b/crates/kebab-chunk/src/code_cpp_ast_v1.rs index f9ca1a1..06804bf 100644 --- a/crates/kebab-chunk/src/code_cpp_ast_v1.rs +++ b/crates/kebab-chunk/src/code_cpp_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_go_ast_v1.rs b/crates/kebab-chunk/src/code_go_ast_v1.rs index 22e9310..825a003 100644 --- a/crates/kebab-chunk/src/code_go_ast_v1.rs +++ b/crates/kebab-chunk/src/code_go_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_java_ast_v1.rs b/crates/kebab-chunk/src/code_java_ast_v1.rs index 07e0ab8..0b24a86 100644 --- a/crates/kebab-chunk/src/code_java_ast_v1.rs +++ b/crates/kebab-chunk/src/code_java_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_js_ast_v1.rs b/crates/kebab-chunk/src/code_js_ast_v1.rs index 8ae1fc5..8480075 100644 --- a/crates/kebab-chunk/src/code_js_ast_v1.rs +++ b/crates/kebab-chunk/src/code_js_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_kotlin_ast_v1.rs b/crates/kebab-chunk/src/code_kotlin_ast_v1.rs index 1c1a386..4a3a6cb 100644 --- a/crates/kebab-chunk/src/code_kotlin_ast_v1.rs +++ b/crates/kebab-chunk/src/code_kotlin_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_python_ast_v1.rs b/crates/kebab-chunk/src/code_python_ast_v1.rs index ac62678..17aeb0d 100644 --- a/crates/kebab-chunk/src/code_python_ast_v1.rs +++ b/crates/kebab-chunk/src/code_python_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_rust_ast_v1.rs b/crates/kebab-chunk/src/code_rust_ast_v1.rs index 365ed87..c44bd49 100644 --- a/crates/kebab-chunk/src/code_rust_ast_v1.rs +++ b/crates/kebab-chunk/src/code_rust_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/code_ts_ast_v1.rs b/crates/kebab-chunk/src/code_ts_ast_v1.rs index 42dd4ac..e66c5d3 100644 --- a/crates/kebab-chunk/src/code_ts_ast_v1.rs +++ b/crates/kebab-chunk/src/code_ts_ast_v1.rs @@ -154,6 +154,7 @@ fn make_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/md_heading_v1.rs b/crates/kebab-chunk/src/md_heading_v1.rs index 0265d1f..d2559cb 100644 --- a/crates/kebab-chunk/src/md_heading_v1.rs +++ b/crates/kebab-chunk/src/md_heading_v1.rs @@ -339,6 +339,7 @@ fn build_chunk( token_estimate, chunker_version: chunker_version.clone(), policy_hash: policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-chunk/src/pdf_page_v1.rs b/crates/kebab-chunk/src/pdf_page_v1.rs index e615163..9f7a5ac 100644 --- a/crates/kebab-chunk/src/pdf_page_v1.rs +++ b/crates/kebab-chunk/src/pdf_page_v1.rs @@ -177,6 +177,7 @@ impl Chunker for PdfPageV1Chunker { token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.clone(), + aliases: None, }); } } diff --git a/crates/kebab-chunk/src/tier2_shared.rs b/crates/kebab-chunk/src/tier2_shared.rs index 8f67d79..a385ce3 100644 --- a/crates/kebab-chunk/src/tier2_shared.rs +++ b/crates/kebab-chunk/src/tier2_shared.rs @@ -196,5 +196,6 @@ fn build_chunk_from_span( token_estimate, chunker_version: chunker_version.clone(), policy_hash: base_policy_hash.to_string(), + aliases: None, } } diff --git a/crates/kebab-core/src/chunk.rs b/crates/kebab-core/src/chunk.rs index 10dce5f..eaa81db 100644 --- a/crates/kebab-core/src/chunk.rs +++ b/crates/kebab-core/src/chunk.rs @@ -28,4 +28,35 @@ pub struct Chunk { /// Bug #8 (한국어 2자 query) 해결을 위한 V009 cascade. #[serde(default)] pub tokenized_korean_text: Option, + /// 색인시 doc-side expansion (Phase 2) 으로 생성된 "검색용 별칭" + /// (같은언어 paraphrase + 한↔영 번역, 개행 join). `[ingest.expansion]` + /// flag off 또는 미생성이면 None — 별도 FTS5 테이블 `chunk_aliases_fts` + /// 에만 색인되고 본문 매칭/dense 임베딩에는 영향 없음. 설계 spec + /// `2026-05-30-doc-side-expansion-design.md` §3.3. + #[serde(default)] + pub aliases: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn aliases_defaults_to_none_on_deserialize() { + // aliases 필드가 없는 과거 JSON 도 파싱되어야 한다 (#[serde(default)]). + let json = r#"{ + "chunk_id": "c1", + "doc_id": "d1", + "block_ids": [], + "text": "hello", + "heading_path": [], + "source_spans": [], + "token_estimate": 1, + "chunker_version": "md-heading-v1", + "policy_hash": "abc" + }"#; + let c: Chunk = serde_json::from_str(json).unwrap(); + assert_eq!(c.aliases, None); + assert_eq!(c.tokenized_korean_text, None); + } } diff --git a/crates/kebab-store-sqlite/src/documents.rs b/crates/kebab-store-sqlite/src/documents.rs index e1dcd57..e02db50 100644 --- a/crates/kebab-store-sqlite/src/documents.rs +++ b/crates/kebab-store-sqlite/src/documents.rs @@ -250,6 +250,7 @@ impl kebab_core::DocumentStore for SqliteStore { chunker_version: kebab_core::ChunkerVersion(row.chunker_version), policy_hash: row.policy_hash, tokenized_korean_text: row.tokenized_korean_text, + aliases: None, })) } diff --git a/crates/kebab-store-sqlite/tests/idempotency.rs b/crates/kebab-store-sqlite/tests/idempotency.rs index 1171c0a..080389e 100644 --- a/crates/kebab-store-sqlite/tests/idempotency.rs +++ b/crates/kebab-store-sqlite/tests/idempotency.rs @@ -98,6 +98,7 @@ fn make_chunks(doc_id: &DocumentId) -> Vec { chunker_version: ChunkerVersion("md-heading-v1".into()), policy_hash: "deadbeefdeadbeef".into(), tokenized_korean_text: None, + aliases: None, }] } diff --git a/crates/kebab-tui/tests/inspect.rs b/crates/kebab-tui/tests/inspect.rs index 4e0525f..d3dbe70 100644 --- a/crates/kebab-tui/tests/inspect.rs +++ b/crates/kebab-tui/tests/inspect.rs @@ -114,6 +114,7 @@ fn make_chunk() -> Chunk { chunker_version: ChunkerVersion("md-heading-v1".into()), policy_hash: "deadbeefdeadbeef".into(), tokenized_korean_text: None, + aliases: None, } }