feat(core): Chunk.aliases 필드 (doc-side expansion)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-30 02:09:39 +00:00
parent 467a974901
commit 848b75c069
16 changed files with 46 additions and 0 deletions

View File

@@ -152,6 +152,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -154,6 +154,7 @@ fn make_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -339,6 +339,7 @@ fn build_chunk(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -177,6 +177,7 @@ impl Chunker for PdfPageV1Chunker {
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.clone(),
aliases: None,
});
}
}

View File

@@ -196,5 +196,6 @@ fn build_chunk_from_span(
token_estimate,
chunker_version: chunker_version.clone(),
policy_hash: base_policy_hash.to_string(),
aliases: None,
}
}

View File

@@ -28,4 +28,35 @@ pub struct Chunk {
/// Bug #8 (한국어 2자 query) 해결을 위한 V009 cascade.
#[serde(default)]
pub tokenized_korean_text: Option<String>,
/// 색인시 doc-side expansion (Phase 2) 으로 생성된 "검색용 별칭"
/// (같은언어 paraphrase + 한↔영 번역, 개행 join). `[ingest.expansion]`
/// flag off 또는 미생성이면 None — 별도 FTS5 테이블 `chunk_aliases_fts`
/// 에만 색인되고 본문 매칭/dense 임베딩에는 영향 없음. 설계 spec
/// `2026-05-30-doc-side-expansion-design.md` §3.3.
#[serde(default)]
pub aliases: Option<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn aliases_defaults_to_none_on_deserialize() {
// aliases 필드가 없는 과거 JSON 도 파싱되어야 한다 (#[serde(default)]).
let json = r#"{
"chunk_id": "c1",
"doc_id": "d1",
"block_ids": [],
"text": "hello",
"heading_path": [],
"source_spans": [],
"token_estimate": 1,
"chunker_version": "md-heading-v1",
"policy_hash": "abc"
}"#;
let c: Chunk = serde_json::from_str(json).unwrap();
assert_eq!(c.aliases, None);
assert_eq!(c.tokenized_korean_text, None);
}
}

View File

@@ -250,6 +250,7 @@ impl kebab_core::DocumentStore for SqliteStore {
chunker_version: kebab_core::ChunkerVersion(row.chunker_version),
policy_hash: row.policy_hash,
tokenized_korean_text: row.tokenized_korean_text,
aliases: None,
}))
}

View File

@@ -98,6 +98,7 @@ fn make_chunks(doc_id: &DocumentId) -> Vec<Chunk> {
chunker_version: ChunkerVersion("md-heading-v1".into()),
policy_hash: "deadbeefdeadbeef".into(),
tokenized_korean_text: None,
aliases: None,
}]
}

View File

@@ -114,6 +114,7 @@ fn make_chunk() -> Chunk {
chunker_version: ChunkerVersion("md-heading-v1".into()),
policy_hash: "deadbeefdeadbeef".into(),
tokenized_korean_text: None,
aliases: None,
}
}