feat(core): Chunk.aliases 필드 (doc-side expansion)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -152,6 +152,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -154,6 +154,7 @@ fn make_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -339,6 +339,7 @@ fn build_chunk(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -177,6 +177,7 @@ impl Chunker for PdfPageV1Chunker {
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.clone(),
|
||||
aliases: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -196,5 +196,6 @@ fn build_chunk_from_span(
|
||||
token_estimate,
|
||||
chunker_version: chunker_version.clone(),
|
||||
policy_hash: base_policy_hash.to_string(),
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,4 +28,35 @@ pub struct Chunk {
|
||||
/// Bug #8 (한국어 2자 query) 해결을 위한 V009 cascade.
|
||||
#[serde(default)]
|
||||
pub tokenized_korean_text: Option<String>,
|
||||
/// 색인시 doc-side expansion (Phase 2) 으로 생성된 "검색용 별칭"
|
||||
/// (같은언어 paraphrase + 한↔영 번역, 개행 join). `[ingest.expansion]`
|
||||
/// flag off 또는 미생성이면 None — 별도 FTS5 테이블 `chunk_aliases_fts`
|
||||
/// 에만 색인되고 본문 매칭/dense 임베딩에는 영향 없음. 설계 spec
|
||||
/// `2026-05-30-doc-side-expansion-design.md` §3.3.
|
||||
#[serde(default)]
|
||||
pub aliases: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn aliases_defaults_to_none_on_deserialize() {
|
||||
// aliases 필드가 없는 과거 JSON 도 파싱되어야 한다 (#[serde(default)]).
|
||||
let json = r#"{
|
||||
"chunk_id": "c1",
|
||||
"doc_id": "d1",
|
||||
"block_ids": [],
|
||||
"text": "hello",
|
||||
"heading_path": [],
|
||||
"source_spans": [],
|
||||
"token_estimate": 1,
|
||||
"chunker_version": "md-heading-v1",
|
||||
"policy_hash": "abc"
|
||||
}"#;
|
||||
let c: Chunk = serde_json::from_str(json).unwrap();
|
||||
assert_eq!(c.aliases, None);
|
||||
assert_eq!(c.tokenized_korean_text, None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -250,6 +250,7 @@ impl kebab_core::DocumentStore for SqliteStore {
|
||||
chunker_version: kebab_core::ChunkerVersion(row.chunker_version),
|
||||
policy_hash: row.policy_hash,
|
||||
tokenized_korean_text: row.tokenized_korean_text,
|
||||
aliases: None,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -98,6 +98,7 @@ fn make_chunks(doc_id: &DocumentId) -> Vec<Chunk> {
|
||||
chunker_version: ChunkerVersion("md-heading-v1".into()),
|
||||
policy_hash: "deadbeefdeadbeef".into(),
|
||||
tokenized_korean_text: None,
|
||||
aliases: None,
|
||||
}]
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +114,7 @@ fn make_chunk() -> Chunk {
|
||||
chunker_version: ChunkerVersion("md-heading-v1".into()),
|
||||
policy_hash: "deadbeefdeadbeef".into(),
|
||||
tokenized_korean_text: None,
|
||||
aliases: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user