diff --git a/crates/kebab-chunk/src/code_c_ast_v1.rs b/crates/kebab-chunk/src/code_c_ast_v1.rs index 6a4daad..642f9d3 100644 --- a/crates/kebab-chunk/src/code_c_ast_v1.rs +++ b/crates/kebab-chunk/src/code_c_ast_v1.rs @@ -145,7 +145,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_cpp_ast_v1.rs b/crates/kebab-chunk/src/code_cpp_ast_v1.rs index 6bf458f..f9ca1a1 100644 --- a/crates/kebab-chunk/src/code_cpp_ast_v1.rs +++ b/crates/kebab-chunk/src/code_cpp_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_go_ast_v1.rs b/crates/kebab-chunk/src/code_go_ast_v1.rs index 5ebf0a4..22e9310 100644 --- a/crates/kebab-chunk/src/code_go_ast_v1.rs +++ b/crates/kebab-chunk/src/code_go_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_java_ast_v1.rs b/crates/kebab-chunk/src/code_java_ast_v1.rs index acbc3a3..07e0ab8 100644 --- a/crates/kebab-chunk/src/code_java_ast_v1.rs +++ b/crates/kebab-chunk/src/code_java_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_js_ast_v1.rs b/crates/kebab-chunk/src/code_js_ast_v1.rs index 1707831..8ae1fc5 100644 --- a/crates/kebab-chunk/src/code_js_ast_v1.rs +++ b/crates/kebab-chunk/src/code_js_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_kotlin_ast_v1.rs b/crates/kebab-chunk/src/code_kotlin_ast_v1.rs index 93ca430..1c1a386 100644 --- a/crates/kebab-chunk/src/code_kotlin_ast_v1.rs +++ b/crates/kebab-chunk/src/code_kotlin_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_python_ast_v1.rs b/crates/kebab-chunk/src/code_python_ast_v1.rs index 00e14dc..ac62678 100644 --- a/crates/kebab-chunk/src/code_python_ast_v1.rs +++ b/crates/kebab-chunk/src/code_python_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_rust_ast_v1.rs b/crates/kebab-chunk/src/code_rust_ast_v1.rs index a513f3f..365ed87 100644 --- a/crates/kebab-chunk/src/code_rust_ast_v1.rs +++ b/crates/kebab-chunk/src/code_rust_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-chunk/src/code_ts_ast_v1.rs b/crates/kebab-chunk/src/code_ts_ast_v1.rs index 33e5932..42dd4ac 100644 --- a/crates/kebab-chunk/src/code_ts_ast_v1.rs +++ b/crates/kebab-chunk/src/code_ts_ast_v1.rs @@ -147,7 +147,7 @@ fn make_chunk( chunk_id, doc_id: DocumentId(doc.doc_id.0.clone()), block_ids: block_ids.to_vec(), - tokenized_korean_text: None, + tokenized_korean_text: crate::tokenize_korean_morphological(&text), text, heading_path: Vec::new(), source_spans: vec![span], diff --git a/crates/kebab-store-sqlite/src/documents.rs b/crates/kebab-store-sqlite/src/documents.rs index 70ebcda..e1dcd57 100644 --- a/crates/kebab-store-sqlite/src/documents.rs +++ b/crates/kebab-store-sqlite/src/documents.rs @@ -223,7 +223,7 @@ impl kebab_core::DocumentStore for SqliteStore { "SELECT chunk_id, doc_id, text, heading_path_json, source_spans_json, token_estimate, chunker_version, - policy_hash, block_ids_json + policy_hash, block_ids_json, tokenized_korean_text FROM chunks WHERE chunk_id = ?", params![id.0], chunk_row_from_sql, @@ -249,7 +249,7 @@ impl kebab_core::DocumentStore for SqliteStore { token_estimate: row.token_estimate as usize, chunker_version: kebab_core::ChunkerVersion(row.chunker_version), policy_hash: row.policy_hash, - tokenized_korean_text: None, + tokenized_korean_text: row.tokenized_korean_text, })) } @@ -560,6 +560,7 @@ struct ChunkRow { chunker_version: String, policy_hash: String, block_ids_json: String, + tokenized_korean_text: Option, } fn chunk_row_from_sql(row: &rusqlite::Row<'_>) -> rusqlite::Result { @@ -573,6 +574,7 @@ fn chunk_row_from_sql(row: &rusqlite::Row<'_>) -> rusqlite::Result { chunker_version: row.get(6)?, policy_hash: row.get(7)?, block_ids_json: row.get(8)?, + tokenized_korean_text: row.get(9)?, }) }