diff --git a/crates/kb-core/src/chunk.rs b/crates/kb-core/src/chunk.rs index 1c3b0aa..5c0db0f 100644 --- a/crates/kb-core/src/chunk.rs +++ b/crates/kb-core/src/chunk.rs @@ -6,6 +6,12 @@ use crate::document::SourceSpan; use crate::ids::{BlockId, ChunkId, DocumentId}; use crate::versions::ChunkerVersion; +/// A unit of retrievable text per design §3.5 + §5.5. +/// +/// `policy_hash` is the chunker's hex digest of the active `ChunkPolicy` +/// (e.g. `target_tokens`, `overlap_tokens`). It mirrors the §5.5 SQLite +/// schema column so persistence is a straight copy, and feeds the +/// `chunk_id` recipe (§4.2) so policy edits invalidate downstream IDs. #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct Chunk { pub chunk_id: ChunkId, @@ -16,4 +22,5 @@ pub struct Chunk { pub source_spans: Vec, pub token_estimate: usize, pub chunker_version: ChunkerVersion, + pub policy_hash: String, }