feat(kebab-core): p9-fb-23 task 2 — CanonicalDocument gains last_chunker_version + last_embedding_version

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 17:50:25 +00:00
parent 0684b3ad66
commit f867b36afb
10 changed files with 30 additions and 1 deletions

View File

@@ -477,6 +477,8 @@ mod tests {
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
}
}

View File

@@ -352,6 +352,8 @@ mod tests {
parser_version,
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
}
}
@@ -515,6 +517,8 @@ mod tests {
parser_version,
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
};
let err = PdfPageV1Chunker
.chunk(&doc, &default_policy(500, 80))

View File

@@ -7,7 +7,7 @@ use crate::asset::WorkspacePath;
use crate::ids::{AssetId, BlockId, DocumentId};
use crate::media::Lang;
use crate::metadata::{Metadata, Provenance};
use crate::versions::ParserVersion;
use crate::versions::{ChunkerVersion, EmbeddingVersion, ParserVersion};
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct CanonicalDocument {
@@ -22,6 +22,15 @@ pub struct CanonicalDocument {
pub parser_version: ParserVersion,
pub schema_version: u32,
pub doc_version: u32,
/// p9-fb-23: chunker version active when this document was last
/// chunked. `None` for rows ingested before V006 migration; the
/// next ingest stamps the current version. Compared against the
/// active chunker version for the incremental-ingest skip path.
pub last_chunker_version: Option<ChunkerVersion>,
/// p9-fb-23: embedding model version active when this document
/// was last embedded. `None` if no embedder is configured (skip
/// path treats `None == None` as a match — see design doc).
pub last_embedding_version: Option<EmbeddingVersion>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]

View File

@@ -169,6 +169,8 @@ pub fn build_canonical_document(
parser_version: parser_version.clone(),
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
})
}

View File

@@ -212,6 +212,8 @@ impl Extractor for ImageExtractor {
parser_version,
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
})
}
}

View File

@@ -216,6 +216,8 @@ impl Extractor for PdfTextExtractor {
parser_version,
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
})
}
}

View File

@@ -221,6 +221,8 @@ impl kebab_core::DocumentStore for SqliteStore {
// under that invariant.
schema_version: row.schema_version as u32,
doc_version: row.doc_version as u32,
last_chunker_version: None,
last_embedding_version: None,
}))
}

View File

@@ -78,6 +78,8 @@ fn make_doc() -> CanonicalDocument {
parser_version: ParserVersion("test-parser".into()),
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
}
}

View File

@@ -67,6 +67,8 @@ fn make_doc(
parser_version: ParserVersion("test".into()),
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
};
(asset, doc)
}

View File

@@ -91,6 +91,8 @@ fn make_doc() -> CanonicalDocument {
parser_version: ParserVersion("test-parser".into()),
schema_version: 1,
doc_version: 1,
last_chunker_version: None,
last_embedding_version: None,
}
}