feat(kebab-core): p9-fb-23 task 2 — CanonicalDocument gains last_chunker_version + last_embedding_version
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -477,6 +477,8 @@ mod tests {
|
||||
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -352,6 +352,8 @@ mod tests {
|
||||
parser_version,
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -515,6 +517,8 @@ mod tests {
|
||||
parser_version,
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
};
|
||||
let err = PdfPageV1Chunker
|
||||
.chunk(&doc, &default_policy(500, 80))
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::asset::WorkspacePath;
|
||||
use crate::ids::{AssetId, BlockId, DocumentId};
|
||||
use crate::media::Lang;
|
||||
use crate::metadata::{Metadata, Provenance};
|
||||
use crate::versions::ParserVersion;
|
||||
use crate::versions::{ChunkerVersion, EmbeddingVersion, ParserVersion};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CanonicalDocument {
|
||||
@@ -22,6 +22,15 @@ pub struct CanonicalDocument {
|
||||
pub parser_version: ParserVersion,
|
||||
pub schema_version: u32,
|
||||
pub doc_version: u32,
|
||||
/// p9-fb-23: chunker version active when this document was last
|
||||
/// chunked. `None` for rows ingested before V006 migration; the
|
||||
/// next ingest stamps the current version. Compared against the
|
||||
/// active chunker version for the incremental-ingest skip path.
|
||||
pub last_chunker_version: Option<ChunkerVersion>,
|
||||
/// p9-fb-23: embedding model version active when this document
|
||||
/// was last embedded. `None` if no embedder is configured (skip
|
||||
/// path treats `None == None` as a match — see design doc).
|
||||
pub last_embedding_version: Option<EmbeddingVersion>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
|
||||
@@ -169,6 +169,8 @@ pub fn build_canonical_document(
|
||||
parser_version: parser_version.clone(),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -212,6 +212,8 @@ impl Extractor for ImageExtractor {
|
||||
parser_version,
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -216,6 +216,8 @@ impl Extractor for PdfTextExtractor {
|
||||
parser_version,
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -221,6 +221,8 @@ impl kebab_core::DocumentStore for SqliteStore {
|
||||
// under that invariant.
|
||||
schema_version: row.schema_version as u32,
|
||||
doc_version: row.doc_version as u32,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
}))
|
||||
}
|
||||
|
||||
|
||||
@@ -78,6 +78,8 @@ fn make_doc() -> CanonicalDocument {
|
||||
parser_version: ParserVersion("test-parser".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,6 +67,8 @@ fn make_doc(
|
||||
parser_version: ParserVersion("test".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
};
|
||||
(asset, doc)
|
||||
}
|
||||
|
||||
@@ -91,6 +91,8 @@ fn make_doc() -> CanonicalDocument {
|
||||
parser_version: ParserVersion("test-parser".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
last_chunker_version: None,
|
||||
last_embedding_version: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user