Compare commits

..

7 Commits

Author SHA1 Message Date
acf8cf3be2 chore: bump version 0.8.3 → 0.9.0
dogfood-discovered routing additions (PR #147) land:
- .mts / .cts → MediaType::Code(typescript)
- .mdx → MediaType::Markdown

minor bump 사유: 사용자 도그푸딩 surface 확장 — 이전에 skip 되던 28+ 파일이
이제 색인됨. design §10.4 dogfooding-ready surface 확장 = minor trigger.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 06:29:27 +00:00
ea5f7b22c8 Merge pull request 'feat(dogfood): route .mts/.cts → typescript + .mdx → markdown' (#147) from feat/dogfood-routing-cts-mts-mdx into main 2026-05-20 06:28:41 +00:00
5497c6e7b5 feat(dogfood): route .mts/.cts to typescript + .mdx to markdown
Dogfood (PR #142 1B + multi-root: kebab-docs + httpx + zod + lodash)
showed 28 files skipped by extension that are routable to existing
extractors:
- .mts (ESM TypeScript) / .cts (CommonJS TypeScript) — same grammar as
  .ts in tree-sitter-typescript 0.23 (LANGUAGE_TYPESCRIPT covers JSX-
  agnostic variants; LANGUAGE_TSX stays for .tsx only)
- .mdx (Markdown + JSX) — routed as MediaType::Markdown; the md parser
  folds JSX islands through as raw passthrough

Changes:
- crates/kebab-source-fs/src/media.rs: 'mts'|'cts' → Code(typescript),
  'mdx' → Markdown. +2 unit tests.
- crates/kebab-parse-code/src/lang.rs: code_lang_for_path matches mts/cts;
  module_path_for_tsjs strips .mts/.cts as well. Test cases extended.
- crates/kebab-parse-code/src/typescript.rs: doc comment on select_grammar
  refreshed to mention .mts/.cts.
- crates/kebab-parse-code/tests/lang.rs: 2 new assertions.

verify: kebab-source-fs 44 / kebab-parse-code lib 20 + lang 4 all pass; clippy clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 06:24:21 +00:00
5a90940f1c chore: bump version 0.8.2 → 0.8.3
dogfood-discovered fix (PR #146) lands: idempotent re-ingest now correctly
returns Unchanged for twin files (identical content at different paths)
via document-centric try_skip_unchanged lookup.

patch bump 사유: advertised idempotency 의 정상 동작 복원. 새 wire / config / surface 변경 없음.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 06:20:34 +00:00
4389b887f0 Merge pull request 'fix(dogfood): document-centric try_skip_unchanged for twin-file idempotency' (#146) from fix/dogfood-bug4-idempotent-twin-files into main 2026-05-20 06:16:28 +00:00
360f825f3a docs(dogfood): refresh try_skip_unchanged doc-comment to match new flow (PR #146 review)
Round 1 review found the function-level doc-comment still described the
old asset-side algorithm (item 2 asset-row checksum, item 3 id_for_doc
miss). Updated to the document-centric flow.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 05:35:17 +00:00
641b92af7d fix(dogfood): document-centric try_skip_unchanged for twin-file idempotency
Identical-content files at different workspace paths share one assets row
(assets.asset_id = blake3 content hash, PRIMARY KEY). The UPSERT
`ON CONFLICT(asset_id) DO UPDATE SET workspace_path = excluded` made
twin files overwrite each other's workspace_path on every ingest, so
`get_asset_by_workspace_path(path1)` returned the OTHER twin's row (or
None) — break idempotent unchanged-detection for both files.

Fix: switch try_skip_unchanged to document-centric lookup. `documents.
workspace_path` is already UNIQUE (V001) and `id_for_doc(path, ...)`
includes path, so each twin has its own stable document row. Compare
`doc.source_asset_id` with the new asset's checksum instead of going
through the assets table.

Dogfood (multi-root: kebab-docs + httpx + zod + lodash) showed 27 of
726 docs marked Updated on every idempotent re-ingest — all 27 are
twin-file victims (empty `__init__.py` ×3, AGENTS.md ↔ CLAUDE.md
same content, duplicate logo PDFs/JPGs).

After: re-ingest reports 0 new / 0 updated / 726 unchanged.

No schema migration needed.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 05:27:21 +00:00
10 changed files with 261 additions and 64 deletions

46
Cargo.lock generated
View File

@@ -4127,7 +4127,7 @@ dependencies = [
[[package]]
name = "kebab-app"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"base64 0.22.1",
@@ -4172,7 +4172,7 @@ dependencies = [
[[package]]
name = "kebab-chunk"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4187,7 +4187,7 @@ dependencies = [
[[package]]
name = "kebab-cli"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"clap",
@@ -4208,7 +4208,7 @@ dependencies = [
[[package]]
name = "kebab-config"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"dirs 5.0.1",
@@ -4223,7 +4223,7 @@ dependencies = [
[[package]]
name = "kebab-core"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4237,7 +4237,7 @@ dependencies = [
[[package]]
name = "kebab-embed"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4251,7 +4251,7 @@ dependencies = [
[[package]]
name = "kebab-embed-local"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"fastembed",
@@ -4264,7 +4264,7 @@ dependencies = [
[[package]]
name = "kebab-eval"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-app",
@@ -4283,7 +4283,7 @@ dependencies = [
[[package]]
name = "kebab-llm"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4292,7 +4292,7 @@ dependencies = [
[[package]]
name = "kebab-llm-local"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-config",
@@ -4309,7 +4309,7 @@ dependencies = [
[[package]]
name = "kebab-mcp"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-app",
@@ -4327,7 +4327,7 @@ dependencies = [
[[package]]
name = "kebab-normalize"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4342,7 +4342,7 @@ dependencies = [
[[package]]
name = "kebab-parse-code"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"gix",
@@ -4360,7 +4360,7 @@ dependencies = [
[[package]]
name = "kebab-parse-image"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"ab_glyph",
"anyhow",
@@ -4384,7 +4384,7 @@ dependencies = [
[[package]]
name = "kebab-parse-md"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4401,7 +4401,7 @@ dependencies = [
[[package]]
name = "kebab-parse-pdf"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4414,7 +4414,7 @@ dependencies = [
[[package]]
name = "kebab-parse-types"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"kebab-core",
"serde",
@@ -4422,7 +4422,7 @@ dependencies = [
[[package]]
name = "kebab-rag"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4443,7 +4443,7 @@ dependencies = [
[[package]]
name = "kebab-search"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"globset",
@@ -4462,7 +4462,7 @@ dependencies = [
[[package]]
name = "kebab-source-fs"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4481,7 +4481,7 @@ dependencies = [
[[package]]
name = "kebab-store-sqlite"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4502,7 +4502,7 @@ dependencies = [
[[package]]
name = "kebab-store-vector"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"arrow",
@@ -4526,7 +4526,7 @@ dependencies = [
[[package]]
name = "kebab-tui"
version = "0.8.2"
version = "0.9.0"
dependencies = [
"anyhow",
"crossterm",

View File

@@ -31,7 +31,7 @@ edition = "2024"
rust-version = "1.85"
license = "MIT OR Apache-2.0"
repository = "https://github.com/altair823/kebab"
version = "0.8.2"
version = "0.9.0"
[workspace.dependencies]
anyhow = "1"

View File

@@ -748,15 +748,18 @@ struct ImagePipeline<'a> {
/// hold (per design §9 cascade rule):
///
/// 1. `force_reingest == false` — caller hasn't asked to bypass skip.
/// 2. The freshly-scanned asset's blake3 checksum equals what the
/// existing `assets` row stores at the same `workspace_path`.
/// 3. The doc keyed on `(workspace_path, asset_id, current_parser_version)`
/// exists. If the parser_version changed, `id_for_doc` produces a
/// different `doc_id` so the lookup misses → no skip → re-process.
/// 4. The existing doc's stamped `last_chunker_version` AND
/// `last_embedding_version` match the values the caller is about
/// to use (`Some(v) == Some(v)` and `None == None` — see design
/// doc for the `None == None` rule when no embedder is configured).
/// 2. A document already exists at this `workspace_path`
/// (`get_document_by_workspace_path`). The lookup is document-side, not
/// asset-side, so twin files (identical content at different paths) each
/// hit their own stable doc row — `documents.workspace_path` is UNIQUE
/// while `assets` may dedupe content into a single row with a flip-flop
/// `workspace_path` column (dogfood bug #4, see `tasks/HOTFIXES.md`).
/// 3. The existing doc's `source_asset_id` equals the freshly-scanned
/// asset's blake3 checksum (content unchanged).
/// 4. The existing doc's `parser_version` matches the current extractor's
/// `parser_version` (extractor not upgraded). Combined with `chunker_version`
/// and `last_embedding_version` checks immediately below — full cascade
/// per design §9.
///
/// Returns `Ok(None)` (proceed with full re-process) when any check
/// fails or any DB read errors out — the skip path is opportunistic;
@@ -773,31 +776,19 @@ fn try_skip_unchanged(
if force_reingest {
return Ok(None);
}
let existing_asset = match app
// Document-centric skip: look up the existing document row by
// workspace_path directly. This avoids the twin-file flip-flop
// that the old asset-side lookup suffers from — multiple files
// with identical content share one `assets` row whose
// `workspace_path` is overwritten on every UPSERT, so
// `get_asset_by_workspace_path(path1)` could return the OTHER
// twin's path (or None) after any ingest of the twin. The
// `documents` table has a UNIQUE index on `workspace_path` (V001),
// so each twin has its own stable row regardless of asset de-dup.
let existing_doc = match app
.sqlite
.get_asset_by_workspace_path(&asset.workspace_path)
.get_document_by_workspace_path(&asset.workspace_path)
{
Ok(Some(a)) => a,
Ok(None) => return Ok(None),
Err(e) => {
tracing::debug!(
target: "kebab-app",
path = %asset.workspace_path.0,
error = %e,
"skip-check: get_asset_by_workspace_path failed; falling through to re-process"
);
return Ok(None);
}
};
if existing_asset.checksum != asset.checksum {
return Ok(None);
}
let candidate_doc_id = kebab_core::id_for_doc(
&asset.workspace_path,
&asset.asset_id,
current_parser_version,
);
let existing_doc = match app.sqlite.get_document(&candidate_doc_id) {
Ok(Some(d)) => d,
Ok(None) => return Ok(None),
Err(e) => {
@@ -805,21 +796,37 @@ fn try_skip_unchanged(
target: "kebab-app",
path = %asset.workspace_path.0,
error = %e,
"skip-check: get_document failed; falling through to re-process"
"skip-check: get_document_by_workspace_path failed; falling through to re-process"
);
return Ok(None);
}
};
// 1. Content unchanged: the freshly-computed asset_id (blake3
// content hash) must match what this document was ingested from.
if existing_doc.source_asset_id != asset.asset_id {
return Ok(None);
}
// 2. Parser unchanged: parser_version is baked into id_for_doc so
// a version bump yields a different doc_id and the row above
// would have been missing. Checking here explicitly keeps the
// logic self-documenting and guards against future id_for_doc
// changes.
if existing_doc.parser_version != *current_parser_version {
return Ok(None);
}
// 3. Chunker unchanged.
let chunker_match = existing_doc.last_chunker_version.as_ref()
== Some(current_chunker_version);
if !chunker_match {
return Ok(None);
}
// 4. Embedder unchanged.
let embedder_match = existing_doc.last_embedding_version.as_ref()
== current_embedding_version;
if !embedder_match {
return Ok(None);
}
let candidate_doc_id = existing_doc.doc_id.clone();
tracing::debug!(
target: "kebab-app::ingest",
path = %asset.workspace_path.0,

View File

@@ -0,0 +1,90 @@
//! Regression test for the twin-file idempotency bug.
//!
//! Identical-content files at different workspace paths share one
//! `assets` row (`asset_id` = blake3 content hash, PRIMARY KEY). The
//! old UPSERT `ON CONFLICT(asset_id) DO UPDATE SET workspace_path =
//! excluded.workspace_path` made each twin overwrite the other's path
//! on every ingest, so `get_asset_by_workspace_path(path1)` returned
//! None (or the wrong twin) → re-process every time.
//!
//! Fix: `try_skip_unchanged` now uses `get_document_by_workspace_path`
//! instead. `documents.workspace_path` is UNIQUE (V001) so each twin
//! has its own stable document row.
//!
//! Assertion contract:
//! 1st ingest → 2 New (one per twin)
//! 2nd ingest → 0 New, 0 Updated, 2 Unchanged
mod common;
use common::TestEnv;
use kebab_app::ingest_with_config;
use kebab_core::IngestItemKind;
#[test]
fn twin_files_second_ingest_is_unchanged() {
let env = TestEnv::lexical_only();
// Write two files with identical content at different paths.
let pkg_a = env.workspace_root.join("pkg_a");
let pkg_b = env.workspace_root.join("pkg_b");
std::fs::create_dir_all(&pkg_a).unwrap();
std::fs::create_dir_all(&pkg_b).unwrap();
let content = b"# shared\nThis content is identical in both files.\n";
std::fs::write(pkg_a.join("__init__.py"), content).unwrap();
std::fs::write(pkg_b.join("__init__.py"), content).unwrap();
// First ingest — both files must be New.
let first = ingest_with_config(env.config.clone(), env.scope(), false)
.expect("first ingest must succeed");
assert_eq!(first.errors, 0, "first ingest: no errors; report={first:?}");
let items = first.items.as_ref().expect("items must be present");
let twin_items: Vec<_> = items
.iter()
.filter(|i| {
i.doc_path.0.ends_with("__init__.py")
})
.collect();
assert_eq!(
twin_items.len(),
2,
"first ingest: expected exactly 2 __init__.py items; items={items:?}"
);
for item in &twin_items {
assert_eq!(
item.kind,
IngestItemKind::New,
"first ingest: each twin must be New; item={item:?}"
);
}
// Second ingest — same files, same content → both must be Unchanged.
let second = ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest must succeed");
assert_eq!(second.errors, 0, "second ingest: no errors; report={second:?}");
assert_eq!(second.new, 0, "second ingest: no new docs; report={second:?}");
assert_eq!(
second.updated, 0,
"second ingest: no updated docs (twin-file bug would set this to 2); report={second:?}"
);
let second_items = second.items.as_ref().expect("items must be present");
let twin_items2: Vec<_> = second_items
.iter()
.filter(|i| i.doc_path.0.ends_with("__init__.py"))
.collect();
assert_eq!(
twin_items2.len(),
2,
"second ingest: expected exactly 2 __init__.py items; items={second_items:?}"
);
for item in &twin_items2 {
assert_eq!(
item.kind,
IngestItemKind::Unchanged,
"second ingest: each twin must be Unchanged; item={item:?}"
);
}
}

View File

@@ -169,6 +169,20 @@ pub trait DocumentStore {
&self,
path: &WorkspacePath,
) -> anyhow::Result<Option<RawAsset>>;
/// Look up a document row by its workspace path. Used by the
/// document-centric skip path in `try_skip_unchanged` to avoid the
/// twin-file flip-flop that the asset-side lookup suffers from
/// (multiple files with identical content share one `assets` row
/// whose `workspace_path` is overwritten on every UPSERT, so
/// `get_asset_by_workspace_path` returns the wrong twin's path).
///
/// `documents.workspace_path` is UNIQUE (V001), so each twin has
/// its own stable document row regardless of the asset de-dup.
fn get_document_by_workspace_path(
&self,
path: &WorkspacePath,
) -> anyhow::Result<Option<CanonicalDocument>>;
}
pub trait VectorStore {

View File

@@ -24,7 +24,7 @@ pub fn code_lang_for_path(path: &Path) -> Option<&'static str> {
match ext.as_str() {
"rs" => Some("rust"),
"py" | "pyi" => Some("python"),
"ts" | "tsx" => Some("typescript"),
"ts" | "tsx" | "mts" | "cts" => Some("typescript"),
"js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
"go" => Some("go"),
"java" => Some("java"),
@@ -82,7 +82,7 @@ pub fn module_path_for_python(workspace_path: &str) -> String {
/// (no slash replacement, no source-root strip). See plan §Task C.
pub fn module_path_for_tsjs(workspace_path: &str) -> String {
let p = workspace_path;
for ext in [".tsx", ".ts", ".jsx", ".mjs", ".cjs", ".js"] {
for ext in [".tsx", ".mts", ".cts", ".ts", ".jsx", ".mjs", ".cjs", ".js"] {
if let Some(stripped) = p.strip_suffix(ext) {
return stripped.to_string();
}
@@ -110,7 +110,7 @@ mod tests {
#[test]
fn module_path_for_tsjs_keeps_slashes_and_strips_ext() {
for ext in ["ts", "tsx", "js", "jsx", "mjs", "cjs"] {
for ext in ["ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs"] {
let p = format!("src/search/retriever/Retriever.{ext}");
assert_eq!(module_path_for_tsjs(&p), "src/search/retriever/Retriever");
}

View File

@@ -173,8 +173,9 @@ impl Extractor for TypescriptAstExtractor {
}
/// Select the tree-sitter grammar based on the workspace path's
/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.d.ts`,
/// missing extension) → TypeScript grammar.
/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.mts`,
/// `.cts`, `.d.ts`, missing extension) → TypeScript grammar (the JSX-
/// agnostic variants all share one grammar in tree-sitter-typescript 0.23).
fn select_grammar(workspace_path: &str) -> tree_sitter::Language {
if workspace_path.ends_with(".tsx") {
tree_sitter_typescript::LANGUAGE_TSX.into()

View File

@@ -9,6 +9,8 @@ fn known_extensions_map_to_canonical_identifiers() {
("foo.pyi", Some("python")),
("foo.ts", Some("typescript")),
("foo.tsx", Some("typescript")),
("foo.mts", Some("typescript")), // ESM TS — same grammar
("foo.cts", Some("typescript")), // CommonJS TS — same grammar
("foo.js", Some("javascript")),
("foo.mjs", Some("javascript")),
("foo.cjs", Some("javascript")),

View File

@@ -19,7 +19,9 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType {
.unwrap_or_default();
match ext.as_str() {
"md" => MediaType::Markdown,
// Markdown + MDX (markdown + JSX, treated as plain markdown — the
// JSX islands are folded into raw passthrough by the md parser).
"md" | "mdx" => MediaType::Markdown,
"pdf" => MediaType::Pdf,
"png" => MediaType::Image(ImageType::Png),
@@ -40,7 +42,8 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType {
// p10-1B: Python / TS / JS AST chunkers active.
"py" | "pyi" => MediaType::Code("python".into()),
"ts" | "tsx" => MediaType::Code("typescript".into()),
// .mts / .cts are TypeScript ESM / CommonJS variants — same grammar.
"ts" | "tsx" | "mts" | "cts" => MediaType::Code("typescript".into()),
"js" | "mjs" | "cjs" | "jsx" => MediaType::Code("javascript".into()),
// Empty string (no extension) and any other extension: bucket as
@@ -102,6 +105,20 @@ mod tests {
assert_eq!(media_type_for(Path::new("a/b.rs")), MediaType::Code("rust".into()));
}
#[test]
fn ts_variants_mts_cts() {
// .mts / .cts are TypeScript ESM / CommonJS — same grammar as .ts.
assert_eq!(media_type_for(Path::new("a/b.mts")), MediaType::Code("typescript".into()));
assert_eq!(media_type_for(Path::new("a/b.cts")), MediaType::Code("typescript".into()));
}
#[test]
fn mdx_routes_to_markdown() {
// MDX is markdown with JSX islands; the md parser folds the JSX
// through as raw passthrough.
assert_eq!(media_type_for(Path::new("docs/page.mdx")), MediaType::Markdown);
}
#[test]
fn unknown_and_missing_extension() {
assert_eq!(

View File

@@ -286,6 +286,72 @@ impl kebab_core::DocumentStore for SqliteStore {
}
}
fn get_document_by_workspace_path(
&self,
path: &kebab_core::WorkspacePath,
) -> Result<Option<kebab_core::CanonicalDocument>> {
let conn = self.lock_conn();
let row: Option<DocumentRow> = conn
.query_row(
"SELECT
doc_id, asset_id, workspace_path, title, lang,
source_type, trust_level, parser_version,
doc_version, schema_version, metadata_json,
provenance_json, created_at, updated_at,
last_chunker_version, last_embedding_version
FROM documents WHERE workspace_path = ?",
params![path.0],
document_row_from_sql,
)
.map(Some)
.or_else(rows_optional)
.map_err(StoreError::from)?;
let Some(row) = row else { return Ok(None) };
let doc_id = kebab_core::DocumentId(row.doc_id.clone());
let mut blocks_stmt = conn
.prepare(
"SELECT payload_json FROM blocks
WHERE doc_id = ? ORDER BY ordinal ASC",
)
.map_err(StoreError::from)?;
let block_rows = blocks_stmt
.query_map(params![row.doc_id], |r| {
let payload_json: String = r.get(0)?;
Ok(payload_json)
})
.map_err(StoreError::from)?;
let mut blocks: Vec<kebab_core::Block> = Vec::new();
for block_row in block_rows {
let payload_json = block_row.map_err(StoreError::from)?;
let block: kebab_core::Block = serde_json::from_str(&payload_json)
.context("deserialize block payload_json")?;
blocks.push(block);
}
let metadata: kebab_core::Metadata = serde_json::from_str(&row.metadata_json)
.context("deserialize metadata_json")?;
let provenance: kebab_core::Provenance =
serde_json::from_str(&row.provenance_json)
.context("deserialize provenance_json")?;
Ok(Some(kebab_core::CanonicalDocument {
doc_id,
source_asset_id: kebab_core::AssetId(row.asset_id),
workspace_path: kebab_core::WorkspacePath(row.workspace_path),
title: row.title.unwrap_or_default(),
lang: kebab_core::Lang(row.lang.unwrap_or_default()),
blocks,
metadata,
provenance,
parser_version: kebab_core::ParserVersion(row.parser_version),
schema_version: row.schema_version as u32,
doc_version: row.doc_version as u32,
last_chunker_version: row.last_chunker_version.map(kebab_core::ChunkerVersion),
last_embedding_version: row.last_embedding_version.map(kebab_core::EmbeddingVersion),
}))
}
fn list_documents(
&self,
filter: &kebab_core::DocFilter,