From 366e89e5e2ed93f20d3aae9d1b8993227bad4bba Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 4 May 2026 17:58:23 +0000 Subject: [PATCH] =?UTF-8?q?feat(kebab-store-sqlite):=20p9-fb-23=20task=204?= =?UTF-8?q?=20=E2=80=94=20get=5Fasset=5Fby=5Fworkspace=5Fpath?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `DocumentStore::get_asset_by_workspace_path` trait method to `kebab-core` and implement it on `SqliteStore` via a private `asset_from_row` helper. Used by the incremental-ingest skip path to compare a freshly-computed blake3 checksum against the persisted row without a full round-trip through `put_asset_with_bytes`. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-core/src/traits.rs | 10 ++- crates/kebab-store-sqlite/src/documents.rs | 81 +++++++++++++++++++ .../tests/incremental_ingest.rs | 29 +++++++ 3 files changed, 119 insertions(+), 1 deletion(-) diff --git a/crates/kebab-core/src/traits.rs b/crates/kebab-core/src/traits.rs index 59dedfb..2c48411 100644 --- a/crates/kebab-core/src/traits.rs +++ b/crates/kebab-core/src/traits.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::asset::RawAsset; +use crate::asset::{RawAsset, WorkspacePath}; use crate::chunk::Chunk; use crate::document::{Block, CanonicalDocument}; use crate::ids::{ChunkId, DocumentId}; @@ -156,6 +156,14 @@ pub trait DocumentStore { fn get_document(&self, id: &DocumentId) -> anyhow::Result>; fn get_chunk(&self, id: &ChunkId) -> anyhow::Result>; fn list_documents(&self, filter: &DocFilter) -> anyhow::Result>; + /// p9-fb-23: look up an asset row by its workspace path. Used by + /// the incremental-ingest skip path to compare the freshly + /// computed blake3 checksum against what's already in SQLite. The + /// schema enforces a unique workspace_path per asset. + fn get_asset_by_workspace_path( + &self, + path: &WorkspacePath, + ) -> anyhow::Result>; } pub trait VectorStore { diff --git a/crates/kebab-store-sqlite/src/documents.rs b/crates/kebab-store-sqlite/src/documents.rs index d565a81..a2568a1 100644 --- a/crates/kebab-store-sqlite/src/documents.rs +++ b/crates/kebab-store-sqlite/src/documents.rs @@ -264,6 +264,28 @@ impl kebab_core::DocumentStore for SqliteStore { })) } + fn get_asset_by_workspace_path( + &self, + path: &kebab_core::WorkspacePath, + ) -> Result> { + let conn = self.lock_conn(); + let result = conn.query_row( + r#"SELECT + asset_id, source_uri, workspace_path, media_type, + byte_len, checksum, storage_kind, storage_path, + discovered_at + FROM assets + WHERE workspace_path = ?"#, + rusqlite::params![path.0.as_str()], + asset_from_row, + ); + match result { + Ok(asset) => Ok(Some(asset)), + Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None), + Err(e) => Err(e.into()), + } + } + fn list_documents( &self, filter: &kebab_core::DocFilter, @@ -484,6 +506,65 @@ fn rows_optional(err: rusqlite::Error) -> rusqlite::Result> { } } +/// Reconstruct a [`kebab_core::RawAsset`] from one `assets` row. +/// +/// Column order must match the SELECT in +/// [`DocumentStore::get_asset_by_workspace_path`]: +/// `asset_id(0), source_uri(1), workspace_path(2), media_type(3), +/// byte_len(4), checksum(5), storage_kind(6), storage_path(7), +/// discovered_at(8)`. +fn asset_from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result { + use std::path::PathBuf; + + let asset_id: String = row.get(0)?; + let source_uri_raw: String = row.get(1)?; + let workspace_path_raw: String = row.get(2)?; + let media_type_json: String = row.get(3)?; + let byte_len: i64 = row.get(4)?; + let checksum_raw: String = row.get(5)?; + let storage_kind: String = row.get(6)?; + let storage_path_raw: String = row.get(7)?; + let discovered_at_raw: String = row.get(8)?; + + // Parse source_uri: stored as "file://" or "kb://". + let source_uri = if let Some(path_str) = source_uri_raw.strip_prefix("file://") { + kebab_core::SourceUri::File(PathBuf::from(path_str)) + } else { + kebab_core::SourceUri::Kb(source_uri_raw.clone()) + }; + + let workspace_path = kebab_core::WorkspacePath(workspace_path_raw); + let media_type: kebab_core::MediaType = serde_json::from_str(&media_type_json) + .map_err(|e| rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e)))?; + let checksum = kebab_core::Checksum(checksum_raw.clone()); + let discovered_at = time::OffsetDateTime::parse( + &discovered_at_raw, + &time::format_description::well_known::Rfc3339, + ) + .map_err(|e| rusqlite::Error::FromSqlConversionFailure(8, rusqlite::types::Type::Text, Box::new(e)))?; + + let storage_path = PathBuf::from(&storage_path_raw); + let stored = if storage_kind == "copied" { + kebab_core::AssetStorage::Copied { path: storage_path } + } else { + kebab_core::AssetStorage::Reference { + path: storage_path, + sha: checksum.clone(), + } + }; + + Ok(kebab_core::RawAsset { + asset_id: kebab_core::AssetId(asset_id), + source_uri, + workspace_path, + media_type, + byte_len: byte_len as u64, + checksum, + discovered_at, + stored, + }) +} + /// UPSERT the documents row and bump `doc_version` on conflict. fn upsert_document( tx: &rusqlite::Transaction<'_>, diff --git a/crates/kebab-store-sqlite/tests/incremental_ingest.rs b/crates/kebab-store-sqlite/tests/incremental_ingest.rs index 03ced6a..3c544a2 100644 --- a/crates/kebab-store-sqlite/tests/incremental_ingest.rs +++ b/crates/kebab-store-sqlite/tests/incremental_ingest.rs @@ -117,3 +117,32 @@ fn put_then_get_document_roundtrips_none_stamps() { "last_embedding_version must be None when not set" ); } + +#[test] +fn get_asset_by_workspace_path_roundtrips() { + let env = common::TestEnv::new(); + let store = SqliteStore::open(&env.config()).unwrap(); + store.run_migrations().unwrap(); + + let asset = make_asset(); + store.put_asset(&asset).unwrap(); + + let loaded = store + .get_asset_by_workspace_path(&asset.workspace_path) + .unwrap() + .expect("asset must round-trip"); + + assert_eq!(loaded.asset_id, asset.asset_id); + assert_eq!(loaded.checksum, asset.checksum); + assert_eq!(loaded.byte_len, asset.byte_len); +} + +#[test] +fn get_asset_by_workspace_path_returns_none_for_unknown() { + let env = common::TestEnv::new(); + let store = SqliteStore::open(&env.config()).unwrap(); + store.run_migrations().unwrap(); + + let path = WorkspacePath::new("notes/missing.md".into()).unwrap(); + assert!(store.get_asset_by_workspace_path(&path).unwrap().is_none()); +}