feat(kebab-store-sqlite): p9-fb-23 task 4 — get_asset_by_workspace_path

Add `DocumentStore::get_asset_by_workspace_path` trait method to
`kebab-core` and implement it on `SqliteStore` via a private
`asset_from_row` helper. Used by the incremental-ingest skip path to
compare a freshly-computed blake3 checksum against the persisted row
without a full round-trip through `put_asset_with_bytes`.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-04 17:58:23 +00:00
parent 4261c8953c
commit 366e89e5e2
3 changed files with 119 additions and 1 deletions

View File

@@ -5,7 +5,7 @@ use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use crate::asset::RawAsset;
use crate::asset::{RawAsset, WorkspacePath};
use crate::chunk::Chunk;
use crate::document::{Block, CanonicalDocument};
use crate::ids::{ChunkId, DocumentId};
@@ -156,6 +156,14 @@ pub trait DocumentStore {
fn get_document(&self, id: &DocumentId) -> anyhow::Result<Option<CanonicalDocument>>;
fn get_chunk(&self, id: &ChunkId) -> anyhow::Result<Option<Chunk>>;
fn list_documents(&self, filter: &DocFilter) -> anyhow::Result<Vec<DocSummary>>;
/// p9-fb-23: look up an asset row by its workspace path. Used by
/// the incremental-ingest skip path to compare the freshly
/// computed blake3 checksum against what's already in SQLite. The
/// schema enforces a unique workspace_path per asset.
fn get_asset_by_workspace_path(
&self,
path: &WorkspacePath,
) -> anyhow::Result<Option<RawAsset>>;
}
pub trait VectorStore {

View File

@@ -264,6 +264,28 @@ impl kebab_core::DocumentStore for SqliteStore {
}))
}
fn get_asset_by_workspace_path(
&self,
path: &kebab_core::WorkspacePath,
) -> Result<Option<kebab_core::RawAsset>> {
let conn = self.lock_conn();
let result = conn.query_row(
r#"SELECT
asset_id, source_uri, workspace_path, media_type,
byte_len, checksum, storage_kind, storage_path,
discovered_at
FROM assets
WHERE workspace_path = ?"#,
rusqlite::params![path.0.as_str()],
asset_from_row,
);
match result {
Ok(asset) => Ok(Some(asset)),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(e) => Err(e.into()),
}
}
fn list_documents(
&self,
filter: &kebab_core::DocFilter,
@@ -484,6 +506,65 @@ fn rows_optional<T>(err: rusqlite::Error) -> rusqlite::Result<Option<T>> {
}
}
/// Reconstruct a [`kebab_core::RawAsset`] from one `assets` row.
///
/// Column order must match the SELECT in
/// [`DocumentStore::get_asset_by_workspace_path`]:
/// `asset_id(0), source_uri(1), workspace_path(2), media_type(3),
/// byte_len(4), checksum(5), storage_kind(6), storage_path(7),
/// discovered_at(8)`.
fn asset_from_row(row: &rusqlite::Row<'_>) -> rusqlite::Result<kebab_core::RawAsset> {
use std::path::PathBuf;
let asset_id: String = row.get(0)?;
let source_uri_raw: String = row.get(1)?;
let workspace_path_raw: String = row.get(2)?;
let media_type_json: String = row.get(3)?;
let byte_len: i64 = row.get(4)?;
let checksum_raw: String = row.get(5)?;
let storage_kind: String = row.get(6)?;
let storage_path_raw: String = row.get(7)?;
let discovered_at_raw: String = row.get(8)?;
// Parse source_uri: stored as "file://<path>" or "kb://<uri>".
let source_uri = if let Some(path_str) = source_uri_raw.strip_prefix("file://") {
kebab_core::SourceUri::File(PathBuf::from(path_str))
} else {
kebab_core::SourceUri::Kb(source_uri_raw.clone())
};
let workspace_path = kebab_core::WorkspacePath(workspace_path_raw);
let media_type: kebab_core::MediaType = serde_json::from_str(&media_type_json)
.map_err(|e| rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e)))?;
let checksum = kebab_core::Checksum(checksum_raw.clone());
let discovered_at = time::OffsetDateTime::parse(
&discovered_at_raw,
&time::format_description::well_known::Rfc3339,
)
.map_err(|e| rusqlite::Error::FromSqlConversionFailure(8, rusqlite::types::Type::Text, Box::new(e)))?;
let storage_path = PathBuf::from(&storage_path_raw);
let stored = if storage_kind == "copied" {
kebab_core::AssetStorage::Copied { path: storage_path }
} else {
kebab_core::AssetStorage::Reference {
path: storage_path,
sha: checksum.clone(),
}
};
Ok(kebab_core::RawAsset {
asset_id: kebab_core::AssetId(asset_id),
source_uri,
workspace_path,
media_type,
byte_len: byte_len as u64,
checksum,
discovered_at,
stored,
})
}
/// UPSERT the documents row and bump `doc_version` on conflict.
fn upsert_document(
tx: &rusqlite::Transaction<'_>,

View File

@@ -117,3 +117,32 @@ fn put_then_get_document_roundtrips_none_stamps() {
"last_embedding_version must be None when not set"
);
}
#[test]
fn get_asset_by_workspace_path_roundtrips() {
let env = common::TestEnv::new();
let store = SqliteStore::open(&env.config()).unwrap();
store.run_migrations().unwrap();
let asset = make_asset();
store.put_asset(&asset).unwrap();
let loaded = store
.get_asset_by_workspace_path(&asset.workspace_path)
.unwrap()
.expect("asset must round-trip");
assert_eq!(loaded.asset_id, asset.asset_id);
assert_eq!(loaded.checksum, asset.checksum);
assert_eq!(loaded.byte_len, asset.byte_len);
}
#[test]
fn get_asset_by_workspace_path_returns_none_for_unknown() {
let env = common::TestEnv::new();
let store = SqliteStore::open(&env.config()).unwrap();
store.run_migrations().unwrap();
let path = WorkspacePath::new("notes/missing.md".into()).unwrap();
assert!(store.get_asset_by_workspace_path(&path).unwrap().is_none());
}