feat(store-sqlite): add truncate_embedding_records helper
Wipes every row from embedding_records and returns the deleted row count. Used by the upcoming `kebab reset --vector-only` to keep SQLite consistent after the on-disk Lance store is removed. Plan deviation from the original spec (task 1): - Original test plan opened SqliteStore with a raw path; the actual signature is `SqliteStore::open(&Config)`, so the integration test builds a Config with `storage.data_dir` pointed at a tempdir. - Original return type was Result<()>; bumped to Result<u64> so the caller (kebab-app::reset) can surface the truncated count in the reset_report.v1 wire payload without a separate COUNT query. p9-fb-06 task 1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -107,6 +107,22 @@ impl SqliteStore {
|
||||
/// WHERE embedding_id IN (?, ?, …)`) inside one transaction —
|
||||
/// avoids the per-row `execute()` round-trip the previous
|
||||
/// implementation paid.
|
||||
/// Wipe every row from `embedding_records`, returning the count of
|
||||
/// rows that were removed. Called by `kebab reset --vector-only` so
|
||||
/// SQLite cannot point at a Lance row that the reset just removed
|
||||
/// off-disk.
|
||||
///
|
||||
/// The function does NOT cascade to `chunks` or `documents` — those
|
||||
/// are kept so the next `kebab ingest` re-embeds the existing chunk
|
||||
/// set without re-parsing.
|
||||
pub fn truncate_embedding_records(&self) -> Result<u64> {
|
||||
let conn = self.lock_conn();
|
||||
let n = conn
|
||||
.execute("DELETE FROM embedding_records", [])
|
||||
.context("DELETE FROM embedding_records")?;
|
||||
Ok(n as u64)
|
||||
}
|
||||
|
||||
pub fn mark_embedding_records_committed(
|
||||
&self,
|
||||
embedding_ids: &[String],
|
||||
|
||||
119
crates/kebab-store-sqlite/tests/truncate_embeddings.rs
Normal file
119
crates/kebab-store-sqlite/tests/truncate_embeddings.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
//! `truncate_embedding_records` wipes every row regardless of status.
|
||||
//!
|
||||
//! Used by `kebab reset --vector-only` to keep SQLite in sync after the
|
||||
//! Lance vector store is deleted off-disk. The helper is exposed at the
|
||||
//! integration-test boundary so consumers (kebab-app's reset module) can
|
||||
//! verify its semantics without reaching into private store internals.
|
||||
|
||||
use kebab_config::Config;
|
||||
use kebab_store_sqlite::{EmbeddingRecordRow, SqliteStore};
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
fn config_for(tmp: &TempDir) -> Config {
|
||||
let mut c = Config::defaults();
|
||||
c.storage.data_dir = tmp.path().to_string_lossy().into_owned();
|
||||
c
|
||||
}
|
||||
|
||||
fn open_store(tmp: &TempDir) -> SqliteStore {
|
||||
let cfg = config_for(tmp);
|
||||
let store = SqliteStore::open(&cfg).unwrap();
|
||||
store.run_migrations().unwrap();
|
||||
store
|
||||
}
|
||||
|
||||
/// Seed an asset + document + chunk so an `embedding_records` row inserted
|
||||
/// against `chunk_id` does not violate the chunks FK. Mirrors the helper
|
||||
/// used by the in-crate `embeddings::tests` module — copied here because
|
||||
/// integration tests cannot reach the private `seed_chunk` from outside
|
||||
/// the crate.
|
||||
fn seed_chunk(store: &SqliteStore, chunk_id: &str) {
|
||||
let conn = store.read_conn();
|
||||
conn.execute(
|
||||
"INSERT INTO assets (
|
||||
asset_id, source_uri, workspace_path, media_type, byte_len,
|
||||
checksum, storage_kind, storage_path, discovered_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, 'reference', '/tmp/x', ?)",
|
||||
params![
|
||||
"0123456789abcdef0123456789abcdef",
|
||||
"file:///tmp/x",
|
||||
"x.md",
|
||||
"{}",
|
||||
0_i64,
|
||||
"deadbeef",
|
||||
"1970-01-01T00:00:00Z",
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO documents (
|
||||
doc_id, asset_id, workspace_path, title, lang, source_type,
|
||||
trust_level, parser_version, doc_version, schema_version,
|
||||
metadata_json, provenance_json, created_at, updated_at
|
||||
) VALUES (?, ?, ?, NULL, NULL, 'fs', 'unverified', 'v1', 1, 1, '{}', '{}', ?, ?)",
|
||||
params![
|
||||
"fedcba9876543210fedcba9876543210",
|
||||
"0123456789abcdef0123456789abcdef",
|
||||
"x.md",
|
||||
"1970-01-01T00:00:00Z",
|
||||
"1970-01-01T00:00:00Z",
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
conn.execute(
|
||||
"INSERT INTO chunks (
|
||||
chunk_id, doc_id, text, heading_path_json, section_label,
|
||||
source_spans_json, token_estimate, chunker_version,
|
||||
policy_hash, block_ids_json, created_at
|
||||
) VALUES (?, ?, 'hi', '[]', NULL, '[]', 1, 'v1', 'hash', '[]', ?)",
|
||||
params![
|
||||
chunk_id,
|
||||
"fedcba9876543210fedcba9876543210",
|
||||
"1970-01-01T00:00:00Z"
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
fn count_rows(store: &SqliteStore) -> i64 {
|
||||
let conn = store.read_conn();
|
||||
conn.query_row("SELECT COUNT(*) FROM embedding_records", [], |r| r.get(0))
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_removes_all_rows_and_returns_count() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let store = open_store(&tmp);
|
||||
let chunk = "11112222333344445555666677778888";
|
||||
seed_chunk(&store, chunk);
|
||||
|
||||
let row = EmbeddingRecordRow {
|
||||
embedding_id: "aaaa1111bbbb2222cccc3333dddd4444".to_string(),
|
||||
chunk_id: chunk.to_string(),
|
||||
model_id: "test-model".to_string(),
|
||||
model_version: "v1".to_string(),
|
||||
dimensions: 4,
|
||||
lance_table: "chunk_embeddings_test_model_4".to_string(),
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
};
|
||||
store
|
||||
.put_embedding_records_pending(std::slice::from_ref(&row))
|
||||
.unwrap();
|
||||
assert_eq!(count_rows(&store), 1);
|
||||
|
||||
let removed = store.truncate_embedding_records().unwrap();
|
||||
assert_eq!(removed, 1);
|
||||
assert_eq!(count_rows(&store), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_on_empty_table_is_noop() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let store = open_store(&tmp);
|
||||
let removed = store.truncate_embedding_records().unwrap();
|
||||
assert_eq!(removed, 0);
|
||||
assert_eq!(count_rows(&store), 0);
|
||||
}
|
||||
Reference in New Issue
Block a user