Wipes every row from embedding_records and returns the deleted row count. Used by the upcoming `kebab reset --vector-only` to keep SQLite consistent after the on-disk Lance store is removed. Plan deviation from the original spec (task 1): - Original test plan opened SqliteStore with a raw path; the actual signature is `SqliteStore::open(&Config)`, so the integration test builds a Config with `storage.data_dir` pointed at a tempdir. - Original return type was Result<()>; bumped to Result<u64> so the caller (kebab-app::reset) can surface the truncated count in the reset_report.v1 wire payload without a separate COUNT query. p9-fb-06 task 1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
4.0 KiB
Rust
120 lines
4.0 KiB
Rust
//! `truncate_embedding_records` wipes every row regardless of status.
|
|
//!
|
|
//! Used by `kebab reset --vector-only` to keep SQLite in sync after the
|
|
//! Lance vector store is deleted off-disk. The helper is exposed at the
|
|
//! integration-test boundary so consumers (kebab-app's reset module) can
|
|
//! verify its semantics without reaching into private store internals.
|
|
|
|
use kebab_config::Config;
|
|
use kebab_store_sqlite::{EmbeddingRecordRow, SqliteStore};
|
|
use rusqlite::params;
|
|
use tempfile::TempDir;
|
|
use time::OffsetDateTime;
|
|
|
|
fn config_for(tmp: &TempDir) -> Config {
|
|
let mut c = Config::defaults();
|
|
c.storage.data_dir = tmp.path().to_string_lossy().into_owned();
|
|
c
|
|
}
|
|
|
|
fn open_store(tmp: &TempDir) -> SqliteStore {
|
|
let cfg = config_for(tmp);
|
|
let store = SqliteStore::open(&cfg).unwrap();
|
|
store.run_migrations().unwrap();
|
|
store
|
|
}
|
|
|
|
/// Seed an asset + document + chunk so an `embedding_records` row inserted
|
|
/// against `chunk_id` does not violate the chunks FK. Mirrors the helper
|
|
/// used by the in-crate `embeddings::tests` module — copied here because
|
|
/// integration tests cannot reach the private `seed_chunk` from outside
|
|
/// the crate.
|
|
fn seed_chunk(store: &SqliteStore, chunk_id: &str) {
|
|
let conn = store.read_conn();
|
|
conn.execute(
|
|
"INSERT INTO assets (
|
|
asset_id, source_uri, workspace_path, media_type, byte_len,
|
|
checksum, storage_kind, storage_path, discovered_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, 'reference', '/tmp/x', ?)",
|
|
params![
|
|
"0123456789abcdef0123456789abcdef",
|
|
"file:///tmp/x",
|
|
"x.md",
|
|
"{}",
|
|
0_i64,
|
|
"deadbeef",
|
|
"1970-01-01T00:00:00Z",
|
|
],
|
|
)
|
|
.unwrap();
|
|
conn.execute(
|
|
"INSERT INTO documents (
|
|
doc_id, asset_id, workspace_path, title, lang, source_type,
|
|
trust_level, parser_version, doc_version, schema_version,
|
|
metadata_json, provenance_json, created_at, updated_at
|
|
) VALUES (?, ?, ?, NULL, NULL, 'fs', 'unverified', 'v1', 1, 1, '{}', '{}', ?, ?)",
|
|
params![
|
|
"fedcba9876543210fedcba9876543210",
|
|
"0123456789abcdef0123456789abcdef",
|
|
"x.md",
|
|
"1970-01-01T00:00:00Z",
|
|
"1970-01-01T00:00:00Z",
|
|
],
|
|
)
|
|
.unwrap();
|
|
conn.execute(
|
|
"INSERT INTO chunks (
|
|
chunk_id, doc_id, text, heading_path_json, section_label,
|
|
source_spans_json, token_estimate, chunker_version,
|
|
policy_hash, block_ids_json, created_at
|
|
) VALUES (?, ?, 'hi', '[]', NULL, '[]', 1, 'v1', 'hash', '[]', ?)",
|
|
params![
|
|
chunk_id,
|
|
"fedcba9876543210fedcba9876543210",
|
|
"1970-01-01T00:00:00Z"
|
|
],
|
|
)
|
|
.unwrap();
|
|
}
|
|
|
|
fn count_rows(store: &SqliteStore) -> i64 {
|
|
let conn = store.read_conn();
|
|
conn.query_row("SELECT COUNT(*) FROM embedding_records", [], |r| r.get(0))
|
|
.unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_removes_all_rows_and_returns_count() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let store = open_store(&tmp);
|
|
let chunk = "11112222333344445555666677778888";
|
|
seed_chunk(&store, chunk);
|
|
|
|
let row = EmbeddingRecordRow {
|
|
embedding_id: "aaaa1111bbbb2222cccc3333dddd4444".to_string(),
|
|
chunk_id: chunk.to_string(),
|
|
model_id: "test-model".to_string(),
|
|
model_version: "v1".to_string(),
|
|
dimensions: 4,
|
|
lance_table: "chunk_embeddings_test_model_4".to_string(),
|
|
created_at: OffsetDateTime::now_utc(),
|
|
};
|
|
store
|
|
.put_embedding_records_pending(std::slice::from_ref(&row))
|
|
.unwrap();
|
|
assert_eq!(count_rows(&store), 1);
|
|
|
|
let removed = store.truncate_embedding_records().unwrap();
|
|
assert_eq!(removed, 1);
|
|
assert_eq!(count_rows(&store), 0);
|
|
}
|
|
|
|
#[test]
|
|
fn truncate_on_empty_table_is_noop() {
|
|
let tmp = TempDir::new().unwrap();
|
|
let store = open_store(&tmp);
|
|
let removed = store.truncate_embedding_records().unwrap();
|
|
assert_eq!(removed, 0);
|
|
assert_eq!(count_rows(&store), 0);
|
|
}
|