review(회차1): 회차 1 critical + nit 반영

- (critical) embeddings.rs: truncate_embedding_records 위치 이동.
  mark_embedding_records_committed 함수 위에 끼워 넣었더니 위쪽
  mark_committed 의 14 줄짜리 doc comment (`WHERE status='pending'`
  의 design rationale 등) 가 truncate 의 doc 으로 흡수되고
  mark_committed 자체는 doc 없이 남는 버그. impl block 끝 (mark_committed
  의 닫는 } 다음) 으로 옮겨 plan 의 원래 의도와도 일치.
- (nit) tests/reset_cli.rs: removed_paths 의 idempotency 검증 보강.
  data dir 은 reported, cache dir 은 omit (생성 안 했으니)
  되어야 함을 strict 하게 assert. state dir 은 logging init 의
  side-effect 로 자동 생성되어 둘 다 가능하므로 허용.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 18:43:25 +00:00
parent 9d96d603b0
commit 565caebec6
2 changed files with 38 additions and 17 deletions

View File

@@ -109,7 +109,28 @@ fn reset_data_only_yes_json_emits_reset_report_v1() {
Some("reset_report.v1")
);
assert_eq!(v.get("scope").and_then(|s| s.as_str()), Some("data_only"));
assert!(v.get("removed_paths").and_then(|a| a.as_array()).is_some());
// The data dir was created beforehand and must show up in the
// report. The cache dir was NOT created, so it must be omitted —
// proving idempotency ("missing path is treated as already
// removed"). The state dir may or may not appear: kebab-app's
// logging init creates the state dir as a side-effect, which is
// tolerated. We assert the strict invariant (data in, cache out)
// and let the state dir be either way.
let paths: Vec<String> = v
.get("removed_paths")
.and_then(|a| a.as_array())
.expect("removed_paths must be a JSON array")
.iter()
.filter_map(|s| s.as_str().map(str::to_owned))
.collect();
assert!(
paths.iter().any(|p| p.contains("/data/kebab")),
"data dir must be reported as removed, got: {paths:?}"
);
assert!(
!paths.iter().any(|p| p.contains("/cache/kebab")),
"cache dir was never created and must be omitted, got: {paths:?}"
);
}
#[test]

View File

@@ -107,22 +107,6 @@ impl SqliteStore {
/// WHERE embedding_id IN (?, ?, …)`) inside one transaction —
/// avoids the per-row `execute()` round-trip the previous
/// implementation paid.
/// Wipe every row from `embedding_records`, returning the count of
/// rows that were removed. Called by `kebab reset --vector-only` so
/// SQLite cannot point at a Lance row that the reset just removed
/// off-disk.
///
/// The function does NOT cascade to `chunks` or `documents` — those
/// are kept so the next `kebab ingest` re-embeds the existing chunk
/// set without re-parsing.
pub fn truncate_embedding_records(&self) -> Result<u64> {
let conn = self.lock_conn();
let n = conn
.execute("DELETE FROM embedding_records", [])
.context("DELETE FROM embedding_records")?;
Ok(n as u64)
}
pub fn mark_embedding_records_committed(
&self,
embedding_ids: &[String],
@@ -148,6 +132,22 @@ impl SqliteStore {
tx.commit().map_err(StoreError::from)?;
Ok(())
}
/// Wipe every row from `embedding_records`, returning the count of
/// rows that were removed. Called by `kebab reset --vector-only` so
/// SQLite cannot point at a Lance row that the reset just removed
/// off-disk.
///
/// The function does NOT cascade to `chunks` or `documents` — those
/// are kept so the next `kebab ingest` re-embeds the existing chunk
/// set without re-parsing.
pub fn truncate_embedding_records(&self) -> Result<u64> {
let conn = self.lock_conn();
let n = conn
.execute("DELETE FROM embedding_records", [])
.context("DELETE FROM embedding_records")?;
Ok(n as u64)
}
}
#[cfg(test)]