diff --git a/crates/kebab-store-sqlite/src/store.rs b/crates/kebab-store-sqlite/src/store.rs index 1837557..ebebf42 100644 --- a/crates/kebab-store-sqlite/src/store.rs +++ b/crates/kebab-store-sqlite/src/store.rs @@ -570,6 +570,20 @@ impl SqliteStore { keep_doc_id: &str, ) -> Result<()> { let conn = self.lock_conn(); + // CASCADE 제거(V011) 대체: documents→chunks CASCADE 가 chunks 를 지우기 전에 + // 원본 + sentinel({id}#alias) embedding_records 를 명시 정리. 별칭 dense + // 벡터는 chunks FK 가 없어 자동 정리되지 않으므로 chunks 가 살아있는 동안 + // 직접 지운다(안 하면 tombstone trigger 가 남긴 행이 누적). 설계 spec + // 2026-05-30-dense-alias-vectors-design.md §3.5-2. (Task 4.5 리뷰 MAJOR.) + conn.execute( + "DELETE FROM embedding_records WHERE chunk_id IN \ + (SELECT chunk_id FROM chunks WHERE doc_id IN \ + (SELECT doc_id FROM documents WHERE workspace_path = ?1 AND doc_id != ?2) \ + UNION SELECT chunk_id || '#alias' FROM chunks WHERE doc_id IN \ + (SELECT doc_id FROM documents WHERE workspace_path = ?1 AND doc_id != ?2))", + params![workspace_path, keep_doc_id], + ) + .map_err(StoreError::from)?; conn.execute( "DELETE FROM documents WHERE workspace_path = ?1 AND doc_id != ?2", params![workspace_path, keep_doc_id], diff --git a/crates/kebab-store-sqlite/tests/embedding_records_fk.rs b/crates/kebab-store-sqlite/tests/embedding_records_fk.rs index d247a60..a739551 100644 --- a/crates/kebab-store-sqlite/tests/embedding_records_fk.rs +++ b/crates/kebab-store-sqlite/tests/embedding_records_fk.rs @@ -162,3 +162,48 @@ fn put_chunks_cleans_original_and_sentinel_embeddings() { "sentinel embedding_records must be cleaned on re-ingest (no chunks FK → explicit DELETE)" ); } + +/// Task 4.5 리뷰 MAJOR: `purge_document_at_workspace_path_except_doc_id` +/// (parser-bump 재인제스트 경로)도 원본 + sentinel embedding_records 를 +/// 명시 DELETE 로 정리해 orphan 0 이어야 한다. (이 경로 누락 시 tombstone 누적.) +#[test] +fn purge_except_doc_id_cleans_original_and_sentinel_embeddings() { + let tmp = TempDir::new().unwrap(); + let store = open_store(&tmp); + let c1 = "11111111111111111111111111111111"; + seed_chunk(&store, c1); // doc DOC_ID @ workspace 'x.md' + let sentinel = format!("{c1}{}", kebab_core::ALIAS_SUFFIX); + + store + .put_embedding_records_pending(&[ + embed_row("e_orig_000000000000000000000000000", c1), + embed_row("e_sentinel_0000000000000000000000", &sentinel), + ]) + .unwrap(); + store + .mark_embedding_records_committed(&[ + "e_orig_000000000000000000000000000".to_string(), + "e_sentinel_0000000000000000000000".to_string(), + ]) + .unwrap(); + assert_eq!(embed_count(&store, c1), 1); + assert_eq!(embed_count(&store, &sentinel), 1); + + // workspace 'x.md' 에서 DOC_ID(=현재 문서) 외 문서만 보존 → DOC_ID 가 + // 삭제 대상(parser-bump: 같은 path 의 옛 doc_id 정리). keep_doc_id 를 + // DOC_ID 와 다른 값으로 주면 DOC_ID 문서 + 그 chunk embedding 이 정리돼야. + store + .purge_document_at_workspace_path_except_doc_id("x.md", "0000000000000000000000000000ffff") + .unwrap(); + + assert_eq!( + embed_count(&store, c1), + 0, + "purge_except_doc_id: 원본 embedding_records 정리 (CASCADE 대체)" + ); + assert_eq!( + embed_count(&store, &sentinel), + 0, + "purge_except_doc_id: sentinel embedding_records 정리 (chunks FK 없음 → 명시 DELETE)" + ); +} diff --git a/migrations/V011__drop_embedding_records_fk.sql b/migrations/V011__drop_embedding_records_fk.sql index 9f156f2..823f222 100644 --- a/migrations/V011__drop_embedding_records_fk.sql +++ b/migrations/V011__drop_embedding_records_fk.sql @@ -3,6 +3,10 @@ -- (설계 spec 2026-05-30-dense-alias-vectors-design.md §3.5-1). SQLite 는 ALTER -- 로 FK 제거 불가 → 테이블 재생성. status/vector_committed(V003) + 인덱스 보존. -- CASCADE 제거분은 put_chunks/purge 의 명시 DELETE 로 대체(§3.5-2). +-- NOTE: PRAGMA foreign_keys 는 refinery 가 마이그레이션을 트랜잭션으로 감싸므로 +-- 트랜잭션 내에서 no-op(SQLite: "FK enforcement may only be changed when no +-- transaction is pending"). 실제 안전장치는 아래 legacy_alter_table — trigger +-- 재파싱 회피가 본 마이그레이션의 핵심 보호다. (Task 4.5 리뷰 NIT.) PRAGMA foreign_keys=OFF; -- legacy_alter_table=ON: DROP embedding_records 직후 V003 의 -- chunks_bd_tombstone_embeddings trigger 가 (아직 존재하는 chunks 위에서) @@ -21,7 +25,8 @@ CREATE TABLE embedding_records_new ( dimensions INTEGER NOT NULL, lance_table TEXT NOT NULL, created_at TEXT NOT NULL, - status TEXT NOT NULL DEFAULT 'pending', + status TEXT NOT NULL DEFAULT 'pending' + CHECK (status IN ('pending','committed','tombstone')), -- V003 와 동일 무결성 가드 보존 vector_committed INTEGER NOT NULL DEFAULT 0, UNIQUE(chunk_id, model_id, model_version, dimensions) );