From 360f825f3a9ead841b57611fb9a5d6e2497e6e56 Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 20 May 2026 05:35:17 +0000 Subject: [PATCH] docs(dogfood): refresh try_skip_unchanged doc-comment to match new flow (PR #146 review) Round 1 review found the function-level doc-comment still described the old asset-side algorithm (item 2 asset-row checksum, item 3 id_for_doc miss). Updated to the document-centric flow. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/lib.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 2e4b3c2..e627b06 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -748,15 +748,18 @@ struct ImagePipeline<'a> { /// hold (per design §9 cascade rule): /// /// 1. `force_reingest == false` — caller hasn't asked to bypass skip. -/// 2. The freshly-scanned asset's blake3 checksum equals what the -/// existing `assets` row stores at the same `workspace_path`. -/// 3. The doc keyed on `(workspace_path, asset_id, current_parser_version)` -/// exists. If the parser_version changed, `id_for_doc` produces a -/// different `doc_id` so the lookup misses → no skip → re-process. -/// 4. The existing doc's stamped `last_chunker_version` AND -/// `last_embedding_version` match the values the caller is about -/// to use (`Some(v) == Some(v)` and `None == None` — see design -/// doc for the `None == None` rule when no embedder is configured). +/// 2. A document already exists at this `workspace_path` +/// (`get_document_by_workspace_path`). The lookup is document-side, not +/// asset-side, so twin files (identical content at different paths) each +/// hit their own stable doc row — `documents.workspace_path` is UNIQUE +/// while `assets` may dedupe content into a single row with a flip-flop +/// `workspace_path` column (dogfood bug #4, see `tasks/HOTFIXES.md`). +/// 3. The existing doc's `source_asset_id` equals the freshly-scanned +/// asset's blake3 checksum (content unchanged). +/// 4. The existing doc's `parser_version` matches the current extractor's +/// `parser_version` (extractor not upgraded). Combined with `chunker_version` +/// and `last_embedding_version` checks immediately below — full cascade +/// per design §9. /// /// Returns `Ok(None)` (proceed with full re-process) when any check /// fails or any DB read errors out — the skip path is opportunistic;