diff --git a/crates/kebab-app/src/ingest_progress.rs b/crates/kebab-app/src/ingest_progress.rs index 1ff6e45..3f41caf 100644 --- a/crates/kebab-app/src/ingest_progress.rs +++ b/crates/kebab-app/src/ingest_progress.rs @@ -22,7 +22,7 @@ use kebab_core::IngestItemKind; /// `p9-fb-04`, `Aborted`) events. Mirrors the fields persisted into /// `ingest_runs.progress_json` so external tooling can reconstruct the /// run's outcome from either side. -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] pub struct AggregateCounts { pub scanned: u32, pub new: u32, @@ -35,6 +35,8 @@ pub struct AggregateCounts { pub errors: u32, pub chunks_indexed: u32, pub embeddings_indexed: u32, + /// p9-fb-25: per-extension skip count. See [`IngestReport::skipped_by_extension`]. + pub skipped_by_extension: std::collections::BTreeMap, } /// One streaming progress event. The CLI's `--json` mode serializes this diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index ab44fdf..232da3f 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -383,6 +383,9 @@ pub fn ingest_with_config_opts( // without re-walking the DB. let mut chunks_indexed: u32 = 0; let mut embeddings_indexed: u32 = 0; + // p9-fb-25: per-extension skip count, populated in the Skipped arm below. + let skipped_by_extension: std::collections::BTreeMap = + std::collections::BTreeMap::new(); let scanned_count: u32 = u32::try_from(assets.len()).unwrap_or(u32::MAX); let embed_active = embedder.is_some() && vector_store.is_some(); @@ -621,6 +624,7 @@ pub fn ingest_with_config_opts( errors: error_count, chunks_indexed, embeddings_indexed, + skipped_by_extension: skipped_by_extension.clone(), }; let terminal_event = if was_cancelled { crate::ingest_progress::IngestEvent::Aborted { @@ -662,6 +666,7 @@ pub fn ingest_with_config_opts( unchanged: unchanged_count, errors: error_count, duration_ms, + skipped_by_extension, items: if summary_only { None } else { Some(items) }, }) } diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index ed503fe..c377e63 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -171,6 +171,7 @@ mod tests { unchanged: 0, errors: 0, duration_ms: 0, + skipped_by_extension: std::collections::BTreeMap::new(), items: None, }; let v = wire_ingest(&r); diff --git a/crates/kebab-core/src/ingest.rs b/crates/kebab-core/src/ingest.rs index 02e1e69..8ada477 100644 --- a/crates/kebab-core/src/ingest.rs +++ b/crates/kebab-core/src/ingest.rs @@ -20,6 +20,11 @@ pub struct IngestReport { pub unchanged: u32, pub errors: u32, pub duration_ms: u32, + /// p9-fb-25: per-extension skip count. Key = lowercase extension + /// without leading dot (e.g. "docx", "txt"); files without an + /// extension key under "". `BTreeMap` so the wire JSON + /// has stable key order across runs. + pub skipped_by_extension: std::collections::BTreeMap, /// `None` ↔ wire `items: null` (`--summary-only`). pub items: Option>, } diff --git a/crates/kebab-store-sqlite/snapshots/ingest_report.snapshot.json b/crates/kebab-store-sqlite/snapshots/ingest_report.snapshot.json index 6cb042d..133aad3 100644 --- a/crates/kebab-store-sqlite/snapshots/ingest_report.snapshot.json +++ b/crates/kebab-store-sqlite/snapshots/ingest_report.snapshot.json @@ -43,6 +43,7 @@ "root": "/home/u/KB" }, "skipped": 0, + "skipped_by_extension": {}, "unchanged": 0, "updated": 1 } diff --git a/crates/kebab-store-sqlite/tests/ingest_report_snapshot.rs b/crates/kebab-store-sqlite/tests/ingest_report_snapshot.rs index f7313c0..458c6c6 100644 --- a/crates/kebab-store-sqlite/tests/ingest_report_snapshot.rs +++ b/crates/kebab-store-sqlite/tests/ingest_report_snapshot.rs @@ -34,6 +34,7 @@ fn fixture_report() -> IngestReport { unchanged: 0, errors: 0, duration_ms: 187, + skipped_by_extension: std::collections::BTreeMap::new(), items: Some(vec![ IngestItem { kind: IngestItemKind::New, diff --git a/crates/kebab-tui/src/ingest_progress.rs b/crates/kebab-tui/src/ingest_progress.rs index edf50c0..d613ac2 100644 --- a/crates/kebab-tui/src/ingest_progress.rs +++ b/crates/kebab-tui/src/ingest_progress.rs @@ -288,7 +288,7 @@ mod tests { chunks_indexed: 50, ..Default::default() }; - apply_event(&mut s, IngestEvent::Completed { counts: final_counts }); + apply_event(&mut s, IngestEvent::Completed { counts: final_counts.clone() }); assert_eq!(s.counts, final_counts); assert!(s.terminal_at.is_some()); assert!(!s.aborted); diff --git a/docs/wire-schema/v1/ingest_report.schema.json b/docs/wire-schema/v1/ingest_report.schema.json index 4cfb394..aeb2e67 100644 --- a/docs/wire-schema/v1/ingest_report.schema.json +++ b/docs/wire-schema/v1/ingest_report.schema.json @@ -13,7 +13,8 @@ "skipped", "unchanged", "errors", - "duration_ms" + "duration_ms", + "skipped_by_extension" ], "properties": { "schema_version": { "const": "ingest_report.v1" }, @@ -29,6 +30,14 @@ }, "errors": { "type": "integer", "minimum": 0 }, "duration_ms": { "type": "integer", "minimum": 0 }, + "skipped_by_extension": { + "type": "object", + "additionalProperties": { + "type": "integer", + "minimum": 0 + }, + "description": "p9-fb-25: per-extension skip count. Key = lowercase extension without leading dot (e.g. 'docx'). Files without extension key under ''." + }, "items": { "type": ["array", "null"] } } }