feat(kebab-core, kebab-app): p9-fb-25 task 4 — IngestReport.skipped_by_extension + wire schema additive

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:06:34 +00:00
parent d64282433c
commit 693f5582f0
8 changed files with 27 additions and 3 deletions

View File

@@ -22,7 +22,7 @@ use kebab_core::IngestItemKind;
/// `p9-fb-04`, `Aborted`) events. Mirrors the fields persisted into
/// `ingest_runs.progress_json` so external tooling can reconstruct the
/// run's outcome from either side.
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
pub struct AggregateCounts {
pub scanned: u32,
pub new: u32,
@@ -35,6 +35,8 @@ pub struct AggregateCounts {
pub errors: u32,
pub chunks_indexed: u32,
pub embeddings_indexed: u32,
/// p9-fb-25: per-extension skip count. See [`IngestReport::skipped_by_extension`].
pub skipped_by_extension: std::collections::BTreeMap<String, u32>,
}
/// One streaming progress event. The CLI's `--json` mode serializes this

View File

@@ -383,6 +383,9 @@ pub fn ingest_with_config_opts(
// without re-walking the DB.
let mut chunks_indexed: u32 = 0;
let mut embeddings_indexed: u32 = 0;
// p9-fb-25: per-extension skip count, populated in the Skipped arm below.
let skipped_by_extension: std::collections::BTreeMap<String, u32> =
std::collections::BTreeMap::new();
let scanned_count: u32 = u32::try_from(assets.len()).unwrap_or(u32::MAX);
let embed_active = embedder.is_some() && vector_store.is_some();
@@ -621,6 +624,7 @@ pub fn ingest_with_config_opts(
errors: error_count,
chunks_indexed,
embeddings_indexed,
skipped_by_extension: skipped_by_extension.clone(),
};
let terminal_event = if was_cancelled {
crate::ingest_progress::IngestEvent::Aborted {
@@ -662,6 +666,7 @@ pub fn ingest_with_config_opts(
unchanged: unchanged_count,
errors: error_count,
duration_ms,
skipped_by_extension,
items: if summary_only { None } else { Some(items) },
})
}

View File

@@ -171,6 +171,7 @@ mod tests {
unchanged: 0,
errors: 0,
duration_ms: 0,
skipped_by_extension: std::collections::BTreeMap::new(),
items: None,
};
let v = wire_ingest(&r);

View File

@@ -20,6 +20,11 @@ pub struct IngestReport {
pub unchanged: u32,
pub errors: u32,
pub duration_ms: u32,
/// p9-fb-25: per-extension skip count. Key = lowercase extension
/// without leading dot (e.g. "docx", "txt"); files without an
/// extension key under "<no-ext>". `BTreeMap` so the wire JSON
/// has stable key order across runs.
pub skipped_by_extension: std::collections::BTreeMap<String, u32>,
/// `None` ↔ wire `items: null` (`--summary-only`).
pub items: Option<Vec<IngestItem>>,
}

View File

@@ -43,6 +43,7 @@
"root": "/home/u/KB"
},
"skipped": 0,
"skipped_by_extension": {},
"unchanged": 0,
"updated": 1
}

View File

@@ -34,6 +34,7 @@ fn fixture_report() -> IngestReport {
unchanged: 0,
errors: 0,
duration_ms: 187,
skipped_by_extension: std::collections::BTreeMap::new(),
items: Some(vec![
IngestItem {
kind: IngestItemKind::New,

View File

@@ -288,7 +288,7 @@ mod tests {
chunks_indexed: 50,
..Default::default()
};
apply_event(&mut s, IngestEvent::Completed { counts: final_counts });
apply_event(&mut s, IngestEvent::Completed { counts: final_counts.clone() });
assert_eq!(s.counts, final_counts);
assert!(s.terminal_at.is_some());
assert!(!s.aborted);

View File

@@ -13,7 +13,8 @@
"skipped",
"unchanged",
"errors",
"duration_ms"
"duration_ms",
"skipped_by_extension"
],
"properties": {
"schema_version": { "const": "ingest_report.v1" },
@@ -29,6 +30,14 @@
},
"errors": { "type": "integer", "minimum": 0 },
"duration_ms": { "type": "integer", "minimum": 0 },
"skipped_by_extension": {
"type": "object",
"additionalProperties": {
"type": "integer",
"minimum": 0
},
"description": "p9-fb-25: per-extension skip count. Key = lowercase extension without leading dot (e.g. 'docx'). Files without extension key under '<no-ext>'."
},
"items": { "type": ["array", "null"] }
}
}