feat(kebab-core, kebab-app): p9-fb-25 task 4 — IngestReport.skipped_by_extension + wire schema additive
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -22,7 +22,7 @@ use kebab_core::IngestItemKind;
|
||||
/// `p9-fb-04`, `Aborted`) events. Mirrors the fields persisted into
|
||||
/// `ingest_runs.progress_json` so external tooling can reconstruct the
|
||||
/// run's outcome from either side.
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct AggregateCounts {
|
||||
pub scanned: u32,
|
||||
pub new: u32,
|
||||
@@ -35,6 +35,8 @@ pub struct AggregateCounts {
|
||||
pub errors: u32,
|
||||
pub chunks_indexed: u32,
|
||||
pub embeddings_indexed: u32,
|
||||
/// p9-fb-25: per-extension skip count. See [`IngestReport::skipped_by_extension`].
|
||||
pub skipped_by_extension: std::collections::BTreeMap<String, u32>,
|
||||
}
|
||||
|
||||
/// One streaming progress event. The CLI's `--json` mode serializes this
|
||||
|
||||
@@ -383,6 +383,9 @@ pub fn ingest_with_config_opts(
|
||||
// without re-walking the DB.
|
||||
let mut chunks_indexed: u32 = 0;
|
||||
let mut embeddings_indexed: u32 = 0;
|
||||
// p9-fb-25: per-extension skip count, populated in the Skipped arm below.
|
||||
let skipped_by_extension: std::collections::BTreeMap<String, u32> =
|
||||
std::collections::BTreeMap::new();
|
||||
let scanned_count: u32 = u32::try_from(assets.len()).unwrap_or(u32::MAX);
|
||||
|
||||
let embed_active = embedder.is_some() && vector_store.is_some();
|
||||
@@ -621,6 +624,7 @@ pub fn ingest_with_config_opts(
|
||||
errors: error_count,
|
||||
chunks_indexed,
|
||||
embeddings_indexed,
|
||||
skipped_by_extension: skipped_by_extension.clone(),
|
||||
};
|
||||
let terminal_event = if was_cancelled {
|
||||
crate::ingest_progress::IngestEvent::Aborted {
|
||||
@@ -662,6 +666,7 @@ pub fn ingest_with_config_opts(
|
||||
unchanged: unchanged_count,
|
||||
errors: error_count,
|
||||
duration_ms,
|
||||
skipped_by_extension,
|
||||
items: if summary_only { None } else { Some(items) },
|
||||
})
|
||||
}
|
||||
|
||||
@@ -171,6 +171,7 @@ mod tests {
|
||||
unchanged: 0,
|
||||
errors: 0,
|
||||
duration_ms: 0,
|
||||
skipped_by_extension: std::collections::BTreeMap::new(),
|
||||
items: None,
|
||||
};
|
||||
let v = wire_ingest(&r);
|
||||
|
||||
@@ -20,6 +20,11 @@ pub struct IngestReport {
|
||||
pub unchanged: u32,
|
||||
pub errors: u32,
|
||||
pub duration_ms: u32,
|
||||
/// p9-fb-25: per-extension skip count. Key = lowercase extension
|
||||
/// without leading dot (e.g. "docx", "txt"); files without an
|
||||
/// extension key under "<no-ext>". `BTreeMap` so the wire JSON
|
||||
/// has stable key order across runs.
|
||||
pub skipped_by_extension: std::collections::BTreeMap<String, u32>,
|
||||
/// `None` ↔ wire `items: null` (`--summary-only`).
|
||||
pub items: Option<Vec<IngestItem>>,
|
||||
}
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
"root": "/home/u/KB"
|
||||
},
|
||||
"skipped": 0,
|
||||
"skipped_by_extension": {},
|
||||
"unchanged": 0,
|
||||
"updated": 1
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ fn fixture_report() -> IngestReport {
|
||||
unchanged: 0,
|
||||
errors: 0,
|
||||
duration_ms: 187,
|
||||
skipped_by_extension: std::collections::BTreeMap::new(),
|
||||
items: Some(vec![
|
||||
IngestItem {
|
||||
kind: IngestItemKind::New,
|
||||
|
||||
@@ -288,7 +288,7 @@ mod tests {
|
||||
chunks_indexed: 50,
|
||||
..Default::default()
|
||||
};
|
||||
apply_event(&mut s, IngestEvent::Completed { counts: final_counts });
|
||||
apply_event(&mut s, IngestEvent::Completed { counts: final_counts.clone() });
|
||||
assert_eq!(s.counts, final_counts);
|
||||
assert!(s.terminal_at.is_some());
|
||||
assert!(!s.aborted);
|
||||
|
||||
@@ -13,7 +13,8 @@
|
||||
"skipped",
|
||||
"unchanged",
|
||||
"errors",
|
||||
"duration_ms"
|
||||
"duration_ms",
|
||||
"skipped_by_extension"
|
||||
],
|
||||
"properties": {
|
||||
"schema_version": { "const": "ingest_report.v1" },
|
||||
@@ -29,6 +30,14 @@
|
||||
},
|
||||
"errors": { "type": "integer", "minimum": 0 },
|
||||
"duration_ms": { "type": "integer", "minimum": 0 },
|
||||
"skipped_by_extension": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "integer",
|
||||
"minimum": 0
|
||||
},
|
||||
"description": "p9-fb-25: per-extension skip count. Key = lowercase extension without leading dot (e.g. 'docx'). Files without extension key under '<no-ext>'."
|
||||
},
|
||||
"items": { "type": ["array", "null"] }
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user