From 9545367904b821c2b3c8568f3ab4c286345871cf Mon Sep 17 00:00:00 2001 From: altair823 Date: Tue, 5 May 2026 12:13:13 +0000 Subject: [PATCH] =?UTF-8?q?feat(kebab-app):=20p9-fb-25=20task=205=20?= =?UTF-8?q?=E2=80=94=20Skipped=20warnings=20+=20skipped=5Fby=5Fextension?= =?UTF-8?q?=20aggregation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/lib.rs | 41 ++++++++++++++++++++----- crates/kebab-app/tests/skip_reason.rs | 43 +++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 8 deletions(-) create mode 100644 crates/kebab-app/tests/skip_reason.rs diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 232da3f..ca812f9 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -384,7 +384,7 @@ pub fn ingest_with_config_opts( let mut chunks_indexed: u32 = 0; let mut embeddings_indexed: u32 = 0; // p9-fb-25: per-extension skip count, populated in the Skipped arm below. - let skipped_by_extension: std::collections::BTreeMap = + let mut skipped_by_extension: std::collections::BTreeMap = std::collections::BTreeMap::new(); let scanned_count: u32 = u32::try_from(assets.len()).unwrap_or(u32::MAX); @@ -475,7 +475,9 @@ pub fn ingest_with_config_opts( } } kebab_core::IngestItemKind::Skipped => { - skipped_count = skipped_count.saturating_add(1) + skipped_count = skipped_count.saturating_add(1); + let ext = ext_for_skip_warning(&item.doc_path.0); + *skipped_by_extension.entry(ext).or_insert(0) += 1; } kebab_core::IngestItemKind::Unchanged => { unchanged_count = unchanged_count.saturating_add(1) @@ -826,6 +828,31 @@ fn try_skip_unchanged( })) } +/// p9-fb-25: extract the lowercase extension (no leading dot) from a +/// workspace path for use in the `unsupported media type: .X` warning +/// and `IngestReport.skipped_by_extension` key. Returns `""` +/// for paths with no extension. Always lowercase so `Foo.DOCX` and +/// `bar.docx` aggregate under the same key. +fn ext_for_skip_warning(path: &str) -> String { + std::path::Path::new(path) + .extension() + .and_then(|s| s.to_str()) + .map(|s| s.to_ascii_lowercase()) + .unwrap_or_else(|| "".to_string()) +} + +/// p9-fb-25: render the `IngestItem.warnings` line for a Skipped +/// asset. `` sentinel renders without a leading dot; +/// everything else gets `.ext` form. +fn unsupported_media_warning(path: &str) -> String { + let ext = ext_for_skip_warning(path); + if ext == "" { + "unsupported media type: ".to_string() + } else { + format!("unsupported media type: .{ext}") + } +} + /// Process a single asset: read bytes, parse, normalize, chunk, /// persist, embed. Per-asset failures bubble up to the caller for /// labelling as `IngestItemKind::Error` — they do NOT abort the @@ -889,7 +916,7 @@ fn ingest_one_asset( chunk_count: None, parser_version: None, chunker_version: None, - warnings: Vec::new(), + warnings: vec![unsupported_media_warning(&asset.workspace_path.0)], error: None, }); } @@ -908,9 +935,7 @@ fn ingest_one_asset( chunk_count: None, parser_version: None, chunker_version: None, - warnings: vec![ - "kb:// source URIs are not supported by the fs ingester".into(), - ], + warnings: vec!["kb:// URI not yet supported".to_string()], error: None, }); } @@ -1103,7 +1128,7 @@ fn ingest_one_image_asset( parser_version: None, chunker_version: None, warnings: vec![ - "kb:// source URIs are not supported by the fs ingester".into(), + "kb:// URI not yet supported".to_string(), ], error: None, }); @@ -1438,7 +1463,7 @@ fn ingest_one_pdf_asset( parser_version: None, chunker_version: None, warnings: vec![ - "kb:// source URIs are not supported by the fs ingester".into(), + "kb:// URI not yet supported".to_string(), ], error: None, }); diff --git a/crates/kebab-app/tests/skip_reason.rs b/crates/kebab-app/tests/skip_reason.rs new file mode 100644 index 0000000..0bd9340 --- /dev/null +++ b/crates/kebab-app/tests/skip_reason.rs @@ -0,0 +1,43 @@ +//! p9-fb-25 task 5: skipped per-asset items must carry a human-readable +//! reason in `warnings`, and the report's `skipped_by_extension` must +//! aggregate by lowercase extension. + +mod common; + +use common::TestEnv; + +#[test] +fn unsupported_extension_skip_carries_warning_and_is_aggregated() { + let env = TestEnv::lexical_only(); + let workspace_root = std::path::PathBuf::from(&env.config.workspace.root); + std::fs::write(workspace_root.join("legacy.docx"), b"unsupported").unwrap(); + std::fs::write(workspace_root.join("Makefile"), b"unsupported").unwrap(); + + let report = kebab_app::ingest_with_config( + env.config.clone(), + env.scope(), + false, + ).unwrap(); + + let items = report.items.as_ref().expect("items array populated"); + let docx_item = items + .iter() + .find(|i| i.doc_path.0.ends_with("legacy.docx")) + .expect("docx in items"); + assert_eq!(docx_item.kind, kebab_core::IngestItemKind::Skipped); + assert_eq!( + docx_item.warnings, + vec!["unsupported media type: .docx".to_string()], + ); + let makefile_item = items + .iter() + .find(|i| i.doc_path.0.ends_with("Makefile")) + .expect("Makefile in items"); + assert_eq!(makefile_item.kind, kebab_core::IngestItemKind::Skipped); + assert_eq!( + makefile_item.warnings, + vec!["unsupported media type: ".to_string()], + ); + assert_eq!(report.skipped_by_extension.get("docx").copied(), Some(1)); + assert_eq!(report.skipped_by_extension.get("").copied(), Some(1)); +}