review(p9-fb-25): 회차 1 nit 반영 — render_skipped_breakdown 단일 source + NO_EXT_SENTINEL + 카운트 + deprecation 문구

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:35:10 +00:00
parent 51feff5f16
commit e4432a2388
6 changed files with 47 additions and 37 deletions

View File

@@ -100,6 +100,20 @@ pub fn media_label(media: &kebab_core::MediaType) -> &'static str {
}
}
/// p9-fb-25: render `": A docx, B txt"` breakdown after the
/// `N skipped` count when the map is non-empty. Empty → empty
/// string (no extra punctuation). desc sort by count, ties broken
/// by key alphabetic.
pub fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
if map.is_empty() {
return String::new();
}
let mut entries: Vec<_> = map.iter().collect();
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
format!(": {}", parts.join(", "))
}
/// Best-effort send into an optional `mpsc::Sender`. A dropped receiver
/// is silently absorbed — the ingest hot path must not stall on a slow
/// consumer. Logged at `trace` for diagnostics.
@@ -194,4 +208,19 @@ mod tests {
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn render_skipped_breakdown_desc_sort_with_tiebreak() {
use std::collections::BTreeMap;
let mut m = BTreeMap::new();
assert_eq!(render_skipped_breakdown(&m), "");
m.insert("txt".to_string(), 1);
m.insert("docx".to_string(), 2);
m.insert("epub".to_string(), 1);
// 2 docx 먼저 (count desc), 그 다음 1 epub / 1 txt 는 alphabetic.
assert_eq!(
render_skipped_breakdown(&m),
": 2 docx, 1 epub, 1 txt".to_string()
);
}
}

View File

@@ -61,9 +61,16 @@ pub mod logging;
pub mod reset;
pub use app::App;
pub use ingest_progress::{AggregateCounts, IngestEvent};
pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
pub use reset::{ResetReport, ResetScope};
/// p9-fb-25: sentinel for files without an extension in
/// `IngestReport.skipped_by_extension` keys + `IngestItem.warnings`
/// `unsupported media type: ...` line. Wire schema description
/// references this literal — changing the sentinel is a wire-
/// compatibility break.
pub const NO_EXT_SENTINEL: &str = "<no-ext>";
/// Parser-version label persisted in `documents.parser_version` for
/// every Markdown file ingested through the `kb-parse-md` pipeline.
/// Kept in lock-step with the literal used in the `kb-store-sqlite`
@@ -830,7 +837,7 @@ fn try_skip_unchanged(
/// p9-fb-25: extract the lowercase extension (no leading dot) from a
/// workspace path for use in the `unsupported media type: .X` warning
/// and `IngestReport.skipped_by_extension` key. Returns `"<no-ext>"`
/// and `IngestReport.skipped_by_extension` key. Returns [`NO_EXT_SENTINEL`]
/// for paths with no extension. Always lowercase so `Foo.DOCX` and
/// `bar.docx` aggregate under the same key.
fn ext_for_skip_warning(path: &str) -> String {
@@ -838,16 +845,16 @@ fn ext_for_skip_warning(path: &str) -> String {
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_ascii_lowercase())
.unwrap_or_else(|| "<no-ext>".to_string())
.unwrap_or_else(|| NO_EXT_SENTINEL.to_string())
}
/// p9-fb-25: render the `IngestItem.warnings` line for a Skipped
/// asset. `<no-ext>` sentinel renders without a leading dot;
/// asset. [`NO_EXT_SENTINEL`] renders without a leading dot;
/// everything else gets `.ext` form.
fn unsupported_media_warning(path: &str) -> String {
let ext = ext_for_skip_warning(path);
if ext == "<no-ext>" {
"unsupported media type: <no-ext>".to_string()
if ext == NO_EXT_SENTINEL {
format!("unsupported media type: {NO_EXT_SENTINEL}")
} else {
format!("unsupported media type: .{ext}")
}