review(p9-fb-25): 회차 1 nit 반영 — render_skipped_breakdown 단일 source + NO_EXT_SENTINEL + 카운트 + deprecation 문구

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-05 12:35:10 +00:00
parent 51feff5f16
commit e4432a2388
6 changed files with 47 additions and 37 deletions

View File

@@ -100,6 +100,20 @@ pub fn media_label(media: &kebab_core::MediaType) -> &'static str {
}
}
/// p9-fb-25: render `": A docx, B txt"` breakdown after the
/// `N skipped` count when the map is non-empty. Empty → empty
/// string (no extra punctuation). desc sort by count, ties broken
/// by key alphabetic.
pub fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
if map.is_empty() {
return String::new();
}
let mut entries: Vec<_> = map.iter().collect();
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
format!(": {}", parts.join(", "))
}
/// Best-effort send into an optional `mpsc::Sender`. A dropped receiver
/// is silently absorbed — the ingest hot path must not stall on a slow
/// consumer. Logged at `trace` for diagnostics.
@@ -194,4 +208,19 @@ mod tests {
other => panic!("unexpected event: {other:?}"),
}
}
#[test]
fn render_skipped_breakdown_desc_sort_with_tiebreak() {
use std::collections::BTreeMap;
let mut m = BTreeMap::new();
assert_eq!(render_skipped_breakdown(&m), "");
m.insert("txt".to_string(), 1);
m.insert("docx".to_string(), 2);
m.insert("epub".to_string(), 1);
// 2 docx 먼저 (count desc), 그 다음 1 epub / 1 txt 는 alphabetic.
assert_eq!(
render_skipped_breakdown(&m),
": 2 docx, 1 epub, 1 txt".to_string()
);
}
}

View File

@@ -61,9 +61,16 @@ pub mod logging;
pub mod reset;
pub use app::App;
pub use ingest_progress::{AggregateCounts, IngestEvent};
pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
pub use reset::{ResetReport, ResetScope};
/// p9-fb-25: sentinel for files without an extension in
/// `IngestReport.skipped_by_extension` keys + `IngestItem.warnings`
/// `unsupported media type: ...` line. Wire schema description
/// references this literal — changing the sentinel is a wire-
/// compatibility break.
pub const NO_EXT_SENTINEL: &str = "<no-ext>";
/// Parser-version label persisted in `documents.parser_version` for
/// every Markdown file ingested through the `kb-parse-md` pipeline.
/// Kept in lock-step with the literal used in the `kb-store-sqlite`
@@ -830,7 +837,7 @@ fn try_skip_unchanged(
/// p9-fb-25: extract the lowercase extension (no leading dot) from a
/// workspace path for use in the `unsupported media type: .X` warning
/// and `IngestReport.skipped_by_extension` key. Returns `"<no-ext>"`
/// and `IngestReport.skipped_by_extension` key. Returns [`NO_EXT_SENTINEL`]
/// for paths with no extension. Always lowercase so `Foo.DOCX` and
/// `bar.docx` aggregate under the same key.
fn ext_for_skip_warning(path: &str) -> String {
@@ -838,16 +845,16 @@ fn ext_for_skip_warning(path: &str) -> String {
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_ascii_lowercase())
.unwrap_or_else(|| "<no-ext>".to_string())
.unwrap_or_else(|| NO_EXT_SENTINEL.to_string())
}
/// p9-fb-25: render the `IngestItem.warnings` line for a Skipped
/// asset. `<no-ext>` sentinel renders without a leading dot;
/// asset. [`NO_EXT_SENTINEL`] renders without a leading dot;
/// everything else gets `.ext` form.
fn unsupported_media_warning(path: &str) -> String {
let ext = ext_for_skip_warning(path);
if ext == "<no-ext>" {
"unsupported media type: <no-ext>".to_string()
if ext == NO_EXT_SENTINEL {
format!("unsupported media type: {NO_EXT_SENTINEL}")
} else {
format!("unsupported media type: .{ext}")
}

View File

@@ -4,20 +4,6 @@
use std::path::PathBuf;
use std::process::ExitCode;
/// p9-fb-25: render `": A docx, B txt"` breakdown after the
/// `N skipped` count when the map is non-empty. Empty → empty
/// string (no extra punctuation). desc sort by count, ties broken
/// by key alphabetic.
fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
if map.is_empty() {
return String::new();
}
let mut entries: Vec<_> = map.iter().collect();
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
format!(": {}", parts.join(", "))
}
use clap::{Parser, Subcommand};
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
@@ -385,7 +371,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
} else {
let skipped_breakdown = render_skipped_breakdown(&report.skipped_by_extension);
let skipped_breakdown = kebab_app::render_skipped_breakdown(&report.skipped_by_extension);
println!(
"scanned {} new {} updated {} skipped {}{} errors {} ({} ms)",
report.scanned,

View File

@@ -411,7 +411,7 @@ impl Config {
tracing::warn!(
target: "kebab-config",
config = %path.display(),
"deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. 다음 버전부터 config 갱신 권장."
"deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25, v0.2.1+). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. config 에서 이 필드를 제거해도 안전 — 더 이상 enforce 안 됨."
);
});
}

View File

@@ -25,18 +25,6 @@ use kebab_core::SourceScope;
use crate::app::{App, IngestState, TERMINAL_LINE_HOLD_SECS};
/// p9-fb-25: render `": A docx, B txt"` breakdown after the `N skipped`
/// count when the map is non-empty. desc sort by count, ties by key.
fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
if map.is_empty() {
return String::new();
}
let mut entries: Vec<_> = map.iter().collect();
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
format!(": {}", parts.join(", "))
}
/// Already-running guard. Returns `Err` if `app.ingest_state` is
/// already populated — pressing `r` twice in a row should not spawn
/// two parallel workers (SQLite is mutexed but Lance writes can race
@@ -187,7 +175,7 @@ pub fn status_line(state: &IngestState) -> String {
let elapsed = state.started_at.elapsed();
let secs = elapsed.as_secs();
if state.aborted {
let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension);
let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension);
return format!(
"✗ ingest aborted at {}/{} after {}s (new={} updated={} unchanged={} skipped={}{} errors={})",
state.counts.scanned.saturating_sub(state.counts.errors),
@@ -201,7 +189,7 @@ pub fn status_line(state: &IngestState) -> String {
state.counts.errors,
);
}
let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension);
let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension);
return format!(
"✓ ingest: {} docs ({} new, {} updated, {} unchanged, {} skipped{}), {} chunks indexed in {}s",
state.counts.scanned,

View File

@@ -29,7 +29,7 @@ git history.
**Spec contract impact**: design §6.2 의 `workspace.include` 항목 invalidate (frozen 그대로 두고 본 항목 + spec `tasks/p9/p9-fb-25-config-include-removal.md` 가 source of truth). design §3.x `IngestReport` + §2.4a `IngestEvent` 에 새 필드 / 새 warning 의미 추가 (additive).
**Tests added**: 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort). 기존 워크스페이스 테스트 무수정 통과.
**Tests added**: 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort) + 1 unit (kebab-app 의 render_skipped_breakdown). 기존 fixture 6 개 mechanical adapter 수정 (`tests/common/mod.rs` SourceScope, `tests/image_pipeline.rs` × 2 + `tests/pdf_pipeline.rs` 의 dead `include.push` 제거, `tests/ingest_report_snapshot.rs` + `kebab-cli/src/wire.rs` literal 에 `BTreeMap::new()` 추가, snapshot JSON 의 `skipped_by_extension` 필드). assertion 의미 변경 없음.
**Known limitation (deferred)**: