From e4432a238872eb421d8dc390d52217328a1fae11 Mon Sep 17 00:00:00 2001 From: altair823 Date: Tue, 5 May 2026 12:35:10 +0000 Subject: [PATCH] =?UTF-8?q?review(p9-fb-25):=20=ED=9A=8C=EC=B0=A8=201=20ni?= =?UTF-8?q?t=20=EB=B0=98=EC=98=81=20=E2=80=94=20render=5Fskipped=5Fbreakdo?= =?UTF-8?q?wn=20=EB=8B=A8=EC=9D=BC=20source=20+=20NO=5FEXT=5FSENTINEL=20+?= =?UTF-8?q?=20=EC=B9=B4=EC=9A=B4=ED=8A=B8=20+=20deprecation=20=EB=AC=B8?= =?UTF-8?q?=EA=B5=AC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/ingest_progress.rs | 29 +++++++++++++++++++++++++ crates/kebab-app/src/lib.rs | 19 +++++++++++----- crates/kebab-cli/src/main.rs | 16 +------------- crates/kebab-config/src/lib.rs | 2 +- crates/kebab-tui/src/ingest_progress.rs | 16 ++------------ tasks/HOTFIXES.md | 2 +- 6 files changed, 47 insertions(+), 37 deletions(-) diff --git a/crates/kebab-app/src/ingest_progress.rs b/crates/kebab-app/src/ingest_progress.rs index 3f41caf..ef1df15 100644 --- a/crates/kebab-app/src/ingest_progress.rs +++ b/crates/kebab-app/src/ingest_progress.rs @@ -100,6 +100,20 @@ pub fn media_label(media: &kebab_core::MediaType) -> &'static str { } } +/// p9-fb-25: render `": A docx, B txt"` breakdown after the +/// `N skipped` count when the map is non-empty. Empty → empty +/// string (no extra punctuation). desc sort by count, ties broken +/// by key alphabetic. +pub fn render_skipped_breakdown(map: &std::collections::BTreeMap) -> String { + if map.is_empty() { + return String::new(); + } + let mut entries: Vec<_> = map.iter().collect(); + entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); + let parts: Vec = entries.iter().map(|(k, v)| format!("{v} {k}")).collect(); + format!(": {}", parts.join(", ")) +} + /// Best-effort send into an optional `mpsc::Sender`. A dropped receiver /// is silently absorbed — the ingest hot path must not stall on a slow /// consumer. Logged at `trace` for diagnostics. @@ -194,4 +208,19 @@ mod tests { other => panic!("unexpected event: {other:?}"), } } + + #[test] + fn render_skipped_breakdown_desc_sort_with_tiebreak() { + use std::collections::BTreeMap; + let mut m = BTreeMap::new(); + assert_eq!(render_skipped_breakdown(&m), ""); + m.insert("txt".to_string(), 1); + m.insert("docx".to_string(), 2); + m.insert("epub".to_string(), 1); + // 2 docx 먼저 (count desc), 그 다음 1 epub / 1 txt 는 alphabetic. + assert_eq!( + render_skipped_breakdown(&m), + ": 2 docx, 1 epub, 1 txt".to_string() + ); + } } diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index ca812f9..4a19a05 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -61,9 +61,16 @@ pub mod logging; pub mod reset; pub use app::App; -pub use ingest_progress::{AggregateCounts, IngestEvent}; +pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown}; pub use reset::{ResetReport, ResetScope}; +/// p9-fb-25: sentinel for files without an extension in +/// `IngestReport.skipped_by_extension` keys + `IngestItem.warnings` +/// `unsupported media type: ...` line. Wire schema description +/// references this literal — changing the sentinel is a wire- +/// compatibility break. +pub const NO_EXT_SENTINEL: &str = ""; + /// Parser-version label persisted in `documents.parser_version` for /// every Markdown file ingested through the `kb-parse-md` pipeline. /// Kept in lock-step with the literal used in the `kb-store-sqlite` @@ -830,7 +837,7 @@ fn try_skip_unchanged( /// p9-fb-25: extract the lowercase extension (no leading dot) from a /// workspace path for use in the `unsupported media type: .X` warning -/// and `IngestReport.skipped_by_extension` key. Returns `""` +/// and `IngestReport.skipped_by_extension` key. Returns [`NO_EXT_SENTINEL`] /// for paths with no extension. Always lowercase so `Foo.DOCX` and /// `bar.docx` aggregate under the same key. fn ext_for_skip_warning(path: &str) -> String { @@ -838,16 +845,16 @@ fn ext_for_skip_warning(path: &str) -> String { .extension() .and_then(|s| s.to_str()) .map(|s| s.to_ascii_lowercase()) - .unwrap_or_else(|| "".to_string()) + .unwrap_or_else(|| NO_EXT_SENTINEL.to_string()) } /// p9-fb-25: render the `IngestItem.warnings` line for a Skipped -/// asset. `` sentinel renders without a leading dot; +/// asset. [`NO_EXT_SENTINEL`] renders without a leading dot; /// everything else gets `.ext` form. fn unsupported_media_warning(path: &str) -> String { let ext = ext_for_skip_warning(path); - if ext == "" { - "unsupported media type: ".to_string() + if ext == NO_EXT_SENTINEL { + format!("unsupported media type: {NO_EXT_SENTINEL}") } else { format!("unsupported media type: .{ext}") } diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 30a2c4a..b46b2e3 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -4,20 +4,6 @@ use std::path::PathBuf; use std::process::ExitCode; -/// p9-fb-25: render `": A docx, B txt"` breakdown after the -/// `N skipped` count when the map is non-empty. Empty → empty -/// string (no extra punctuation). desc sort by count, ties broken -/// by key alphabetic. -fn render_skipped_breakdown(map: &std::collections::BTreeMap) -> String { - if map.is_empty() { - return String::new(); - } - let mut entries: Vec<_> = map.iter().collect(); - entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); - let parts: Vec = entries.iter().map(|(k, v)| format!("{v} {k}")).collect(); - format!(": {}", parts.join(", ")) -} - use clap::{Parser, Subcommand}; use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal}; @@ -385,7 +371,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { if cli.json { println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?); } else { - let skipped_breakdown = render_skipped_breakdown(&report.skipped_by_extension); + let skipped_breakdown = kebab_app::render_skipped_breakdown(&report.skipped_by_extension); println!( "scanned {} new {} updated {} skipped {}{} errors {} ({} ms)", report.scanned, diff --git a/crates/kebab-config/src/lib.rs b/crates/kebab-config/src/lib.rs index 65e76fb..0f2e823 100644 --- a/crates/kebab-config/src/lib.rs +++ b/crates/kebab-config/src/lib.rs @@ -411,7 +411,7 @@ impl Config { tracing::warn!( target: "kebab-config", config = %path.display(), - "deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. 다음 버전부터 config 갱신 권장." + "deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25, v0.2.1+). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. config 에서 이 필드를 제거해도 안전 — 더 이상 enforce 안 됨." ); }); } diff --git a/crates/kebab-tui/src/ingest_progress.rs b/crates/kebab-tui/src/ingest_progress.rs index e26a396..d6c6392 100644 --- a/crates/kebab-tui/src/ingest_progress.rs +++ b/crates/kebab-tui/src/ingest_progress.rs @@ -25,18 +25,6 @@ use kebab_core::SourceScope; use crate::app::{App, IngestState, TERMINAL_LINE_HOLD_SECS}; -/// p9-fb-25: render `": A docx, B txt"` breakdown after the `N skipped` -/// count when the map is non-empty. desc sort by count, ties by key. -fn render_skipped_breakdown(map: &std::collections::BTreeMap) -> String { - if map.is_empty() { - return String::new(); - } - let mut entries: Vec<_> = map.iter().collect(); - entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0))); - let parts: Vec = entries.iter().map(|(k, v)| format!("{v} {k}")).collect(); - format!(": {}", parts.join(", ")) -} - /// Already-running guard. Returns `Err` if `app.ingest_state` is /// already populated — pressing `r` twice in a row should not spawn /// two parallel workers (SQLite is mutexed but Lance writes can race @@ -187,7 +175,7 @@ pub fn status_line(state: &IngestState) -> String { let elapsed = state.started_at.elapsed(); let secs = elapsed.as_secs(); if state.aborted { - let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension); + let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension); return format!( "✗ ingest aborted at {}/{} after {}s (new={} updated={} unchanged={} skipped={}{} errors={})", state.counts.scanned.saturating_sub(state.counts.errors), @@ -201,7 +189,7 @@ pub fn status_line(state: &IngestState) -> String { state.counts.errors, ); } - let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension); + let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension); return format!( "✓ ingest: {} docs ({} new, {} updated, {} unchanged, {} skipped{}), {} chunks indexed in {}s", state.counts.scanned, diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index ef6092e..a2727d7 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -29,7 +29,7 @@ git history. **Spec contract impact**: design §6.2 의 `workspace.include` 항목 invalidate (frozen 그대로 두고 본 항목 + spec `tasks/p9/p9-fb-25-config-include-removal.md` 가 source of truth). design §3.x `IngestReport` + §2.4a `IngestEvent` 에 새 필드 / 새 warning 의미 추가 (additive). -**Tests added**: 약 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort). 기존 워크스페이스 테스트 무수정 통과. +**Tests added**: 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort) + 1 unit (kebab-app 의 render_skipped_breakdown). 기존 fixture 6 개 mechanical adapter 수정 (`tests/common/mod.rs` SourceScope, `tests/image_pipeline.rs` × 2 + `tests/pdf_pipeline.rs` 의 dead `include.push` 제거, `tests/ingest_report_snapshot.rs` + `kebab-cli/src/wire.rs` literal 에 `BTreeMap::new()` 추가, snapshot JSON 의 `skipped_by_extension` 필드). assertion 의미 변경 없음. **Known limitation (deferred)**: