review(p9-fb-25): 회차 1 nit 반영 — render_skipped_breakdown 단일 source + NO_EXT_SENTINEL + 카운트 + deprecation 문구
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -100,6 +100,20 @@ pub fn media_label(media: &kebab_core::MediaType) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-25: render `": A docx, B txt"` breakdown after the
|
||||
/// `N skipped` count when the map is non-empty. Empty → empty
|
||||
/// string (no extra punctuation). desc sort by count, ties broken
|
||||
/// by key alphabetic.
|
||||
pub fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
|
||||
if map.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let mut entries: Vec<_> = map.iter().collect();
|
||||
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
|
||||
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
|
||||
format!(": {}", parts.join(", "))
|
||||
}
|
||||
|
||||
/// Best-effort send into an optional `mpsc::Sender`. A dropped receiver
|
||||
/// is silently absorbed — the ingest hot path must not stall on a slow
|
||||
/// consumer. Logged at `trace` for diagnostics.
|
||||
@@ -194,4 +208,19 @@ mod tests {
|
||||
other => panic!("unexpected event: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn render_skipped_breakdown_desc_sort_with_tiebreak() {
|
||||
use std::collections::BTreeMap;
|
||||
let mut m = BTreeMap::new();
|
||||
assert_eq!(render_skipped_breakdown(&m), "");
|
||||
m.insert("txt".to_string(), 1);
|
||||
m.insert("docx".to_string(), 2);
|
||||
m.insert("epub".to_string(), 1);
|
||||
// 2 docx 먼저 (count desc), 그 다음 1 epub / 1 txt 는 alphabetic.
|
||||
assert_eq!(
|
||||
render_skipped_breakdown(&m),
|
||||
": 2 docx, 1 epub, 1 txt".to_string()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,9 +61,16 @@ pub mod logging;
|
||||
pub mod reset;
|
||||
|
||||
pub use app::App;
|
||||
pub use ingest_progress::{AggregateCounts, IngestEvent};
|
||||
pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
|
||||
pub use reset::{ResetReport, ResetScope};
|
||||
|
||||
/// p9-fb-25: sentinel for files without an extension in
|
||||
/// `IngestReport.skipped_by_extension` keys + `IngestItem.warnings`
|
||||
/// `unsupported media type: ...` line. Wire schema description
|
||||
/// references this literal — changing the sentinel is a wire-
|
||||
/// compatibility break.
|
||||
pub const NO_EXT_SENTINEL: &str = "<no-ext>";
|
||||
|
||||
/// Parser-version label persisted in `documents.parser_version` for
|
||||
/// every Markdown file ingested through the `kb-parse-md` pipeline.
|
||||
/// Kept in lock-step with the literal used in the `kb-store-sqlite`
|
||||
@@ -830,7 +837,7 @@ fn try_skip_unchanged(
|
||||
|
||||
/// p9-fb-25: extract the lowercase extension (no leading dot) from a
|
||||
/// workspace path for use in the `unsupported media type: .X` warning
|
||||
/// and `IngestReport.skipped_by_extension` key. Returns `"<no-ext>"`
|
||||
/// and `IngestReport.skipped_by_extension` key. Returns [`NO_EXT_SENTINEL`]
|
||||
/// for paths with no extension. Always lowercase so `Foo.DOCX` and
|
||||
/// `bar.docx` aggregate under the same key.
|
||||
fn ext_for_skip_warning(path: &str) -> String {
|
||||
@@ -838,16 +845,16 @@ fn ext_for_skip_warning(path: &str) -> String {
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
.map(|s| s.to_ascii_lowercase())
|
||||
.unwrap_or_else(|| "<no-ext>".to_string())
|
||||
.unwrap_or_else(|| NO_EXT_SENTINEL.to_string())
|
||||
}
|
||||
|
||||
/// p9-fb-25: render the `IngestItem.warnings` line for a Skipped
|
||||
/// asset. `<no-ext>` sentinel renders without a leading dot;
|
||||
/// asset. [`NO_EXT_SENTINEL`] renders without a leading dot;
|
||||
/// everything else gets `.ext` form.
|
||||
fn unsupported_media_warning(path: &str) -> String {
|
||||
let ext = ext_for_skip_warning(path);
|
||||
if ext == "<no-ext>" {
|
||||
"unsupported media type: <no-ext>".to_string()
|
||||
if ext == NO_EXT_SENTINEL {
|
||||
format!("unsupported media type: {NO_EXT_SENTINEL}")
|
||||
} else {
|
||||
format!("unsupported media type: .{ext}")
|
||||
}
|
||||
|
||||
@@ -4,20 +4,6 @@
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitCode;
|
||||
|
||||
/// p9-fb-25: render `": A docx, B txt"` breakdown after the
|
||||
/// `N skipped` count when the map is non-empty. Empty → empty
|
||||
/// string (no extra punctuation). desc sort by count, ties broken
|
||||
/// by key alphabetic.
|
||||
fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
|
||||
if map.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let mut entries: Vec<_> = map.iter().collect();
|
||||
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
|
||||
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
|
||||
format!(": {}", parts.join(", "))
|
||||
}
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
|
||||
@@ -385,7 +371,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
|
||||
} else {
|
||||
let skipped_breakdown = render_skipped_breakdown(&report.skipped_by_extension);
|
||||
let skipped_breakdown = kebab_app::render_skipped_breakdown(&report.skipped_by_extension);
|
||||
println!(
|
||||
"scanned {} new {} updated {} skipped {}{} errors {} ({} ms)",
|
||||
report.scanned,
|
||||
|
||||
@@ -411,7 +411,7 @@ impl Config {
|
||||
tracing::warn!(
|
||||
target: "kebab-config",
|
||||
config = %path.display(),
|
||||
"deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. 다음 버전부터 config 갱신 권장."
|
||||
"deprecated config: `workspace.include` 필드는 더 이상 사용되지 않습니다 (p9-fb-25, v0.2.1+). 처리 가능한 형식 (md / png / jpg / pdf) 은 extractor 가 자동 결정. config 에서 이 필드를 제거해도 안전 — 더 이상 enforce 안 됨."
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -25,18 +25,6 @@ use kebab_core::SourceScope;
|
||||
|
||||
use crate::app::{App, IngestState, TERMINAL_LINE_HOLD_SECS};
|
||||
|
||||
/// p9-fb-25: render `": A docx, B txt"` breakdown after the `N skipped`
|
||||
/// count when the map is non-empty. desc sort by count, ties by key.
|
||||
fn render_skipped_breakdown(map: &std::collections::BTreeMap<String, u32>) -> String {
|
||||
if map.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let mut entries: Vec<_> = map.iter().collect();
|
||||
entries.sort_by(|a, b| b.1.cmp(a.1).then_with(|| a.0.cmp(b.0)));
|
||||
let parts: Vec<String> = entries.iter().map(|(k, v)| format!("{v} {k}")).collect();
|
||||
format!(": {}", parts.join(", "))
|
||||
}
|
||||
|
||||
/// Already-running guard. Returns `Err` if `app.ingest_state` is
|
||||
/// already populated — pressing `r` twice in a row should not spawn
|
||||
/// two parallel workers (SQLite is mutexed but Lance writes can race
|
||||
@@ -187,7 +175,7 @@ pub fn status_line(state: &IngestState) -> String {
|
||||
let elapsed = state.started_at.elapsed();
|
||||
let secs = elapsed.as_secs();
|
||||
if state.aborted {
|
||||
let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension);
|
||||
let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension);
|
||||
return format!(
|
||||
"✗ ingest aborted at {}/{} after {}s (new={} updated={} unchanged={} skipped={}{} errors={})",
|
||||
state.counts.scanned.saturating_sub(state.counts.errors),
|
||||
@@ -201,7 +189,7 @@ pub fn status_line(state: &IngestState) -> String {
|
||||
state.counts.errors,
|
||||
);
|
||||
}
|
||||
let skipped_breakdown = render_skipped_breakdown(&state.counts.skipped_by_extension);
|
||||
let skipped_breakdown = kebab_app::ingest_progress::render_skipped_breakdown(&state.counts.skipped_by_extension);
|
||||
return format!(
|
||||
"✓ ingest: {} docs ({} new, {} updated, {} unchanged, {} skipped{}), {} chunks indexed in {}s",
|
||||
state.counts.scanned,
|
||||
|
||||
@@ -29,7 +29,7 @@ git history.
|
||||
|
||||
**Spec contract impact**: design §6.2 의 `workspace.include` 항목 invalidate (frozen 그대로 두고 본 항목 + spec `tasks/p9/p9-fb-25-config-include-removal.md` 가 source of truth). design §3.x `IngestReport` + §2.4a `IngestEvent` 에 새 필드 / 새 warning 의미 추가 (additive).
|
||||
|
||||
**Tests added**: 약 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort). 기존 워크스페이스 테스트 무수정 통과.
|
||||
**Tests added**: 5 신규 (kebab-config 단위 2: legacy include 무시 + WorkspaceCfg 필드 destructure / kebab-app 통합 1: skip_reason / kebab-app 통합 1: init_template 헤더 / kebab-tui 단위 2: status_line breakdown 완료/abort) + 1 unit (kebab-app 의 render_skipped_breakdown). 기존 fixture 6 개 mechanical adapter 수정 (`tests/common/mod.rs` SourceScope, `tests/image_pipeline.rs` × 2 + `tests/pdf_pipeline.rs` 의 dead `include.push` 제거, `tests/ingest_report_snapshot.rs` + `kebab-cli/src/wire.rs` literal 에 `BTreeMap::new()` 추가, snapshot JSON 의 `skipped_by_extension` 필드). assertion 의미 변경 없음.
|
||||
|
||||
**Known limitation (deferred)**:
|
||||
|
||||
|
||||
Reference in New Issue
Block a user