diff --git a/crates/kebab-app/src/ingest_progress.rs b/crates/kebab-app/src/ingest_progress.rs index a797547..46ad000 100644 --- a/crates/kebab-app/src/ingest_progress.rs +++ b/crates/kebab-app/src/ingest_progress.rs @@ -50,18 +50,16 @@ pub struct AggregateCounts { /// < ( AssetStarted /// [< (PdfOcrStarted < PdfOcrFinished)*] /// [< AssetChunked] -/// [< ExpansionProgress*] /// [< AssetTimings] /// < AssetFinished )* /// < (Completed | Aborted) /// ``` /// /// `[]` = optional. `PdfOcr*` is per-PDF asset only (v0.20.0 sub-item 1). -/// `AssetChunked` / `ExpansionProgress` / `AssetTimings` are the v0.24.0 -/// asset-internal phase events: `AssetChunked` fires once right after -/// chunking (markdown / image / PDF); `ExpansionProgress` is a throttled -/// counter through the alias-expansion loop (markdown, expansion enabled -/// only); `AssetTimings` reports per-phase wall-clock once (markdown only). +/// `AssetChunked` / `AssetTimings` are the v0.24.0 asset-internal phase +/// events: `AssetChunked` fires once right after chunking (markdown / +/// image / PDF); `AssetTimings` reports per-phase wall-clock once +/// (markdown only). /// /// Embed-batch events (`embed_batch_started` / `embed_batch_finished` /// in §2.4a) are reserved for a future iteration and are not emitted @@ -98,26 +96,14 @@ pub enum IngestEvent { /// `idx/total` while its per-chunk phases churn. `chunks` is the chunk /// count for asset `idx`. AssetChunked { idx: u32, total: u32, chunks: u32 }, - /// v0.24.0 (additive): throttled progress through the per-chunk - /// expansion (alias-LLM) loop — the slowest inner phase for large - /// documents (~1–4s per chunk against a remote GPU Ollama). `done` is - /// the number of chunks processed so far (cache hits included, so the - /// counter still advances on a warm re-run); `chunks` is the asset's - /// total chunk count. Emitted at most every 25 chunks or once per - /// second (see the loop in `ingest_one_asset`), plus a final - /// `done == chunks` frame. - ExpansionProgress { - idx: u32, - total: u32, - done: u32, - chunks: u32, - }, /// v0.24.0 (additive): per-phase wall-clock (milliseconds) for asset /// `idx`, emitted once the asset's markdown pipeline finishes. Lets a - /// user see *where* the time went (parse / chunk / expansion / embed / - /// store) without parsing logs. Only the markdown path emits this; the + /// user see *where* the time went (parse / chunk / embed / store) + /// without parsing logs. Only the markdown path emits this; the /// image / PDF paths surface `AssetChunked` but skip phase timing (their - /// phase shapes differ — OCR / caption rather than expansion). + /// phase shapes differ — OCR / caption). `expansion_ms` is retained for + /// wire compatibility but is always 0 since doc-side expansion was + /// removed (HOTFIXES 2026-06-03). AssetTimings { idx: u32, total: u32, @@ -265,26 +251,6 @@ mod tests { ); } - #[test] - fn expansion_progress_serializes_with_discriminator() { - let ev = IngestEvent::ExpansionProgress { - idx: 1, - total: 5, - done: 25, - chunks: 200, - }; - let v = serde_json::to_value(&ev).unwrap(); - assert_eq!( - v.get("kind").and_then(|s| s.as_str()), - Some("expansion_progress") - ); - assert_eq!(v.get("done").and_then(serde_json::Value::as_u64), Some(25)); - assert_eq!( - v.get("chunks").and_then(serde_json::Value::as_u64), - Some(200) - ); - } - #[test] fn asset_timings_serializes_all_phase_fields() { let ev = IngestEvent::AssetTimings { diff --git a/crates/kebab-cli/src/progress.rs b/crates/kebab-cli/src/progress.rs index 9c1babd..0e2783e 100644 --- a/crates/kebab-cli/src/progress.rs +++ b/crates/kebab-cli/src/progress.rs @@ -157,11 +157,11 @@ impl ProgressDisplay { // in Completed handles the final state. No per-asset bar update // here avoids the duplicate-frame artifact in TTY scrollback. } - // v0.24.0: asset-internal phase visibility. AssetChunked / - // ExpansionProgress use the bar *message* (live sub-progress for - // the current asset) — distinct from the per-file position draw, - // so a single large document no longer looks frozen. AssetTimings - // prints a one-line breakdown when the asset finishes. + // v0.24.0: asset-internal phase visibility. AssetChunked uses the + // bar *message* (live sub-progress for the current asset) — + // distinct from the per-file position draw, so a single large + // document no longer looks frozen. AssetTimings prints a one-line + // breakdown when the asset finishes. IngestEvent::AssetChunked { idx, total, chunks } => { if let Some(bar) = self.bar.as_ref() { bar.set_message(format!("→ {chunks} chunks")); @@ -171,20 +171,9 @@ impl ProgressDisplay { let _ = writeln!(err, "ingest: {idx}/{total} → {chunks} chunks"); } } - IngestEvent::ExpansionProgress { - done, chunks, .. - } => { - if let Some(bar) = self.bar.as_ref() { - bar.set_message(format!("별칭 확장 {done}/{chunks}")); - } - // Non-TTY: suppressed by default — throttled though it is, one - // line per emit would still spam CI logs. The bar message - // covers the interactive case; --json carries every frame. - } IngestEvent::AssetTimings { parse_ms, chunk_ms, - expansion_ms, embed_ms, store_ms, .. @@ -196,10 +185,9 @@ impl ProgressDisplay { let mut err = std::io::stderr().lock(); let _ = writeln!( err, - " ⏱ parse {} · chunk {} · expand {} · embed {} · store {}", + " ⏱ parse {} · chunk {} · embed {} · store {}", fmt_ms(*parse_ms), fmt_ms(*chunk_ms), - fmt_ms(*expansion_ms), fmt_ms(*embed_ms), fmt_ms(*store_ms), ); @@ -289,7 +277,7 @@ fn emit_json(event: &IngestEvent) -> anyhow::Result<()> { /// Render a phase duration (milliseconds) compactly for the human-mode /// `AssetTimings` line: `< 1000ms` stays in `ms`, larger spans collapse to -/// one-decimal seconds so a 45-second expansion reads `45.0s`, not `45000ms`. +/// one-decimal seconds so a 45-second embed reads `45.0s`, not `45000ms`. fn fmt_ms(ms: u64) -> String { if ms >= 1000 { format!("{:.1}s", ms as f64 / 1000.0) diff --git a/crates/kebab-tui/src/ingest_progress.rs b/crates/kebab-tui/src/ingest_progress.rs index b94510c..76f56b2 100644 --- a/crates/kebab-tui/src/ingest_progress.rs +++ b/crates/kebab-tui/src/ingest_progress.rs @@ -160,7 +160,6 @@ fn apply_event(state: &mut IngestState, event: IngestEvent) { // per-asset counters, not sub-asset phase progress, so these are // no-ops here (the CLI / --json surfaces render them). | IngestEvent::AssetChunked { .. } - | IngestEvent::ExpansionProgress { .. } | IngestEvent::AssetTimings { .. } => {} } } diff --git a/docs/wire-schema/v1/ingest_progress.schema.json b/docs/wire-schema/v1/ingest_progress.schema.json index ef2889a..4919754 100644 --- a/docs/wire-schema/v1/ingest_progress.schema.json +++ b/docs/wire-schema/v1/ingest_progress.schema.json @@ -15,7 +15,6 @@ "asset_started", "asset_finished", "asset_chunked", - "expansion_progress", "asset_timings", "embed_batch_started", "embed_batch_finished", @@ -36,11 +35,10 @@ "enum": ["new", "updated", "skipped", "error"], "description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)." }, - "chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked / expansion_progress (v0.24.0): chunk count produced for this asset." }, - "done": { "type": "integer", "minimum": 0, "description": "expansion_progress (v0.24.0, additive): chunks processed so far in the per-chunk alias-expansion loop (cache hits included). Throttled: emitted at most every 25 chunks or once per second, plus a final frame where done == chunks." }, + "chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked (v0.24.0): chunk count produced for this asset." }, "parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." }, "chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." }, - "expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): alias-expansion phase wall-clock (ms). Markdown path only; 0 when expansion is disabled." }, + "expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): retained for wire compatibility but always 0 — doc-side expansion was removed (HOTFIXES 2026-06-03)." }, "embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." }, "store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." }, "n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." },