refactor(wire): ExpansionProgress 이벤트 + 렌더 제거
IngestEvent::ExpansionProgress variant + 직렬화 테스트 제거(AssetChunked/ AssetTimings 유지). CLI/TUI 의 expansion 렌더 제거, AssetTimings 한 줄에서 expand 세그먼트 제거. ingest_progress.v1 schema 의 expansion_progress kind 제거, expansion_ms 설명을 "값 0 유지"로 갱신. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -50,18 +50,16 @@ pub struct AggregateCounts {
|
||||
/// < ( AssetStarted
|
||||
/// [< (PdfOcrStarted < PdfOcrFinished)*]
|
||||
/// [< AssetChunked]
|
||||
/// [< ExpansionProgress*]
|
||||
/// [< AssetTimings]
|
||||
/// < AssetFinished )*
|
||||
/// < (Completed | Aborted)
|
||||
/// ```
|
||||
///
|
||||
/// `[]` = optional. `PdfOcr*` is per-PDF asset only (v0.20.0 sub-item 1).
|
||||
/// `AssetChunked` / `ExpansionProgress` / `AssetTimings` are the v0.24.0
|
||||
/// asset-internal phase events: `AssetChunked` fires once right after
|
||||
/// chunking (markdown / image / PDF); `ExpansionProgress` is a throttled
|
||||
/// counter through the alias-expansion loop (markdown, expansion enabled
|
||||
/// only); `AssetTimings` reports per-phase wall-clock once (markdown only).
|
||||
/// `AssetChunked` / `AssetTimings` are the v0.24.0 asset-internal phase
|
||||
/// events: `AssetChunked` fires once right after chunking (markdown /
|
||||
/// image / PDF); `AssetTimings` reports per-phase wall-clock once
|
||||
/// (markdown only).
|
||||
///
|
||||
/// Embed-batch events (`embed_batch_started` / `embed_batch_finished`
|
||||
/// in §2.4a) are reserved for a future iteration and are not emitted
|
||||
@@ -98,26 +96,14 @@ pub enum IngestEvent {
|
||||
/// `idx/total` while its per-chunk phases churn. `chunks` is the chunk
|
||||
/// count for asset `idx`.
|
||||
AssetChunked { idx: u32, total: u32, chunks: u32 },
|
||||
/// v0.24.0 (additive): throttled progress through the per-chunk
|
||||
/// expansion (alias-LLM) loop — the slowest inner phase for large
|
||||
/// documents (~1–4s per chunk against a remote GPU Ollama). `done` is
|
||||
/// the number of chunks processed so far (cache hits included, so the
|
||||
/// counter still advances on a warm re-run); `chunks` is the asset's
|
||||
/// total chunk count. Emitted at most every 25 chunks or once per
|
||||
/// second (see the loop in `ingest_one_asset`), plus a final
|
||||
/// `done == chunks` frame.
|
||||
ExpansionProgress {
|
||||
idx: u32,
|
||||
total: u32,
|
||||
done: u32,
|
||||
chunks: u32,
|
||||
},
|
||||
/// v0.24.0 (additive): per-phase wall-clock (milliseconds) for asset
|
||||
/// `idx`, emitted once the asset's markdown pipeline finishes. Lets a
|
||||
/// user see *where* the time went (parse / chunk / expansion / embed /
|
||||
/// store) without parsing logs. Only the markdown path emits this; the
|
||||
/// user see *where* the time went (parse / chunk / embed / store)
|
||||
/// without parsing logs. Only the markdown path emits this; the
|
||||
/// image / PDF paths surface `AssetChunked` but skip phase timing (their
|
||||
/// phase shapes differ — OCR / caption rather than expansion).
|
||||
/// phase shapes differ — OCR / caption). `expansion_ms` is retained for
|
||||
/// wire compatibility but is always 0 since doc-side expansion was
|
||||
/// removed (HOTFIXES 2026-06-03).
|
||||
AssetTimings {
|
||||
idx: u32,
|
||||
total: u32,
|
||||
@@ -265,26 +251,6 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expansion_progress_serializes_with_discriminator() {
|
||||
let ev = IngestEvent::ExpansionProgress {
|
||||
idx: 1,
|
||||
total: 5,
|
||||
done: 25,
|
||||
chunks: 200,
|
||||
};
|
||||
let v = serde_json::to_value(&ev).unwrap();
|
||||
assert_eq!(
|
||||
v.get("kind").and_then(|s| s.as_str()),
|
||||
Some("expansion_progress")
|
||||
);
|
||||
assert_eq!(v.get("done").and_then(serde_json::Value::as_u64), Some(25));
|
||||
assert_eq!(
|
||||
v.get("chunks").and_then(serde_json::Value::as_u64),
|
||||
Some(200)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn asset_timings_serializes_all_phase_fields() {
|
||||
let ev = IngestEvent::AssetTimings {
|
||||
|
||||
@@ -157,11 +157,11 @@ impl ProgressDisplay {
|
||||
// in Completed handles the final state. No per-asset bar update
|
||||
// here avoids the duplicate-frame artifact in TTY scrollback.
|
||||
}
|
||||
// v0.24.0: asset-internal phase visibility. AssetChunked /
|
||||
// ExpansionProgress use the bar *message* (live sub-progress for
|
||||
// the current asset) — distinct from the per-file position draw,
|
||||
// so a single large document no longer looks frozen. AssetTimings
|
||||
// prints a one-line breakdown when the asset finishes.
|
||||
// v0.24.0: asset-internal phase visibility. AssetChunked uses the
|
||||
// bar *message* (live sub-progress for the current asset) —
|
||||
// distinct from the per-file position draw, so a single large
|
||||
// document no longer looks frozen. AssetTimings prints a one-line
|
||||
// breakdown when the asset finishes.
|
||||
IngestEvent::AssetChunked { idx, total, chunks } => {
|
||||
if let Some(bar) = self.bar.as_ref() {
|
||||
bar.set_message(format!("→ {chunks} chunks"));
|
||||
@@ -171,20 +171,9 @@ impl ProgressDisplay {
|
||||
let _ = writeln!(err, "ingest: {idx}/{total} → {chunks} chunks");
|
||||
}
|
||||
}
|
||||
IngestEvent::ExpansionProgress {
|
||||
done, chunks, ..
|
||||
} => {
|
||||
if let Some(bar) = self.bar.as_ref() {
|
||||
bar.set_message(format!("별칭 확장 {done}/{chunks}"));
|
||||
}
|
||||
// Non-TTY: suppressed by default — throttled though it is, one
|
||||
// line per emit would still spam CI logs. The bar message
|
||||
// covers the interactive case; --json carries every frame.
|
||||
}
|
||||
IngestEvent::AssetTimings {
|
||||
parse_ms,
|
||||
chunk_ms,
|
||||
expansion_ms,
|
||||
embed_ms,
|
||||
store_ms,
|
||||
..
|
||||
@@ -196,10 +185,9 @@ impl ProgressDisplay {
|
||||
let mut err = std::io::stderr().lock();
|
||||
let _ = writeln!(
|
||||
err,
|
||||
" ⏱ parse {} · chunk {} · expand {} · embed {} · store {}",
|
||||
" ⏱ parse {} · chunk {} · embed {} · store {}",
|
||||
fmt_ms(*parse_ms),
|
||||
fmt_ms(*chunk_ms),
|
||||
fmt_ms(*expansion_ms),
|
||||
fmt_ms(*embed_ms),
|
||||
fmt_ms(*store_ms),
|
||||
);
|
||||
@@ -289,7 +277,7 @@ fn emit_json(event: &IngestEvent) -> anyhow::Result<()> {
|
||||
|
||||
/// Render a phase duration (milliseconds) compactly for the human-mode
|
||||
/// `AssetTimings` line: `< 1000ms` stays in `ms`, larger spans collapse to
|
||||
/// one-decimal seconds so a 45-second expansion reads `45.0s`, not `45000ms`.
|
||||
/// one-decimal seconds so a 45-second embed reads `45.0s`, not `45000ms`.
|
||||
fn fmt_ms(ms: u64) -> String {
|
||||
if ms >= 1000 {
|
||||
format!("{:.1}s", ms as f64 / 1000.0)
|
||||
|
||||
@@ -160,7 +160,6 @@ fn apply_event(state: &mut IngestState, event: IngestEvent) {
|
||||
// per-asset counters, not sub-asset phase progress, so these are
|
||||
// no-ops here (the CLI / --json surfaces render them).
|
||||
| IngestEvent::AssetChunked { .. }
|
||||
| IngestEvent::ExpansionProgress { .. }
|
||||
| IngestEvent::AssetTimings { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
"asset_started",
|
||||
"asset_finished",
|
||||
"asset_chunked",
|
||||
"expansion_progress",
|
||||
"asset_timings",
|
||||
"embed_batch_started",
|
||||
"embed_batch_finished",
|
||||
@@ -36,11 +35,10 @@
|
||||
"enum": ["new", "updated", "skipped", "error"],
|
||||
"description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)."
|
||||
},
|
||||
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked / expansion_progress (v0.24.0): chunk count produced for this asset." },
|
||||
"done": { "type": "integer", "minimum": 0, "description": "expansion_progress (v0.24.0, additive): chunks processed so far in the per-chunk alias-expansion loop (cache hits included). Throttled: emitted at most every 25 chunks or once per second, plus a final frame where done == chunks." },
|
||||
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked (v0.24.0): chunk count produced for this asset." },
|
||||
"parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." },
|
||||
"chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." },
|
||||
"expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): alias-expansion phase wall-clock (ms). Markdown path only; 0 when expansion is disabled." },
|
||||
"expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): retained for wire compatibility but always 0 — doc-side expansion was removed (HOTFIXES 2026-06-03)." },
|
||||
"embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." },
|
||||
"store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." },
|
||||
"n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." },
|
||||
|
||||
Reference in New Issue
Block a user