refactor(wire): ExpansionProgress 이벤트 + 렌더 제거
IngestEvent::ExpansionProgress variant + 직렬화 테스트 제거(AssetChunked/ AssetTimings 유지). CLI/TUI 의 expansion 렌더 제거, AssetTimings 한 줄에서 expand 세그먼트 제거. ingest_progress.v1 schema 의 expansion_progress kind 제거, expansion_ms 설명을 "값 0 유지"로 갱신. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -50,18 +50,16 @@ pub struct AggregateCounts {
|
|||||||
/// < ( AssetStarted
|
/// < ( AssetStarted
|
||||||
/// [< (PdfOcrStarted < PdfOcrFinished)*]
|
/// [< (PdfOcrStarted < PdfOcrFinished)*]
|
||||||
/// [< AssetChunked]
|
/// [< AssetChunked]
|
||||||
/// [< ExpansionProgress*]
|
|
||||||
/// [< AssetTimings]
|
/// [< AssetTimings]
|
||||||
/// < AssetFinished )*
|
/// < AssetFinished )*
|
||||||
/// < (Completed | Aborted)
|
/// < (Completed | Aborted)
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// `[]` = optional. `PdfOcr*` is per-PDF asset only (v0.20.0 sub-item 1).
|
/// `[]` = optional. `PdfOcr*` is per-PDF asset only (v0.20.0 sub-item 1).
|
||||||
/// `AssetChunked` / `ExpansionProgress` / `AssetTimings` are the v0.24.0
|
/// `AssetChunked` / `AssetTimings` are the v0.24.0 asset-internal phase
|
||||||
/// asset-internal phase events: `AssetChunked` fires once right after
|
/// events: `AssetChunked` fires once right after chunking (markdown /
|
||||||
/// chunking (markdown / image / PDF); `ExpansionProgress` is a throttled
|
/// image / PDF); `AssetTimings` reports per-phase wall-clock once
|
||||||
/// counter through the alias-expansion loop (markdown, expansion enabled
|
/// (markdown only).
|
||||||
/// only); `AssetTimings` reports per-phase wall-clock once (markdown only).
|
|
||||||
///
|
///
|
||||||
/// Embed-batch events (`embed_batch_started` / `embed_batch_finished`
|
/// Embed-batch events (`embed_batch_started` / `embed_batch_finished`
|
||||||
/// in §2.4a) are reserved for a future iteration and are not emitted
|
/// in §2.4a) are reserved for a future iteration and are not emitted
|
||||||
@@ -98,26 +96,14 @@ pub enum IngestEvent {
|
|||||||
/// `idx/total` while its per-chunk phases churn. `chunks` is the chunk
|
/// `idx/total` while its per-chunk phases churn. `chunks` is the chunk
|
||||||
/// count for asset `idx`.
|
/// count for asset `idx`.
|
||||||
AssetChunked { idx: u32, total: u32, chunks: u32 },
|
AssetChunked { idx: u32, total: u32, chunks: u32 },
|
||||||
/// v0.24.0 (additive): throttled progress through the per-chunk
|
|
||||||
/// expansion (alias-LLM) loop — the slowest inner phase for large
|
|
||||||
/// documents (~1–4s per chunk against a remote GPU Ollama). `done` is
|
|
||||||
/// the number of chunks processed so far (cache hits included, so the
|
|
||||||
/// counter still advances on a warm re-run); `chunks` is the asset's
|
|
||||||
/// total chunk count. Emitted at most every 25 chunks or once per
|
|
||||||
/// second (see the loop in `ingest_one_asset`), plus a final
|
|
||||||
/// `done == chunks` frame.
|
|
||||||
ExpansionProgress {
|
|
||||||
idx: u32,
|
|
||||||
total: u32,
|
|
||||||
done: u32,
|
|
||||||
chunks: u32,
|
|
||||||
},
|
|
||||||
/// v0.24.0 (additive): per-phase wall-clock (milliseconds) for asset
|
/// v0.24.0 (additive): per-phase wall-clock (milliseconds) for asset
|
||||||
/// `idx`, emitted once the asset's markdown pipeline finishes. Lets a
|
/// `idx`, emitted once the asset's markdown pipeline finishes. Lets a
|
||||||
/// user see *where* the time went (parse / chunk / expansion / embed /
|
/// user see *where* the time went (parse / chunk / embed / store)
|
||||||
/// store) without parsing logs. Only the markdown path emits this; the
|
/// without parsing logs. Only the markdown path emits this; the
|
||||||
/// image / PDF paths surface `AssetChunked` but skip phase timing (their
|
/// image / PDF paths surface `AssetChunked` but skip phase timing (their
|
||||||
/// phase shapes differ — OCR / caption rather than expansion).
|
/// phase shapes differ — OCR / caption). `expansion_ms` is retained for
|
||||||
|
/// wire compatibility but is always 0 since doc-side expansion was
|
||||||
|
/// removed (HOTFIXES 2026-06-03).
|
||||||
AssetTimings {
|
AssetTimings {
|
||||||
idx: u32,
|
idx: u32,
|
||||||
total: u32,
|
total: u32,
|
||||||
@@ -265,26 +251,6 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn expansion_progress_serializes_with_discriminator() {
|
|
||||||
let ev = IngestEvent::ExpansionProgress {
|
|
||||||
idx: 1,
|
|
||||||
total: 5,
|
|
||||||
done: 25,
|
|
||||||
chunks: 200,
|
|
||||||
};
|
|
||||||
let v = serde_json::to_value(&ev).unwrap();
|
|
||||||
assert_eq!(
|
|
||||||
v.get("kind").and_then(|s| s.as_str()),
|
|
||||||
Some("expansion_progress")
|
|
||||||
);
|
|
||||||
assert_eq!(v.get("done").and_then(serde_json::Value::as_u64), Some(25));
|
|
||||||
assert_eq!(
|
|
||||||
v.get("chunks").and_then(serde_json::Value::as_u64),
|
|
||||||
Some(200)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn asset_timings_serializes_all_phase_fields() {
|
fn asset_timings_serializes_all_phase_fields() {
|
||||||
let ev = IngestEvent::AssetTimings {
|
let ev = IngestEvent::AssetTimings {
|
||||||
|
|||||||
@@ -157,11 +157,11 @@ impl ProgressDisplay {
|
|||||||
// in Completed handles the final state. No per-asset bar update
|
// in Completed handles the final state. No per-asset bar update
|
||||||
// here avoids the duplicate-frame artifact in TTY scrollback.
|
// here avoids the duplicate-frame artifact in TTY scrollback.
|
||||||
}
|
}
|
||||||
// v0.24.0: asset-internal phase visibility. AssetChunked /
|
// v0.24.0: asset-internal phase visibility. AssetChunked uses the
|
||||||
// ExpansionProgress use the bar *message* (live sub-progress for
|
// bar *message* (live sub-progress for the current asset) —
|
||||||
// the current asset) — distinct from the per-file position draw,
|
// distinct from the per-file position draw, so a single large
|
||||||
// so a single large document no longer looks frozen. AssetTimings
|
// document no longer looks frozen. AssetTimings prints a one-line
|
||||||
// prints a one-line breakdown when the asset finishes.
|
// breakdown when the asset finishes.
|
||||||
IngestEvent::AssetChunked { idx, total, chunks } => {
|
IngestEvent::AssetChunked { idx, total, chunks } => {
|
||||||
if let Some(bar) = self.bar.as_ref() {
|
if let Some(bar) = self.bar.as_ref() {
|
||||||
bar.set_message(format!("→ {chunks} chunks"));
|
bar.set_message(format!("→ {chunks} chunks"));
|
||||||
@@ -171,20 +171,9 @@ impl ProgressDisplay {
|
|||||||
let _ = writeln!(err, "ingest: {idx}/{total} → {chunks} chunks");
|
let _ = writeln!(err, "ingest: {idx}/{total} → {chunks} chunks");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
IngestEvent::ExpansionProgress {
|
|
||||||
done, chunks, ..
|
|
||||||
} => {
|
|
||||||
if let Some(bar) = self.bar.as_ref() {
|
|
||||||
bar.set_message(format!("별칭 확장 {done}/{chunks}"));
|
|
||||||
}
|
|
||||||
// Non-TTY: suppressed by default — throttled though it is, one
|
|
||||||
// line per emit would still spam CI logs. The bar message
|
|
||||||
// covers the interactive case; --json carries every frame.
|
|
||||||
}
|
|
||||||
IngestEvent::AssetTimings {
|
IngestEvent::AssetTimings {
|
||||||
parse_ms,
|
parse_ms,
|
||||||
chunk_ms,
|
chunk_ms,
|
||||||
expansion_ms,
|
|
||||||
embed_ms,
|
embed_ms,
|
||||||
store_ms,
|
store_ms,
|
||||||
..
|
..
|
||||||
@@ -196,10 +185,9 @@ impl ProgressDisplay {
|
|||||||
let mut err = std::io::stderr().lock();
|
let mut err = std::io::stderr().lock();
|
||||||
let _ = writeln!(
|
let _ = writeln!(
|
||||||
err,
|
err,
|
||||||
" ⏱ parse {} · chunk {} · expand {} · embed {} · store {}",
|
" ⏱ parse {} · chunk {} · embed {} · store {}",
|
||||||
fmt_ms(*parse_ms),
|
fmt_ms(*parse_ms),
|
||||||
fmt_ms(*chunk_ms),
|
fmt_ms(*chunk_ms),
|
||||||
fmt_ms(*expansion_ms),
|
|
||||||
fmt_ms(*embed_ms),
|
fmt_ms(*embed_ms),
|
||||||
fmt_ms(*store_ms),
|
fmt_ms(*store_ms),
|
||||||
);
|
);
|
||||||
@@ -289,7 +277,7 @@ fn emit_json(event: &IngestEvent) -> anyhow::Result<()> {
|
|||||||
|
|
||||||
/// Render a phase duration (milliseconds) compactly for the human-mode
|
/// Render a phase duration (milliseconds) compactly for the human-mode
|
||||||
/// `AssetTimings` line: `< 1000ms` stays in `ms`, larger spans collapse to
|
/// `AssetTimings` line: `< 1000ms` stays in `ms`, larger spans collapse to
|
||||||
/// one-decimal seconds so a 45-second expansion reads `45.0s`, not `45000ms`.
|
/// one-decimal seconds so a 45-second embed reads `45.0s`, not `45000ms`.
|
||||||
fn fmt_ms(ms: u64) -> String {
|
fn fmt_ms(ms: u64) -> String {
|
||||||
if ms >= 1000 {
|
if ms >= 1000 {
|
||||||
format!("{:.1}s", ms as f64 / 1000.0)
|
format!("{:.1}s", ms as f64 / 1000.0)
|
||||||
|
|||||||
@@ -160,7 +160,6 @@ fn apply_event(state: &mut IngestState, event: IngestEvent) {
|
|||||||
// per-asset counters, not sub-asset phase progress, so these are
|
// per-asset counters, not sub-asset phase progress, so these are
|
||||||
// no-ops here (the CLI / --json surfaces render them).
|
// no-ops here (the CLI / --json surfaces render them).
|
||||||
| IngestEvent::AssetChunked { .. }
|
| IngestEvent::AssetChunked { .. }
|
||||||
| IngestEvent::ExpansionProgress { .. }
|
|
||||||
| IngestEvent::AssetTimings { .. } => {}
|
| IngestEvent::AssetTimings { .. } => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,7 +15,6 @@
|
|||||||
"asset_started",
|
"asset_started",
|
||||||
"asset_finished",
|
"asset_finished",
|
||||||
"asset_chunked",
|
"asset_chunked",
|
||||||
"expansion_progress",
|
|
||||||
"asset_timings",
|
"asset_timings",
|
||||||
"embed_batch_started",
|
"embed_batch_started",
|
||||||
"embed_batch_finished",
|
"embed_batch_finished",
|
||||||
@@ -36,11 +35,10 @@
|
|||||||
"enum": ["new", "updated", "skipped", "error"],
|
"enum": ["new", "updated", "skipped", "error"],
|
||||||
"description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)."
|
"description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)."
|
||||||
},
|
},
|
||||||
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked / expansion_progress (v0.24.0): chunk count produced for this asset." },
|
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked (v0.24.0): chunk count produced for this asset." },
|
||||||
"done": { "type": "integer", "minimum": 0, "description": "expansion_progress (v0.24.0, additive): chunks processed so far in the per-chunk alias-expansion loop (cache hits included). Throttled: emitted at most every 25 chunks or once per second, plus a final frame where done == chunks." },
|
|
||||||
"parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." },
|
"parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." },
|
||||||
"chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." },
|
"chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." },
|
||||||
"expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): alias-expansion phase wall-clock (ms). Markdown path only; 0 when expansion is disabled." },
|
"expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): retained for wire compatibility but always 0 — doc-side expansion was removed (HOTFIXES 2026-06-03)." },
|
||||||
"embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." },
|
"embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." },
|
||||||
"store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." },
|
"store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." },
|
||||||
"n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." },
|
"n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." },
|
||||||
|
|||||||
Reference in New Issue
Block a user