chore(ingest): PR #204 회차1 리뷰 반영 — 버전 라벨 v0.26.0 → v0.27.0
신규 진행로깅 표면(asset_phase / ocr_ms / caption_ms + progress.rs heartbeat· slowest 주석)이 v0.26.0 으로 잘못 표기돼 있던 것을 v0.27.0(실제 추가 버전)으로 정정. wire schema 의 "추가 버전" 정확성(외부 통합 참조). 로직 변경 없음(주석/doc). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -96,7 +96,7 @@ pub enum IngestEvent {
|
||||
/// `idx/total` while its per-chunk phases churn. `chunks` is the chunk
|
||||
/// count for asset `idx`.
|
||||
AssetChunked { idx: u32, total: u32, chunks: u32 },
|
||||
/// v0.26.0 (additive): emitted when an asset enters a *slow* internal
|
||||
/// v0.27.0 (additive): emitted when an asset enters a *slow* internal
|
||||
/// phase, so the interactive progress bar can show **which** phase
|
||||
/// (and which model) is currently running instead of looking frozen.
|
||||
/// `phase` ∈ {`"ocr"`, `"caption"`, `"embed"`}; short phases
|
||||
@@ -121,8 +121,8 @@ pub enum IngestEvent {
|
||||
/// them so the slowest-asset summary attributes vision-model time
|
||||
/// correctly. `expansion_ms` is retained for wire compatibility but is
|
||||
/// always 0 since doc-side expansion was removed (HOTFIXES 2026-06-03).
|
||||
/// `ocr_ms` / `caption_ms` (v0.26.0) are additive with serde default 0
|
||||
/// so pre-v0.26.0 consumers deserialize cleanly.
|
||||
/// `ocr_ms` / `caption_ms` (v0.27.0) are additive with serde default 0
|
||||
/// so pre-v0.27.0 consumers deserialize cleanly.
|
||||
AssetTimings {
|
||||
idx: u32,
|
||||
total: u32,
|
||||
@@ -312,7 +312,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn asset_timings_ocr_caption_default_to_zero_for_legacy_wire() {
|
||||
// v0.26.0 additive: a pre-v0.26.0 wire payload omits ocr_ms /
|
||||
// v0.27.0 additive: a pre-v0.27.0 wire payload omits ocr_ms /
|
||||
// caption_ms; serde `default` must fill 0 so old producers stay
|
||||
// compatible.
|
||||
let legacy = serde_json::json!({
|
||||
@@ -339,7 +339,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn asset_phase_serializes_with_discriminator() {
|
||||
// v0.26.0 additive variant — `kind` must be snake_case
|
||||
// v0.27.0 additive variant — `kind` must be snake_case
|
||||
// `asset_phase`, `phase` is the slow-phase label, `model` the
|
||||
// model id (nullable).
|
||||
let ev = IngestEvent::AssetPhase {
|
||||
|
||||
@@ -1350,7 +1350,7 @@ fn ingest_one_asset(
|
||||
let store_ms = u64::try_from(t_store.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// Embed + vector upsert (only when both sides are configured).
|
||||
// v0.26.0: surface the embed phase + model so a long embed run reads as
|
||||
// v0.27.0: surface the embed phase + model so a long embed run reads as
|
||||
// "embedding(<model>)…" rather than a frozen bar (markdown path too).
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
@@ -1578,7 +1578,7 @@ fn ingest_one_image_asset(
|
||||
let lang_hint = lang_hint_from_doc(&canonical);
|
||||
let now = time::OffsetDateTime::now_utc();
|
||||
let mut warning_notes: Vec<String> = Vec::new();
|
||||
// v0.26.0: vision phases (OCR / caption) are the usual bottleneck on an
|
||||
// v0.27.0: vision phases (OCR / caption) are the usual bottleneck on an
|
||||
// image-heavy vault and emitted no progress before — so the bar looked
|
||||
// frozen. Surface each as an `AssetPhase` and measure its wall-clock for
|
||||
// the slowest-asset summary.
|
||||
@@ -1771,7 +1771,7 @@ fn ingest_one_image_asset(
|
||||
}
|
||||
let embed_ms = u64::try_from(t_embed.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// v0.26.0: per-phase timing for the image path — ocr_ms / caption_ms
|
||||
// v0.27.0: per-phase timing for the image path — ocr_ms / caption_ms
|
||||
// carry the vision-model cost so the slowest-asset summary attributes
|
||||
// an image-heavy run's bottleneck correctly.
|
||||
crate::ingest_progress::emit(
|
||||
@@ -2360,7 +2360,7 @@ fn ingest_one_pdf_asset(
|
||||
}
|
||||
let embed_ms = u64::try_from(t_embed.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// v0.26.0: per-phase timing for the PDF path. `ocr_ms` reuses the
|
||||
// v0.27.0: per-phase timing for the PDF path. `ocr_ms` reuses the
|
||||
// page-OCR total already computed above so a scanned-PDF run's OCR cost
|
||||
// shows up in the slowest-asset summary; caption is markdown/image-only.
|
||||
crate::ingest_progress::emit(
|
||||
|
||||
Reference in New Issue
Block a user