From 6c9c8df43ed3bbb3c0e5060ca76c138d95d01c83 Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 3 Jun 2026 11:02:16 +0000 Subject: [PATCH] =?UTF-8?q?chore(version):=200.27.0=20=E2=86=92=200.26.1?= =?UTF-8?q?=20=E2=80=94=20=EC=83=88=20bump=20=EA=B7=9C=EC=B9=99=EC=83=81?= =?UTF-8?q?=20patch?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 진행 로그 개선은 검색·색인 결과 불변 + 새 명령/플래그/config 없음 + additive-only wire(asset_phase)라 CLAUDE.md 신규 규칙(기능/인터페이스 변경=minor, 없으면 patch)상 patch 가 맞음. version·라벨·HOTFIXES 헤더를 0.26.1 로 정정. Co-Authored-By: Claude Opus 4.8 (1M context) --- Cargo.lock | 48 +++++++++---------- Cargo.toml | 2 +- HANDOFF.md | 2 +- crates/kebab-app/src/ingest_progress.rs | 10 ++-- crates/kebab-app/src/lib.rs | 8 ++-- crates/kebab-cli/src/progress.rs | 32 ++++++------- crates/kebab-tui/src/ingest_progress.rs | 2 +- .../v1/ingest_progress.schema.json | 8 ++-- tasks/HOTFIXES.md | 2 +- 9 files changed, 57 insertions(+), 57 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91a8535..bb18b21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4724,7 +4724,7 @@ dependencies = [ [[package]] name = "kebab-app" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "base64 0.22.1", @@ -4772,7 +4772,7 @@ dependencies = [ [[package]] name = "kebab-chunk" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -4790,7 +4790,7 @@ dependencies = [ [[package]] name = "kebab-cli" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "clap", @@ -4811,7 +4811,7 @@ dependencies = [ [[package]] name = "kebab-config" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "dirs 5.0.1", @@ -4827,7 +4827,7 @@ dependencies = [ [[package]] name = "kebab-core" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -4841,7 +4841,7 @@ dependencies = [ [[package]] name = "kebab-embed" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -4855,7 +4855,7 @@ dependencies = [ [[package]] name = "kebab-embed-candle" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "candle-core", @@ -4875,7 +4875,7 @@ dependencies = [ [[package]] name = "kebab-embed-local" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "fastembed", @@ -4888,7 +4888,7 @@ dependencies = [ [[package]] name = "kebab-embed-ollama" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-config", @@ -4903,7 +4903,7 @@ dependencies = [ [[package]] name = "kebab-eval" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-app", @@ -4922,7 +4922,7 @@ dependencies = [ [[package]] name = "kebab-llm" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-core", @@ -4931,7 +4931,7 @@ dependencies = [ [[package]] name = "kebab-llm-local" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-config", @@ -4948,7 +4948,7 @@ dependencies = [ [[package]] name = "kebab-mcp" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-app", @@ -4966,7 +4966,7 @@ dependencies = [ [[package]] name = "kebab-nli" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "hf-hub", @@ -4981,7 +4981,7 @@ dependencies = [ [[package]] name = "kebab-parse-code" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "gix", @@ -5004,7 +5004,7 @@ dependencies = [ [[package]] name = "kebab-parse-image" -version = "0.27.0" +version = "0.26.1" dependencies = [ "ab_glyph", "anyhow", @@ -5028,7 +5028,7 @@ dependencies = [ [[package]] name = "kebab-parse-md" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "kebab-core", @@ -5045,7 +5045,7 @@ dependencies = [ [[package]] name = "kebab-parse-pdf" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -5060,7 +5060,7 @@ dependencies = [ [[package]] name = "kebab-rag" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -5082,7 +5082,7 @@ dependencies = [ [[package]] name = "kebab-search" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "globset", @@ -5101,7 +5101,7 @@ dependencies = [ [[package]] name = "kebab-source-fs" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -5119,7 +5119,7 @@ dependencies = [ [[package]] name = "kebab-store-sqlite" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "blake3", @@ -5139,7 +5139,7 @@ dependencies = [ [[package]] name = "kebab-store-vector" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "arrow", @@ -5163,7 +5163,7 @@ dependencies = [ [[package]] name = "kebab-tui" -version = "0.27.0" +version = "0.26.1" dependencies = [ "anyhow", "crossterm", diff --git a/Cargo.toml b/Cargo.toml index ac443f9..f56aae6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -32,7 +32,7 @@ edition = "2024" rust-version = "1.85" license = "MIT OR Apache-2.0" repository = "https://github.com/altair823/kebab" -version = "0.27.0" # v0.27.0 — ingest 진행 로그 개선: TTY 진행바에 현재 파일명 + 느린 phase(ocr/caption/embed)+모델명 실시간 + 경과초 heartbeat `(Ns)`, 종료 시 최장 소요 파일 top-5 요약. 신규 wire 이벤트 `asset_phase{idx,total,phase,model}` + `asset_timings.ocr_ms`/`caption_ms` 추가(additive, ingest_progress.v1 유지, serde default 0). 기본 동작 불변. — CLAUDE.md §Release +version = "0.26.1" # v0.26.1 — ingest 진행 로그 개선: TTY 진행바에 현재 파일명 + 느린 phase(ocr/caption/embed)+모델명 실시간 + 경과초 heartbeat `(Ns)`, 종료 시 최장 소요 파일 top-5 요약. 신규 wire 이벤트 `asset_phase{idx,total,phase,model}` + `asset_timings.ocr_ms`/`caption_ms` 추가(additive, ingest_progress.v1 유지, serde default 0). 기본 동작 불변. — CLAUDE.md §Release # pre-v0.18 workspace-wide cleanup: enable clippy::pedantic group with # intentional allow-list. The allowed lints are either cosmetic (doc style), diff --git a/HANDOFF.md b/HANDOFF.md index c20f7d0..d4fc47b 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -35,7 +35,7 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능. 머지 후 발견된 모든 deviation / hotfix 의 dated 로그는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md). 본 요약은 \"누군가가 인수받을 때 알아두면 시간을 많이 절약하는\" 항목만: -- **2026-06-03 ingest 진행 로그 개선** — v0.27.0. 이미지/PDF + OCR/caption on 볼트 ingest 가 "멈춘 듯" 보이던 문제 해소: TTY 진행바에 현재 파일명 + 느린 phase(ocr/caption/embed)+모델명 + 경과초 `(Ns)` heartbeat, 종료 시 최장 소요 파일 top-5 요약. 신규 wire `asset_phase{idx,total,phase,model}` + `asset_timings.ocr_ms`/`caption_ms`(additive, `ingest_progress.v1` 유지, serde default 0). 이미지·PDF 경로도 `asset_timings` emit(이전 markdown 만). 기본 동작 불변. 자세한 내용: `tasks/HOTFIXES.md` (2026-06-03 ingest 진행 로그), spec/plan `docs/superpowers/{specs,plans}/2026-06-03-ingest-log-improve-*.md`. +- **2026-06-03 ingest 진행 로그 개선** — v0.26.1. 이미지/PDF + OCR/caption on 볼트 ingest 가 "멈춘 듯" 보이던 문제 해소: TTY 진행바에 현재 파일명 + 느린 phase(ocr/caption/embed)+모델명 + 경과초 `(Ns)` heartbeat, 종료 시 최장 소요 파일 top-5 요약. 신규 wire `asset_phase{idx,total,phase,model}` + `asset_timings.ocr_ms`/`caption_ms`(additive, `ingest_progress.v1` 유지, serde default 0). 이미지·PDF 경로도 `asset_timings` emit(이전 markdown 만). 기본 동작 불변. 자세한 내용: `tasks/HOTFIXES.md` (2026-06-03 ingest 진행 로그), spec/plan `docs/superpowers/{specs,plans}/2026-06-03-ingest-log-improve-*.md`. - **2026-06-03 arctic-embed-l-v2.0 임베더 통합** — v0.26.0. 별칭 제거 후 설명형 query recall 보강(측정 recall@10 130/132, e5 +7). `kebab-embed-candle` 모델 레지스트리화(e5 mean + `snowflake-arctic-embed-l-v2.0` CLS, 모델별 pooling/prefix) + 신규 `kebab-embed-ollama`(`provider="ollama"`, `/api/embed`). config `endpoint: Option` 추가. 기본 e5 유지(opt-in), arctic 전환은 embedding_version cascade → 재색인. candle↔Ollama cosine>0.99 게이트로 pooling/prefix 정확성 고정(`#[ignore]`). 자세한 내용: `tasks/HOTFIXES.md` (2026-06-03 arctic), spec `docs/superpowers/specs/2026-06-03-arctic-embedder-spec.md`. - **2026-06-03 doc-side expansion(별칭) 기능 완전 제거** — v0.25.0. 아래 2026-05-31 항목의 색인-시 청크당 LLM 별칭 생성 + 별칭 검색 채널을 **전부 제거**(ROI 음수: cross-lingual 은 e5-large 단독으로 충분, 기여는 설명형 +2 그룹뿐인데 대가가 청크당 색인-시 LLM). `Chunk.aliases`/`expansion.rs`/`IngestExpansionCfg`/alias lexical arm/`expansion_progress` wire kind 제거, 신규 마이그레이션 **V013** 이 `chunk_aliases_fts`+`chunks.aliases` DROP. 별칭 default-off 였어 사용자 체감 0, 기존 KB 도 재색인 불요(잔존 별칭 벡터는 `strip_alias_suffix` graceful 매핑/`reset` 정리). `AssetTimings.expansion_ms` 는 wire 호환 위해 값 0 으로 유지. 자세한 내용: `tasks/HOTFIXES.md` (2026-06-03), spec `docs/superpowers/specs/2026-06-03-remove-doc-expansion-spec.md`. - **2026-05-31 Phase 2 doc-side expansion 별칭(개별 dense 벡터) + 파생물 캐시(V012)** — v0.21.0 cut. 색인 시 LLM 이 청크별 별칭("같은 의미 다른 표현")을 생성, 줄별 **개별 dense 벡터**(sentinel `{chunk}#alias#N`)로 색인 (묶음 1벡터는 평균화 희석으로 회귀 → 폐기) + boilerplate 청크 skip. `[ingest.expansion]` default off. 측정(나무위키 ~1000 문서 CS corpus): 변형 일관성 14/18 → **16/18**, spread 0.222→0.111, 대조군 false-positive 별칭 무죄. 비용 병목(별칭 18문서 2.5h)은 **파생물 캐시(V012, 청크 내용 해시 키)**로 해소 — 정답 3개 cold 1879s → warm 13s **≈ 145배**, embedding+별칭 LLM 캐싱, version_key cascade 정합. search/ask 가 `kebab.sqlite`+`lancedb` 만으로 동작 → 외부 서버 색인 후 DB 만 복사하는 이식 워크플로 가능. **결정/known limitation**: grounded/refusal 판정이 부분 인용을 grounded 로 오분류(정직한 거부가 false-positive 로 집계) — 별도 개선 후보. stack·svm 설명형 2개 잔존. 자세한 내용: `tasks/HOTFIXES.md` (2026-05-31), 측정: `docs/superpowers/handoffs/2026-05-31-namu-wiki-alias-cache-study.md`. diff --git a/crates/kebab-app/src/ingest_progress.rs b/crates/kebab-app/src/ingest_progress.rs index 7b99201..476eabc 100644 --- a/crates/kebab-app/src/ingest_progress.rs +++ b/crates/kebab-app/src/ingest_progress.rs @@ -96,7 +96,7 @@ pub enum IngestEvent { /// `idx/total` while its per-chunk phases churn. `chunks` is the chunk /// count for asset `idx`. AssetChunked { idx: u32, total: u32, chunks: u32 }, - /// v0.27.0 (additive): emitted when an asset enters a *slow* internal + /// v0.26.1 (additive): emitted when an asset enters a *slow* internal /// phase, so the interactive progress bar can show **which** phase /// (and which model) is currently running instead of looking frozen. /// `phase` ∈ {`"ocr"`, `"caption"`, `"embed"`}; short phases @@ -121,8 +121,8 @@ pub enum IngestEvent { /// them so the slowest-asset summary attributes vision-model time /// correctly. `expansion_ms` is retained for wire compatibility but is /// always 0 since doc-side expansion was removed (HOTFIXES 2026-06-03). - /// `ocr_ms` / `caption_ms` (v0.27.0) are additive with serde default 0 - /// so pre-v0.27.0 consumers deserialize cleanly. + /// `ocr_ms` / `caption_ms` (v0.26.1) are additive with serde default 0 + /// so pre-v0.26.1 consumers deserialize cleanly. AssetTimings { idx: u32, total: u32, @@ -312,7 +312,7 @@ mod tests { #[test] fn asset_timings_ocr_caption_default_to_zero_for_legacy_wire() { - // v0.27.0 additive: a pre-v0.27.0 wire payload omits ocr_ms / + // v0.26.1 additive: a pre-v0.26.1 wire payload omits ocr_ms / // caption_ms; serde `default` must fill 0 so old producers stay // compatible. let legacy = serde_json::json!({ @@ -339,7 +339,7 @@ mod tests { #[test] fn asset_phase_serializes_with_discriminator() { - // v0.27.0 additive variant — `kind` must be snake_case + // v0.26.1 additive variant — `kind` must be snake_case // `asset_phase`, `phase` is the slow-phase label, `model` the // model id (nullable). let ev = IngestEvent::AssetPhase { diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index ff24e91..94ede3a 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -1350,7 +1350,7 @@ fn ingest_one_asset( let store_ms = u64::try_from(t_store.elapsed().as_millis()).unwrap_or(u64::MAX); // Embed + vector upsert (only when both sides are configured). - // v0.27.0: surface the embed phase + model so a long embed run reads as + // v0.26.1: surface the embed phase + model so a long embed run reads as // "embedding()…" rather than a frozen bar (markdown path too). crate::ingest_progress::emit( progress, @@ -1578,7 +1578,7 @@ fn ingest_one_image_asset( let lang_hint = lang_hint_from_doc(&canonical); let now = time::OffsetDateTime::now_utc(); let mut warning_notes: Vec = Vec::new(); - // v0.27.0: vision phases (OCR / caption) are the usual bottleneck on an + // v0.26.1: vision phases (OCR / caption) are the usual bottleneck on an // image-heavy vault and emitted no progress before — so the bar looked // frozen. Surface each as an `AssetPhase` and measure its wall-clock for // the slowest-asset summary. @@ -1771,7 +1771,7 @@ fn ingest_one_image_asset( } let embed_ms = u64::try_from(t_embed.elapsed().as_millis()).unwrap_or(u64::MAX); - // v0.27.0: per-phase timing for the image path — ocr_ms / caption_ms + // v0.26.1: per-phase timing for the image path — ocr_ms / caption_ms // carry the vision-model cost so the slowest-asset summary attributes // an image-heavy run's bottleneck correctly. crate::ingest_progress::emit( @@ -2360,7 +2360,7 @@ fn ingest_one_pdf_asset( } let embed_ms = u64::try_from(t_embed.elapsed().as_millis()).unwrap_or(u64::MAX); - // v0.27.0: per-phase timing for the PDF path. `ocr_ms` reuses the + // v0.26.1: per-phase timing for the PDF path. `ocr_ms` reuses the // page-OCR total already computed above so a scanned-PDF run's OCR cost // shows up in the slowest-asset summary; caption is markdown/image-only. crate::ingest_progress::emit( diff --git a/crates/kebab-cli/src/progress.rs b/crates/kebab-cli/src/progress.rs index c238ab6..60d5636 100644 --- a/crates/kebab-cli/src/progress.rs +++ b/crates/kebab-cli/src/progress.rs @@ -32,7 +32,7 @@ use time::format_description::well_known::Rfc3339; use crate::wire; -/// v0.27.0: number of slowest assets surfaced in the end-of-run summary. +/// v0.26.1: number of slowest assets surfaced in the end-of-run summary. /// Constant for now (spec defers the config knob). const SLOWEST_TOP_N: usize = 5; @@ -72,19 +72,19 @@ impl ProgressMode { pub struct ProgressDisplay { mode: ProgressMode, bar: Option, - /// v0.27.0 heartbeat: start `Instant` of the asset currently in + /// v0.26.1 heartbeat: start `Instant` of the asset currently in /// flight, shared with the bar's steady-tick custom template key so /// the `(Ns)` elapsed counter advances *between* events (the drain /// loop blocks on `recv()`, so without the ticker the counter would /// freeze). `None` while scanning / between assets / after completion. asset_start: Arc>>, - /// v0.27.0: workspace path of the asset currently in flight — set on + /// v0.26.1: workspace path of the asset currently in flight — set on /// `AssetStarted`, reused by `AssetPhase` to render `{path} · {phase}…`. current_path: Option, - /// v0.27.0 slowest summary: idx → path, captured from `AssetStarted` + /// v0.26.1 slowest summary: idx → path, captured from `AssetStarted` /// so `AssetTimings` (which only carries `idx`) can name the asset. asset_paths: HashMap, - /// v0.27.0 slowest summary: (path, total_ms) per asset that reported + /// v0.26.1 slowest summary: (path, total_ms) per asset that reported /// `AssetTimings`. Sorted + truncated to top-N on `Completed`. timings: Vec<(String, u64)>, } @@ -151,7 +151,7 @@ impl ProgressDisplay { if let Some(bar) = self.bar.as_mut() { bar.set_length(u64::from(*total)); bar.set_position(0); - // v0.27.0: a custom `{asset_elapsed}` key reads the shared + // v0.26.1: a custom `{asset_elapsed}` key reads the shared // per-asset start `Instant` and appends ` (Ns)`. Combined // with the steady tick below, the elapsed counter advances // even while the drain loop is blocked on `recv()` waiting @@ -198,18 +198,18 @@ impl ProgressDisplay { path, media, } => { - // v0.27.0: remember the path so AssetPhase can render it and + // v0.26.1: remember the path so AssetPhase can render it and // the slowest summary (keyed by idx in AssetTimings) can name // the asset. self.current_path = Some(path.clone()); self.asset_paths.insert(*idx, path.clone()); - // v0.27.0: (re)start the per-asset heartbeat clock. + // v0.26.1: (re)start the per-asset heartbeat clock. if let Ok(mut guard) = self.asset_start.lock() { *guard = Some(Instant::now()); } if let Some(bar) = self.bar.as_ref() { bar.set_position(u64::from(idx.saturating_sub(1))); - // v0.27.0: show the current filename on the bar (TTY). + // v0.26.1: show the current filename on the bar (TTY). // Previously position-only — the interactive user couldn't // tell which file was in flight. The steady tick redraws // in place, so this no longer pollutes scrollback. @@ -222,7 +222,7 @@ impl ProgressDisplay { } IngestEvent::AssetFinished { .. } => { // Position is advanced in AssetStarted; bar.finish_and_clear() - // in Completed handles the final state. v0.27.0: stop the + // in Completed handles the final state. v0.26.1: stop the // heartbeat clock so the bar doesn't show a stale `(Ns)` in the // gap before the next AssetStarted. if let Ok(mut guard) = self.asset_start.lock() { @@ -230,7 +230,7 @@ impl ProgressDisplay { } self.current_path = None; } - // v0.27.0: an asset entered a slow internal phase (ocr / caption / + // v0.26.1: an asset entered a slow internal phase (ocr / caption / // embed). Surface which phase + model is running so a multi-second // vision-model call no longer looks frozen. IngestEvent::AssetPhase { @@ -276,7 +276,7 @@ impl ProgressDisplay { caption_ms, .. } => { - // v0.27.0: accumulate (path, total_ms) for the slowest summary. + // v0.26.1: accumulate (path, total_ms) for the slowest summary. // total = every measured phase (expansion_ms is always 0). let total_ms = parse_ms + chunk_ms + embed_ms + store_ms + ocr_ms + caption_ms; if let Some(path) = self.asset_paths.get(idx) { @@ -287,7 +287,7 @@ impl ProgressDisplay { } if !quiet { let mut err = std::io::stderr().lock(); - // v0.27.0: only print ocr / caption when they actually ran + // v0.26.1: only print ocr / caption when they actually ran // (markdown leaves them 0) so the text path stays uncluttered. let mut parts = vec![ format!("parse {}", fmt_ms(*parse_ms)), @@ -320,7 +320,7 @@ impl ProgressDisplay { "ingest: complete (scanned={} new={} updated={} skipped={} errors={})", counts.scanned, counts.new, counts.updated, counts.skipped, counts.errors, ); - // v0.27.0: slowest-asset summary. Useful in both TTY and + // v0.26.1: slowest-asset summary. Useful in both TTY and // non-TTY (it pinpoints the bottleneck file), so it prints // unless --quiet. --json mode never reaches here (emit_json). let _ = write_slowest_summary(&mut err, &self.timings, SLOWEST_TOP_N); @@ -404,7 +404,7 @@ fn fmt_ms(ms: u64) -> String { } } -/// v0.27.0: shorten an over-long workspace path for the progress-bar +/// v0.26.1: shorten an over-long workspace path for the progress-bar /// message so the live `(Ns)` heartbeat suffix stays visible on a narrow /// terminal. Keeps the tail (filename + a couple of parents) — that's the /// distinguishing part — and prefixes `…` when truncated. Paths up to the @@ -423,7 +423,7 @@ fn abbreviate_path(path: &str) -> String { format!("…{tail}") } -/// v0.27.0: render the end-of-run "slowest assets" summary. Sorts +/// v0.26.1: render the end-of-run "slowest assets" summary. Sorts /// `(path, total_ms)` descending by time, takes the top `n`, and writes a /// compact table to `w`. No-op (writes nothing) when `timings` is empty so /// a run with no per-asset timing (e.g. all-skipped) prints no stray header. diff --git a/crates/kebab-tui/src/ingest_progress.rs b/crates/kebab-tui/src/ingest_progress.rs index f2d86d8..f3ae475 100644 --- a/crates/kebab-tui/src/ingest_progress.rs +++ b/crates/kebab-tui/src/ingest_progress.rs @@ -161,7 +161,7 @@ fn apply_event(state: &mut IngestState, event: IngestEvent) { // no-ops here (the CLI / --json surfaces render them). | IngestEvent::AssetChunked { .. } | IngestEvent::AssetTimings { .. } - // v0.27.0 slow-phase hint (ocr / caption / embed): the CLI bar uses + // v0.26.1 slow-phase hint (ocr / caption / embed): the CLI bar uses // it for a live phase message; the TUI status-bar reducer tracks only // per-asset counters, so it's a no-op here. | IngestEvent::AssetPhase { .. } => {} diff --git a/docs/wire-schema/v1/ingest_progress.schema.json b/docs/wire-schema/v1/ingest_progress.schema.json index 390ad31..e1bf2c0 100644 --- a/docs/wire-schema/v1/ingest_progress.schema.json +++ b/docs/wire-schema/v1/ingest_progress.schema.json @@ -37,15 +37,15 @@ "description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)." }, "chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked (v0.24.0): chunk count produced for this asset." }, - "phase": { "type": "string", "enum": ["ocr", "caption", "embed"], "description": "asset_phase (v0.27.0): the slow internal phase the asset just entered. Short phases (parse/chunk/store) are not emitted." }, - "model": { "type": ["string", "null"], "description": "asset_phase (v0.27.0): model performing the phase — vision LLM id for ocr/caption, embedder model_id for embed. null when the phase runs without a configured model." }, + "phase": { "type": "string", "enum": ["ocr", "caption", "embed"], "description": "asset_phase (v0.26.1): the slow internal phase the asset just entered. Short phases (parse/chunk/store) are not emitted." }, + "model": { "type": ["string", "null"], "description": "asset_phase (v0.26.1): model performing the phase — vision LLM id for ocr/caption, embedder model_id for embed. null when the phase runs without a configured model." }, "parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Emitted by markdown / image / PDF paths." }, "chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Emitted by markdown / image / PDF paths." }, "expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): retained for wire compatibility but always 0 — doc-side expansion was removed (HOTFIXES 2026-06-03)." }, "embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge." }, "store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only." }, - "ocr_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.27.0, additive, default 0): image/PDF OCR phase wall-clock (ms). 0 on the markdown path (no OCR)." }, - "caption_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.27.0, additive, default 0): image caption phase wall-clock (ms). 0 on markdown / PDF paths." }, + "ocr_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.26.1, additive, default 0): image/PDF OCR phase wall-clock (ms). 0 on the markdown path (no OCR)." }, + "caption_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.26.1, additive, default 0): image caption phase wall-clock (ms). 0 on markdown / PDF paths." }, "n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." }, "ms": { "type": "integer", "minimum": 0, "description": "embed_batch_finished / pdf_ocr_finished: wall-clock duration (ms). pdf_ocr_finished skip path 의 의미는 mixed (DCTDecode 부재 시 0, engine 실패 시 latency-before-bail)." }, "chars": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: char count of OCR result. Skip 시 0." }, diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index 2f8313a..90e00d2 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -14,7 +14,7 @@ historical contract that was implemented; this file accumulates the deltas so phase 5+ readers can find the live behavior without diffing git history. -## 2026-06-03 — ingest 진행 로그 개선: 파일명·phase·heartbeat·slowest 요약 (v0.27.0) +## 2026-06-03 — ingest 진행 로그 개선: 파일명·phase·heartbeat·slowest 요약 (v0.26.1) **무엇을 왜 추가했나.** arctic 도그푸딩 중 이미지/PDF 혼재 + OCR/caption on 볼트에서 ingest 가 중간부터 느려졌는데, TTY 진행바가 **파일명·현재 phase·모델·경과시간**을 안 보여