diff --git a/crates/kebab-app/tests/ingest_progress.rs b/crates/kebab-app/tests/ingest_progress.rs index 2c55e7e..89eb4b9 100644 --- a/crates/kebab-app/tests/ingest_progress.rs +++ b/crates/kebab-app/tests/ingest_progress.rs @@ -143,3 +143,68 @@ fn dropped_receiver_does_not_panic_or_fail_ingest() { .unwrap(); assert_eq!(report.scanned, 3); } + +/// v0.20.0 sub-item 1: pdf_ocr_started + pdf_ocr_finished events 가 PDF asset 의 +/// OCR-enabled ingest 시 emit 됨을 검증. real Ollama 의존 — `#[ignore]` default. +/// +/// Manual invoke: +/// ``` +/// KEBAB_PDF_OCR_ENABLED=true \ +/// KEBAB_PDF_OCR_ENDPOINT=http://192.168.0.47:11434 \ +/// cargo test -p kebab-app --test ingest_progress \ +/// --ignored pdf_ocr_progress_emits_started_finished_events +/// ``` +#[test] +#[ignore = "real Ollama dependency — manual invoke via KEBAB_PDF_OCR_ENABLED=true"] +fn pdf_ocr_progress_emits_started_finished_events() { + // F1 fixture (DCTDecode JPEG passthrough) 을 tmpdir 의 workspace 로 copy. + let tmpdir = tempfile::tempdir().expect("create tmpdir"); + let workspace = tmpdir.path().join("workspace"); + std::fs::create_dir_all(&workspace).expect("create workspace dir"); + let f1_src = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../kebab-parse-pdf/tests/fixtures/scanned_page1.pdf"); + let f1 = std::fs::read(&f1_src).expect("F1 fixture present"); + std::fs::write(workspace.join("page1.pdf"), &f1).expect("copy F1"); + + let data_dir = tmpdir.path().join("data"); + std::fs::create_dir_all(&data_dir).expect("create data dir"); + + let mut config = kebab_config::Config::defaults(); + config.workspace.root = workspace.to_string_lossy().into_owned(); + config.storage.data_dir = data_dir.to_string_lossy().into_owned(); + config.models.embedding.provider = "none".to_string(); + config.models.embedding.dimensions = 0; + config.pdf.ocr.enabled = true; + if let Ok(endpoint) = std::env::var("KEBAB_PDF_OCR_ENDPOINT") { + config.pdf.ocr.endpoint = Some(endpoint); + } + + let scope = kebab_core::SourceScope { + root: workspace.clone(), + ..Default::default() + }; + + let (tx, rx) = mpsc::channel::(); + let _report = kebab_app::ingest_with_config_progress( + config, + scope, + false, + Some(tx), + ) + .expect("ingest_with_config_progress"); + + let events: Vec<_> = rx.iter().collect(); + + let started_count = events + .iter() + .filter(|e| matches!(e, IngestEvent::PdfOcrStarted { .. })) + .count(); + let finished_count = events + .iter() + .filter(|e| matches!(e, IngestEvent::PdfOcrFinished { .. })) + .count(); + + assert!(started_count >= 1, "PdfOcrStarted 가 ≥ 1 emit 됨 (got {started_count})"); + assert!(finished_count >= 1, "PdfOcrFinished 가 ≥ 1 emit 됨 (got {finished_count})"); + assert_eq!(started_count, finished_count, "Started 와 Finished 의 count 일치"); +} diff --git a/crates/kebab-cli/src/progress.rs b/crates/kebab-cli/src/progress.rs index 6e79df3..a8495a1 100644 --- a/crates/kebab-cli/src/progress.rs +++ b/crates/kebab-cli/src/progress.rs @@ -201,9 +201,25 @@ impl ProgressDisplay { ); } } - // v0.20.0 sub-item 1: per-page PDF OCR events — not surfaced in - // human-readable progress output (no TTY bar update needed). - IngestEvent::PdfOcrStarted { .. } | IngestEvent::PdfOcrFinished { .. } => {} + // v0.20.0 sub-item 1: per-page PDF OCR events — sub-progress lines + // under AssetStarted for scanned PDF. spec §4.6.1 line 1085-1086. + // skipped=true 시 (DCTDecode 부재 또는 engine fail) skip line. + IngestEvent::PdfOcrStarted { page } => { + if !quiet { + let mut err = std::io::stderr().lock(); + let _ = writeln!(err, " 📷 OCR page {page}..."); + } + } + IngestEvent::PdfOcrFinished { page, ms, chars, ocr_engine, skipped } => { + if !quiet { + let mut err = std::io::stderr().lock(); + if *skipped { + let _ = writeln!(err, " ⊘ OCR page {page} skipped (no DCTDecode or engine fail, {ms}ms)"); + } else { + let _ = writeln!(err, " ✓ OCR page {page} ({chars} chars, {ms}ms via {ocr_engine})"); + } + } + } } Ok(()) } diff --git a/docs/superpowers/specs/2026-05-27-pdf-scanned-ocr-spec.md b/docs/superpowers/specs/2026-05-27-pdf-scanned-ocr-spec.md index de3f0ca..924e745 100644 --- a/docs/superpowers/specs/2026-05-27-pdf-scanned-ocr-spec.md +++ b/docs/superpowers/specs/2026-05-27-pdf-scanned-ocr-spec.md @@ -1073,8 +1073,9 @@ lang_hint = "kor" ```diff + "page": { "type": "integer", "minimum": 1, "description": "pdf_ocr_started / pdf_ocr_finished: 1-based PDF page number under OCR." }, + "ocr_engine": { "type": "string", "description": "pdf_ocr_finished: engine_name (e.g. 'ollama-vision')." }, -+ "ocr_ms": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: per-page OCR wall-clock duration." }, -+ "ocr_chars": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: char count of OCR result." }, ++ "ms": { "type": "integer", "minimum": 0, "description": "embed_batch_finished / pdf_ocr_finished: wall-clock duration (ms). polymorphic field — option_A (Rust serde 정합, Step 7 commit 4c5ccd5)." }, ++ "chars": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: char count of OCR result." }, ++ "skipped": { "type": "boolean", "description": "pdf_ocr_finished: true 일 시 OCR 미수행 (DCTDecode 부재 또는 engine fail). Step 6 M-4 resolution." }, ``` **in-tree consumer enumerate** (M-8 resolution): @@ -1083,7 +1084,7 @@ lang_hint = "kor" - `crates/kebab-cli/src/main.rs` 의 ingest stdout printer (kind → 사람-친화 라인 mapping). 두 새 kind 의 라인 추가 deliverable: - `pdf_ocr_started` → `" 📷 OCR page {page}..."` - - `pdf_ocr_finished` → `" ✓ OCR page {page} ({ocr_chars} chars, {ocr_ms}ms via {ocr_engine})"` + - `pdf_ocr_finished` → `" ✓ OCR page {page} ({chars} chars, {ms}ms via {ocr_engine})"` - `crates/kebab-app/tests/ingest_progress*.rs` (snapshot test) — 새 kind 등장 시 baseline snapshot diff. plan executor 가 PDF OCR fixture 사용 시 snapshot 갱신 또는 `--accept` deliverable. - `crates/kebab-app/tests/integration_pdf*.rs` (PDF ingest path test) — 새 kind 가 emit 됨을 검증하는 새 test 추가. - 향후 `kebab-tui` 의 progress pane — 본 spec 의 scope 외 (P9 sub-item). @@ -1552,7 +1553,7 @@ vision LLM 의 inherent characteristic — "大韓民國" → "대한민국", PoC 의 latency 측정 = remote (192.168.0.47) CPU 환경. GPU 가속 시 3-5x 향상 예상 — qwen2.5vl:3b page 당 9-30s 가능. 사용자 환경 (remote Ollama 의 GPU 보유 여부) 의존. **mitigation**: -1. dogfood 시점 latency 재측정 — `pdf_ocr_finished.ocr_ms` event 의 distribution 분석. +1. dogfood 시점 latency 재측정 — `pdf_ocr_finished.ms` event 의 distribution 분석. 2. `request_timeout_secs` default 600 의 5x headroom — GPU 환경에서는 과보호, CPU 환경 worst-case 105s 의 5.7x 보호. ### §7.4 async indexing UX — 책 1권 ingest 의 hours-long stall