chore(ocr): PR #164 회차 1 리뷰 반영

- HOTFIXES 헤더 `v0.17.2` (vaporware) → `post-v0.17.1 dogfood` 로 변경, release tag 결정과 무관하게 정확한 anchor. - HOTFIXES caller 수 `6 (5+3)` → `9 call site (6+3)` 으로 정정. - OcrCfg.request_timeout_secs doc 의 edge case 가 LlmCfg sister doc 과 동일한 구체 예제 (`u64::MAX`, `86400`) + reqwest 0.12.x 명시 주석으로 강화. - LLM + OCR 양쪽의 legacy TOML fixture (78 줄 거의 동일) 를 module-level `LEGACY_PRE_TIMEOUT_TOML` const 로 추출. 두 test 가 동일 source 공유 → 옛 schema 가 또 변하면 한 곳만 수정. reqwest::Duration::ZERO fact-check (회차 1 점 5) 는 회차 2 reply 에서 검증 결과 보고. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 05:13:09 +00:00
parent 41c5edc517
commit e118844256
2 changed files with 95 additions and 152 deletions
--- a/crates/kebab-config/src/lib.rs
+++ b/crates/kebab-config/src/lib.rs
@@ -236,9 +236,12 @@ pub struct OcrCfg {
    /// for the rationale.
    ///
    /// **Edge case — `0` is NOT a disable sentinel.** Same semantics as
-    /// `LlmCfg::request_timeout_secs`: `Duration::from_secs(0)` means
-    /// "every request fails immediately", not "no timeout". Use a
-    /// large finite value for an effectively-uncapped budget.
+    /// [`LlmCfg::request_timeout_secs`]: `Duration::from_secs(0)` means
+    /// "every request fails immediately" (reqwest 0.12.x — the read
+    /// timeout is applied as a 0-second deadline), not "no timeout".
+    /// To approximate "no cap", use a large finite value (e.g.
+    /// `u64::MAX` ≈ 5.8 × 10¹¹ years, or just a generous number like
+    /// `86400`).
    #[serde(default = "default_ocr_request_timeout_secs")]
    pub request_timeout_secs: u64,
 }
@@ -860,6 +863,83 @@ fn parse_bool(s: &str) -> bool {
 mod tests {
    use super::*;

+    /// Legacy TOML fixture written before the `request_timeout_secs`
+    /// knobs (LLM in v0.17.1, OCR follow-up) existed. Shared by
+    /// `legacy_config_without_request_timeout_secs_uses_default`
+    /// (LLM-side) and `legacy_config_without_ocr_request_timeout_secs_uses_default`
+    /// (OCR-side) so both invariants pin against the same on-disk
+    /// shape — schema drift in the legacy form only needs one edit.
+    const LEGACY_PRE_TIMEOUT_TOML: &str = r#"
+schema_version = 1
+
+[workspace]
+root = "/tmp/x"
+exclude = []
+
+[storage]
+data_dir = "/tmp/x"
+sqlite = "/tmp/x/kebab.sqlite"
+vector_dir = "/tmp/x/lancedb"
+asset_dir = "/tmp/x/assets"
+artifact_dir = "/tmp/x/artifacts"
+model_dir = "/tmp/x/models"
+runs_dir = "/tmp/x/runs"
+copy_threshold_mb = 100
+
+[indexing]
+max_parallel_extractors = 2
+max_parallel_embeddings = 1
+watch_filesystem = false
+
+[chunking]
+target_tokens = 500
+overlap_tokens = 80
+respect_markdown_headings = true
+chunker_version = "md-heading-v1"
+
+[models.embedding]
+provider = "fastembed"
+model = "multilingual-e5-large"
+version = "v1"
+dimensions = 1024
+batch_size = 64
+
+[models.llm]
+provider = "ollama"
+model = "gemma3:4b"
+context_tokens = 4096
+endpoint = "http://127.0.0.1:11434"
+temperature = 0.0
+seed = 0
+
+[search]
+default_k = 10
+hybrid_fusion = "rrf"
+rrf_k = 60
+snippet_chars = 220
+
+[rag]
+prompt_template_version = "rag-v2"
+score_gate = 0.3
+explain_default = false
+max_context_tokens = 8000
+
+[image.ocr]
+enabled = false
+engine = "ollama-vision"
+model = "gemma3:4b"
+languages = ["eng"]
+max_pixels = 1600
+
+[image.caption]
+enabled = false
+max_pixels = 768
+prompt_template_version = "caption-v1"
+
+[ui]
+theme = "dark"
+"#;
+
    #[test]
    fn defaults_are_serde_roundtrip_stable() {
        let c = Config::defaults();
@@ -950,80 +1030,12 @@ mod tests {

    /// v0.17.0 post-dogfood: a config file written before the field
    /// existed (no `request_timeout_secs` key) must still parse and fall
-    /// back to the 300s default — backwards-compat invariant.
+    /// back to the 300s default — backwards-compat invariant. Fixture
+    /// shared with the OCR-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
    #[test]
    fn legacy_config_without_request_timeout_secs_uses_default() {
-        let toml_src = r#"
-schema_version = 1
-
-[workspace]
-root = "/tmp/x"
-exclude = []
-
-[storage]
-data_dir = "/tmp/x"
-sqlite = "/tmp/x/kebab.sqlite"
-vector_dir = "/tmp/x/lancedb"
-asset_dir = "/tmp/x/assets"
-artifact_dir = "/tmp/x/artifacts"
-model_dir = "/tmp/x/models"
-runs_dir = "/tmp/x/runs"
-copy_threshold_mb = 100
-
-[indexing]
-max_parallel_extractors = 2
-max_parallel_embeddings = 1
-watch_filesystem = false
-
-[chunking]
-target_tokens = 500
-overlap_tokens = 80
-respect_markdown_headings = true
-chunker_version = "md-heading-v1"
-
-[models.embedding]
-provider = "fastembed"
-model = "multilingual-e5-large"
-version = "v1"
-dimensions = 1024
-batch_size = 64
-
-[models.llm]
-provider = "ollama"
-model = "gemma3:4b"
-context_tokens = 4096
-endpoint = "http://127.0.0.1:11434"
-temperature = 0.0
-seed = 0
-
-[search]
-default_k = 10
-hybrid_fusion = "rrf"
-rrf_k = 60
-snippet_chars = 220
-
-[rag]
-prompt_template_version = "rag-v2"
-score_gate = 0.3
-explain_default = false
-max_context_tokens = 8000
-
-[image.ocr]
-enabled = false
-engine = "ollama-vision"
-model = "gemma3:4b"
-languages = ["eng"]
-max_pixels = 1600
-
-[image.caption]
-enabled = false
-max_pixels = 768
-prompt_template_version = "caption-v1"
-
-[ui]
-theme = "dark"
-"#;
-        let c: Config = toml::from_str(toml_src).expect("parse legacy config");
+        let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
+            .expect("parse legacy config");
        assert_eq!(c.models.llm.request_timeout_secs, 300);
    }

@@ -1069,83 +1081,14 @@ theme = "dark"
        assert_eq!(c.image.ocr.request_timeout_secs, 900);
    }

-    /// v0.17.2 post-dogfood: a config file written before the OCR
+    /// post-v0.17.1 dogfood: a config file written before the OCR
    /// timeout field existed must still parse and fall back to the
-    /// 300s default — backwards-compat invariant. Reuses the same
-    /// minimal legacy TOML fixture as the LLM-side test.
+    /// 300s default — backwards-compat invariant. Fixture shared
+    /// with the LLM-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
    #[test]
    fn legacy_config_without_ocr_request_timeout_secs_uses_default() {
-        let toml_src = r#"
-schema_version = 1
-
-[workspace]
-root = "/tmp/x"
-exclude = []
-
-[storage]
-data_dir = "/tmp/x"
-sqlite = "/tmp/x/kebab.sqlite"
-vector_dir = "/tmp/x/lancedb"
-asset_dir = "/tmp/x/assets"
-artifact_dir = "/tmp/x/artifacts"
-model_dir = "/tmp/x/models"
-runs_dir = "/tmp/x/runs"
-copy_threshold_mb = 100
-
-[indexing]
-max_parallel_extractors = 2
-max_parallel_embeddings = 1
-watch_filesystem = false
-
-[chunking]
-target_tokens = 500
-overlap_tokens = 80
-respect_markdown_headings = true
-chunker_version = "md-heading-v1"
-
-[models.embedding]
-provider = "fastembed"
-model = "multilingual-e5-large"
-version = "v1"
-dimensions = 1024
-batch_size = 64
-
-[models.llm]
-provider = "ollama"
-model = "gemma3:4b"
-context_tokens = 4096
-endpoint = "http://127.0.0.1:11434"
-temperature = 0.0
-seed = 0
-
-[search]
-default_k = 10
-hybrid_fusion = "rrf"
-rrf_k = 60
-snippet_chars = 220
-
-[rag]
-prompt_template_version = "rag-v2"
-score_gate = 0.3
-explain_default = false
-max_context_tokens = 8000
-
-[image.ocr]
-enabled = false
-engine = "ollama-vision"
-model = "gemma3:4b"
-languages = ["eng"]
-max_pixels = 1600
-
-[image.caption]
-enabled = false
-max_pixels = 768
-prompt_template_version = "caption-v1"
-
-[ui]
-theme = "dark"
-"#;
-        let c: Config = toml::from_str(toml_src).expect("parse legacy config");
+        let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
+            .expect("parse legacy config");
        assert_eq!(c.image.ocr.request_timeout_secs, 300);
    }

--- a/tasks/HOTFIXES.md
+++ b/tasks/HOTFIXES.md
@@ -33,14 +33,14 @@ v0.17.0 후속 도그푸딩에서 발견: 사용자가 default `gemma4:e4b` (8B

 Cross-link: `crates/kebab-config/src/lib.rs::LlmCfg::request_timeout_secs`, `crates/kebab-llm-local/src/ollama.rs::OllamaLanguageModel::new`.

-## 2026-05-25 — v0.17.2: `[image.ocr] request_timeout_secs` 노브 (closure of 2026-05-25 v0.17.1 미진행)
+## 2026-05-25 — post-v0.17.1 dogfood: `[image.ocr] request_timeout_secs` 노브 (closure of v0.17.1 미진행)

-v0.17.1 entry 의 첫 번째 미진행 항목 closure. LLM 쪽이 v0.17.1 에서 `[models.llm] request_timeout_secs` 로 풀려난 패턴을 OCR 어댑터에 동일 적용. 별 노브로 분리한 이유 (사용자 결정): OCR 은 통상 LLM 대비 짧고 cold start 패턴도 다름 — 두 노브를 독립 조절할 수 있어야 16 GB / CPU only 환경에서 vision 모델만 다른 timeout 을 쓰기 편함.
+v0.17.1 entry 의 첫 번째 미진행 항목 closure. LLM 쪽이 v0.17.1 에서 `[models.llm] request_timeout_secs` 로 풀려난 패턴을 OCR 어댑터에 동일 적용. 별 노브로 분리한 이유 (사용자 결정): OCR 은 통상 LLM 대비 짧고 cold start 패턴도 다름 — 두 노브를 독립 조절할 수 있어야 16 GB / CPU only 환경에서 vision 모델만 다른 timeout 을 쓰기 편함. release tag 는 본 entry 시점 미결정 — cut 합의 시점에 동일 entry 가 v0.17.2 / v0.18.0 등으로 anchor 갱신.

 **변경**:
 - `crates/kebab-config/src/lib.rs::OcrCfg` 에 `request_timeout_secs: u64` additive 필드 (`#[serde(default = "default_ocr_request_timeout_secs")]`, default `300`). 옛 config 가 필드 누락해도 그대로 파싱 + 동일 동작 (3 신규 unit test 가 default / env override / legacy parse 핀).
 - env override `KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS`.
- `crates/kebab-parse-image/src/ocr.rs` 의 `REQUEST_TIMEOUT` 상수 제거. `OllamaVisionOcr::build` 시그니처가 `request_timeout_secs: u64` 추가, `new(&Config)` 는 `config.image.ocr.request_timeout_secs` 전달. `from_parts` (테스트 전용 surface) 도 동일하게 시그니처 확장 — caller 6 곳 (`crates/kebab-parse-image/src/ocr.rs::tests` 5, `crates/kebab-parse-image/tests/ocr.rs::from_parts_clamps_max_pixels_into_legal_range` 3 site) 모두 `300` 명시 갱신.
+- `crates/kebab-parse-image/src/ocr.rs` 의 `REQUEST_TIMEOUT` 상수 제거. `OllamaVisionOcr::build` 시그니처가 `request_timeout_secs: u64` 추가, `new(&Config)` 는 `config.image.ocr.request_timeout_secs` 전달. `from_parts` (테스트 전용 surface) 도 동일하게 시그니처 확장 — caller 9 call site (`crates/kebab-parse-image/src/ocr.rs::tests` 5 test / 6 call site, `crates/kebab-parse-image/tests/ocr.rs::from_parts_clamps_max_pixels_into_legal_range` 1 test / 3 call site) 모두 `300` 명시 갱신.
 - `OcrCfg::defaults()` 에 `request_timeout_secs: default_ocr_request_timeout_secs()` 추가. `Config::defaults()` 는 `ImageCfg::defaults()` 경유라 cascade.

 **Edge case 동일**: `0` 은 disable 아닌 "즉시 timeout" (`Duration::from_secs(0)` 의 reqwest 의미). LlmCfg 의 doc comment 와 같은 안내가 OcrCfg field doc 에 명시.