feat(image.ocr): request_timeout_secs config knob + closure of v0.17.1 미진행

v0.17.1 (PR #162) 가 LLM 쪽 hard-coded 300s 를 [models.llm] request_timeout_secs 로 풀어준 것과 같은 패턴을 OCR 어댑터에 적용. 사용자 결정으로 별 노브 분리 ([image.ocr] request_timeout_secs) — OCR 는 LLM 대비 cold start 패턴이 달라 독립 조절이 편함. - OcrCfg.request_timeout_secs: u64 (serde default 300) - KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS env override - OllamaVisionOcr::build / from_parts 시그니처에 timeout 인자 추가 - REQUEST_TIMEOUT 상수 제거 - 3 신규 unit test (default / env / legacy parse) — LlmCfg 패턴 그대로 - HOTFIXES 2026-05-25 v0.17.1 entry 의 두 미진행 항목 모두 closure (OCR timeout = 본 PR, --stream docs = PR #163 에서 이미 완료) 기존 config / 옛 KB 영향 없음 — 새 필드는 default 로 채워지고 동작도 동일 (300s). vision 모델 cold start 가 길면 env 또는 config 로 늘릴 수 있음. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-25 05:06:53 +00:00
parent d02149c010
commit 41c5edc517
4 changed files with 174 additions and 17 deletions
--- a/crates/kebab-config/src/lib.rs
+++ b/crates/kebab-config/src/lib.rs
@@ -228,6 +228,19 @@ pub struct OcrCfg {
    /// Cap the long edge of the image (in pixels) before sending. Larger
    /// images bloat prompt cost. Default `1600`.
    pub max_pixels: u32,
+    /// v0.17.2 post-dogfood: Hard ceiling on a single HTTP exchange to
+    /// the OCR endpoint. Sister knob to [`LlmCfg::request_timeout_secs`]
+    /// — kept separate because OCR latency is typically shorter than
+    /// chat-LLM cold start, and large vision models on CPU-only hosts
+    /// occasionally need a different budget. See HOTFIXES 2026-05-25
+    /// for the rationale.
+    ///
+    /// **Edge case — `0` is NOT a disable sentinel.** Same semantics as
+    /// `LlmCfg::request_timeout_secs`: `Duration::from_secs(0)` means
+    /// "every request fails immediately", not "no timeout". Use a
+    /// large finite value for an effectively-uncapped budget.
+    #[serde(default = "default_ocr_request_timeout_secs")]
+    pub request_timeout_secs: u64,
 }

 impl OcrCfg {
@@ -239,10 +252,18 @@ impl OcrCfg {
            endpoint: None,
            languages: vec!["eng".to_string(), "kor".to_string()],
            max_pixels: 1600,
+            request_timeout_secs: default_ocr_request_timeout_secs(),
        }
    }
 }

+/// v0.17.2 post-dogfood: matches the legacy hard-coded ceiling so
+/// existing configs that omit the field keep behaving identically.
+/// Overridable per config / `KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS`.
+fn default_ocr_request_timeout_secs() -> u64 {
+    300
+}
+
 /// Caption settings (P6-3). Caption uses the same Ollama-vision /
 /// `LanguageModel` pipeline as the rest of the workspace; the trait
 /// abstraction is the part the spec demands. `enabled` defaults to
@@ -722,6 +743,11 @@ impl Config {
                        self.image.ocr.max_pixels = n;
                    }
                }
+                "KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS" => {
+                    if let Ok(n) = v.parse::<u64>() {
+                        self.image.ocr.request_timeout_secs = n;
+                    }
+                }

                // image.caption (P6-3)
                "KEBAB_IMAGE_CAPTION_ENABLED" => {
@@ -1022,6 +1048,107 @@ theme = "dark"
        assert_eq!(c.image.ocr.max_pixels, 1600);
    }

+    /// v0.17.2 post-dogfood: matches the legacy hard-coded 300s cap so
+    /// existing configs that omit the new field keep behaving identically.
+    #[test]
+    fn default_ocr_request_timeout_secs_is_300() {
+        assert_eq!(
+            Config::defaults().image.ocr.request_timeout_secs,
+            300
+        );
+    }
+
+    #[test]
+    fn env_overrides_image_ocr_request_timeout_secs() {
+        let mut env = HashMap::new();
+        env.insert(
+            "KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS".to_string(),
+            "900".to_string(),
+        );
+        let c = Config::defaults().apply_env(&env);
+        assert_eq!(c.image.ocr.request_timeout_secs, 900);
+    }
+
+    /// v0.17.2 post-dogfood: a config file written before the OCR
+    /// timeout field existed must still parse and fall back to the
+    /// 300s default — backwards-compat invariant. Reuses the same
+    /// minimal legacy TOML fixture as the LLM-side test.
+    #[test]
+    fn legacy_config_without_ocr_request_timeout_secs_uses_default() {
+        let toml_src = r#"
+schema_version = 1
+
+[workspace]
+root = "/tmp/x"
+exclude = []
+
+[storage]
+data_dir = "/tmp/x"
+sqlite = "/tmp/x/kebab.sqlite"
+vector_dir = "/tmp/x/lancedb"
+asset_dir = "/tmp/x/assets"
+artifact_dir = "/tmp/x/artifacts"
+model_dir = "/tmp/x/models"
+runs_dir = "/tmp/x/runs"
+copy_threshold_mb = 100
+
+[indexing]
+max_parallel_extractors = 2
+max_parallel_embeddings = 1
+watch_filesystem = false
+
+[chunking]
+target_tokens = 500
+overlap_tokens = 80
+respect_markdown_headings = true
+chunker_version = "md-heading-v1"
+
+[models.embedding]
+provider = "fastembed"
+model = "multilingual-e5-large"
+version = "v1"
+dimensions = 1024
+batch_size = 64
+
+[models.llm]
+provider = "ollama"
+model = "gemma3:4b"
+context_tokens = 4096
+endpoint = "http://127.0.0.1:11434"
+temperature = 0.0
+seed = 0
+
+[search]
+default_k = 10
+hybrid_fusion = "rrf"
+rrf_k = 60
+snippet_chars = 220
+
+[rag]
+prompt_template_version = "rag-v2"
+score_gate = 0.3
+explain_default = false
+max_context_tokens = 8000
+
+[image.ocr]
+enabled = false
+engine = "ollama-vision"
+model = "gemma3:4b"
+languages = ["eng"]
+max_pixels = 1600
+
+[image.caption]
+enabled = false
+max_pixels = 768
+prompt_template_version = "caption-v1"
+
+[ui]
+theme = "dark"
+"#;
+        let c: Config = toml::from_str(toml_src).expect("parse legacy config");
+        assert_eq!(c.image.ocr.request_timeout_secs, 300);
+    }
+
    #[test]
    fn image_ocr_env_overrides() {
        let mut env = HashMap::new();