refactor(config): v3 경로 call-site sweep (kebab-app/kebab-eval/kebab-parse-image)

부모 경로에 .ingest 삽입(leaf 구조체 불변). src + 테스트 call-site 전부. kebab-cli 테스트의 v2 TOML fixture 는 from_file 자동변환(T6) 경로 검증용으로 유지. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 12:40:06 +00:00
parent 148c8b7040
commit d5c69f6715
19 changed files with 90 additions and 89 deletions
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -924,7 +924,7 @@ impl App {
            k: u32::try_from(query.k).unwrap_or(u32::MAX),
            snippet_chars: u32::try_from(self.config.search.snippet_chars).unwrap_or(u32::MAX),
            embedding_version,
-            chunker_version: self.config.chunking.chunker_version.clone(),
+            chunker_version: self.config.ingest.chunking.chunker_version.clone(),
            corpus_revision: self.sqlite.corpus_revision(),
        })
    }
@@ -1025,7 +1025,7 @@ impl App {
 fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
    IndexVersion(format!(
        "lex:{}:fts5-v009-korean-morphological",
-        config.chunking.chunker_version
+        config.ingest.chunking.chunker_version
    ))
 }

--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -360,12 +360,12 @@ pub fn ingest_with_config_opts(
    // loop is correct and cheap. Construction failure (e.g. invalid
    // endpoint) aborts ingest fail-fast — better than silently disabling
    // OCR/caption mid-run.
-    let ocr_engine: Option<Box<dyn OcrEngine>> = if app.config.image.ocr.enabled {
+    let ocr_engine: Option<Box<dyn OcrEngine>> = if app.config.ingest.image.ocr.enabled {
        Some(build_image_ocr_engine(&app.config).context("kb-app::ingest: build image OCR engine")?)
    } else {
        None
    };
-    let caption_llm: Option<Box<dyn LanguageModel>> = if app.config.image.caption.enabled {
+    let caption_llm: Option<Box<dyn LanguageModel>> = if app.config.ingest.image.caption.enabled {
        Some(Box::new(OllamaLanguageModel::new(&app.config).context(
            "kb-app::ingest: build OllamaLanguageModel for caption",
        )?))
@@ -380,7 +380,7 @@ pub fn ingest_with_config_opts(
    // p10 / v0.20 sub-item 1: PDF OCR engine eager init (H-5 resolution).
    // image OCR pattern mirror — per-ingest 1회 build, fallible → fail-fast.
    let pdf_ocr_engine: Option<Box<dyn OcrEngine>> =
-        if app.config.pdf.ocr.enabled || app.config.pdf.ocr.always_on {
+        if app.config.ingest.pdf.ocr.enabled || app.config.ingest.pdf.ocr.always_on {
            Some(
                build_pdf_ocr_engine(&app.config)
                    .context("kb-app::ingest: build pdf OCR engine")?,
@@ -825,7 +825,7 @@ fn mint_ingest_run_id(scope_json: &str, at: time::OffsetDateTime) -> String {
 type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;

 /// v0.27.0 (T8): build the image OCR engine selected by
-/// `config.image.ocr.engine`. Returns a boxed trait object so the ingest
+/// `config.ingest.image.ocr.engine`. Returns a boxed trait object so the ingest
 /// pipeline is engine-agnostic. Construction is fail-fast (model load /
 /// hash / endpoint validation) — mirrors the prior concrete-type behaviour.
 ///
@@ -835,7 +835,7 @@ type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;
 fn build_image_ocr_engine(
    config: &kebab_config::Config,
 ) -> anyhow::Result<Box<dyn OcrEngine>> {
-    match config.image.ocr.engine.as_str() {
+    match config.ingest.image.ocr.engine.as_str() {
        OLLAMA_VISION_ENGINE => Ok(Box::new(
            OllamaVisionOcr::new(config).context("build OllamaVisionOcr")?,
        )),
@@ -850,7 +850,7 @@ fn build_image_ocr_engine(
 }

 /// v0.27.0 (T8): build the PDF OCR engine selected by
-/// `config.pdf.ocr.engine`. The ollama-vision arm uses the PDF-specific
+/// `config.ingest.pdf.ocr.engine`. The ollama-vision arm uses the PDF-specific
 /// `model` / `languages` / `max_pixels` / `request_timeout_secs` knobs (and
 /// endpoint fallback to `models.llm.endpoint`). The paddle-onnx arm shares
 /// the same bundled ONNX models as image OCR (resolved from `image.ocr`
@@ -869,9 +869,9 @@ fn build_image_ocr_engine(
 fn build_pdf_ocr_engine(
    config: &kebab_config::Config,
 ) -> anyhow::Result<Box<dyn OcrEngine>> {
-    match config.pdf.ocr.engine.as_str() {
+    match config.ingest.pdf.ocr.engine.as_str() {
        OLLAMA_VISION_ENGINE => {
-            let cfg = &config.pdf.ocr;
+            let cfg = &config.ingest.pdf.ocr;
            let endpoint = match cfg.endpoint.as_deref() {
                Some(s) if !s.is_empty() => s.to_string(),
                _ => config.models.llm.endpoint.clone(),
@@ -2144,7 +2144,7 @@ fn sweep_deleted_files(
 ///   asset rollback on embed-fail is a P+ task).
 ///
 /// `chunker_version` is hard-coded to `pdf-page-v1` (HOTFIXES entry —
-/// `config.chunking.chunker_version` is single-valued today and serves
+/// `config.ingest.chunking.chunker_version` is single-valued today and serves
 /// the markdown path; per-medium config split is a P+ chunker registry
 /// task).
 #[allow(clippy::too_many_arguments)]
@@ -2229,15 +2229,15 @@ fn ingest_one_pdf_asset(
    // v0.20 sub-item 1: post-extract OCR enrichment (PR #187 registry
    // dispatch invariant 보존 — extract_for 가 normal entry).
    let (pdf_ocr_pages, pdf_ocr_ms_total): (Option<u32>, Option<u64>) =
-        if app.config.pdf.ocr.enabled || app.config.pdf.ocr.always_on {
+        if app.config.ingest.pdf.ocr.enabled || app.config.ingest.pdf.ocr.always_on {
            match pdf_ocr_engine {
                Some(engine) => {
                    let ocr_opts = crate::pdf_ocr_apply::PdfOcrOpts {
-                        enabled: app.config.pdf.ocr.enabled || app.config.pdf.ocr.always_on,
-                        always_on: app.config.pdf.ocr.always_on,
-                        valid_ratio_threshold: app.config.pdf.ocr.valid_ratio_threshold,
-                        min_char_count: app.config.pdf.ocr.min_char_count,
-                        lang_hint: app.config.pdf.ocr.lang_hint.clone().map(kebab_core::Lang),
+                        enabled: app.config.ingest.pdf.ocr.enabled || app.config.ingest.pdf.ocr.always_on,
+                        always_on: app.config.ingest.pdf.ocr.always_on,
+                        valid_ratio_threshold: app.config.ingest.pdf.ocr.valid_ratio_threshold,
+                        min_char_count: app.config.ingest.pdf.ocr.min_char_count,
+                        lang_hint: app.config.ingest.pdf.ocr.lang_hint.clone().map(kebab_core::Lang),
                        cancel: cancel.cloned(),
                    };
                    // v0.20.x Hook 2: pre-clone Arcs for capture by OCR closure.
@@ -2356,7 +2356,7 @@ fn ingest_one_pdf_asset(
        };

    // Per-medium chunker selection: PDF docs always use pdf-page-v1
-    // regardless of `config.chunking.chunker_version`. The chunker
+    // regardless of `config.ingest.chunking.chunker_version`. The chunker
    // validates every block carries `SourceSpan::Page`; failure here
    // means the parser drifted from its contract.
    let chunker = PdfPageV1Chunker;
@@ -3056,10 +3056,10 @@ fn build_body_hints(asset: &RawAsset) -> BodyHints {
 /// Build a `ChunkPolicy` from the active config.
 fn chunk_policy_from_config(config: &kebab_config::Config) -> ChunkPolicy {
    ChunkPolicy {
-        target_tokens: config.chunking.target_tokens,
-        overlap_tokens: config.chunking.overlap_tokens,
-        respect_markdown_headings: config.chunking.respect_markdown_headings,
-        chunker_version: ChunkerVersion(config.chunking.chunker_version.clone()),
+        target_tokens: config.ingest.chunking.target_tokens,
+        overlap_tokens: config.ingest.chunking.overlap_tokens,
+        respect_markdown_headings: config.ingest.chunking.respect_markdown_headings,
+        chunker_version: ChunkerVersion(config.ingest.chunking.chunker_version.clone()),
    }
 }

@@ -3099,7 +3099,7 @@ fn ocr_engine_version_for_sig(config: &kebab_config::Config, engine: &str, model
        // stable per-model revision, so engine/model is the identity.
        return format!("ollama/{model}");
    }
-    let ocr = &config.image.ocr;
+    let ocr = &config.ingest.image.ocr;
    let key = format!(
        "{}|{}|{}",
        ocr.det_model.as_deref().unwrap_or("<bundled>"),
@@ -3130,7 +3130,7 @@ fn ingest_config_signature(config: &kebab_config::Config, media: &MediaType) ->
    // Common (every media type): chunking parameters that move chunk
    // boundaries. `target_tokens` / `overlap_tokens` change re-chunking for
    // markdown / image / pdf / code alike, so a change re-indexes all types.
-    let c = &config.chunking;
+    let c = &config.ingest.chunking;
    let mut sig = format!(
        "chunk:{}:{}:{}:{}",
        c.target_tokens, c.overlap_tokens, c.respect_markdown_headings, c.chunker_version
@@ -3140,7 +3140,7 @@ fn ingest_config_signature(config: &kebab_config::Config, media: &MediaType) ->
            // OCR / caption only affect output when their `enabled` flag is
            // on; the model / prompt version matters only then. Off ↔ off is
            // a stable empty token so re-running the same config skips.
-            let ocr = &config.image.ocr;
+            let ocr = &config.ingest.image.ocr;
            if ocr.enabled {
                // v0.27.0 (T9): engine + engine_version so switching engine
                // (ollama-vision ↔ paddle-onnx) OR changing the model/assets
@@ -3153,7 +3153,7 @@ fn ingest_config_signature(config: &kebab_config::Config, media: &MediaType) ->
            } else {
                sig.push_str("|ocr:0");
            }
-            let cap = &config.image.caption;
+            let cap = &config.ingest.image.caption;
            if cap.enabled {
                sig.push_str(&format!("|cap:1:{}", cap.prompt_template_version));
            } else {
@@ -3163,7 +3163,7 @@ fn ingest_config_signature(config: &kebab_config::Config, media: &MediaType) ->
        MediaType::Pdf => {
            // PDF OCR is active when EITHER `enabled` or `always_on` is set
            // (mirrors the ingest gate). `model` only matters when active.
-            let ocr = &config.pdf.ocr;
+            let ocr = &config.ingest.pdf.ocr;
            if ocr.enabled || ocr.always_on {
                // v0.27.0 (T9): engine + engine_version (same cascade rule as
                // image OCR above) alongside the enabled/always_on gate.
--- a/crates/kebab-app/src/schema.rs
+++ b/crates/kebab-app/src/schema.rs
@@ -205,7 +205,7 @@ fn collect_models(cfg: &Config, store: &kebab_store_sqlite::SqliteStore) -> Mode
        // maintain their own versions; surface those when SchemaV1.models
        // becomes a multi-medium map (P+).
        parser_version: kebab_parse_md::PARSER_VERSION.to_string(),
-        chunker_version: cfg.chunking.chunker_version.clone(),
+        chunker_version: cfg.ingest.chunking.chunker_version.clone(),
        active_parsers,
        active_chunkers,
        // EmbeddingModelCfg uses `.model` (not `.id`) — adapt from plan.
--- a/crates/kebab-app/tests/common/mod.rs
+++ b/crates/kebab-app/tests/common/mod.rs
@@ -62,8 +62,8 @@ impl TestEnv {
        // Drop in a small chunk policy so the fixture's small files
        // emit at least a couple of chunks even with overlap_tokens
        // honored.
-        config.chunking.target_tokens = 80;
-        config.chunking.overlap_tokens = 20;
+        config.ingest.chunking.target_tokens = 80;
+        config.ingest.chunking.overlap_tokens = 20;

        Self {
            temp,
--- a/crates/kebab-app/tests/config_invalidation.rs
+++ b/crates/kebab-app/tests/config_invalidation.rs
@@ -63,7 +63,7 @@ fn chunking_change_reindexes_all_types() {
    let scanned = first.scanned;

    // Bump target_tokens — folds into every type's signature.
-    env.config.chunking.target_tokens += 100;
+    env.config.ingest.chunking.target_tokens += 100;

    let second = reingest(&env);
    assert_eq!(second.scanned, scanned);
--- a/crates/kebab-app/tests/image_pipeline.rs
+++ b/crates/kebab-app/tests/image_pipeline.rs
@@ -34,11 +34,11 @@ fn cfg_with_image_pipeline(env: &TestEnv, mock_endpoint: &str) -> Config {
    let mut cfg = env.config.clone();
    // p9-fb-25: workspace.include removed; extension routing is now
    // handled by extractor matching alone (no config knob).
-    cfg.image.ocr.enabled = true;
-    cfg.image.ocr.endpoint = Some(mock_endpoint.to_string());
-    cfg.image.ocr.model = "vision-mock:1b".to_string();
-    cfg.image.ocr.max_pixels = 512;
-    cfg.image.caption.enabled = false; // tested separately below
+    cfg.ingest.image.ocr.enabled = true;
+    cfg.ingest.image.ocr.endpoint = Some(mock_endpoint.to_string());
+    cfg.ingest.image.ocr.model = "vision-mock:1b".to_string();
+    cfg.ingest.image.ocr.max_pixels = 512;
+    cfg.ingest.image.caption.enabled = false; // tested separately below
    cfg.models.llm.endpoint = mock_endpoint.to_string();
    cfg.models.llm.model = "vision-mock:1b".to_string();
    cfg
@@ -161,8 +161,8 @@ async fn ingest_image_with_ocr_and_caption_populates_both_fields() {
    let env = TestEnv::lexical_only();
    write_red_png(&env.workspace_root, "diagram.png");
    let mut cfg = cfg_with_image_pipeline(&env, &server.uri());
-    cfg.image.caption.enabled = true;
-    cfg.image.caption.max_pixels = 384;
+    cfg.ingest.image.caption.enabled = true;
+    cfg.ingest.image.caption.max_pixels = 384;

    let cfg_clone = cfg.clone();
    let scope = env.scope();
@@ -270,8 +270,8 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
    let mut cfg = env.config.clone();
    // p9-fb-25: workspace.include removed; extension routing is now
    // handled by extractor matching alone (no config knob).
-    cfg.image.ocr.enabled = false;
-    cfg.image.caption.enabled = false;
+    cfg.ingest.image.ocr.enabled = false;
+    cfg.ingest.image.caption.enabled = false;

    let cfg_clone = cfg.clone();
    let scope = env.scope();
@@ -334,8 +334,8 @@ async fn garbage_png_increments_errors_counter_exactly_once() {
    let mut cfg = env.config.clone();
    // p9-fb-25: workspace.include removed; extension routing is now
    // handled by extractor matching alone (no config knob).
-    cfg.image.ocr.enabled = false;
-    cfg.image.caption.enabled = false;
+    cfg.ingest.image.ocr.enabled = false;
+    cfg.ingest.image.caption.enabled = false;

    let cfg_clone = cfg.clone();
    let scope = env.scope();
--- a/crates/kebab-app/tests/ingest_log_smoke.rs
+++ b/crates/kebab-app/tests/ingest_log_smoke.rs
@@ -23,8 +23,8 @@ fn minimal_config(workspace: &std::path::Path, log_dir: &std::path::Path) -> Con
    cfg.storage.model_dir = model_dir.to_string_lossy().into_owned();
    cfg.models.embedding.provider = "none".to_string();
    cfg.models.embedding.dimensions = 0;
-    cfg.chunking.target_tokens = 80;
-    cfg.chunking.overlap_tokens = 20;
+    cfg.ingest.chunking.target_tokens = 80;
+    cfg.ingest.chunking.overlap_tokens = 20;
    cfg.logging = LoggingCfg {
        ingest_log_enabled: true,
        ingest_log_dir: log_dir.to_path_buf(),
--- a/crates/kebab-app/tests/ingest_pdf_ocr_smoke.rs
+++ b/crates/kebab-app/tests/ingest_pdf_ocr_smoke.rs
@@ -22,8 +22,8 @@ fn ollama_endpoint() -> String {

 fn make_ocr_env_real() -> TestEnv {
    let mut env = TestEnv::lexical_only();
-    env.config.pdf.ocr.enabled = true;
-    env.config.pdf.ocr.endpoint = Some(ollama_endpoint());
+    env.config.ingest.pdf.ocr.enabled = true;
+    env.config.ingest.pdf.ocr.endpoint = Some(ollama_endpoint());
    env.config.models.embedding.provider = "none".to_string();

    let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
@@ -92,8 +92,8 @@ fn ocr_text_indexed_and_searchable() {
 #[test]
 fn ingest_with_cancel_aborts_mid_pdf() {
    let mut env = TestEnv::lexical_only();
-    env.config.pdf.ocr.enabled = true;
-    env.config.pdf.ocr.endpoint = Some("http://127.0.0.1:1".to_string());
+    env.config.ingest.pdf.ocr.enabled = true;
+    env.config.ingest.pdf.ocr.endpoint = Some("http://127.0.0.1:1".to_string());

    let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
        .parent()
--- a/crates/kebab-app/tests/ingest_progress.rs
+++ b/crates/kebab-app/tests/ingest_progress.rs
@@ -196,9 +196,9 @@ fn pdf_ocr_progress_emits_started_finished_events() {
    config.storage.data_dir = data_dir.to_string_lossy().into_owned();
    config.models.embedding.provider = "none".to_string();
    config.models.embedding.dimensions = 0;
-    config.pdf.ocr.enabled = true;
+    config.ingest.pdf.ocr.enabled = true;
    if let Ok(endpoint) = std::env::var("KEBAB_PDF_OCR_ENDPOINT") {
-        config.pdf.ocr.endpoint = Some(endpoint);
+        config.ingest.pdf.ocr.endpoint = Some(endpoint);
    }

    let scope = kebab_core::SourceScope {
--- a/crates/kebab-app/tests/pdf_ocr_events_insert_smoke.rs
+++ b/crates/kebab-app/tests/pdf_ocr_events_insert_smoke.rs
@@ -49,9 +49,9 @@ async fn ingest_dual_write_doc_id_matches_ndjson() {
    let result = spawn_blocking(move || {
        let mut env = TestEnv::lexical_only();
        // Enable PDF OCR + set up mock endpoint
-        env.config.pdf.ocr.enabled = true;
-        env.config.pdf.ocr.endpoint = Some(mock_url.clone());
-        env.config.pdf.ocr.model = "qwen2.5vl:3b".to_string();
+        env.config.ingest.pdf.ocr.enabled = true;
+        env.config.ingest.pdf.ocr.endpoint = Some(mock_url.clone());
+        env.config.ingest.pdf.ocr.model = "qwen2.5vl:3b".to_string();
        // Enable ingest log
        let log_dir = env.temp.path().join("logs");
        std::fs::create_dir_all(&log_dir).unwrap();
--- a/crates/kebab-app/tests/pdf_pipeline.rs
+++ b/crates/kebab-app/tests/pdf_pipeline.rs
@@ -121,8 +121,8 @@ fn cfg_with_pdf(env: &TestEnv) -> Config {
    // PDF ingest does not need OCR / caption / LM — leave defaults
    // (ocr.enabled=false, caption.enabled=false). The image pipeline
    // construction step skips both adapters.
-    cfg.image.ocr.enabled = false;
-    cfg.image.caption.enabled = false;
+    cfg.ingest.image.ocr.enabled = false;
+    cfg.ingest.image.caption.enabled = false;
    cfg
 }

--- a/crates/kebab-app/tests/schema_active_versions.rs
+++ b/crates/kebab-app/tests/schema_active_versions.rs
@@ -12,8 +12,8 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
    cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
    cfg.models.embedding.provider = "none".to_string();
    cfg.models.embedding.dimensions = 0;
-    cfg.chunking.target_tokens = 80;
-    cfg.chunking.overlap_tokens = 20;
+    cfg.ingest.chunking.target_tokens = 80;
+    cfg.ingest.chunking.overlap_tokens = 20;
    cfg
 }

--- a/crates/kebab-app/tests/schema_report.rs
+++ b/crates/kebab-app/tests/schema_report.rs
@@ -14,8 +14,8 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
    config.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
    config.models.embedding.provider = "none".to_string();
    config.models.embedding.dimensions = 0;
-    config.chunking.target_tokens = 80;
-    config.chunking.overlap_tokens = 20;
+    config.ingest.chunking.target_tokens = 80;
+    config.ingest.chunking.overlap_tokens = 20;
    config
 }

--- a/crates/kebab-eval/src/runner.rs
+++ b/crates/kebab-eval/src/runner.rs
@@ -220,7 +220,7 @@ fn build_config_snapshot(cfg: &kebab_config::Config, eval_k: usize) -> Result<se
    Ok(serde_json::json!({
        "config": cfg_value,
        "eval_k": eval_k,
-        "chunker_version": cfg.chunking.chunker_version,
+        "chunker_version": cfg.ingest.chunking.chunker_version,
        "embedding": {
            "model": cfg.models.embedding.model,
            "version": cfg.models.embedding.version,
--- a/crates/kebab-parse-image/src/caption.rs
+++ b/crates/kebab-parse-image/src/caption.rs
@@ -18,7 +18,7 @@
 //!
 //! The original P6-3 spec asked for a cargo feature `caption` (default
 //! OFF at compile time). We collapse this into a single runtime gate
-//! (`config.image.caption.enabled = false`, default OFF). Reasoning:
+//! (`config.ingest.image.caption.enabled = false`, default OFF). Reasoning:
 //! the captioning module's only extra deps are `base64` + `image` +
 //! `kebab-llm` trait — all already pulled in by the rest of the
 //! crate. A cargo feature would only complicate the build matrix
@@ -50,13 +50,13 @@ const CAPTION_MAX_TOKENS: usize = 96;

 /// Run a caption pass and return the resulting `ModelCaption`.
 ///
-/// Pure raw operation — does **not** consult `config.image.caption.enabled`.
+/// Pure raw operation — does **not** consult `config.ingest.image.caption.enabled`.
 /// The runtime feature gate lives in [`apply_caption`]; this entry
 /// always invokes the LM. Tests pinning the produced `ModelCaption`
 /// shape can call this directly without flipping the config flag.
 ///
 /// Honours the `[MIN_CAPTION_LONG_EDGE, MAX_CAPTION_LONG_EDGE]` clamp
-/// on `config.image.caption.max_pixels` so a hostile config cannot
+/// on `config.ingest.image.caption.max_pixels` so a hostile config cannot
 /// blow up prompt cost.
 pub fn caption_image(
    llm: &dyn LanguageModel,
@@ -65,15 +65,16 @@ pub fn caption_image(
    cfg: &kebab_config::Config,
 ) -> Result<ModelCaption> {
    let max_pixels = cfg
+        .ingest
        .image
        .caption
        .max_pixels
        .clamp(MIN_CAPTION_LONG_EDGE, MAX_CAPTION_LONG_EDGE);
-    if max_pixels != cfg.image.caption.max_pixels {
+    if max_pixels != cfg.ingest.image.caption.max_pixels {
        tracing::warn!(
            target: "kebab-parse-image",
            "image.caption.max_pixels = {} clamped to {} (legal range [{}, {}])",
-            cfg.image.caption.max_pixels,
+            cfg.ingest.image.caption.max_pixels,
            max_pixels,
            MIN_CAPTION_LONG_EDGE,
            MAX_CAPTION_LONG_EDGE
@@ -129,7 +130,7 @@ pub fn caption_image(
    let caption_text = text.trim().to_string();

    let model_ref = llm.model_ref();
-    let prompt_v = &cfg.image.caption.prompt_template_version;
+    let prompt_v = &cfg.ingest.image.caption.prompt_template_version;
    let model_version = format!(
        "{provider}/{prompt}",
        provider = model_ref.provider,
@@ -151,7 +152,7 @@ pub fn caption_image(
    })
 }

-/// Pipeline entry point — gate-checks `config.image.caption.enabled`
+/// Pipeline entry point — gate-checks `config.ingest.image.caption.enabled`
 /// then mutates `block.caption` in place via [`caption_image`].
 ///
 /// When `enabled = false` the function is a clean no-op (returns
@@ -167,7 +168,7 @@ pub fn apply_caption(
    cfg: &kebab_config::Config,
    events: &mut Vec<ProvenanceEvent>,
 ) -> Result<()> {
-    if !cfg.image.caption.enabled {
+    if !cfg.ingest.image.caption.enabled {
        tracing::debug!(
            target: "kebab-parse-image",
            "captioning skipped — image.caption.enabled = false"
--- a/crates/kebab-parse-image/src/ocr.rs
+++ b/crates/kebab-parse-image/src/ocr.rs
@@ -39,7 +39,7 @@ use crate::image_prep;
 /// Engine name written into `OcrText.engine` for the Ollama-vision adapter.
 pub const OLLAMA_VISION_ENGINE: &str = "ollama-vision";

-/// Lower bound on `config.image.ocr.max_pixels`. Anything below this is
+/// Lower bound on `config.ingest.image.ocr.max_pixels`. Anything below this is
 /// silently bumped to keep the model from receiving an unreadable thumbnail.
 const MIN_LONG_EDGE: u32 = 256;

@@ -126,14 +126,14 @@ pub struct OllamaVisionOcr {

 impl OllamaVisionOcr {
    /// Build an adapter from a workspace [`kebab_config::Config`].
-    /// Reads `config.image.ocr.{model, endpoint, languages, max_pixels}`;
+    /// Reads `config.ingest.image.ocr.{model, endpoint, languages, max_pixels}`;
    /// when `endpoint` is empty falls back to `config.models.llm.endpoint`
    /// so the same Ollama host serves both LLM and OCR by default.
    ///
    /// Construction does NOT touch the network — the first HTTP call
    /// happens inside [`OcrEngine::recognize`].
    pub fn new(config: &kebab_config::Config) -> Result<Self> {
-        let ocr = &config.image.ocr;
+        let ocr = &config.ingest.image.ocr;
        let endpoint = match ocr.endpoint.as_deref() {
            Some(s) if !s.is_empty() => s.to_string(),
            _ => config.models.llm.endpoint.clone(),
--- a/crates/kebab-parse-image/src/paddle_onnx.rs
+++ b/crates/kebab-parse-image/src/paddle_onnx.rs
@@ -122,7 +122,7 @@ impl ModelPaths {
    /// [`from_default_dir`]: ModelPaths::from_default_dir
    pub fn from_config(config: &kebab_config::Config) -> Self {
        let defaults = Self::from_default_dir();
-        let ocr = &config.image.ocr;
+        let ocr = &config.ingest.image.ocr;
        Self {
            det: ocr.det_model.as_ref().map(PathBuf::from).unwrap_or(defaults.det),
            rec: ocr.rec_model.as_ref().map(PathBuf::from).unwrap_or(defaults.rec),
@@ -138,7 +138,7 @@ impl OnnxPaddleOcr {
    /// here are fail-fast (matches the Ollama adapter's construction contract).
    pub fn new(config: &kebab_config::Config) -> Result<Self> {
        let paths = ModelPaths::from_config(config);
-        let ocr = &config.image.ocr;
+        let ocr = &config.ingest.image.ocr;
        Self::from_paths(
            &paths,
            ocr.score_thresh,
@@ -882,8 +882,8 @@ mod tests {
        assert!(def.dict.ends_with("korean_dict.txt"), "{:?}", def.dict);

        // Override det + dict; rec stays bundled (partial override allowed).
-        cfg.image.ocr.det_model = Some("/custom/det.onnx".to_string());
-        cfg.image.ocr.dict = Some("/custom/dict.txt".to_string());
+        cfg.ingest.image.ocr.det_model = Some("/custom/det.onnx".to_string());
+        cfg.ingest.image.ocr.dict = Some("/custom/dict.txt".to_string());
        let ov = ModelPaths::from_config(&cfg);
        assert_eq!(ov.det, PathBuf::from("/custom/det.onnx"));
        assert_eq!(ov.dict, PathBuf::from("/custom/dict.txt"));
--- a/crates/kebab-parse-image/tests/caption.rs
+++ b/crates/kebab-parse-image/tests/caption.rs
@@ -22,8 +22,8 @@ use crate::common::red_100x50_png;

 fn cfg_with_caption_enabled() -> Config {
    let mut cfg = Config::defaults();
-    cfg.image.caption.enabled = true;
-    cfg.image.caption.max_pixels = 512;
+    cfg.ingest.image.caption.enabled = true;
+    cfg.ingest.image.caption.max_pixels = 512;
    cfg
 }

@@ -67,7 +67,7 @@ fn mk_mock(canned: &str) -> MockLanguageModel {
 #[test]
 fn apply_caption_no_op_when_feature_disabled() {
    let mut cfg = Config::defaults();
-    cfg.image.caption.enabled = false;
+    cfg.ingest.image.caption.enabled = false;
    let mock = mk_mock("ignored");
    let mut block = empty_image_block();
    let mut events: Vec<ProvenanceEvent> = Vec::new();
@@ -292,8 +292,8 @@ fn caption_image_deterministic_with_identical_inputs() {
 #[test]
 fn caption_image_clamps_oversized_max_pixels() {
    let mut cfg = Config::defaults();
-    cfg.image.caption.enabled = true;
-    cfg.image.caption.max_pixels = 99_999; // way over MAX_CAPTION_LONG_EDGE
+    cfg.ingest.image.caption.enabled = true;
+    cfg.ingest.image.caption.max_pixels = 99_999; // way over MAX_CAPTION_LONG_EDGE
    let captured_images: Arc<Mutex<Vec<String>>> = Arc::new(Mutex::new(Vec::new()));
    let mock = CapturingMock {
        captured_system: Arc::new(Mutex::new(None)),
@@ -339,8 +339,8 @@ fn caption_integration_real_ollama_describes_image() {
    use kebab_llm_local::OllamaLanguageModel;

    let mut cfg = Config::defaults();
-    cfg.image.caption.enabled = true;
-    cfg.image.caption.max_pixels = 768;
+    cfg.ingest.image.caption.enabled = true;
+    cfg.ingest.image.caption.max_pixels = 768;
    if let Ok(ep) = std::env::var("KEBAB_MODELS_LLM_ENDPOINT") {
        cfg.models.llm.endpoint = ep;
    } else {
--- a/crates/kebab-parse-image/tests/ocr.rs
+++ b/crates/kebab-parse-image/tests/ocr.rs
@@ -19,10 +19,10 @@ use crate::common::red_100x50_png;

 fn cfg_for_endpoint(endpoint: &str) -> Config {
    let mut cfg = Config::defaults();
-    cfg.image.ocr.endpoint = Some(endpoint.to_string());
-    cfg.image.ocr.model = "gemma4:e4b".to_string();
-    cfg.image.ocr.languages = vec!["eng".to_string(), "kor".to_string()];
-    cfg.image.ocr.max_pixels = 1024;
+    cfg.ingest.image.ocr.endpoint = Some(endpoint.to_string());
+    cfg.ingest.image.ocr.model = "gemma4:e4b".to_string();
+    cfg.ingest.image.ocr.languages = vec!["eng".to_string(), "kor".to_string()];
+    cfg.ingest.image.ocr.max_pixels = 1024;
    cfg
 }

@@ -375,9 +375,9 @@ async fn ocr_integration_real_ollama_transcribes_text() {
    };
    let cfg = {
        let mut c = Config::defaults();
-        c.image.ocr.endpoint = Some(endpoint);
-        c.image.ocr.model = model;
-        c.image.ocr.max_pixels = 1024;
+        c.ingest.image.ocr.endpoint = Some(endpoint);
+        c.ingest.image.ocr.model = model;
+        c.ingest.image.ocr.max_pixels = 1024;
        c
    };
    let text = tokio::task::spawn_blocking(move || run_recognize(cfg, bytes, None))