feat(config): env 이름 보존 RHS 갱신 + pdf paddle 신규 env 6키

apply_env whitelist 의 키 문자열(LHS) 전부 불변, 대입 대상만 self.ingest.*
(불변식 #2). KEBAB_PDF_OCR_{DET_MODEL,REC_MODEL,DICT,SCORE_THRESH,
UNCLIP_RATIO,MAX_BOXES} 신규(image.ocr paddle 패턴 대칭).
게이트: clippy --workspace --all-targets 0, kebab-config/app/eval 테스트 green.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-04 12:56:25 +00:00
parent a8ec354188
commit 15e6918cef

View File

@@ -1321,6 +1321,34 @@ impl Config {
"KEBAB_PDF_OCR_LANG_HINT" => { "KEBAB_PDF_OCR_LANG_HINT" => {
self.ingest.pdf.ocr.lang_hint = if v.is_empty() { None } else { Some(v.clone()) }; self.ingest.pdf.ocr.lang_hint = if v.is_empty() { None } else { Some(v.clone()) };
} }
// pdf paddle-onnx engine overrides (v3). image.ocr paddle 패턴 복제.
// Empty string → None (fall back to bundled / KEBAB_IMAGE_OCR_MODEL_DIR).
"KEBAB_PDF_OCR_DET_MODEL" => {
self.ingest.pdf.ocr.det_model =
if v.is_empty() { None } else { Some(v.clone()) };
}
"KEBAB_PDF_OCR_REC_MODEL" => {
self.ingest.pdf.ocr.rec_model =
if v.is_empty() { None } else { Some(v.clone()) };
}
"KEBAB_PDF_OCR_DICT" => {
self.ingest.pdf.ocr.dict = if v.is_empty() { None } else { Some(v.clone()) };
}
"KEBAB_PDF_OCR_SCORE_THRESH" => {
if let Ok(f) = v.parse::<f32>() {
self.ingest.pdf.ocr.score_thresh = f;
}
}
"KEBAB_PDF_OCR_UNCLIP_RATIO" => {
if let Ok(f) = v.parse::<f32>() {
self.ingest.pdf.ocr.unclip_ratio = f;
}
}
"KEBAB_PDF_OCR_MAX_BOXES" => {
if let Ok(n) = v.parse::<usize>() {
self.ingest.pdf.ocr.max_boxes = n;
}
}
// Unknown KEBAB_* keys are silently ignored — see // Unknown KEBAB_* keys are silently ignored — see
// `env_unknown_key_is_ignored` test. // `env_unknown_key_is_ignored` test.
@@ -1594,6 +1622,34 @@ max_pixels = 1600
assert_eq!(c.search.default_k, 25); assert_eq!(c.search.default_k, 25);
} }
/// 불변식 #2: env override 이름(LHS) 100% 보존 — struct 경로가 바뀌어도
/// 기존 `KEBAB_*` 스크립트가 새 경로로 대입되어 무파손.
#[test]
fn env_names_preserved_target_new_paths() {
let mut env = HashMap::new();
env.insert("KEBAB_CHUNKING_TARGET_TOKENS".into(), "640".into());
env.insert("KEBAB_INDEXING_MAX_PARALLEL_EXTRACTORS".into(), "6".into());
env.insert("KEBAB_IMAGE_OCR_ENABLED".into(), "true".into());
env.insert("KEBAB_PDF_OCR_ENGINE".into(), "paddle-onnx".into());
let c = Config::defaults().apply_env(&env);
assert_eq!(c.ingest.chunking.target_tokens, 640);
assert_eq!(c.ingest.max_parallel_extractors, 6);
assert!(c.ingest.image.ocr.enabled);
assert_eq!(c.ingest.pdf.ocr.engine, "paddle-onnx");
}
#[test]
fn env_pdf_paddle_symmetric_overrides() {
let mut env = HashMap::new();
env.insert("KEBAB_PDF_OCR_DET_MODEL".into(), "/d.onnx".into());
env.insert("KEBAB_PDF_OCR_SCORE_THRESH".into(), "0.4".into());
env.insert("KEBAB_PDF_OCR_MAX_BOXES".into(), "500".into());
let c = Config::defaults().apply_env(&env);
assert_eq!(c.ingest.pdf.ocr.det_model.as_deref(), Some("/d.onnx"));
assert!((c.ingest.pdf.ocr.score_thresh - 0.4).abs() < 1e-6);
assert_eq!(c.ingest.pdf.ocr.max_boxes, 500);
}
#[test] #[test]
fn env_unknown_key_is_ignored() { fn env_unknown_key_is_ignored() {
let baseline = Config::defaults(); let baseline = Config::defaults();