feat(ocr): T7-T9 — config overrides + engine factory + signature cascade
T7: OcrCfg gains det_model/rec_model/dict overrides + score_thresh/
unclip_ratio/max_boxes (serde default, KEBAB_IMAGE_OCR_* env). OnnxPaddleOcr::new
threads them via ModelPaths::from_config.
T8: build_image_ocr_engine / build_pdf_ocr_engine factories return
Box<dyn OcrEngine>; match on engine string (ollama-vision|paddle-onnx|err).
ImagePipeline.ocr_engine + pdf_ocr_engine signatures switched to &dyn OcrEngine.
OcrEngine gains model() for the progress label.
T9: ingest_config_signature image/pdf branches emit |ocr:1:{engine}:{engine_version}
(memoized blake3 per asset-triple, m3-safe). Unit tests (a)(b)(c) added.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -377,6 +377,36 @@ pub struct OcrCfg {
|
||||
/// `86400`).
|
||||
#[serde(default = "default_ocr_request_timeout_secs")]
|
||||
pub request_timeout_secs: u64,
|
||||
|
||||
// ── paddle-onnx engine overrides (v0.27.0) ──────────────────────────
|
||||
// Only consulted when `engine == "paddle-onnx"`; the ollama-vision
|
||||
// engine ignores them. All `#[serde(default)]` so pre-v0.27 config
|
||||
// files load unchanged.
|
||||
/// Override path to the detection ONNX model. `None` → bundled
|
||||
/// `assets/paddleocr-onnx/ppocrv5_mobile_det.onnx` (or the directory
|
||||
/// named by `KEBAB_IMAGE_OCR_MODEL_DIR`).
|
||||
#[serde(default)]
|
||||
pub det_model: Option<String>,
|
||||
/// Override path to the recognition ONNX model. `None` → bundled
|
||||
/// `assets/paddleocr-onnx/korean_ppocrv5_mobile_rec.onnx`.
|
||||
#[serde(default)]
|
||||
pub rec_model: Option<String>,
|
||||
/// Override path to the character dictionary. `None` → bundled
|
||||
/// `assets/paddleocr-onnx/korean_dict.txt`.
|
||||
#[serde(default)]
|
||||
pub dict: Option<String>,
|
||||
/// DBNet detection box score threshold (0.0..=1.0). Boxes whose mean
|
||||
/// probability is below this are dropped. Default `0.3`.
|
||||
#[serde(default = "default_ocr_score_thresh")]
|
||||
pub score_thresh: f32,
|
||||
/// Polygon unclip ratio applied to each detected box before crop.
|
||||
/// Larger = more padding around the text. Default `1.5`.
|
||||
#[serde(default = "default_ocr_unclip_ratio")]
|
||||
pub unclip_ratio: f32,
|
||||
/// Hard cap on detected boxes per image (runaway guard). Extra boxes
|
||||
/// past this count are truncated with a warning. Default `1000`.
|
||||
#[serde(default = "default_ocr_max_boxes")]
|
||||
pub max_boxes: usize,
|
||||
}
|
||||
|
||||
impl OcrCfg {
|
||||
@@ -389,10 +419,29 @@ impl OcrCfg {
|
||||
languages: vec!["eng".to_string(), "kor".to_string()],
|
||||
max_pixels: 1600,
|
||||
request_timeout_secs: default_ocr_request_timeout_secs(),
|
||||
det_model: None,
|
||||
rec_model: None,
|
||||
dict: None,
|
||||
score_thresh: default_ocr_score_thresh(),
|
||||
unclip_ratio: default_ocr_unclip_ratio(),
|
||||
max_boxes: default_ocr_max_boxes(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// paddle-onnx DBNet box score threshold default. See [`OcrCfg::score_thresh`].
|
||||
fn default_ocr_score_thresh() -> f32 {
|
||||
0.3
|
||||
}
|
||||
/// paddle-onnx unclip ratio default. See [`OcrCfg::unclip_ratio`].
|
||||
fn default_ocr_unclip_ratio() -> f32 {
|
||||
1.5
|
||||
}
|
||||
/// paddle-onnx box-count cap default. See [`OcrCfg::max_boxes`].
|
||||
fn default_ocr_max_boxes() -> usize {
|
||||
1000
|
||||
}
|
||||
|
||||
/// v0.17.2 post-dogfood: matches the legacy hard-coded ceiling so
|
||||
/// existing configs that omit the field keep behaving identically.
|
||||
/// Overridable per config / `KEBAB_IMAGE_OCR_REQUEST_TIMEOUT_SECS`.
|
||||
@@ -1098,6 +1147,34 @@ impl Config {
|
||||
self.image.ocr.request_timeout_secs = n;
|
||||
}
|
||||
}
|
||||
// paddle-onnx engine overrides (v0.27.0). Empty string → None
|
||||
// (fall back to bundled / KEBAB_IMAGE_OCR_MODEL_DIR).
|
||||
"KEBAB_IMAGE_OCR_DET_MODEL" => {
|
||||
self.image.ocr.det_model =
|
||||
if v.is_empty() { None } else { Some(v.clone()) };
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_REC_MODEL" => {
|
||||
self.image.ocr.rec_model =
|
||||
if v.is_empty() { None } else { Some(v.clone()) };
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_DICT" => {
|
||||
self.image.ocr.dict = if v.is_empty() { None } else { Some(v.clone()) };
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_SCORE_THRESH" => {
|
||||
if let Ok(f) = v.parse::<f32>() {
|
||||
self.image.ocr.score_thresh = f;
|
||||
}
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_UNCLIP_RATIO" => {
|
||||
if let Ok(f) = v.parse::<f32>() {
|
||||
self.image.ocr.unclip_ratio = f;
|
||||
}
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_MAX_BOXES" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.image.ocr.max_boxes = n;
|
||||
}
|
||||
}
|
||||
|
||||
// image.caption (P6-3)
|
||||
"KEBAB_IMAGE_CAPTION_ENABLED" => {
|
||||
|
||||
Reference in New Issue
Block a user