chore(ocr): T11/T12 — clippy clean + docs + v0.27.0 bump
T11: fix 12 clippy lints in paddle_onnx.rs/paddle_e2e.rs (doc overindent, finish_non_exhaustive, map_or_else, RangeInclusive::contains, cast_lossless, is_some_and, usize::from). Full-workspace clippy -D warnings = 0. Smoke (paddle-onnx, real binary): clean_paragraph OCR verbatim-correct, real per-region confidence (0.99/0.96/0.95), FTS5 lexical hit on Korean(검색)+ English(embedding), parser_version folds |ocr:1:paddle-onnx:<ver>. Big page <4s inference (5.6s ingest incl. one-time session load). T12: README [image.ocr].engine + ARCHITECTURE OCR row + SMOKE paddle-onnx config + HANDOFF + HOTFIXES dated entry. Workspace version 0.26.2 → 0.27.0 (minor: new engine value + config keys). .gitattributes: onnx as plain blobs (no git-lfs). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -39,6 +39,7 @@ impl OcrEngine for MockOcrEngine {
|
||||
"mock-v1".to_string()
|
||||
}
|
||||
|
||||
#[allow(clippy::unnecessary_literal_bound)]
|
||||
fn model(&self) -> &str {
|
||||
"mock-model"
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ pub mod paddle_onnx;
|
||||
|
||||
pub use caption::{apply_caption, caption_image};
|
||||
pub use ocr::{OLLAMA_VISION_ENGINE, OcrEngine, OllamaVisionOcr, apply_ocr};
|
||||
pub use paddle_onnx::{OnnxPaddleOcr, PADDLE_ONNX_ENGINE, engine_version_for_config};
|
||||
pub use paddle_onnx::{ModelPaths, OnnxPaddleOcr, PADDLE_ONNX_ENGINE, engine_version_for_config};
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kebab_core::{
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
//! production dependency (see crate-level rationale + `assets/paddleocr-onnx/NOTICE`).
|
||||
//!
|
||||
//! Pipeline (`recognize`):
|
||||
//! 1. decode (RGB) + downscale long edge to `max_pixels`
|
||||
//! 2. det: ImageNet-normalized NCHW → DBNet prob map `[1,1,H,W]`
|
||||
//! → threshold 0.3 → contours → min-area rect (rotating calipers,
|
||||
//! pure Rust) → unclip(ratio 1.5, pure Rust) → boxes
|
||||
//! 3. crop+rectify: perspective warp each rotated box to a horizontal strip
|
||||
//! 4. rec: 48×W normalized `(x-0.5)/0.5` → `[1,T,11947]` → CTC greedy decode
|
||||
//! 5. assemble reading-order `OcrText`
|
||||
//! 1. decode (RGB) + downscale long edge to `max_pixels`
|
||||
//! 2. det: ImageNet-normalized NCHW → DBNet prob map `[1,1,H,W]` → threshold
|
||||
//! 0.3 → contours → min-area rect (rotating calipers, pure Rust) →
|
||||
//! unclip(ratio 1.5, pure Rust) → boxes
|
||||
//! 3. crop+rectify: perspective warp each rotated box to a horizontal strip
|
||||
//! 4. rec: 48×W normalized `(x-0.5)/0.5` → `[1,T,11947]` → CTC greedy decode
|
||||
//! 5. assemble reading-order `OcrText`
|
||||
//!
|
||||
//! ## Confirmed CTC facts (empirically derived in T0a, see
|
||||
//! `tests/golden/ctc_rec_golden.json` — do NOT re-derive):
|
||||
@@ -82,7 +82,7 @@ impl std::fmt::Debug for OnnxPaddleOcr {
|
||||
.field("unclip_ratio", &self.unclip_ratio)
|
||||
.field("max_boxes", &self.max_boxes)
|
||||
.field("max_pixels", &self.max_pixels)
|
||||
.finish()
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,11 +100,10 @@ impl ModelPaths {
|
||||
/// Default bundled-asset directory: `KEBAB_IMAGE_OCR_MODEL_DIR` if set,
|
||||
/// else the crate's `assets/paddleocr-onnx/`.
|
||||
pub fn from_default_dir() -> Self {
|
||||
let dir = std::env::var("KEBAB_IMAGE_OCR_MODEL_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
Path::new(env!("CARGO_MANIFEST_DIR")).join("assets/paddleocr-onnx")
|
||||
});
|
||||
let dir = std::env::var("KEBAB_IMAGE_OCR_MODEL_DIR").map_or_else(
|
||||
|_| Path::new(env!("CARGO_MANIFEST_DIR")).join("assets/paddleocr-onnx"),
|
||||
PathBuf::from,
|
||||
);
|
||||
Self {
|
||||
det: dir.join("ppocrv5_mobile_det.onnx"),
|
||||
rec: dir.join("korean_ppocrv5_mobile_rec.onnx"),
|
||||
@@ -211,7 +210,7 @@ impl OnnxPaddleOcr {
|
||||
match idx {
|
||||
CTC_BLANK => None,
|
||||
CTC_SPACE => Some(" "),
|
||||
i if i >= 1 && i <= DICT_LINES => Some(self.dict[i - 1].as_str()),
|
||||
i if (1..=DICT_LINES).contains(&i) => Some(self.dict[i - 1].as_str()),
|
||||
_ => None, // out-of-range guard (should not happen for 11947 classes)
|
||||
}
|
||||
}
|
||||
@@ -226,6 +225,10 @@ impl OcrEngine for OnnxPaddleOcr {
|
||||
self.engine_version.clone()
|
||||
}
|
||||
|
||||
// The trait method's elided lifetime ties the return to `&self`; the body
|
||||
// returns a literal, but the signature must match the trait, so allow the
|
||||
// `'static`-narrowing lint here.
|
||||
#[allow(clippy::unnecessary_literal_bound)]
|
||||
fn model(&self) -> &str {
|
||||
// Static label for the progress display; the per-asset hash lives
|
||||
// in `engine_version`.
|
||||
@@ -335,7 +338,7 @@ impl OnnxPaddleOcr {
|
||||
for (x, y, px) in det_img.enumerate_pixels() {
|
||||
let (xi, yi) = (x as usize, y as usize);
|
||||
for c in 0..3 {
|
||||
let v = px[c] as f32 / 255.0;
|
||||
let v = f32::from(px[c]) / 255.0;
|
||||
arr[[0, c, yi, xi]] = (v - IMAGENET_MEAN[c]) / IMAGENET_STD[c];
|
||||
}
|
||||
}
|
||||
@@ -372,7 +375,7 @@ impl OnnxPaddleOcr {
|
||||
for (x, y, px) in resized.enumerate_pixels() {
|
||||
let (xi, yi) = (x as usize, y as usize);
|
||||
for c in 0..3 {
|
||||
let v = px[c] as f32 / 255.0;
|
||||
let v = f32::from(px[c]) / 255.0;
|
||||
arr[[0, c, yi, xi]] = (v - 0.5) / 0.5; // [-1, 1]
|
||||
}
|
||||
}
|
||||
@@ -447,7 +450,7 @@ fn load_dict(path: &Path) -> Result<Vec<String>> {
|
||||
let raw = std::fs::read_to_string(path)?;
|
||||
// split on '\n'; drop a single trailing empty element from the final newline
|
||||
let mut lines: Vec<String> = raw.split('\n').map(|s| s.trim_end_matches('\r').to_string()).collect();
|
||||
if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
|
||||
if lines.last().is_some_and(String::is_empty) {
|
||||
lines.pop();
|
||||
}
|
||||
Ok(lines)
|
||||
|
||||
@@ -33,7 +33,7 @@ fn cer(gt: &str, pred: &str) -> f64 {
|
||||
for i in 1..=m {
|
||||
let mut cur = vec![i; n + 1];
|
||||
for j in 1..=n {
|
||||
let cost = if g[i - 1] == p[j - 1] { 0 } else { 1 };
|
||||
let cost = usize::from(g[i - 1] != p[j - 1]);
|
||||
cur[j] = (prev[j] + 1).min(cur[j - 1] + 1).min(prev[j - 1] + cost);
|
||||
}
|
||||
prev = cur;
|
||||
@@ -42,11 +42,10 @@ fn cer(gt: &str, pred: &str) -> f64 {
|
||||
}
|
||||
|
||||
fn fixture_dir() -> PathBuf {
|
||||
std::env::var("KEBAB_TEST_OCR_FIXTURE_DIR")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
PathBuf::from("/build/dogfood/corpus/images/synthetic-ocr-bench")
|
||||
})
|
||||
std::env::var("KEBAB_TEST_OCR_FIXTURE_DIR").map_or_else(
|
||||
|_| PathBuf::from("/build/dogfood/corpus/images/synthetic-ocr-bench"),
|
||||
PathBuf::from,
|
||||
)
|
||||
}
|
||||
|
||||
/// T10: undecodable image bytes must surface as an error (the kebab-app caller
|
||||
|
||||
Reference in New Issue
Block a user