T11: fix 12 clippy lints in paddle_onnx.rs/paddle_e2e.rs (doc overindent, finish_non_exhaustive, map_or_else, RangeInclusive::contains, cast_lossless, is_some_and, usize::from). Full-workspace clippy -D warnings = 0. Smoke (paddle-onnx, real binary): clean_paragraph OCR verbatim-correct, real per-region confidence (0.99/0.96/0.95), FTS5 lexical hit on Korean(검색)+ English(embedding), parser_version folds |ocr:1:paddle-onnx:<ver>. Big page <4s inference (5.6s ingest incl. one-time session load). T12: README [image.ocr].engine + ARCHITECTURE OCR row + SMOKE paddle-onnx config + HANDOFF + HOTFIXES dated entry. Workspace version 0.26.2 → 0.27.0 (minor: new engine value + config keys). .gitattributes: onnx as plain blobs (no git-lfs). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
66 lines
1.6 KiB
Rust
66 lines
1.6 KiB
Rust
use std::sync::Mutex;
|
|
|
|
use anyhow::Result;
|
|
use kebab_core::{Lang, OcrText};
|
|
use kebab_parse_image::OcrEngine;
|
|
|
|
pub struct MockOcrEngine {
|
|
expected_texts: Vec<String>,
|
|
call_index: Mutex<usize>,
|
|
fail: bool,
|
|
}
|
|
|
|
impl MockOcrEngine {
|
|
/// Single text (backward-compat ctor for pdf_ocr_apply.rs 10 sites).
|
|
pub fn single(text: impl Into<String>, fail: bool) -> Self {
|
|
Self {
|
|
expected_texts: vec![text.into()],
|
|
call_index: Mutex::new(0),
|
|
fail,
|
|
}
|
|
}
|
|
|
|
/// Per-page texts (cursor advances per recognize call).
|
|
pub fn per_page(texts: Vec<String>, fail: bool) -> Self {
|
|
Self {
|
|
expected_texts: texts,
|
|
call_index: Mutex::new(0),
|
|
fail,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl OcrEngine for MockOcrEngine {
|
|
fn engine_name(&self) -> &'static str {
|
|
"mock-ocr"
|
|
}
|
|
|
|
fn engine_version(&self) -> String {
|
|
"mock-v1".to_string()
|
|
}
|
|
|
|
#[allow(clippy::unnecessary_literal_bound)]
|
|
fn model(&self) -> &str {
|
|
"mock-model"
|
|
}
|
|
|
|
fn recognize(&self, _img: &[u8], _hint: Option<&Lang>) -> Result<OcrText> {
|
|
if self.fail {
|
|
anyhow::bail!("mock failure");
|
|
}
|
|
let mut idx = self.call_index.lock().unwrap();
|
|
let text = self
|
|
.expected_texts
|
|
.get(*idx)
|
|
.cloned()
|
|
.unwrap_or_else(|| self.expected_texts.last().cloned().unwrap_or_default());
|
|
*idx += 1;
|
|
Ok(OcrText {
|
|
joined: text,
|
|
regions: vec![],
|
|
engine: "mock-ocr".to_string(),
|
|
engine_version: "mock-v1".to_string(),
|
|
})
|
|
}
|
|
}
|