Files
kebab/crates/kebab-parse-image/Cargo.toml
altair823 8f8d3a4100 feat(ocr): T0a/T0/T1 — golden harness(CTC blank=0 도출) + deps(ort rc.9) + dict/NOTICE
T0a: onnxruntime 직접 골든 하네스 → CTC blank/dict 매핑 경험 확정(gt CER 0.000).
T0: 모델 번들 dict+NOTICE(.onnx 는 T12 LFS 결정까지 워크트리 보관).
T1: ort(download-binaries)+imageproc 추가, cargo tree ort rc.9 단일 확인.
2026-06-04 07:43:53 +00:00

75 lines
3.6 KiB
TOML

[package]
name = "kebab-parse-image"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
license = { workspace = true }
repository = { workspace = true }
description = "Image extractor + EXIF + OCR (Ollama-vision) for the kebab pipeline (P6-1, P6-2)"
[dependencies]
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
# `kebab-llm` re-exports the trait crate (`kebab-core::LanguageModel`)
# under a stable surface; the caption adapter consumes any
# `dyn LanguageModel`. We do NOT depend on `kebab-llm-local` (forbidden
# by p6-3 design §8) — the trait abstraction is exactly what spec
# requires.
kebab-llm = { path = "../kebab-llm" }
anyhow = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
time = { workspace = true }
tracing = { workspace = true }
# `image` ships a wide format menagerie under default features (BMP, DDS,
# Farbfeld, …). We only need PNG / JPEG / WebP / GIF / TIFF for v1 (per
# task spec out-of-scope HEIC/RAW). Trim defaults to keep the dep
# closure small.
image = { version = "0.25", default-features = false, features = ["png", "jpeg", "webp", "gif", "tiff"] }
# kamadak-exif: pure-Rust EXIF reader. Used for the whitelisted tag
# extraction (DateTimeOriginal, GPS, Make, Model, Orientation, Software).
kamadak-exif = "0.6"
# Ollama-vision OCR adapter (P6-2) talks HTTP directly. We keep the
# feature surface identical to `kebab-llm-local` (blocking + json +
# rustls-tls) so both crates share the same TLS backend and the
# transitive tokio runtime is brought in once.
reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
base64 = { workspace = true }
thiserror = { workspace = true }
# paddle-onnx OCR engine (PP-OCRv5, in-process). We reuse the workspace ort
# pin (=2.0.0-rc.9) so the ONNX Runtime native lib stays single-versioned with
# fastembed / kebab-nli (oar-ocr is intentionally NOT a dep — it would pull
# ort rc.12 + ndarray 0.17, splitting the native `links` and threatening the
# embedding stack). `download-binaries` extends the pin the same way
# `kebab-nli/Cargo.toml:23` does: this crate isn't in fastembed's build graph,
# so a standalone `cargo test -p kebab-parse-image` needs it to link onnxruntime.
ort = { workspace = true, features = ["ndarray", "download-binaries"] }
ndarray = { workspace = true }
# imageproc: connected-components / contours for DBNet det post-processing.
# min-area rotated-rect (rotating calipers) and polygon unclip are implemented
# in pure Rust (clipper2 is C++ FFI — would break the single-binary guarantee).
imageproc = "0.25"
[dev-dependencies]
tempfile = { workspace = true }
blake3 = { workspace = true }
# Shared test infrastructure with `kebab-llm-local`: wiremock under
# tokio for HTTP fixtures.
wiremock = { workspace = true }
tokio = { workspace = true, features = ["rt-multi-thread"] }
# Used by `tests/common/mod.rs` to render the opt-in OCR integration
# fixture. Only loaded for tests; the production crate doesn't need
# font rendering.
ab_glyph = "0.2"
base64 = { workspace = true }
# `kebab-llm/mock` exposes `MockLanguageModel` for hermetic caption
# tests. Real adapters (Ollama) live in `kebab-llm-local`, which is
# only allowed at the dev-dep level here — the runtime crate stays
# trait-only, so the §8 forbidden-deps rule (no `kebab-llm-local`
# at runtime) is preserved.
kebab-llm = { path = "../kebab-llm", features = ["mock"] }
kebab-llm-local = { path = "../kebab-llm-local" }
[lints]
workspace = true