review(p6-3): 회차 2 — image_prep 회귀 테스트 + doc 일반화
- src/image_prep.rs:
• 신규 unit 테스트 6건 — PNG passthrough (zero-decode + 바이트
동일성), JPEG → PNG 재인코딩, 1px 후행 클램프 (max=1601 / long=4001
irrational scale), aspect ratio (4:3 보존, 2% 이내), 손상 PNG
Err, 인식 불가 바이트 Err.
• 모듈 doc-comment 의 \"send to vision models\" 표현을 \"image-to-LM
pipeline / channel\" 으로 일반화. 미래 PDF / video keyframe 등
호출자가 doc 만 보고 호출 의도 파악 가능.
cargo test -p kebab-parse-image — 48 pass + 2 ignored
(19 unit (+6 image_prep) + 12 P6-1 + 8 P6-2 + 9 P6-3).
cargo clippy -p kebab-parse-image --all-targets -- -D warnings — pass.
This commit is contained in:
@@ -1,12 +1,15 @@
|
||||
//! Shared image preparation for OCR / caption / future vision pipelines.
|
||||
//! Shared image preparation for any image-to-LM pipeline.
|
||||
//!
|
||||
//! Both P6-2 OCR and P6-3 caption need the same pre-LM step: clamp the
|
||||
//! long edge to a configured max, re-encode as PNG (Ollama's vision
|
||||
//! channel format), pass through the source bytes when they already
|
||||
//! satisfy both constraints. Centralising this here keeps the
|
||||
//! 1px-rounding fix, the PNG passthrough hot path, and the error
|
||||
//! messages in one place — future modules (PDF page thumbnails,
|
||||
//! video keyframes, …) plug in without re-deriving the algorithm.
|
||||
//! P6-2 OCR and P6-3 caption both need the same pre-LM step: clamp
|
||||
//! the long edge to a configured max, re-encode as PNG (the wire
|
||||
//! format vision channels expect — Ollama's `images: [base64, ...]`
|
||||
//! takes PNG/JPEG, but PNG keeps the alpha + lossless invariant we
|
||||
//! prefer for hand-drawn / screenshot inputs), pass through the
|
||||
//! source bytes when they already satisfy both constraints.
|
||||
//! Centralising this here keeps the 1px-rounding fix, the PNG
|
||||
//! passthrough hot path, and the error messages in one place —
|
||||
//! future image-to-LM channels (PDF page thumbnails, video
|
||||
//! keyframes, …) plug in without re-deriving the algorithm.
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
@@ -81,3 +84,106 @@ pub(crate) fn downscale_to_png(
|
||||
.context("encoding image as PNG")?;
|
||||
Ok((out.into_inner(), final_w, final_h))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
use image::{ImageBuffer, Rgb};
|
||||
|
||||
/// Solid-colour PNG of the given dimensions. Solid colour
|
||||
/// compresses aggressively so even 4001×3001 stays under a few
|
||||
/// kilobytes.
|
||||
fn solid_png(w: u32, h: u32) -> Vec<u8> {
|
||||
let img: ImageBuffer<Rgb<u8>, _> =
|
||||
ImageBuffer::from_pixel(w, h, Rgb([0, 0, 255]));
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
img.write_to(&mut buf, ImageFormat::Png)
|
||||
.expect("encoding solid PNG must not fail");
|
||||
buf.into_inner()
|
||||
}
|
||||
|
||||
fn solid_jpeg(w: u32, h: u32) -> Vec<u8> {
|
||||
let img: ImageBuffer<Rgb<u8>, _> =
|
||||
ImageBuffer::from_pixel(w, h, Rgb([255, 255, 255]));
|
||||
let mut buf = Cursor::new(Vec::new());
|
||||
img.write_to(&mut buf, ImageFormat::Jpeg)
|
||||
.expect("encoding solid JPEG must not fail");
|
||||
buf.into_inner()
|
||||
}
|
||||
|
||||
/// PNG within budget skips the decode + re-encode round-trip
|
||||
/// entirely. Source bytes survive byte-for-byte.
|
||||
#[test]
|
||||
fn png_within_cap_passes_through_zero_decode() {
|
||||
let bytes = solid_png(100, 50);
|
||||
let (out, w, h) =
|
||||
downscale_to_png(&bytes, 1024).expect("PNG passthrough must succeed");
|
||||
assert_eq!((w, h), (100, 50));
|
||||
assert_eq!(out, bytes, "PNG passthrough must return source bytes verbatim");
|
||||
}
|
||||
|
||||
/// JPEG within budget gets re-encoded as PNG (the wire format)
|
||||
/// while preserving dimensions.
|
||||
#[test]
|
||||
fn jpeg_within_cap_reencodes_as_png() {
|
||||
let bytes = solid_jpeg(100, 50);
|
||||
let (out, w, h) =
|
||||
downscale_to_png(&bytes, 1024).expect("JPEG re-encode must succeed");
|
||||
assert_eq!((w, h), (100, 50));
|
||||
// Byte stream must now start with the PNG magic.
|
||||
assert_eq!(
|
||||
&out[..8],
|
||||
&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A],
|
||||
"output must be PNG-encoded after JPEG input"
|
||||
);
|
||||
}
|
||||
|
||||
/// Pathological irrational scale — `max=1601, long=4001` would let
|
||||
/// independent f32 round-to-nearest push the long axis to 1602.
|
||||
/// The post-resize clamp pins it back to `max_long_edge`.
|
||||
#[test]
|
||||
fn long_edge_clamped_strictly_to_max_for_irrational_scale() {
|
||||
let bytes = solid_png(4001, 3001);
|
||||
let (_out, w, h) =
|
||||
downscale_to_png(&bytes, 1601).expect("downscale must succeed");
|
||||
let long = w.max(h);
|
||||
assert!(long <= 1601, "long edge must be ≤ max, got {long}");
|
||||
}
|
||||
|
||||
/// Aspect ratio survives the downscale within 2%.
|
||||
#[test]
|
||||
fn aspect_ratio_preserved_within_rounding() {
|
||||
let bytes = solid_png(4000, 3000);
|
||||
let (_out, w, h) =
|
||||
downscale_to_png(&bytes, 1024).expect("downscale must succeed");
|
||||
let ratio = w as f32 / h as f32;
|
||||
assert!(
|
||||
(ratio - 4.0 / 3.0).abs() < 0.02,
|
||||
"aspect drift: in=4/3 out={}/{}={ratio}",
|
||||
w,
|
||||
h
|
||||
);
|
||||
}
|
||||
|
||||
/// Truncated PNG header — format guess succeeds (8-byte signature
|
||||
/// intact) but `into_dimensions` fails. Surfaced as Err so
|
||||
/// callers can route to "skip + warning" without confusing the
|
||||
/// downstream pipeline with a zero-size image.
|
||||
#[test]
|
||||
fn corrupt_bytes_return_err() {
|
||||
let truncated = vec![0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
|
||||
let r = downscale_to_png(&truncated, 1024);
|
||||
assert!(r.is_err(), "corrupt PNG must surface as Err");
|
||||
}
|
||||
|
||||
/// Unrecognised bytes (not any image format) — header sniff fails
|
||||
/// before dimension read.
|
||||
#[test]
|
||||
fn unrecognised_bytes_return_err() {
|
||||
let r = downscale_to_png(b"definitely not an image", 1024);
|
||||
assert!(r.is_err(), "non-image bytes must surface as Err");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user