review(p6-2): 회차 2 지적 반영

- src/ocr.rs:
  • `OllamaVisionOcr::new` 와 `from_parts` 의 입력 검증을 공통
    `fn build` 으로 통합. 두 생성자가 빈 endpoint / 빈 model /
    `max_pixels` 클램프 동일 invariant 를 공유 — \"테스트는 통과하지만
    프로덕션은 panic\" 분기 차단.
  • `max_pixels` clamp 가 실제로 발동 시 `tracing::warn!` 로 사유
    기록 (사용자가 \"왜 항상 4096?\" 디버깅 가능).
  • `downscale_to_long_edge` 의 long-axis 가 `f32` 라운딩으로 1px
    초과하는 코너 케이스 (예: max=1601, long=4001) 후행 클램프로
    엄격히 묶음. doc-comment 의 \"long edge is at most max_long_edge\"
    가 실제 동작과 정확히 일치.
- tests/ocr.rs:
  • 통합 테스트의 이중 게이트 (`#[ignore]` + `KEBAB_OCR_INTEGRATION=1`)
    제거. `--ignored` 만으로 실행 의도 단일 신호화 — `kebab-llm-local`
    의 통합 테스트 컨벤션과 일관됨. endpoint / model 의 env 오버라이드는
    유지.

cargo test -p kebab-parse-image — 28 pass + 1 ignored.
cargo test -p kebab-config — 21 pass.
cargo clippy --workspace --all-targets -- -D warnings — pass.
This commit is contained in:
2026-05-02 05:48:23 +00:00
parent e869710d82
commit 2bede0030f
2 changed files with 55 additions and 36 deletions

View File

@@ -138,16 +138,49 @@ impl OllamaVisionOcr {
Some(s) if !s.is_empty() => s.to_string(),
_ => config.models.llm.endpoint.clone(),
};
Self::build(endpoint, ocr.model.clone(), ocr.languages.clone(), ocr.max_pixels)
}
/// Build directly from explicit fields. Useful for tests that need
/// to point at a wiremock host without going through `Config`.
/// Shares the same input validation as [`Self::new`] so the two
/// constructors agree on what counts as a legal `OllamaVisionOcr` —
/// callers cannot smuggle an empty endpoint or empty model id past
/// `from_parts`.
pub fn from_parts(
endpoint: impl Into<String>,
model: impl Into<String>,
languages: Vec<String>,
max_pixels: u32,
) -> Result<Self> {
Self::build(endpoint.into(), model.into(), languages, max_pixels)
}
/// Shared validation + construction. Centralised so `new` and
/// `from_parts` cannot drift on what they accept.
fn build(
endpoint: String,
model: String,
languages: Vec<String>,
requested_max_pixels: u32,
) -> Result<Self> {
if endpoint.is_empty() {
anyhow::bail!(
"OllamaVisionOcr: endpoint is empty (set image.ocr.endpoint or models.llm.endpoint)"
);
}
let model = ocr.model.trim().to_string();
let model = model.trim().to_string();
if model.is_empty() {
anyhow::bail!("OllamaVisionOcr: image.ocr.model is empty");
anyhow::bail!("OllamaVisionOcr: model is empty");
}
let max_pixels = requested_max_pixels.clamp(MIN_LONG_EDGE, MAX_LONG_EDGE);
if max_pixels != requested_max_pixels {
tracing::warn!(
target: "kebab-parse-image",
"image.ocr.max_pixels = {requested_max_pixels} clamped to {max_pixels} \
(legal range [{MIN_LONG_EDGE}, {MAX_LONG_EDGE}])"
);
}
let max_pixels = ocr.max_pixels.clamp(MIN_LONG_EDGE, MAX_LONG_EDGE);
let client = reqwest::blocking::Client::builder()
.timeout(REQUEST_TIMEOUT)
.build()
@@ -156,29 +189,8 @@ impl OllamaVisionOcr {
client,
endpoint,
model,
languages: ocr.languages.clone(),
max_pixels,
})
}
/// Build directly from explicit fields. Useful for tests that need
/// to point at a wiremock host without going through `Config`.
pub fn from_parts(
endpoint: impl Into<String>,
model: impl Into<String>,
languages: Vec<String>,
max_pixels: u32,
) -> Result<Self> {
let client = reqwest::blocking::Client::builder()
.timeout(REQUEST_TIMEOUT)
.build()
.context("building OCR HTTP client")?;
Ok(Self {
client,
endpoint: endpoint.into(),
model: model.into(),
languages,
max_pixels: max_pixels.clamp(MIN_LONG_EDGE, MAX_LONG_EDGE),
max_pixels,
})
}
@@ -339,8 +351,18 @@ fn downscale_to_long_edge(bytes: &[u8], max_long_edge: u32) -> Result<(Vec<u8>,
(w, h, img)
} else {
let scale = max_long_edge as f32 / long as f32;
let new_w = ((w as f32) * scale).round().max(1.0) as u32;
let new_h = ((h as f32) * scale).round().max(1.0) as u32;
let mut new_w = ((w as f32) * scale).round().max(1.0) as u32;
let mut new_h = ((h as f32) * scale).round().max(1.0) as u32;
// Independent rounding of the two axes can let `f32`'s nearest
// round push the long axis one pixel past `max_long_edge` for
// irrational scales (e.g. `max=1601, long=4001`). Pin the long
// axis to exactly `max_long_edge` so the doc-comment's
// "long edge is at most max_long_edge" stays a strict bound.
if w >= h {
new_w = new_w.min(max_long_edge);
} else {
new_h = new_h.min(max_long_edge);
}
let resized =
img.resize_exact(new_w, new_h, image::imageops::FilterType::Triangle);
(new_w, new_h, resized)

View File

@@ -346,23 +346,20 @@ fn from_parts_clamps_max_pixels_into_legal_range() {
// ── Integration test against real Ollama (opt-in) ────────────────────────
/// End-to-end OCR against the workspace's real Ollama daemon. Skipped
/// unless `KEBAB_OCR_INTEGRATION=1` so the regular `cargo test` run
/// stays hermetic.
/// by default via `#[ignore]` (matching the `kebab-llm-local`
/// convention); a developer who explicitly opts in via `--ignored` is
/// signalling they want the network call. Endpoint / model can still
/// be overridden via env to point at a non-default Ollama host.
///
/// Run with:
///
/// ```sh
/// KEBAB_OCR_INTEGRATION=1 \
/// KEBAB_IMAGE_OCR_ENDPOINT=http://192.168.0.47:11434 \
/// cargo test -p kebab-parse-image ocr_integration -- --ignored
/// cargo test -p kebab-parse-image --test ocr ocr_integration -- --ignored
/// ```
#[tokio::test]
#[ignore = "hits a real Ollama daemon; gated behind KEBAB_OCR_INTEGRATION=1"]
#[ignore = "hits a real Ollama daemon; opt in via `cargo test -- --ignored`"]
async fn ocr_integration_real_ollama_transcribes_text() {
if std::env::var("KEBAB_OCR_INTEGRATION").ok().as_deref() != Some("1") {
eprintln!("skipping ocr_integration: KEBAB_OCR_INTEGRATION != 1");
return;
}
let endpoint = std::env::var("KEBAB_IMAGE_OCR_ENDPOINT")
.unwrap_or_else(|_| "http://192.168.0.47:11434".to_string());
let model =