review(p6-1): 회차 2 지적 반영 — GPS 안전성 + 디버깅

- src/exif_extract.rs:
  • `gps_decimal` 에 ±90 / ±180 범위 검증 추가. 비정상 EXIF (예: 위도
    300°) 가 들어와도 wire 에 흘러나가지 않고 silent drop.
  • GPSLatitudeRef / GPSLongitudeRef 가 빠진 좌표는 양수 가정으로
    내보내지 않고 None 반환 — 모호한 부호를 그대로 두는 대신 손상된
    메타데이터로 처리.
  • `read_from_container` 실패 시 `tracing::debug!` 한 줄로 사유 기록
    (운영시 \"EXIF 없음\" vs \"EXIF 손상\" 구분 단서).
- src/dims.rs: `match Some/None` 을 `anyhow::Context::context()?` 로
  압축. import 한 줄 추가.
- src/lib.rs: `Vec::with_capacity` 를 dim_warning 분기에 따라
  `2` / `3` 으로 정확히 맞추고 의미 주석 한 줄 추가.
- tests/common/mod.rs: `build_exif_blob_gps` 를 `GpsFlavor`
  파라미터로 일반화 (`Valid` / `NoRef` / `OutOfRange`). JPEG 스플라이스
  로직은 `splice_exif_into_jpeg` 헬퍼로 추출.
- tests/extractor.rs: 회귀 테스트 2건 추가 — `*Ref` 누락 좌표 드롭,
  out-of-range 위도 드롭 (경도는 정상 통과 검증).

cargo test -p kebab-parse-image — 16건 (4 unit + 12 integration) pass.
cargo clippy -p kebab-parse-image --all-targets -- -D warnings — pass.
This commit is contained in:
2026-05-02 05:16:37 +00:00
parent 194dd34668
commit 58d56467e5
5 changed files with 130 additions and 29 deletions

View File

@@ -14,7 +14,7 @@
use std::io::Cursor;
use anyhow::Result;
use anyhow::{Context, Result};
use image::{ImageFormat, ImageReader};
use crate::MAX_DECODE_DIM;
@@ -37,12 +37,9 @@ pub(crate) fn probe(bytes: &[u8]) -> Result<DimOutcome> {
.with_guessed_format()
.map_err(|e| anyhow::anyhow!("io error guessing format: {e}"))?;
let format = match reader.format() {
Some(f) => f,
None => {
anyhow::bail!("unsupported or unrecognised image format");
}
};
let format = reader
.format()
.context("unsupported or unrecognised image format")?;
let format_str = format_label(format);
match reader.into_dimensions() {

View File

@@ -35,7 +35,13 @@ pub(crate) fn extract_whitelisted(bytes: &[u8]) -> Map<String, JsonValue> {
let mut out = Map::new();
let exif = match Reader::new().read_from_container(&mut Cursor::new(bytes)) {
Ok(e) => e,
Err(_) => return out,
Err(e) => {
tracing::debug!(
target: "kebab-parse-image",
"no readable EXIF block: {e}"
);
return out;
}
};
if let Some(s) = ascii_field(&exif, Tag::DateTimeOriginal)
@@ -130,9 +136,21 @@ fn exif_datetime_to_iso(raw: &str) -> Option<String> {
/// Convert a GPS DMS triple (degrees / minutes / seconds, each
/// `Rational`) into a signed decimal degree using the matching N/S/E/W
/// reference tag. Returns `None` if either tag is missing or shaped
/// unexpectedly.
/// reference tag. Returns `None` if any of:
///
/// * `value_tag` is missing or not a 3-element rational triple
/// * `ref_tag` (GPSLatitudeRef / GPSLongitudeRef) is missing — the EXIF
/// spec requires it alongside the value, so absence is treated as
/// corrupted metadata rather than \"assume positive\"
/// * the resulting decimal is non-finite or out of physical range
/// (`±90` for latitude, `±180` for longitude)
fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option<f64> {
let limit = match value_tag {
Tag::GPSLatitude => 90.0_f64,
Tag::GPSLongitude => 180.0_f64,
_ => return None,
};
let f = exif.get_field(value_tag, In::PRIMARY)?;
let dms = match &f.value {
Value::Rational(r) if r.len() == 3 => r,
@@ -142,17 +160,17 @@ fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option<f64> {
let min = rational_to_f64(&dms[1])?;
let sec = rational_to_f64(&dms[2])?;
let mut decimal = deg + min / 60.0 + sec / 3600.0;
if let Some(reference) = ascii_field(exif, ref_tag) {
let r = reference.to_ascii_uppercase();
if r.starts_with('S') || r.starts_with('W') {
decimal = -decimal;
}
let reference = ascii_field(exif, ref_tag)?;
let r = reference.to_ascii_uppercase();
if r.starts_with('S') || r.starts_with('W') {
decimal = -decimal;
}
if decimal.is_finite() {
Some(decimal)
} else {
None
if !decimal.is_finite() || decimal.abs() > limit {
return None;
}
Some(decimal)
}
fn rational_to_f64(r: &exif::Rational) -> Option<f64> {

View File

@@ -129,7 +129,10 @@ impl Extractor for ImageExtractor {
});
let now = OffsetDateTime::now_utc();
let mut events: Vec<ProvenanceEvent> = Vec::with_capacity(3);
// Discovered + Parsed (always) + optional Warning when the
// dim probe failed.
let mut events: Vec<ProvenanceEvent> =
Vec::with_capacity(if dim_warning.is_some() { 3 } else { 2 });
events.push(ProvenanceEvent {
at: asset.discovered_at,
agent: "kb-source-fs".to_string(),

View File

@@ -52,8 +52,25 @@ pub fn no_exif_png() -> Vec<u8> {
/// APP1 segment immediately after SOI (FF D8). The EXIF blob is built
/// with `exif::experimental::Writer`.
pub fn exif_with_gps_jpg() -> Vec<u8> {
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::Valid))
}
/// JPEG carrying GPSLatitude / GPSLongitude triples but missing the
/// matching `*Ref` tags. Used to verify the extractor drops the GPS
/// coordinates entirely (rather than silently assuming positive sign).
pub fn exif_gps_no_ref_jpg() -> Vec<u8> {
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::NoRef))
}
/// JPEG carrying a GPSLatitude triple whose decimal value lands outside
/// the legal `[-90, 90]` range (300° here). Used to verify the extractor
/// drops the coordinate as corrupted.
pub fn exif_gps_out_of_range_jpg() -> Vec<u8> {
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::OutOfRange))
}
fn splice_exif_into_jpeg(exif_blob: Vec<u8>) -> Vec<u8> {
let base = encode_tiny_jpeg();
let exif_blob = build_exif_blob_gps();
let mut out = Vec::with_capacity(base.len() + exif_blob.len() + 16);
// SOI: FF D8.
@@ -85,7 +102,21 @@ fn encode_tiny_jpeg() -> Vec<u8> {
buf.into_inner()
}
fn build_exif_blob_gps() -> Vec<u8> {
/// Selector for which GPS shape the test fixture should embed.
#[derive(Clone, Copy)]
enum GpsFlavor {
/// 37°30'0" N, 127°0'0" E with both `*Ref` tags (= 37.5, 127.0).
Valid,
/// Same DMS triples but `GPSLatitudeRef` / `GPSLongitudeRef` omitted.
/// Extractor must treat this as corrupted metadata and drop the
/// coordinates.
NoRef,
/// Latitude DMS encodes 300° (out of the legal `[-90, 90]` range).
/// Extractor must drop the coordinate.
OutOfRange,
}
fn build_exif_blob_gps(flavor: GpsFlavor) -> Vec<u8> {
let make = Field {
tag: Tag::Make,
ifd_num: In::PRIMARY,
@@ -111,13 +142,17 @@ fn build_exif_blob_gps() -> Vec<u8> {
ifd_num: In::PRIMARY,
value: Value::Short(vec![1]),
};
// GPS — 37.5 N, 127.0 E (Seoul-ish). DMS triple: 37°30'0" N,
// 127°0'0" E. Each component is num/denom rationals.
let (lat_deg, lon_deg) = match flavor {
GpsFlavor::OutOfRange => (300_u32, 127_u32),
_ => (37_u32, 127_u32),
};
// GPS DMS triples — `OutOfRange` puts 300° in the latitude degrees
// slot so the resulting decimal escapes ±90.
let lat = Field {
tag: Tag::GPSLatitude,
ifd_num: In::PRIMARY,
value: Value::Rational(vec![
Rational { num: 37, denom: 1 },
Rational { num: lat_deg, denom: 1 },
Rational { num: 30, denom: 1 },
Rational { num: 0, denom: 1 },
]),
@@ -131,7 +166,7 @@ fn build_exif_blob_gps() -> Vec<u8> {
tag: Tag::GPSLongitude,
ifd_num: In::PRIMARY,
value: Value::Rational(vec![
Rational { num: 127, denom: 1 },
Rational { num: lon_deg, denom: 1 },
Rational { num: 0, denom: 1 },
Rational { num: 0, denom: 1 },
]),
@@ -149,9 +184,11 @@ fn build_exif_blob_gps() -> Vec<u8> {
writer.push_field(&datetime);
writer.push_field(&orientation);
writer.push_field(&lat);
writer.push_field(&lat_ref);
writer.push_field(&lon);
writer.push_field(&lon_ref);
if !matches!(flavor, GpsFlavor::NoRef) {
writer.push_field(&lat_ref);
writer.push_field(&lon_ref);
}
let mut blob = Cursor::new(Vec::new());
writer

View File

@@ -7,7 +7,8 @@ use kebab_parse_image::ImageExtractor;
use serde_json::Value;
use crate::common::{
corrupt_png, exif_with_gps_jpg, fixture_for, no_exif_png, red_100x50_png, strip_dynamic_at,
corrupt_png, exif_gps_no_ref_jpg, exif_gps_out_of_range_jpg, exif_with_gps_jpg, fixture_for,
no_exif_png, red_100x50_png, strip_dynamic_at,
};
fn extract_block(doc: &kebab_core::CanonicalDocument) -> &kebab_core::ImageRefBlock {
@@ -239,6 +240,51 @@ fn supports_only_image_media_type() {
assert!(!e.supports(&kebab_core::MediaType::Pdf));
}
#[test]
fn jpeg_with_gps_missing_ref_drops_coordinates() {
let bytes = exif_gps_no_ref_jpg();
let fx = fixture_for("img/no-ref.jpg", ImageType::Jpeg, &bytes);
let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap();
let exif = doc
.metadata
.user
.get("exif")
.and_then(|v| v.as_object())
.expect("exif object present");
// Other whitelisted tags still load (Make / Model / …); GPS is
// dropped because the *Ref tags are missing.
assert!(exif.contains_key("make"));
assert!(
!exif.contains_key("gps_lat"),
"missing GPSLatitudeRef must drop gps_lat"
);
assert!(
!exif.contains_key("gps_lon"),
"missing GPSLongitudeRef must drop gps_lon"
);
}
#[test]
fn jpeg_with_gps_out_of_range_drops_latitude() {
let bytes = exif_gps_out_of_range_jpg();
let fx = fixture_for("img/oor.jpg", ImageType::Jpeg, &bytes);
let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap();
let exif = doc
.metadata
.user
.get("exif")
.and_then(|v| v.as_object())
.expect("exif object present");
// Latitude (300° + 30' = ~300.5) is outside ±90, so it must be
// dropped. Longitude (127°) stays in range and survives.
assert!(
!exif.contains_key("gps_lat"),
"out-of-range latitude must be dropped"
);
let lon = exif.get("gps_lon").and_then(|v| v.as_f64()).expect("gps_lon");
assert!((lon - 127.0).abs() < 1e-6);
}
#[test]
fn rejects_extract_when_media_type_mismatches() {
let bytes = red_100x50_png();