From 58d56467e5f9211d8ef7b1dcbbec24c5ad355142 Mon Sep 17 00:00:00 2001 From: altair823 Date: Sat, 2 May 2026 05:16:37 +0000 Subject: [PATCH] =?UTF-8?q?review(p6-1):=20=ED=9A=8C=EC=B0=A8=202=20?= =?UTF-8?q?=EC=A7=80=EC=A0=81=20=EB=B0=98=EC=98=81=20=E2=80=94=20GPS=20?= =?UTF-8?q?=EC=95=88=EC=A0=84=EC=84=B1=20+=20=EB=94=94=EB=B2=84=EA=B9=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - src/exif_extract.rs: • `gps_decimal` 에 ±90 / ±180 범위 검증 추가. 비정상 EXIF (예: 위도 300°) 가 들어와도 wire 에 흘러나가지 않고 silent drop. • GPSLatitudeRef / GPSLongitudeRef 가 빠진 좌표는 양수 가정으로 내보내지 않고 None 반환 — 모호한 부호를 그대로 두는 대신 손상된 메타데이터로 처리. • `read_from_container` 실패 시 `tracing::debug!` 한 줄로 사유 기록 (운영시 \"EXIF 없음\" vs \"EXIF 손상\" 구분 단서). - src/dims.rs: `match Some/None` 을 `anyhow::Context::context()?` 로 압축. import 한 줄 추가. - src/lib.rs: `Vec::with_capacity` 를 dim_warning 분기에 따라 `2` / `3` 으로 정확히 맞추고 의미 주석 한 줄 추가. - tests/common/mod.rs: `build_exif_blob_gps` 를 `GpsFlavor` 파라미터로 일반화 (`Valid` / `NoRef` / `OutOfRange`). JPEG 스플라이스 로직은 `splice_exif_into_jpeg` 헬퍼로 추출. - tests/extractor.rs: 회귀 테스트 2건 추가 — `*Ref` 누락 좌표 드롭, out-of-range 위도 드롭 (경도는 정상 통과 검증). cargo test -p kebab-parse-image — 16건 (4 unit + 12 integration) pass. cargo clippy -p kebab-parse-image --all-targets -- -D warnings — pass. --- crates/kebab-parse-image/src/dims.rs | 11 ++-- crates/kebab-parse-image/src/exif_extract.rs | 42 +++++++++++----- crates/kebab-parse-image/src/lib.rs | 5 +- crates/kebab-parse-image/tests/common/mod.rs | 53 +++++++++++++++++--- crates/kebab-parse-image/tests/extractor.rs | 48 +++++++++++++++++- 5 files changed, 130 insertions(+), 29 deletions(-) diff --git a/crates/kebab-parse-image/src/dims.rs b/crates/kebab-parse-image/src/dims.rs index 55dbcb7..0e9e8f2 100644 --- a/crates/kebab-parse-image/src/dims.rs +++ b/crates/kebab-parse-image/src/dims.rs @@ -14,7 +14,7 @@ use std::io::Cursor; -use anyhow::Result; +use anyhow::{Context, Result}; use image::{ImageFormat, ImageReader}; use crate::MAX_DECODE_DIM; @@ -37,12 +37,9 @@ pub(crate) fn probe(bytes: &[u8]) -> Result { .with_guessed_format() .map_err(|e| anyhow::anyhow!("io error guessing format: {e}"))?; - let format = match reader.format() { - Some(f) => f, - None => { - anyhow::bail!("unsupported or unrecognised image format"); - } - }; + let format = reader + .format() + .context("unsupported or unrecognised image format")?; let format_str = format_label(format); match reader.into_dimensions() { diff --git a/crates/kebab-parse-image/src/exif_extract.rs b/crates/kebab-parse-image/src/exif_extract.rs index 77a8eaf..3560b1f 100644 --- a/crates/kebab-parse-image/src/exif_extract.rs +++ b/crates/kebab-parse-image/src/exif_extract.rs @@ -35,7 +35,13 @@ pub(crate) fn extract_whitelisted(bytes: &[u8]) -> Map { let mut out = Map::new(); let exif = match Reader::new().read_from_container(&mut Cursor::new(bytes)) { Ok(e) => e, - Err(_) => return out, + Err(e) => { + tracing::debug!( + target: "kebab-parse-image", + "no readable EXIF block: {e}" + ); + return out; + } }; if let Some(s) = ascii_field(&exif, Tag::DateTimeOriginal) @@ -130,9 +136,21 @@ fn exif_datetime_to_iso(raw: &str) -> Option { /// Convert a GPS DMS triple (degrees / minutes / seconds, each /// `Rational`) into a signed decimal degree using the matching N/S/E/W -/// reference tag. Returns `None` if either tag is missing or shaped -/// unexpectedly. +/// reference tag. Returns `None` if any of: +/// +/// * `value_tag` is missing or not a 3-element rational triple +/// * `ref_tag` (GPSLatitudeRef / GPSLongitudeRef) is missing — the EXIF +/// spec requires it alongside the value, so absence is treated as +/// corrupted metadata rather than \"assume positive\" +/// * the resulting decimal is non-finite or out of physical range +/// (`±90` for latitude, `±180` for longitude) fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option { + let limit = match value_tag { + Tag::GPSLatitude => 90.0_f64, + Tag::GPSLongitude => 180.0_f64, + _ => return None, + }; + let f = exif.get_field(value_tag, In::PRIMARY)?; let dms = match &f.value { Value::Rational(r) if r.len() == 3 => r, @@ -142,17 +160,17 @@ fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option { let min = rational_to_f64(&dms[1])?; let sec = rational_to_f64(&dms[2])?; let mut decimal = deg + min / 60.0 + sec / 3600.0; - if let Some(reference) = ascii_field(exif, ref_tag) { - let r = reference.to_ascii_uppercase(); - if r.starts_with('S') || r.starts_with('W') { - decimal = -decimal; - } + + let reference = ascii_field(exif, ref_tag)?; + let r = reference.to_ascii_uppercase(); + if r.starts_with('S') || r.starts_with('W') { + decimal = -decimal; } - if decimal.is_finite() { - Some(decimal) - } else { - None + + if !decimal.is_finite() || decimal.abs() > limit { + return None; } + Some(decimal) } fn rational_to_f64(r: &exif::Rational) -> Option { diff --git a/crates/kebab-parse-image/src/lib.rs b/crates/kebab-parse-image/src/lib.rs index 35780fc..a5dc5cf 100644 --- a/crates/kebab-parse-image/src/lib.rs +++ b/crates/kebab-parse-image/src/lib.rs @@ -129,7 +129,10 @@ impl Extractor for ImageExtractor { }); let now = OffsetDateTime::now_utc(); - let mut events: Vec = Vec::with_capacity(3); + // Discovered + Parsed (always) + optional Warning when the + // dim probe failed. + let mut events: Vec = + Vec::with_capacity(if dim_warning.is_some() { 3 } else { 2 }); events.push(ProvenanceEvent { at: asset.discovered_at, agent: "kb-source-fs".to_string(), diff --git a/crates/kebab-parse-image/tests/common/mod.rs b/crates/kebab-parse-image/tests/common/mod.rs index 9202338..5541e09 100644 --- a/crates/kebab-parse-image/tests/common/mod.rs +++ b/crates/kebab-parse-image/tests/common/mod.rs @@ -52,8 +52,25 @@ pub fn no_exif_png() -> Vec { /// APP1 segment immediately after SOI (FF D8). The EXIF blob is built /// with `exif::experimental::Writer`. pub fn exif_with_gps_jpg() -> Vec { + splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::Valid)) +} + +/// JPEG carrying GPSLatitude / GPSLongitude triples but missing the +/// matching `*Ref` tags. Used to verify the extractor drops the GPS +/// coordinates entirely (rather than silently assuming positive sign). +pub fn exif_gps_no_ref_jpg() -> Vec { + splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::NoRef)) +} + +/// JPEG carrying a GPSLatitude triple whose decimal value lands outside +/// the legal `[-90, 90]` range (300° here). Used to verify the extractor +/// drops the coordinate as corrupted. +pub fn exif_gps_out_of_range_jpg() -> Vec { + splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::OutOfRange)) +} + +fn splice_exif_into_jpeg(exif_blob: Vec) -> Vec { let base = encode_tiny_jpeg(); - let exif_blob = build_exif_blob_gps(); let mut out = Vec::with_capacity(base.len() + exif_blob.len() + 16); // SOI: FF D8. @@ -85,7 +102,21 @@ fn encode_tiny_jpeg() -> Vec { buf.into_inner() } -fn build_exif_blob_gps() -> Vec { +/// Selector for which GPS shape the test fixture should embed. +#[derive(Clone, Copy)] +enum GpsFlavor { + /// 37°30'0" N, 127°0'0" E with both `*Ref` tags (= 37.5, 127.0). + Valid, + /// Same DMS triples but `GPSLatitudeRef` / `GPSLongitudeRef` omitted. + /// Extractor must treat this as corrupted metadata and drop the + /// coordinates. + NoRef, + /// Latitude DMS encodes 300° (out of the legal `[-90, 90]` range). + /// Extractor must drop the coordinate. + OutOfRange, +} + +fn build_exif_blob_gps(flavor: GpsFlavor) -> Vec { let make = Field { tag: Tag::Make, ifd_num: In::PRIMARY, @@ -111,13 +142,17 @@ fn build_exif_blob_gps() -> Vec { ifd_num: In::PRIMARY, value: Value::Short(vec![1]), }; - // GPS — 37.5 N, 127.0 E (Seoul-ish). DMS triple: 37°30'0" N, - // 127°0'0" E. Each component is num/denom rationals. + let (lat_deg, lon_deg) = match flavor { + GpsFlavor::OutOfRange => (300_u32, 127_u32), + _ => (37_u32, 127_u32), + }; + // GPS DMS triples — `OutOfRange` puts 300° in the latitude degrees + // slot so the resulting decimal escapes ±90. let lat = Field { tag: Tag::GPSLatitude, ifd_num: In::PRIMARY, value: Value::Rational(vec![ - Rational { num: 37, denom: 1 }, + Rational { num: lat_deg, denom: 1 }, Rational { num: 30, denom: 1 }, Rational { num: 0, denom: 1 }, ]), @@ -131,7 +166,7 @@ fn build_exif_blob_gps() -> Vec { tag: Tag::GPSLongitude, ifd_num: In::PRIMARY, value: Value::Rational(vec![ - Rational { num: 127, denom: 1 }, + Rational { num: lon_deg, denom: 1 }, Rational { num: 0, denom: 1 }, Rational { num: 0, denom: 1 }, ]), @@ -149,9 +184,11 @@ fn build_exif_blob_gps() -> Vec { writer.push_field(&datetime); writer.push_field(&orientation); writer.push_field(&lat); - writer.push_field(&lat_ref); writer.push_field(&lon); - writer.push_field(&lon_ref); + if !matches!(flavor, GpsFlavor::NoRef) { + writer.push_field(&lat_ref); + writer.push_field(&lon_ref); + } let mut blob = Cursor::new(Vec::new()); writer diff --git a/crates/kebab-parse-image/tests/extractor.rs b/crates/kebab-parse-image/tests/extractor.rs index 451d08d..19889b0 100644 --- a/crates/kebab-parse-image/tests/extractor.rs +++ b/crates/kebab-parse-image/tests/extractor.rs @@ -7,7 +7,8 @@ use kebab_parse_image::ImageExtractor; use serde_json::Value; use crate::common::{ - corrupt_png, exif_with_gps_jpg, fixture_for, no_exif_png, red_100x50_png, strip_dynamic_at, + corrupt_png, exif_gps_no_ref_jpg, exif_gps_out_of_range_jpg, exif_with_gps_jpg, fixture_for, + no_exif_png, red_100x50_png, strip_dynamic_at, }; fn extract_block(doc: &kebab_core::CanonicalDocument) -> &kebab_core::ImageRefBlock { @@ -239,6 +240,51 @@ fn supports_only_image_media_type() { assert!(!e.supports(&kebab_core::MediaType::Pdf)); } +#[test] +fn jpeg_with_gps_missing_ref_drops_coordinates() { + let bytes = exif_gps_no_ref_jpg(); + let fx = fixture_for("img/no-ref.jpg", ImageType::Jpeg, &bytes); + let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap(); + let exif = doc + .metadata + .user + .get("exif") + .and_then(|v| v.as_object()) + .expect("exif object present"); + // Other whitelisted tags still load (Make / Model / …); GPS is + // dropped because the *Ref tags are missing. + assert!(exif.contains_key("make")); + assert!( + !exif.contains_key("gps_lat"), + "missing GPSLatitudeRef must drop gps_lat" + ); + assert!( + !exif.contains_key("gps_lon"), + "missing GPSLongitudeRef must drop gps_lon" + ); +} + +#[test] +fn jpeg_with_gps_out_of_range_drops_latitude() { + let bytes = exif_gps_out_of_range_jpg(); + let fx = fixture_for("img/oor.jpg", ImageType::Jpeg, &bytes); + let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap(); + let exif = doc + .metadata + .user + .get("exif") + .and_then(|v| v.as_object()) + .expect("exif object present"); + // Latitude (300° + 30' = ~300.5) is outside ±90, so it must be + // dropped. Longitude (127°) stays in range and survives. + assert!( + !exif.contains_key("gps_lat"), + "out-of-range latitude must be dropped" + ); + let lon = exif.get("gps_lon").and_then(|v| v.as_f64()).expect("gps_lon"); + assert!((lon - 127.0).abs() < 1e-6); +} + #[test] fn rejects_extract_when_media_type_mismatches() { let bytes = red_100x50_png();