review(p6-1): 회차 2 지적 반영 — GPS 안전성 + 디버깅
- src/exif_extract.rs:
• `gps_decimal` 에 ±90 / ±180 범위 검증 추가. 비정상 EXIF (예: 위도
300°) 가 들어와도 wire 에 흘러나가지 않고 silent drop.
• GPSLatitudeRef / GPSLongitudeRef 가 빠진 좌표는 양수 가정으로
내보내지 않고 None 반환 — 모호한 부호를 그대로 두는 대신 손상된
메타데이터로 처리.
• `read_from_container` 실패 시 `tracing::debug!` 한 줄로 사유 기록
(운영시 \"EXIF 없음\" vs \"EXIF 손상\" 구분 단서).
- src/dims.rs: `match Some/None` 을 `anyhow::Context::context()?` 로
압축. import 한 줄 추가.
- src/lib.rs: `Vec::with_capacity` 를 dim_warning 분기에 따라
`2` / `3` 으로 정확히 맞추고 의미 주석 한 줄 추가.
- tests/common/mod.rs: `build_exif_blob_gps` 를 `GpsFlavor`
파라미터로 일반화 (`Valid` / `NoRef` / `OutOfRange`). JPEG 스플라이스
로직은 `splice_exif_into_jpeg` 헬퍼로 추출.
- tests/extractor.rs: 회귀 테스트 2건 추가 — `*Ref` 누락 좌표 드롭,
out-of-range 위도 드롭 (경도는 정상 통과 검증).
cargo test -p kebab-parse-image — 16건 (4 unit + 12 integration) pass.
cargo clippy -p kebab-parse-image --all-targets -- -D warnings — pass.
This commit is contained in:
@@ -14,7 +14,7 @@
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
use anyhow::Result;
|
||||
use anyhow::{Context, Result};
|
||||
use image::{ImageFormat, ImageReader};
|
||||
|
||||
use crate::MAX_DECODE_DIM;
|
||||
@@ -37,12 +37,9 @@ pub(crate) fn probe(bytes: &[u8]) -> Result<DimOutcome> {
|
||||
.with_guessed_format()
|
||||
.map_err(|e| anyhow::anyhow!("io error guessing format: {e}"))?;
|
||||
|
||||
let format = match reader.format() {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
anyhow::bail!("unsupported or unrecognised image format");
|
||||
}
|
||||
};
|
||||
let format = reader
|
||||
.format()
|
||||
.context("unsupported or unrecognised image format")?;
|
||||
let format_str = format_label(format);
|
||||
|
||||
match reader.into_dimensions() {
|
||||
|
||||
@@ -35,7 +35,13 @@ pub(crate) fn extract_whitelisted(bytes: &[u8]) -> Map<String, JsonValue> {
|
||||
let mut out = Map::new();
|
||||
let exif = match Reader::new().read_from_container(&mut Cursor::new(bytes)) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return out,
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
target: "kebab-parse-image",
|
||||
"no readable EXIF block: {e}"
|
||||
);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(s) = ascii_field(&exif, Tag::DateTimeOriginal)
|
||||
@@ -130,9 +136,21 @@ fn exif_datetime_to_iso(raw: &str) -> Option<String> {
|
||||
|
||||
/// Convert a GPS DMS triple (degrees / minutes / seconds, each
|
||||
/// `Rational`) into a signed decimal degree using the matching N/S/E/W
|
||||
/// reference tag. Returns `None` if either tag is missing or shaped
|
||||
/// unexpectedly.
|
||||
/// reference tag. Returns `None` if any of:
|
||||
///
|
||||
/// * `value_tag` is missing or not a 3-element rational triple
|
||||
/// * `ref_tag` (GPSLatitudeRef / GPSLongitudeRef) is missing — the EXIF
|
||||
/// spec requires it alongside the value, so absence is treated as
|
||||
/// corrupted metadata rather than \"assume positive\"
|
||||
/// * the resulting decimal is non-finite or out of physical range
|
||||
/// (`±90` for latitude, `±180` for longitude)
|
||||
fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option<f64> {
|
||||
let limit = match value_tag {
|
||||
Tag::GPSLatitude => 90.0_f64,
|
||||
Tag::GPSLongitude => 180.0_f64,
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
let f = exif.get_field(value_tag, In::PRIMARY)?;
|
||||
let dms = match &f.value {
|
||||
Value::Rational(r) if r.len() == 3 => r,
|
||||
@@ -142,17 +160,17 @@ fn gps_decimal(exif: &exif::Exif, value_tag: Tag, ref_tag: Tag) -> Option<f64> {
|
||||
let min = rational_to_f64(&dms[1])?;
|
||||
let sec = rational_to_f64(&dms[2])?;
|
||||
let mut decimal = deg + min / 60.0 + sec / 3600.0;
|
||||
if let Some(reference) = ascii_field(exif, ref_tag) {
|
||||
let r = reference.to_ascii_uppercase();
|
||||
if r.starts_with('S') || r.starts_with('W') {
|
||||
decimal = -decimal;
|
||||
}
|
||||
|
||||
let reference = ascii_field(exif, ref_tag)?;
|
||||
let r = reference.to_ascii_uppercase();
|
||||
if r.starts_with('S') || r.starts_with('W') {
|
||||
decimal = -decimal;
|
||||
}
|
||||
if decimal.is_finite() {
|
||||
Some(decimal)
|
||||
} else {
|
||||
None
|
||||
|
||||
if !decimal.is_finite() || decimal.abs() > limit {
|
||||
return None;
|
||||
}
|
||||
Some(decimal)
|
||||
}
|
||||
|
||||
fn rational_to_f64(r: &exif::Rational) -> Option<f64> {
|
||||
|
||||
@@ -129,7 +129,10 @@ impl Extractor for ImageExtractor {
|
||||
});
|
||||
|
||||
let now = OffsetDateTime::now_utc();
|
||||
let mut events: Vec<ProvenanceEvent> = Vec::with_capacity(3);
|
||||
// Discovered + Parsed (always) + optional Warning when the
|
||||
// dim probe failed.
|
||||
let mut events: Vec<ProvenanceEvent> =
|
||||
Vec::with_capacity(if dim_warning.is_some() { 3 } else { 2 });
|
||||
events.push(ProvenanceEvent {
|
||||
at: asset.discovered_at,
|
||||
agent: "kb-source-fs".to_string(),
|
||||
|
||||
@@ -52,8 +52,25 @@ pub fn no_exif_png() -> Vec<u8> {
|
||||
/// APP1 segment immediately after SOI (FF D8). The EXIF blob is built
|
||||
/// with `exif::experimental::Writer`.
|
||||
pub fn exif_with_gps_jpg() -> Vec<u8> {
|
||||
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::Valid))
|
||||
}
|
||||
|
||||
/// JPEG carrying GPSLatitude / GPSLongitude triples but missing the
|
||||
/// matching `*Ref` tags. Used to verify the extractor drops the GPS
|
||||
/// coordinates entirely (rather than silently assuming positive sign).
|
||||
pub fn exif_gps_no_ref_jpg() -> Vec<u8> {
|
||||
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::NoRef))
|
||||
}
|
||||
|
||||
/// JPEG carrying a GPSLatitude triple whose decimal value lands outside
|
||||
/// the legal `[-90, 90]` range (300° here). Used to verify the extractor
|
||||
/// drops the coordinate as corrupted.
|
||||
pub fn exif_gps_out_of_range_jpg() -> Vec<u8> {
|
||||
splice_exif_into_jpeg(build_exif_blob_gps(GpsFlavor::OutOfRange))
|
||||
}
|
||||
|
||||
fn splice_exif_into_jpeg(exif_blob: Vec<u8>) -> Vec<u8> {
|
||||
let base = encode_tiny_jpeg();
|
||||
let exif_blob = build_exif_blob_gps();
|
||||
|
||||
let mut out = Vec::with_capacity(base.len() + exif_blob.len() + 16);
|
||||
// SOI: FF D8.
|
||||
@@ -85,7 +102,21 @@ fn encode_tiny_jpeg() -> Vec<u8> {
|
||||
buf.into_inner()
|
||||
}
|
||||
|
||||
fn build_exif_blob_gps() -> Vec<u8> {
|
||||
/// Selector for which GPS shape the test fixture should embed.
|
||||
#[derive(Clone, Copy)]
|
||||
enum GpsFlavor {
|
||||
/// 37°30'0" N, 127°0'0" E with both `*Ref` tags (= 37.5, 127.0).
|
||||
Valid,
|
||||
/// Same DMS triples but `GPSLatitudeRef` / `GPSLongitudeRef` omitted.
|
||||
/// Extractor must treat this as corrupted metadata and drop the
|
||||
/// coordinates.
|
||||
NoRef,
|
||||
/// Latitude DMS encodes 300° (out of the legal `[-90, 90]` range).
|
||||
/// Extractor must drop the coordinate.
|
||||
OutOfRange,
|
||||
}
|
||||
|
||||
fn build_exif_blob_gps(flavor: GpsFlavor) -> Vec<u8> {
|
||||
let make = Field {
|
||||
tag: Tag::Make,
|
||||
ifd_num: In::PRIMARY,
|
||||
@@ -111,13 +142,17 @@ fn build_exif_blob_gps() -> Vec<u8> {
|
||||
ifd_num: In::PRIMARY,
|
||||
value: Value::Short(vec![1]),
|
||||
};
|
||||
// GPS — 37.5 N, 127.0 E (Seoul-ish). DMS triple: 37°30'0" N,
|
||||
// 127°0'0" E. Each component is num/denom rationals.
|
||||
let (lat_deg, lon_deg) = match flavor {
|
||||
GpsFlavor::OutOfRange => (300_u32, 127_u32),
|
||||
_ => (37_u32, 127_u32),
|
||||
};
|
||||
// GPS DMS triples — `OutOfRange` puts 300° in the latitude degrees
|
||||
// slot so the resulting decimal escapes ±90.
|
||||
let lat = Field {
|
||||
tag: Tag::GPSLatitude,
|
||||
ifd_num: In::PRIMARY,
|
||||
value: Value::Rational(vec![
|
||||
Rational { num: 37, denom: 1 },
|
||||
Rational { num: lat_deg, denom: 1 },
|
||||
Rational { num: 30, denom: 1 },
|
||||
Rational { num: 0, denom: 1 },
|
||||
]),
|
||||
@@ -131,7 +166,7 @@ fn build_exif_blob_gps() -> Vec<u8> {
|
||||
tag: Tag::GPSLongitude,
|
||||
ifd_num: In::PRIMARY,
|
||||
value: Value::Rational(vec![
|
||||
Rational { num: 127, denom: 1 },
|
||||
Rational { num: lon_deg, denom: 1 },
|
||||
Rational { num: 0, denom: 1 },
|
||||
Rational { num: 0, denom: 1 },
|
||||
]),
|
||||
@@ -149,9 +184,11 @@ fn build_exif_blob_gps() -> Vec<u8> {
|
||||
writer.push_field(&datetime);
|
||||
writer.push_field(&orientation);
|
||||
writer.push_field(&lat);
|
||||
writer.push_field(&lat_ref);
|
||||
writer.push_field(&lon);
|
||||
writer.push_field(&lon_ref);
|
||||
if !matches!(flavor, GpsFlavor::NoRef) {
|
||||
writer.push_field(&lat_ref);
|
||||
writer.push_field(&lon_ref);
|
||||
}
|
||||
|
||||
let mut blob = Cursor::new(Vec::new());
|
||||
writer
|
||||
|
||||
@@ -7,7 +7,8 @@ use kebab_parse_image::ImageExtractor;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::common::{
|
||||
corrupt_png, exif_with_gps_jpg, fixture_for, no_exif_png, red_100x50_png, strip_dynamic_at,
|
||||
corrupt_png, exif_gps_no_ref_jpg, exif_gps_out_of_range_jpg, exif_with_gps_jpg, fixture_for,
|
||||
no_exif_png, red_100x50_png, strip_dynamic_at,
|
||||
};
|
||||
|
||||
fn extract_block(doc: &kebab_core::CanonicalDocument) -> &kebab_core::ImageRefBlock {
|
||||
@@ -239,6 +240,51 @@ fn supports_only_image_media_type() {
|
||||
assert!(!e.supports(&kebab_core::MediaType::Pdf));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jpeg_with_gps_missing_ref_drops_coordinates() {
|
||||
let bytes = exif_gps_no_ref_jpg();
|
||||
let fx = fixture_for("img/no-ref.jpg", ImageType::Jpeg, &bytes);
|
||||
let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap();
|
||||
let exif = doc
|
||||
.metadata
|
||||
.user
|
||||
.get("exif")
|
||||
.and_then(|v| v.as_object())
|
||||
.expect("exif object present");
|
||||
// Other whitelisted tags still load (Make / Model / …); GPS is
|
||||
// dropped because the *Ref tags are missing.
|
||||
assert!(exif.contains_key("make"));
|
||||
assert!(
|
||||
!exif.contains_key("gps_lat"),
|
||||
"missing GPSLatitudeRef must drop gps_lat"
|
||||
);
|
||||
assert!(
|
||||
!exif.contains_key("gps_lon"),
|
||||
"missing GPSLongitudeRef must drop gps_lon"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn jpeg_with_gps_out_of_range_drops_latitude() {
|
||||
let bytes = exif_gps_out_of_range_jpg();
|
||||
let fx = fixture_for("img/oor.jpg", ImageType::Jpeg, &bytes);
|
||||
let doc = ImageExtractor::new().extract(&fx.ctx(), &bytes).unwrap();
|
||||
let exif = doc
|
||||
.metadata
|
||||
.user
|
||||
.get("exif")
|
||||
.and_then(|v| v.as_object())
|
||||
.expect("exif object present");
|
||||
// Latitude (300° + 30' = ~300.5) is outside ±90, so it must be
|
||||
// dropped. Longitude (127°) stays in range and survives.
|
||||
assert!(
|
||||
!exif.contains_key("gps_lat"),
|
||||
"out-of-range latitude must be dropped"
|
||||
);
|
||||
let lon = exif.get("gps_lon").and_then(|v| v.as_f64()).expect("gps_lon");
|
||||
assert!((lon - 127.0).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_extract_when_media_type_mismatches() {
|
||||
let bytes = red_100x50_png();
|
||||
|
||||
Reference in New Issue
Block a user