From ab20202241211d0c195b13ec25531ffba0ff0977 Mon Sep 17 00:00:00 2001 From: altair823 Date: Fri, 29 May 2026 17:13:21 +0000 Subject: [PATCH] =?UTF-8?q?test(eval):=20Task1=20=EB=A6=AC=EB=B7=B0=20nit?= =?UTF-8?q?=20=E2=80=94=203+=EB=A9=A4=EB=B2=84=20=EA=B7=B8=EB=A3=B9/group?= =?UTF-8?q?=3DNone=20=ED=85=8C=EC=8A=A4=ED=8A=B8=20+=20=EC=97=90=EB=9F=AC?= =?UTF-8?q?=20=EB=A9=94=EC=8B=9C=EC=A7=80=EC=97=90=20divergent=20query=20i?= =?UTF-8?q?d?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- crates/kebab-eval/src/loader.rs | 52 +++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/crates/kebab-eval/src/loader.rs b/crates/kebab-eval/src/loader.rs index a628659..42e7836 100644 --- a/crates/kebab-eval/src/loader.rs +++ b/crates/kebab-eval/src/loader.rs @@ -60,8 +60,11 @@ pub(crate) fn load_golden_set_validated( /// 전제하므로, 그룹 내 정답이 갈리면 측정이 무의미해진다 → bail. fn check_group_integrity(queries: &[GoldenQuery]) -> Result<()> { use std::collections::BTreeMap; - // group -> (대표 정답 집합, 대표 query id) + // group -> (대표 정답 집합, 대표 query id). 첫 멤버를 canonical 로 삼고 + // 이후 멤버가 다른 expected 를 가지면 offender 로 기록한다. let mut canonical: BTreeMap<&str, (BTreeSet, &str)> = BTreeMap::new(); + // 그룹별 위반 메시지(정렬·dedup 위해 BTreeSet). canonical query id 와 + // divergent query id 를 함께 담아 yaml 수정 시 바로 찾을 수 있게 한다. let mut offenders: BTreeSet = BTreeSet::new(); for q in queries { let Some(group) = q.group.as_deref() else { @@ -72,8 +75,11 @@ fn check_group_integrity(queries: &[GoldenQuery]) -> Result<()> { None => { canonical.insert(group, (docs, q.id.as_str())); } - Some((expected, _first)) if *expected != docs => { - offenders.insert(group.to_string()); + Some((expected, first)) if *expected != docs => { + offenders.insert(format!( + "group '{group}' (query '{}' differs from canonical '{first}')", + q.id + )); } Some(_) => {} } @@ -83,8 +89,8 @@ fn check_group_integrity(queries: &[GoldenQuery]) -> Result<()> { } else { let list: Vec = offenders.into_iter().collect(); Err(anyhow!( - "group(s) with divergent expected_doc_ids (same group must share one expected doc set): {}", - list.join(", ") + "same group must share one expected_doc_ids set, but found divergence — {}", + list.join("; ") )) } } @@ -184,6 +190,42 @@ mod tests { use std::fs; use tempfile::tempdir; + #[test] + fn group_integrity_flags_only_divergent_member_in_3plus_group() { + // g1(docA) canonical, g2(docB) divergent, g3(docA) matches canonical. + // Only g2 is an offender; g3 must pass. Error names g2, not g3. + let tmp = tempdir().unwrap(); + let yaml_path = tmp.path().join("golden.yaml"); + fs::write( + &yaml_path, + "- id: g1\n query: a\n group: gr\n expected_doc_ids: [\"docA\"]\n\ + - id: g2\n query: b\n group: gr\n expected_doc_ids: [\"docB\"]\n\ + - id: g3\n query: c\n group: gr\n expected_doc_ids: [\"docA\"]\n", + ) + .unwrap(); + let err = load_golden_set(&yaml_path).unwrap_err(); + let msg = format!("{err:#}"); + assert!(msg.contains("'g2'"), "should name the divergent query g2: {msg}"); + assert!(!msg.contains("'g3'"), "g3 matches canonical, must not be flagged: {msg}"); + } + + #[test] + fn ungrouped_queries_skip_group_integrity() { + // group=None entries mixed with a valid group must not interfere. + let tmp = tempdir().unwrap(); + let yaml_path = tmp.path().join("golden.yaml"); + fs::write( + &yaml_path, + "- id: solo1\n query: x\n expected_doc_ids: [\"docA\"]\n\ + - id: g1\n query: a\n group: gr\n expected_doc_ids: [\"docB\"]\n\ + - id: solo2\n query: y\n expected_doc_ids: [\"docC\"]\n", + ) + .unwrap(); + let qs = load_golden_set(&yaml_path).unwrap(); + assert_eq!(qs.len(), 3); + assert!(qs[0].group.is_none()); + } + #[test] fn rejects_unknown_expected_chunk_id() { let tmp = tempdir().unwrap();