feat(eval): GoldenQuery.group + 그룹 정합성 검증 (변형 일관성 기반)

This commit is contained in:
2026-05-29 17:06:41 +00:00
parent fe4c854673
commit a51e6395c0
4 changed files with 73 additions and 0 deletions

View File

@@ -503,6 +503,7 @@ mod tests {
must_contain: vec![],
forbidden: vec![],
difficulty: None,
group: None,
};
let g = Some(&g);
// a miss, b hit → Win

View File

@@ -30,6 +30,7 @@ pub fn load_golden_set(path: &Path) -> Result<Vec<GoldenQuery>> {
let queries: Vec<GoldenQuery> = serde_yaml::from_slice(&bytes)
.with_context(|| format!("parse golden YAML at {}", path.display()))?;
check_unique_ids(&queries)?;
check_group_integrity(&queries)?;
Ok(queries)
}
@@ -54,6 +55,40 @@ pub(crate) fn load_golden_set_validated(
Ok(queries)
}
/// 같은 `group`에 속한 모든 쿼리가 동일한 `expected_doc_ids`(집합)를
/// 공유하는지 검증. 변형 일관성 메트릭은 "같은 정답을 가진 다른 표현들"을
/// 전제하므로, 그룹 내 정답이 갈리면 측정이 무의미해진다 → bail.
fn check_group_integrity(queries: &[GoldenQuery]) -> Result<()> {
use std::collections::BTreeMap;
// group -> (대표 정답 집합, 대표 query id)
let mut canonical: BTreeMap<&str, (BTreeSet<String>, &str)> = BTreeMap::new();
let mut offenders: BTreeSet<String> = BTreeSet::new();
for q in queries {
let Some(group) = q.group.as_deref() else {
continue;
};
let docs: BTreeSet<String> = q.expected_doc_ids.iter().map(|d| d.0.clone()).collect();
match canonical.get(group) {
None => {
canonical.insert(group, (docs, q.id.as_str()));
}
Some((expected, _first)) if *expected != docs => {
offenders.insert(group.to_string());
}
Some(_) => {}
}
}
if offenders.is_empty() {
Ok(())
} else {
let list: Vec<String> = offenders.into_iter().collect();
Err(anyhow!(
"group(s) with divergent expected_doc_ids (same group must share one expected doc set): {}",
list.join(", ")
))
}
}
fn check_unique_ids(queries: &[GoldenQuery]) -> Result<()> {
let mut seen: HashSet<&str> = HashSet::new();
let mut dups: BTreeSet<String> = BTreeSet::new();
@@ -194,6 +229,37 @@ mod tests {
assert_eq!(qs.len(), 1);
}
#[test]
fn rejects_group_with_divergent_expected_docs() {
let tmp = tempdir().unwrap();
let yaml_path = tmp.path().join("golden.yaml");
fs::write(
&yaml_path,
"- id: g1\n query: \"러스트 소유권\"\n group: ownership\n expected_doc_ids: [\"docA\"]\n\
- id: g2\n query: \"rust ownership\"\n group: ownership\n expected_doc_ids: [\"docB\"]\n",
)
.unwrap();
let err = load_golden_set(&yaml_path).unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("group"), "msg: {msg}");
assert!(msg.contains("ownership"), "msg: {msg}");
}
#[test]
fn accepts_group_with_matching_expected_docs() {
let tmp = tempdir().unwrap();
let yaml_path = tmp.path().join("golden.yaml");
fs::write(
&yaml_path,
"- id: g1\n query: \"러스트 소유권\"\n group: ownership\n expected_doc_ids: [\"docA\"]\n\
- id: g2\n query: \"rust ownership\"\n group: ownership\n expected_doc_ids: [\"docA\"]\n",
)
.unwrap();
let qs = load_golden_set(&yaml_path).unwrap();
assert_eq!(qs.len(), 2);
assert_eq!(qs[0].group.as_deref(), Some("ownership"));
}
fn seed_one_chunk(store: &SqliteStore, doc_id: &str, chunk_id: &str) {
let conn = store.read_conn();
let asset_id = format!("a_{doc_id}");

View File

@@ -456,6 +456,7 @@ mod tests {
must_contain: vec![],
forbidden: vec![],
difficulty: None,
group: None,
}
}

View File

@@ -26,6 +26,11 @@ pub struct GoldenQuery {
pub forbidden: Vec<String>,
#[serde(default)]
pub difficulty: Option<String>,
/// 같은 의미의 여러 표현(동의어·다른 어휘·풀어쓴 문장·한/영)을 묶는
/// 의도 그룹 id. 같은 그룹의 모든 변형은 동일한 `expected_doc_ids`(집합)를
/// 공유해야 한다(loader가 강제). `None`이면 단독 쿼리(기존 동작 불변).
#[serde(default)]
pub group: Option<String>,
}
fn default_lang() -> Lang {