From 5870a1de15831ec4a880f2e6fb5e580759786211 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 22:39:11 +0900 Subject: [PATCH] fix(fb-39): address PR #136 round 1 review kebab eval compare now surfaces precision_at_k_chunk delta in both human-readable table + deltas JSON. Snapshot fixture regenerated additively. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-eval/src/compare.rs | 21 +++++++++++++++++++ .../tests/fixtures/eval/compare-1.json | 6 ++++++ 2 files changed, 27 insertions(+) diff --git a/crates/kebab-eval/src/compare.rs b/crates/kebab-eval/src/compare.rs index 4ba9bd4..5033ca7 100644 --- a/crates/kebab-eval/src/compare.rs +++ b/crates/kebab-eval/src/compare.rs @@ -184,6 +184,18 @@ pub fn render_report_md(report: &CompareReport) -> String { ), ); } + for k in crate::metrics::TOP_K_VARIANTS { + let _ = writeln!( + out, + "| precision@{k}_chunk | {} | {} | {} |", + fmt(a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)), + fmt(b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)), + fmt_delta( + a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + ), + ); + } let _ = writeln!( out, "| citation_coverage | {} | {} | {} |", @@ -419,6 +431,7 @@ fn build_deltas( } let mut hit = serde_json::Map::new(); let mut recall = serde_json::Map::new(); + let mut precision = serde_json::Map::new(); for k in crate::metrics::TOP_K_VARIANTS { hit.insert( k.to_string(), @@ -434,11 +447,19 @@ fn build_deltas( b.recall_at_k_doc.get(k).copied().unwrap_or(f32::NAN), ), ); + precision.insert( + k.to_string(), + d( + a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + ), + ); } serde_json::json!({ "hit_at_k": hit, "mrr": d(a.mrr, b.mrr), "recall_at_k_doc": recall, + "precision_at_k_chunk": precision, "citation_coverage": d(a.citation_coverage, b.citation_coverage), "groundedness": d(a.groundedness, b.groundedness), "empty_result_rate": d(a.empty_result_rate, b.empty_result_rate), diff --git a/crates/kebab-eval/tests/fixtures/eval/compare-1.json b/crates/kebab-eval/tests/fixtures/eval/compare-1.json index de408d2..da3f300 100644 --- a/crates/kebab-eval/tests/fixtures/eval/compare-1.json +++ b/crates/kebab-eval/tests/fixtures/eval/compare-1.json @@ -65,6 +65,12 @@ "5": 0.33329999446868896 }, "mrr": 0.41659998893737793, + "precision_at_k_chunk": { + "1": 0.33340001106262207, + "10": 0.0333000048995018, + "3": 0.22219999134540558, + "5": 0.06669999659061432 + }, "recall_at_k_doc": { "1": 0.33340001106262207, "10": 0.33329999446868896,