From 5870a1de15831ec4a880f2e6fb5e580759786211 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 22:39:11 +0900
Subject: [PATCH] fix(fb-39): address PR #136 round 1 review
kebab eval compare now surfaces precision_at_k_chunk delta in both
human-readable table + deltas JSON. Snapshot fixture regenerated
additively.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-eval/src/compare.rs | 21 +++++++++++++++++++
.../tests/fixtures/eval/compare-1.json | 6 ++++++
2 files changed, 27 insertions(+)
diff --git a/crates/kebab-eval/src/compare.rs b/crates/kebab-eval/src/compare.rs
index 4ba9bd4..5033ca7 100644
--- a/crates/kebab-eval/src/compare.rs
+++ b/crates/kebab-eval/src/compare.rs
@@ -184,6 +184,18 @@ pub fn render_report_md(report: &CompareReport) -> String {
),
);
}
+ for k in crate::metrics::TOP_K_VARIANTS {
+ let _ = writeln!(
+ out,
+ "| precision@{k}_chunk | {} | {} | {} |",
+ fmt(a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)),
+ fmt(b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)),
+ fmt_delta(
+ a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
+ b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
+ ),
+ );
+ }
let _ = writeln!(
out,
"| citation_coverage | {} | {} | {} |",
@@ -419,6 +431,7 @@ fn build_deltas(
}
let mut hit = serde_json::Map::new();
let mut recall = serde_json::Map::new();
+ let mut precision = serde_json::Map::new();
for k in crate::metrics::TOP_K_VARIANTS {
hit.insert(
k.to_string(),
@@ -434,11 +447,19 @@ fn build_deltas(
b.recall_at_k_doc.get(k).copied().unwrap_or(f32::NAN),
),
);
+ precision.insert(
+ k.to_string(),
+ d(
+ a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
+ b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
+ ),
+ );
}
serde_json::json!({
"hit_at_k": hit,
"mrr": d(a.mrr, b.mrr),
"recall_at_k_doc": recall,
+ "precision_at_k_chunk": precision,
"citation_coverage": d(a.citation_coverage, b.citation_coverage),
"groundedness": d(a.groundedness, b.groundedness),
"empty_result_rate": d(a.empty_result_rate, b.empty_result_rate),
diff --git a/crates/kebab-eval/tests/fixtures/eval/compare-1.json b/crates/kebab-eval/tests/fixtures/eval/compare-1.json
index de408d2..da3f300 100644
--- a/crates/kebab-eval/tests/fixtures/eval/compare-1.json
+++ b/crates/kebab-eval/tests/fixtures/eval/compare-1.json
@@ -65,6 +65,12 @@
"5": 0.33329999446868896
},
"mrr": 0.41659998893737793,
+ "precision_at_k_chunk": {
+ "1": 0.33340001106262207,
+ "10": 0.0333000048995018,
+ "3": 0.22219999134540558,
+ "5": 0.06669999659061432
+ },
"recall_at_k_doc": {
"1": 0.33340001106262207,
"10": 0.33329999446868896,