fix(fb-39): address PR #136 round 1 review
kebab eval compare now surfaces precision_at_k_chunk delta in both human-readable table + deltas JSON. Snapshot fixture regenerated additively. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -184,6 +184,18 @@ pub fn render_report_md(report: &CompareReport) -> String {
|
||||
),
|
||||
);
|
||||
}
|
||||
for k in crate::metrics::TOP_K_VARIANTS {
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"| precision@{k}_chunk | {} | {} | {} |",
|
||||
fmt(a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)),
|
||||
fmt(b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)),
|
||||
fmt_delta(
|
||||
a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
|
||||
b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
|
||||
),
|
||||
);
|
||||
}
|
||||
let _ = writeln!(
|
||||
out,
|
||||
"| citation_coverage | {} | {} | {} |",
|
||||
@@ -419,6 +431,7 @@ fn build_deltas(
|
||||
}
|
||||
let mut hit = serde_json::Map::new();
|
||||
let mut recall = serde_json::Map::new();
|
||||
let mut precision = serde_json::Map::new();
|
||||
for k in crate::metrics::TOP_K_VARIANTS {
|
||||
hit.insert(
|
||||
k.to_string(),
|
||||
@@ -434,11 +447,19 @@ fn build_deltas(
|
||||
b.recall_at_k_doc.get(k).copied().unwrap_or(f32::NAN),
|
||||
),
|
||||
);
|
||||
precision.insert(
|
||||
k.to_string(),
|
||||
d(
|
||||
a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
|
||||
b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN),
|
||||
),
|
||||
);
|
||||
}
|
||||
serde_json::json!({
|
||||
"hit_at_k": hit,
|
||||
"mrr": d(a.mrr, b.mrr),
|
||||
"recall_at_k_doc": recall,
|
||||
"precision_at_k_chunk": precision,
|
||||
"citation_coverage": d(a.citation_coverage, b.citation_coverage),
|
||||
"groundedness": d(a.groundedness, b.groundedness),
|
||||
"empty_result_rate": d(a.empty_result_rate, b.empty_result_rate),
|
||||
|
||||
@@ -65,6 +65,12 @@
|
||||
"5": 0.33329999446868896
|
||||
},
|
||||
"mrr": 0.41659998893737793,
|
||||
"precision_at_k_chunk": {
|
||||
"1": 0.33340001106262207,
|
||||
"10": 0.0333000048995018,
|
||||
"3": 0.22219999134540558,
|
||||
"5": 0.06669999659061432
|
||||
},
|
||||
"recall_at_k_doc": {
|
||||
"1": 0.33340001106262207,
|
||||
"10": 0.33329999446868896,
|
||||
|
||||
Reference in New Issue
Block a user