diff --git a/crates/kebab-eval/src/compare.rs b/crates/kebab-eval/src/compare.rs index 4ba9bd4..5033ca7 100644 --- a/crates/kebab-eval/src/compare.rs +++ b/crates/kebab-eval/src/compare.rs @@ -184,6 +184,18 @@ pub fn render_report_md(report: &CompareReport) -> String { ), ); } + for k in crate::metrics::TOP_K_VARIANTS { + let _ = writeln!( + out, + "| precision@{k}_chunk | {} | {} | {} |", + fmt(a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)), + fmt(b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN)), + fmt_delta( + a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + ), + ); + } let _ = writeln!( out, "| citation_coverage | {} | {} | {} |", @@ -419,6 +431,7 @@ fn build_deltas( } let mut hit = serde_json::Map::new(); let mut recall = serde_json::Map::new(); + let mut precision = serde_json::Map::new(); for k in crate::metrics::TOP_K_VARIANTS { hit.insert( k.to_string(), @@ -434,11 +447,19 @@ fn build_deltas( b.recall_at_k_doc.get(k).copied().unwrap_or(f32::NAN), ), ); + precision.insert( + k.to_string(), + d( + a.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + b.precision_at_k_chunk.get(k).copied().unwrap_or(f32::NAN), + ), + ); } serde_json::json!({ "hit_at_k": hit, "mrr": d(a.mrr, b.mrr), "recall_at_k_doc": recall, + "precision_at_k_chunk": precision, "citation_coverage": d(a.citation_coverage, b.citation_coverage), "groundedness": d(a.groundedness, b.groundedness), "empty_result_rate": d(a.empty_result_rate, b.empty_result_rate), diff --git a/crates/kebab-eval/tests/fixtures/eval/compare-1.json b/crates/kebab-eval/tests/fixtures/eval/compare-1.json index de408d2..da3f300 100644 --- a/crates/kebab-eval/tests/fixtures/eval/compare-1.json +++ b/crates/kebab-eval/tests/fixtures/eval/compare-1.json @@ -65,6 +65,12 @@ "5": 0.33329999446868896 }, "mrr": 0.41659998893737793, + "precision_at_k_chunk": { + "1": 0.33340001106262207, + "10": 0.0333000048995018, + "3": 0.22219999134540558, + "5": 0.06669999659061432 + }, "recall_at_k_doc": { "1": 0.33340001106262207, "10": 0.33329999446868896,