Files
kebab/crates/kebab-eval/tests/fixtures/eval/compare-1.json
th-kim0823 5870a1de15 fix(fb-39): address PR #136 round 1 review
kebab eval compare now surfaces precision_at_k_chunk delta in both
human-readable table + deltas JSON. Snapshot fixture regenerated
additively.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 22:39:11 +09:00

108 lines
2.4 KiB
JSON

{
"aggregate_a": {
"citation_coverage": null,
"empty_result_rate": 0.0,
"failed_queries": 0,
"groundedness": 0.0,
"hit_at_k": {
"1": 0.33329999446868896,
"10": 0.666700005531311,
"3": 0.33329999446868896,
"5": 0.666700005531311
},
"mrr": 0.41670000553131104,
"precision_at_k_chunk": {
"1": 0.33329999446868896,
"10": 0.06669999659061432,
"3": 0.11110000312328339,
"5": 0.13330000638961792
},
"recall_at_k_doc": {
"1": 0.33329999446868896,
"10": 0.666700005531311,
"3": 0.33329999446868896,
"5": 0.666700005531311
},
"refusal_correctness": null,
"total_queries": 3
},
"aggregate_b": {
"citation_coverage": null,
"empty_result_rate": 0.0,
"failed_queries": 0,
"groundedness": 0.0,
"hit_at_k": {
"1": 0.666700005531311,
"10": 1.0,
"3": 1.0,
"5": 1.0
},
"mrr": 0.833299994468689,
"precision_at_k_chunk": {
"1": 0.666700005531311,
"10": 0.10000000149011612,
"3": 0.33329999446868896,
"5": 0.20000000298023224
},
"recall_at_k_doc": {
"1": 0.666700005531311,
"10": 1.0,
"3": 1.0,
"5": 1.0
},
"refusal_correctness": null,
"total_queries": 3
},
"deltas": {
"chunker_version_match": "exact",
"citation_coverage": null,
"empty_result_rate": 0.0,
"groundedness": 0.0,
"hit_at_k": {
"1": 0.33340001106262207,
"10": 0.33329999446868896,
"3": 0.666700005531311,
"5": 0.33329999446868896
},
"mrr": 0.41659998893737793,
"precision_at_k_chunk": {
"1": 0.33340001106262207,
"10": 0.0333000048995018,
"3": 0.22219999134540558,
"5": 0.06669999659061432
},
"recall_at_k_doc": {
"1": 0.33340001106262207,
"10": 0.33329999446868896,
"3": 0.666700005531311,
"5": 0.33329999446868896
},
"refusal_correctness": null
},
"per_query": [
{
"a_hit_rank": 1,
"b_hit_rank": 2,
"kind": "loss",
"note": "rank 1→2",
"query_id": "q-001"
},
{
"a_hit_rank": 4,
"b_hit_rank": 1,
"kind": "win",
"note": "rank 4→1",
"query_id": "q-002"
},
{
"a_hit_rank": null,
"b_hit_rank": 1,
"kind": "win",
"note": null,
"query_id": "q-003"
}
],
"run_a": "run_a",
"run_b": "run_b"
}