{ "aggregate_a": { "citation_coverage": null, "empty_result_rate": 0.0, "failed_queries": 0, "groundedness": 0.0, "hit_at_k": { "1": 0.33329999446868896, "10": 0.666700005531311, "3": 0.33329999446868896, "5": 0.666700005531311 }, "mrr": 0.41670000553131104, "precision_at_k_chunk": { "1": 0.33329999446868896, "10": 0.06669999659061432, "3": 0.11110000312328339, "5": 0.13330000638961792 }, "recall_at_k_doc": { "1": 0.33329999446868896, "10": 0.666700005531311, "3": 0.33329999446868896, "5": 0.666700005531311 }, "refusal_correctness": null, "total_queries": 3 }, "aggregate_b": { "citation_coverage": null, "empty_result_rate": 0.0, "failed_queries": 0, "groundedness": 0.0, "hit_at_k": { "1": 0.666700005531311, "10": 1.0, "3": 1.0, "5": 1.0 }, "mrr": 0.833299994468689, "precision_at_k_chunk": { "1": 0.666700005531311, "10": 0.10000000149011612, "3": 0.33329999446868896, "5": 0.20000000298023224 }, "recall_at_k_doc": { "1": 0.666700005531311, "10": 1.0, "3": 1.0, "5": 1.0 }, "refusal_correctness": null, "total_queries": 3 }, "deltas": { "chunker_version_match": "exact", "citation_coverage": null, "empty_result_rate": 0.0, "groundedness": 0.0, "hit_at_k": { "1": 0.33340001106262207, "10": 0.33329999446868896, "3": 0.666700005531311, "5": 0.33329999446868896 }, "mrr": 0.41659998893737793, "precision_at_k_chunk": { "1": 0.33340001106262207, "10": 0.0333000048995018, "3": 0.22219999134540558, "5": 0.06669999659061432 }, "recall_at_k_doc": { "1": 0.33340001106262207, "10": 0.33329999446868896, "3": 0.666700005531311, "5": 0.33329999446868896 }, "refusal_correctness": null }, "per_query": [ { "a_hit_rank": 1, "b_hit_rank": 2, "kind": "loss", "note": "rank 1→2", "query_id": "q-001" }, { "a_hit_rank": 4, "b_hit_rank": 1, "kind": "win", "note": "rank 4→1", "query_id": "q-002" }, { "a_hit_rank": null, "b_hit_rank": 1, "kind": "win", "note": null, "query_id": "q-003" } ], "run_a": "run_a", "run_b": "run_b" }