Mirrors code_rust_ast_snapshot pattern. In-memory CanonicalDocument build so no kebab-parse-code dep (boundary §6.3 respected). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
171 lines
15 KiB
JSON
171 lines
15 KiB
JSON
[
|
|
{
|
|
"block_ids": [
|
|
"bd1be1fd8b8f77e2874755010b36e617"
|
|
],
|
|
"chunk_id": "20e05d99069f939104cdc69c7ef22889",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 5,
|
|
"line_start": 1,
|
|
"symbol": "imports"
|
|
}
|
|
],
|
|
"text": "import os\nimport sys\nfrom typing import List\nfrom pathlib import Path\nfrom collections import defaultdict",
|
|
"token_estimate": 35
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"2fe948bb529221e94c5139951cc65acf"
|
|
],
|
|
"chunk_id": "99cef84788f2cbad3de6fb7c27b81c48",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 12,
|
|
"line_start": 7,
|
|
"symbol": "compute_mrr"
|
|
}
|
|
],
|
|
"text": "def compute_mrr(scores):\n if not scores:\n return 0.0\n return sum(\n 1.0 / r for r in scores\n ) / len(scores)",
|
|
"token_estimate": 44
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"ff944bad66bea107fd2500c35d7ddf68"
|
|
],
|
|
"chunk_id": "28a3abdd51390c9c9bb89aa8b3ff3f46",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 20,
|
|
"line_start": 14,
|
|
"symbol": "MetricsCollector"
|
|
}
|
|
],
|
|
"text": "class MetricsCollector:\n def __init__(self):\n self.scores = []\n self.labels = []\n self.counts = defaultdict(int)\n self.totals = defaultdict(float)\n self.tags = []",
|
|
"token_estimate": 67
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"1e75f40c64ba21ad0bada0f5d35dc232"
|
|
],
|
|
"chunk_id": "031086ad8c4b880d02cb52527382425c",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 30,
|
|
"line_start": 22,
|
|
"symbol": "BaseEvaluator"
|
|
}
|
|
],
|
|
"text": "class BaseEvaluator:\n def evaluate(self, data):\n raise NotImplementedError\n def batch_evaluate(self, items):\n results = []\n for item in items:\n results.append(self.evaluate(item))\n return results\n def name(self):\n return type(self).__name__",
|
|
"token_estimate": 99
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"33d08d6405adb459e90b8d67bab5cc80"
|
|
],
|
|
"chunk_id": "a431bd5ab64b2f12634c0d4f4b3e0841",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 38,
|
|
"line_start": 32,
|
|
"symbol": "MetricsCollector.run"
|
|
}
|
|
],
|
|
"text": "class MetricsCollector:\n def run(self, inputs):\n for inp in inputs:\n score = self._score(inp)\n self.scores.append(\n score\n )",
|
|
"token_estimate": 61
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"af3d89eb1be6e11dfd14af3c86a8ba9c"
|
|
],
|
|
"chunk_id": "00b756d5bcc43858bb98aa609f22ab6c",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 46,
|
|
"line_start": 40,
|
|
"symbol": "MetricsCollector.report"
|
|
}
|
|
],
|
|
"text": "class MetricsCollector:\n def report(self):\n return {\n 'mean': sum(self.scores) / max(len(self.scores), 1),\n 'count': len(self.scores),\n 'tags': self.tags,\n }",
|
|
"token_estimate": 69
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"c86acf6ae110d7f5681093c93ee0e5e5"
|
|
],
|
|
"chunk_id": "90071017de40b5dd57e9d6001657cf14",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 247,
|
|
"line_start": 48,
|
|
"symbol": "big_compute [part 1/2]"
|
|
}
|
|
],
|
|
"text": "def big_compute(data):\n v0 = data[0] if 0 < len(data) else 0\n v1 = data[1] if 1 < len(data) else 0\n v2 = data[2] if 2 < len(data) else 0\n v3 = data[3] if 3 < len(data) else 0\n v4 = data[4] if 4 < len(data) else 0\n v5 = data[5] if 5 < len(data) else 0\n v6 = data[6] if 6 < len(data) else 0\n v7 = data[7] if 7 < len(data) else 0\n v8 = data[8] if 8 < len(data) else 0\n v9 = data[9] if 9 < len(data) else 0\n v10 = data[10] if 10 < len(data) else 0\n v11 = data[11] if 11 < len(data) else 0\n v12 = data[12] if 12 < len(data) else 0\n v13 = data[13] if 13 < len(data) else 0\n v14 = data[14] if 14 < len(data) else 0\n v15 = data[15] if 15 < len(data) else 0\n v16 = data[16] if 16 < len(data) else 0\n v17 = data[17] if 17 < len(data) else 0\n v18 = data[18] if 18 < len(data) else 0\n v19 = data[19] if 19 < len(data) else 0\n v20 = data[20] if 20 < len(data) else 0\n v21 = data[21] if 21 < len(data) else 0\n v22 = data[22] if 22 < len(data) else 0\n v23 = data[23] if 23 < len(data) else 0\n v24 = data[24] if 24 < len(data) else 0\n v25 = data[25] if 25 < len(data) else 0\n v26 = data[26] if 26 < len(data) else 0\n v27 = data[27] if 27 < len(data) else 0\n v28 = data[28] if 28 < len(data) else 0\n v29 = data[29] if 29 < len(data) else 0\n v30 = data[30] if 30 < len(data) else 0\n v31 = data[31] if 31 < len(data) else 0\n v32 = data[32] if 32 < len(data) else 0\n v33 = data[33] if 33 < len(data) else 0\n v34 = data[34] if 34 < len(data) else 0\n v35 = data[35] if 35 < len(data) else 0\n v36 = data[36] if 36 < len(data) else 0\n v37 = data[37] if 37 < len(data) else 0\n v38 = data[38] if 38 < len(data) else 0\n v39 = data[39] if 39 < len(data) else 0\n v40 = data[40] if 40 < len(data) else 0\n v41 = data[41] if 41 < len(data) else 0\n v42 = data[42] if 42 < len(data) else 0\n v43 = data[43] if 43 < len(data) else 0\n v44 = data[44] if 44 < len(data) else 0\n v45 = data[45] if 45 < len(data) else 0\n v46 = data[46] if 46 < len(data) else 0\n v47 = data[47] if 47 < len(data) else 0\n v48 = data[48] if 48 < len(data) else 0\n v49 = data[49] if 49 < len(data) else 0\n v50 = data[50] if 50 < len(data) else 0\n v51 = data[51] if 51 < len(data) else 0\n v52 = data[52] if 52 < len(data) else 0\n v53 = data[53] if 53 < len(data) else 0\n v54 = data[54] if 54 < len(data) else 0\n v55 = data[55] if 55 < len(data) else 0\n v56 = data[56] if 56 < len(data) else 0\n v57 = data[57] if 57 < len(data) else 0\n v58 = data[58] if 58 < len(data) else 0\n v59 = data[59] if 59 < len(data) else 0\n v60 = data[60] if 60 < len(data) else 0\n v61 = data[61] if 61 < len(data) else 0\n v62 = data[62] if 62 < len(data) else 0\n v63 = data[63] if 63 < len(data) else 0\n v64 = data[64] if 64 < len(data) else 0\n v65 = data[65] if 65 < len(data) else 0\n v66 = data[66] if 66 < len(data) else 0\n v67 = data[67] if 67 < len(data) else 0\n v68 = data[68] if 68 < len(data) else 0\n v69 = data[69] if 69 < len(data) else 0\n v70 = data[70] if 70 < len(data) else 0\n v71 = data[71] if 71 < len(data) else 0\n v72 = data[72] if 72 < len(data) else 0\n v73 = data[73] if 73 < len(data) else 0\n v74 = data[74] if 74 < len(data) else 0\n v75 = data[75] if 75 < len(data) else 0\n v76 = data[76] if 76 < len(data) else 0\n v77 = data[77] if 77 < len(data) else 0\n v78 = data[78] if 78 < len(data) else 0\n v79 = data[79] if 79 < len(data) else 0\n v80 = data[80] if 80 < len(data) else 0\n v81 = data[81] if 81 < len(data) else 0\n v82 = data[82] if 82 < len(data) else 0\n v83 = data[83] if 83 < len(data) else 0\n v84 = data[84] if 84 < len(data) else 0\n v85 = data[85] if 85 < len(data) else 0\n v86 = data[86] if 86 < len(data) else 0\n v87 = data[87] if 87 < len(data) else 0\n v88 = data[88] if 88 < len(data) else 0\n v89 = data[89] if 89 < len(data) else 0\n v90 = data[90] if 90 < len(data) else 0\n v91 = data[91] if 91 < len(data) else 0\n v92 = data[92] if 92 < len(data) else 0\n v93 = data[93] if 93 < len(data) else 0\n v94 = data[94] if 94 < len(data) else 0\n v95 = data[95] if 95 < len(data) else 0\n v96 = data[96] if 96 < len(data) else 0\n v97 = data[97] if 97 < len(data) else 0\n v98 = data[98] if 98 < len(data) else 0\n v99 = data[99] if 99 < len(data) else 0\n v100 = data[100] if 100 < len(data) else 0\n v101 = data[101] if 101 < len(data) else 0\n v102 = data[102] if 102 < len(data) else 0\n v103 = data[103] if 103 < len(data) else 0\n v104 = data[104] if 104 < len(data) else 0\n v105 = data[105] if 105 < len(data) else 0\n v106 = data[106] if 106 < len(data) else 0\n v107 = data[107] if 107 < len(data) else 0\n v108 = data[108] if 108 < len(data) else 0\n v109 = data[109] if 109 < len(data) else 0\n v110 = data[110] if 110 < len(data) else 0\n v111 = data[111] if 111 < len(data) else 0\n v112 = data[112] if 112 < len(data) else 0\n v113 = data[113] if 113 < len(data) else 0\n v114 = data[114] if 114 < len(data) else 0\n v115 = data[115] if 115 < len(data) else 0\n v116 = data[116] if 116 < len(data) else 0\n v117 = data[117] if 117 < len(data) else 0\n v118 = data[118] if 118 < len(data) else 0\n v119 = data[119] if 119 < len(data) else 0\n v120 = data[120] if 120 < len(data) else 0\n v121 = data[121] if 121 < len(data) else 0\n v122 = data[122] if 122 < len(data) else 0\n v123 = data[123] if 123 < len(data) else 0\n v124 = data[124] if 124 < len(data) else 0\n v125 = data[125] if 125 < len(data) else 0\n v126 = data[126] if 126 < len(data) else 0\n v127 = data[127] if 127 < len(data) else 0\n v128 = data[128] if 128 < len(data) else 0\n v129 = data[129] if 129 < len(data) else 0\n v130 = data[130] if 130 < len(data) else 0\n v131 = data[131] if 131 < len(data) else 0\n v132 = data[132] if 132 < len(data) else 0\n v133 = data[133] if 133 < len(data) else 0\n v134 = data[134] if 134 < len(data) else 0\n v135 = data[135] if 135 < len(data) else 0\n v136 = data[136] if 136 < len(data) else 0\n v137 = data[137] if 137 < len(data) else 0\n v138 = data[138] if 138 < len(data) else 0\n v139 = data[139] if 139 < len(data) else 0\n v140 = data[140] if 140 < len(data) else 0\n v141 = data[141] if 141 < len(data) else 0\n v142 = data[142] if 142 < len(data) else 0\n v143 = data[143] if 143 < len(data) else 0\n v144 = data[144] if 144 < len(data) else 0\n v145 = data[145] if 145 < len(data) else 0\n v146 = data[146] if 146 < len(data) else 0\n v147 = data[147] if 147 < len(data) else 0\n v148 = data[148] if 148 < len(data) else 0\n v149 = data[149] if 149 < len(data) else 0\n v150 = data[150] if 150 < len(data) else 0\n v151 = data[151] if 151 < len(data) else 0\n v152 = data[152] if 152 < len(data) else 0\n v153 = data[153] if 153 < len(data) else 0\n v154 = data[154] if 154 < len(data) else 0\n v155 = data[155] if 155 < len(data) else 0\n v156 = data[156] if 156 < len(data) else 0\n v157 = data[157] if 157 < len(data) else 0\n v158 = data[158] if 158 < len(data) else 0\n v159 = data[159] if 159 < len(data) else 0\n v160 = data[160] if 160 < len(data) else 0\n v161 = data[161] if 161 < len(data) else 0\n v162 = data[162] if 162 < len(data) else 0\n v163 = data[163] if 163 < len(data) else 0\n v164 = data[164] if 164 < len(data) else 0\n v165 = data[165] if 165 < len(data) else 0\n v166 = data[166] if 166 < len(data) else 0\n v167 = data[167] if 167 < len(data) else 0\n v168 = data[168] if 168 < len(data) else 0\n v169 = data[169] if 169 < len(data) else 0\n v170 = data[170] if 170 < len(data) else 0\n v171 = data[171] if 171 < len(data) else 0\n v172 = data[172] if 172 < len(data) else 0\n v173 = data[173] if 173 < len(data) else 0\n v174 = data[174] if 174 < len(data) else 0\n v175 = data[175] if 175 < len(data) else 0\n v176 = data[176] if 176 < len(data) else 0\n v177 = data[177] if 177 < len(data) else 0\n v178 = data[178] if 178 < len(data) else 0\n v179 = data[179] if 179 < len(data) else 0\n v180 = data[180] if 180 < len(data) else 0\n v181 = data[181] if 181 < len(data) else 0\n v182 = data[182] if 182 < len(data) else 0\n v183 = data[183] if 183 < len(data) else 0\n v184 = data[184] if 184 < len(data) else 0\n v185 = data[185] if 185 < len(data) else 0\n v186 = data[186] if 186 < len(data) else 0\n v187 = data[187] if 187 < len(data) else 0\n v188 = data[188] if 188 < len(data) else 0\n v189 = data[189] if 189 < len(data) else 0\n v190 = data[190] if 190 < len(data) else 0\n v191 = data[191] if 191 < len(data) else 0\n v192 = data[192] if 192 < len(data) else 0\n v193 = data[193] if 193 < len(data) else 0\n v194 = data[194] if 194 < len(data) else 0\n v195 = data[195] if 195 < len(data) else 0\n v196 = data[196] if 196 < len(data) else 0\n v197 = data[197] if 197 < len(data) else 0\n v198 = data[198] if 198 < len(data) else 0",
|
|
"token_estimate": 3015
|
|
},
|
|
{
|
|
"block_ids": [
|
|
"c86acf6ae110d7f5681093c93ee0e5e5"
|
|
],
|
|
"chunk_id": "efc6599ac90e8de5fe8f63896a85d747",
|
|
"chunker_version": "code-python-ast-v1",
|
|
"doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03",
|
|
"heading_path": [],
|
|
"policy_hash": "383e9a070f636294",
|
|
"source_spans": [
|
|
{
|
|
"kind": "code",
|
|
"lang": "python",
|
|
"line_end": 259,
|
|
"line_start": 248,
|
|
"symbol": "big_compute [part 2/2]"
|
|
}
|
|
],
|
|
"text": " v199 = data[199] if 199 < len(data) else 0\n v200 = data[200] if 200 < len(data) else 0\n v201 = data[201] if 201 < len(data) else 0\n v202 = data[202] if 202 < len(data) else 0\n v203 = data[203] if 203 < len(data) else 0\n v204 = data[204] if 204 < len(data) else 0\n v205 = data[205] if 205 < len(data) else 0\n v206 = data[206] if 206 < len(data) else 0\n v207 = data[207] if 207 < len(data) else 0\n v208 = data[208] if 208 < len(data) else 0\n v209 = data[209] if 209 < len(data) else 0\n return sum(data)",
|
|
"token_estimate": 179
|
|
}
|
|
]
|