diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.c.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.c.chunks.snapshot.json index 832c474..ddd7223 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.c.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.c.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "#include \n#include \n\n#define MAX_BUF 4096\n\ntypedef enum {\n OK = 0,\n ERR_PARSE,\n ERR_IO,\n} status_t;\n\ntypedef struct {\n int id;\n char name[64];\n status_t status;\n} record_t;\n\nstatic int counter = 0;", - "token_estimate": 78 + "token_estimate": 78, + "tokenized_korean_text": "# include < stdio . h > # include < stdlib . h > # define MAX _ BUF 4096 typedef enum { OK = 0 , ERR _ PARSE , ERR _ IO , } status _ t ; typedef struct { int id ; char name [ 64 ]; status _ t status ; } record _ t ; static int counter = 0 ;" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "int parse_record(const char *line, record_t *out) {\n if (line == NULL || out == NULL) return ERR_PARSE;\n return OK;\n}", - "token_estimate": 41 + "token_estimate": 41, + "tokenized_korean_text": "int parse _ record ( const char * line , record _ t * out ) { if ( line == NULL || out == NULL ) return ERR _ PARSE ; return OK ; }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "void print_record(const record_t *r) {\n printf(\"[%d] %s (status=%d)\\n\", r->id, r->name, r->status);\n}", - "token_estimate": 35 + "token_estimate": 35, + "tokenized_korean_text": "void print _ record ( const record _ t * r ) { printf (\"[% d ] % s ( status =% d )\\ n \", r -> id , r -> name , r -> status ); }" }, { "block_ids": [ @@ -81,6 +84,7 @@ } ], "text": "int main(void) {\n record_t r = { .id = 1, .name = \"foo\", .status = OK };\n print_record(&r);\n return 0;\n}", - "token_estimate": 38 + "token_estimate": 38, + "tokenized_korean_text": "int main ( void ) { record _ t r = { . id = 1 , . name = \" foo \", . status = OK }; print _ record (& r ); return 0 ; }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.chunks.snapshot.json index f1c69be..8d2f54d 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "use std::collections::HashMap;\nuse std::fmt;\n\nconst MAX: usize = 1024;\nconst MIN: usize = 0;", - "token_estimate": 31 + "token_estimate": 31, + "tokenized_korean_text": "use std : : collections : : HashMap ; use std : : fmt ; const MAX : usize = 1024 ; const MIN : usize = 0 ;" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "pub fn parse(input: &str) -> Option {\n input\n .trim()\n .parse()\n .ok()\n}", - "token_estimate": 34 + "token_estimate": 34, + "tokenized_korean_text": "pub fn parse ( input : & str ) -> Option < u 32 > { input . trim ( ) . parse ( ) . ok ( ) }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "pub struct Foo {\n pub name: String,\n pub value: u32,\n pub tags: Vec,\n pub meta: Option,\n pub count: usize,\n}", - "token_estimate": 47 + "token_estimate": 47, + "tokenized_korean_text": "pub struct Foo { pub name : String , pub value : u 32 , pub tags : Vec < String >, pub meta : Option < String >, pub count : usize , }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "pub trait Frobable {\n fn frob(&self) -> String;\n fn frob_twice(&self) -> String {\n let a = self.frob();\n let b = self.frob();\n format!(\"{a}{b}\")\n }\n fn name(&self) -> &str;\n}", - "token_estimate": 69 + "token_estimate": 69, + "tokenized_korean_text": "pub trait Frobable { fn frob (& self ) -> String ; fn frob _ twice (& self ) -> String { let a = self . frob (); let b = self . frob (); format !(\"{ a }{ b }\") } fn name (& self ) -> & str ; }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "impl Foo {\n pub fn double(&self) -> u32 {\n self.value\n .checked_mul(2)\n .unwrap_or(u32::MAX)\n }\n}", - "token_estimate": 44 + "token_estimate": 44, + "tokenized_korean_text": "impl Foo { pub fn double (& self ) -> u 32 { self . value . checked _ mul ( 2 ) . unwrap _ or ( u 32 : : MAX ) } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "impl Foo {\n pub fn triple(&self) -> u32 {\n self.value\n .checked_mul(3)\n .unwrap_or(u32::MAX)\n }\n}", - "token_estimate": 44 + "token_estimate": 44, + "tokenized_korean_text": "impl Foo { pub fn triple (& self ) -> u 32 { self . value . checked _ mul ( 3 ) . unwrap _ or ( u 32 : : MAX ) } }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "pub fn big_fn(input: &[u8]) -> Vec {\n let v0 = input.get(0 as usize).copied().unwrap_or(0);\n let v1 = input.get(1 as usize).copied().unwrap_or(0);\n let v2 = input.get(2 as usize).copied().unwrap_or(0);\n let v3 = input.get(3 as usize).copied().unwrap_or(0);\n let v4 = input.get(4 as usize).copied().unwrap_or(0);\n let v5 = input.get(5 as usize).copied().unwrap_or(0);\n let v6 = input.get(6 as usize).copied().unwrap_or(0);\n let v7 = input.get(7 as usize).copied().unwrap_or(0);\n let v8 = input.get(8 as usize).copied().unwrap_or(0);\n let v9 = input.get(9 as usize).copied().unwrap_or(0);\n let v10 = input.get(10 as usize).copied().unwrap_or(0);\n let v11 = input.get(11 as usize).copied().unwrap_or(0);\n let v12 = input.get(12 as usize).copied().unwrap_or(0);\n let v13 = input.get(13 as usize).copied().unwrap_or(0);\n let v14 = input.get(14 as usize).copied().unwrap_or(0);\n let v15 = input.get(15 as usize).copied().unwrap_or(0);\n let v16 = input.get(16 as usize).copied().unwrap_or(0);\n let v17 = input.get(17 as usize).copied().unwrap_or(0);\n let v18 = input.get(18 as usize).copied().unwrap_or(0);\n let v19 = input.get(19 as usize).copied().unwrap_or(0);\n let v20 = input.get(20 as usize).copied().unwrap_or(0);\n let v21 = input.get(21 as usize).copied().unwrap_or(0);\n let v22 = input.get(22 as usize).copied().unwrap_or(0);\n let v23 = input.get(23 as usize).copied().unwrap_or(0);\n let v24 = input.get(24 as usize).copied().unwrap_or(0);\n let v25 = input.get(25 as usize).copied().unwrap_or(0);\n let v26 = input.get(26 as usize).copied().unwrap_or(0);\n let v27 = input.get(27 as usize).copied().unwrap_or(0);\n let v28 = input.get(28 as usize).copied().unwrap_or(0);\n let v29 = input.get(29 as usize).copied().unwrap_or(0);\n let v30 = input.get(30 as usize).copied().unwrap_or(0);\n let v31 = input.get(31 as usize).copied().unwrap_or(0);\n let v32 = input.get(32 as usize).copied().unwrap_or(0);\n let v33 = input.get(33 as usize).copied().unwrap_or(0);\n let v34 = input.get(34 as usize).copied().unwrap_or(0);\n let v35 = input.get(35 as usize).copied().unwrap_or(0);\n let v36 = input.get(36 as usize).copied().unwrap_or(0);\n let v37 = input.get(37 as usize).copied().unwrap_or(0);\n let v38 = input.get(38 as usize).copied().unwrap_or(0);\n let v39 = input.get(39 as usize).copied().unwrap_or(0);\n let v40 = input.get(40 as usize).copied().unwrap_or(0);\n let v41 = input.get(41 as usize).copied().unwrap_or(0);\n let v42 = input.get(42 as usize).copied().unwrap_or(0);\n let v43 = input.get(43 as usize).copied().unwrap_or(0);\n let v44 = input.get(44 as usize).copied().unwrap_or(0);\n let v45 = input.get(45 as usize).copied().unwrap_or(0);\n let v46 = input.get(46 as usize).copied().unwrap_or(0);\n let v47 = input.get(47 as usize).copied().unwrap_or(0);\n let v48 = input.get(48 as usize).copied().unwrap_or(0);\n let v49 = input.get(49 as usize).copied().unwrap_or(0);\n let v50 = input.get(50 as usize).copied().unwrap_or(0);\n let v51 = input.get(51 as usize).copied().unwrap_or(0);\n let v52 = input.get(52 as usize).copied().unwrap_or(0);\n let v53 = input.get(53 as usize).copied().unwrap_or(0);\n let v54 = input.get(54 as usize).copied().unwrap_or(0);\n let v55 = input.get(55 as usize).copied().unwrap_or(0);\n let v56 = input.get(56 as usize).copied().unwrap_or(0);\n let v57 = input.get(57 as usize).copied().unwrap_or(0);\n let v58 = input.get(58 as usize).copied().unwrap_or(0);\n let v59 = input.get(59 as usize).copied().unwrap_or(0);\n let v60 = input.get(60 as usize).copied().unwrap_or(0);\n let v61 = input.get(61 as usize).copied().unwrap_or(0);\n let v62 = input.get(62 as usize).copied().unwrap_or(0);\n let v63 = input.get(63 as usize).copied().unwrap_or(0);\n let v64 = input.get(64 as usize).copied().unwrap_or(0);\n let v65 = input.get(65 as usize).copied().unwrap_or(0);\n let v66 = input.get(66 as usize).copied().unwrap_or(0);\n let v67 = input.get(67 as usize).copied().unwrap_or(0);\n let v68 = input.get(68 as usize).copied().unwrap_or(0);\n let v69 = input.get(69 as usize).copied().unwrap_or(0);\n let v70 = input.get(70 as usize).copied().unwrap_or(0);\n let v71 = input.get(71 as usize).copied().unwrap_or(0);\n let v72 = input.get(72 as usize).copied().unwrap_or(0);\n let v73 = input.get(73 as usize).copied().unwrap_or(0);\n let v74 = input.get(74 as usize).copied().unwrap_or(0);\n let v75 = input.get(75 as usize).copied().unwrap_or(0);\n let v76 = input.get(76 as usize).copied().unwrap_or(0);\n let v77 = input.get(77 as usize).copied().unwrap_or(0);\n let v78 = input.get(78 as usize).copied().unwrap_or(0);\n let v79 = input.get(79 as usize).copied().unwrap_or(0);\n let v80 = input.get(80 as usize).copied().unwrap_or(0);\n let v81 = input.get(81 as usize).copied().unwrap_or(0);\n let v82 = input.get(82 as usize).copied().unwrap_or(0);\n let v83 = input.get(83 as usize).copied().unwrap_or(0);\n let v84 = input.get(84 as usize).copied().unwrap_or(0);\n let v85 = input.get(85 as usize).copied().unwrap_or(0);\n let v86 = input.get(86 as usize).copied().unwrap_or(0);\n let v87 = input.get(87 as usize).copied().unwrap_or(0);\n let v88 = input.get(88 as usize).copied().unwrap_or(0);\n let v89 = input.get(89 as usize).copied().unwrap_or(0);\n let v90 = input.get(90 as usize).copied().unwrap_or(0);\n let v91 = input.get(91 as usize).copied().unwrap_or(0);\n let v92 = input.get(92 as usize).copied().unwrap_or(0);\n let v93 = input.get(93 as usize).copied().unwrap_or(0);\n let v94 = input.get(94 as usize).copied().unwrap_or(0);\n let v95 = input.get(95 as usize).copied().unwrap_or(0);\n let v96 = input.get(96 as usize).copied().unwrap_or(0);\n let v97 = input.get(97 as usize).copied().unwrap_or(0);\n let v98 = input.get(98 as usize).copied().unwrap_or(0);\n let v99 = input.get(99 as usize).copied().unwrap_or(0);\n let v100 = input.get(100 as usize).copied().unwrap_or(0);\n let v101 = input.get(101 as usize).copied().unwrap_or(0);\n let v102 = input.get(102 as usize).copied().unwrap_or(0);\n let v103 = input.get(103 as usize).copied().unwrap_or(0);\n let v104 = input.get(104 as usize).copied().unwrap_or(0);\n let v105 = input.get(105 as usize).copied().unwrap_or(0);\n let v106 = input.get(106 as usize).copied().unwrap_or(0);\n let v107 = input.get(107 as usize).copied().unwrap_or(0);\n let v108 = input.get(108 as usize).copied().unwrap_or(0);\n let v109 = input.get(109 as usize).copied().unwrap_or(0);\n let v110 = input.get(110 as usize).copied().unwrap_or(0);\n let v111 = input.get(111 as usize).copied().unwrap_or(0);\n let v112 = input.get(112 as usize).copied().unwrap_or(0);\n let v113 = input.get(113 as usize).copied().unwrap_or(0);\n let v114 = input.get(114 as usize).copied().unwrap_or(0);\n let v115 = input.get(115 as usize).copied().unwrap_or(0);\n let v116 = input.get(116 as usize).copied().unwrap_or(0);\n let v117 = input.get(117 as usize).copied().unwrap_or(0);\n let v118 = input.get(118 as usize).copied().unwrap_or(0);\n let v119 = input.get(119 as usize).copied().unwrap_or(0);\n let v120 = input.get(120 as usize).copied().unwrap_or(0);\n let v121 = input.get(121 as usize).copied().unwrap_or(0);\n let v122 = input.get(122 as usize).copied().unwrap_or(0);\n let v123 = input.get(123 as usize).copied().unwrap_or(0);\n let v124 = input.get(124 as usize).copied().unwrap_or(0);\n let v125 = input.get(125 as usize).copied().unwrap_or(0);\n let v126 = input.get(126 as usize).copied().unwrap_or(0);\n let v127 = input.get(127 as usize).copied().unwrap_or(0);\n let v128 = input.get(128 as usize).copied().unwrap_or(0);\n let v129 = input.get(129 as usize).copied().unwrap_or(0);\n let v130 = input.get(130 as usize).copied().unwrap_or(0);\n let v131 = input.get(131 as usize).copied().unwrap_or(0);\n let v132 = input.get(132 as usize).copied().unwrap_or(0);\n let v133 = input.get(133 as usize).copied().unwrap_or(0);\n let v134 = input.get(134 as usize).copied().unwrap_or(0);\n let v135 = input.get(135 as usize).copied().unwrap_or(0);\n let v136 = input.get(136 as usize).copied().unwrap_or(0);\n let v137 = input.get(137 as usize).copied().unwrap_or(0);\n let v138 = input.get(138 as usize).copied().unwrap_or(0);\n let v139 = input.get(139 as usize).copied().unwrap_or(0);\n let v140 = input.get(140 as usize).copied().unwrap_or(0);\n let v141 = input.get(141 as usize).copied().unwrap_or(0);\n let v142 = input.get(142 as usize).copied().unwrap_or(0);\n let v143 = input.get(143 as usize).copied().unwrap_or(0);\n let v144 = input.get(144 as usize).copied().unwrap_or(0);\n let v145 = input.get(145 as usize).copied().unwrap_or(0);\n let v146 = input.get(146 as usize).copied().unwrap_or(0);\n let v147 = input.get(147 as usize).copied().unwrap_or(0);\n let v148 = input.get(148 as usize).copied().unwrap_or(0);\n let v149 = input.get(149 as usize).copied().unwrap_or(0);\n let v150 = input.get(150 as usize).copied().unwrap_or(0);\n let v151 = input.get(151 as usize).copied().unwrap_or(0);\n let v152 = input.get(152 as usize).copied().unwrap_or(0);\n let v153 = input.get(153 as usize).copied().unwrap_or(0);\n let v154 = input.get(154 as usize).copied().unwrap_or(0);\n let v155 = input.get(155 as usize).copied().unwrap_or(0);\n let v156 = input.get(156 as usize).copied().unwrap_or(0);\n let v157 = input.get(157 as usize).copied().unwrap_or(0);\n let v158 = input.get(158 as usize).copied().unwrap_or(0);\n let v159 = input.get(159 as usize).copied().unwrap_or(0);\n let v160 = input.get(160 as usize).copied().unwrap_or(0);\n let v161 = input.get(161 as usize).copied().unwrap_or(0);\n let v162 = input.get(162 as usize).copied().unwrap_or(0);\n let v163 = input.get(163 as usize).copied().unwrap_or(0);\n let v164 = input.get(164 as usize).copied().unwrap_or(0);\n let v165 = input.get(165 as usize).copied().unwrap_or(0);\n let v166 = input.get(166 as usize).copied().unwrap_or(0);\n let v167 = input.get(167 as usize).copied().unwrap_or(0);\n let v168 = input.get(168 as usize).copied().unwrap_or(0);\n let v169 = input.get(169 as usize).copied().unwrap_or(0);\n let v170 = input.get(170 as usize).copied().unwrap_or(0);\n let v171 = input.get(171 as usize).copied().unwrap_or(0);\n let v172 = input.get(172 as usize).copied().unwrap_or(0);\n let v173 = input.get(173 as usize).copied().unwrap_or(0);\n let v174 = input.get(174 as usize).copied().unwrap_or(0);\n let v175 = input.get(175 as usize).copied().unwrap_or(0);\n let v176 = input.get(176 as usize).copied().unwrap_or(0);\n let v177 = input.get(177 as usize).copied().unwrap_or(0);\n let v178 = input.get(178 as usize).copied().unwrap_or(0);\n let v179 = input.get(179 as usize).copied().unwrap_or(0);\n let v180 = input.get(180 as usize).copied().unwrap_or(0);\n let v181 = input.get(181 as usize).copied().unwrap_or(0);\n let v182 = input.get(182 as usize).copied().unwrap_or(0);\n let v183 = input.get(183 as usize).copied().unwrap_or(0);\n let v184 = input.get(184 as usize).copied().unwrap_or(0);\n let v185 = input.get(185 as usize).copied().unwrap_or(0);\n let v186 = input.get(186 as usize).copied().unwrap_or(0);\n let v187 = input.get(187 as usize).copied().unwrap_or(0);\n let v188 = input.get(188 as usize).copied().unwrap_or(0);\n let v189 = input.get(189 as usize).copied().unwrap_or(0);\n let v190 = input.get(190 as usize).copied().unwrap_or(0);\n let v191 = input.get(191 as usize).copied().unwrap_or(0);\n let v192 = input.get(192 as usize).copied().unwrap_or(0);\n let v193 = input.get(193 as usize).copied().unwrap_or(0);\n let v194 = input.get(194 as usize).copied().unwrap_or(0);\n let v195 = input.get(195 as usize).copied().unwrap_or(0);\n let v196 = input.get(196 as usize).copied().unwrap_or(0);\n let v197 = input.get(197 as usize).copied().unwrap_or(0);\n let v198 = input.get(198 as usize).copied().unwrap_or(0);", - "token_estimate": 4053 + "token_estimate": 4053, + "tokenized_korean_text": "pub fn big _ fn ( input : &[ u 8 ] ) -> Vec < u 8 > { let v 0 = input . get ( 0 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 1 = input . get ( 1 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 2 = input . get ( 2 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 3 = input . get ( 3 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 4 = input . get ( 4 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 5 = input . get ( 5 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 6 = input . get ( 6 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 7 = input . get ( 7 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 8 = input . get ( 8 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 9 = input . get ( 9 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 10 = input . get ( 10 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 11 = input . get ( 11 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 12 = input . get ( 12 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 13 = input . get ( 13 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 14 = input . get ( 14 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 15 = input . get ( 15 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 16 = input . get ( 16 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 17 = input . get ( 17 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 18 = input . get ( 18 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 19 = input . get ( 19 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 20 = input . get ( 20 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 21 = input . get ( 21 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 22 = input . get ( 22 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 23 = input . get ( 23 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 24 = input . get ( 24 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 25 = input . get ( 25 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 26 = input . get ( 26 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 27 = input . get ( 27 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 28 = input . get ( 28 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 29 = input . get ( 29 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 30 = input . get ( 30 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 31 = input . get ( 31 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 32 = input . get ( 32 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 33 = input . get ( 33 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 34 = input . get ( 34 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 35 = input . get ( 35 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 36 = input . get ( 36 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 37 = input . get ( 37 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 38 = input . get ( 38 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 39 = input . get ( 39 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 40 = input . get ( 40 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 41 = input . get ( 41 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 42 = input . get ( 42 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 43 = input . get ( 43 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 44 = input . get ( 44 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 45 = input . get ( 45 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 46 = input . get ( 46 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 47 = input . get ( 47 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 48 = input . get ( 48 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 49 = input . get ( 49 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 50 = input . get ( 50 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 51 = input . get ( 51 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 52 = input . get ( 52 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 53 = input . get ( 53 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 54 = input . get ( 54 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 55 = input . get ( 55 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 56 = input . get ( 56 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 57 = input . get ( 57 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 58 = input . get ( 58 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 59 = input . get ( 59 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 60 = input . get ( 60 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 61 = input . get ( 61 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 62 = input . get ( 62 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 63 = input . get ( 63 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 64 = input . get ( 64 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 65 = input . get ( 65 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 66 = input . get ( 66 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 67 = input . get ( 67 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 68 = input . get ( 68 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 69 = input . get ( 69 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 70 = input . get ( 70 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 71 = input . get ( 71 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 72 = input . get ( 72 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 73 = input . get ( 73 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 74 = input . get ( 74 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 75 = input . get ( 75 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 76 = input . get ( 76 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 77 = input . get ( 77 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 78 = input . get ( 78 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 79 = input . get ( 79 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 80 = input . get ( 80 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 81 = input . get ( 81 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 82 = input . get ( 82 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 83 = input . get ( 83 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 84 = input . get ( 84 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 85 = input . get ( 85 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 86 = input . get ( 86 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 87 = input . get ( 87 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 88 = input . get ( 88 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 89 = input . get ( 89 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 90 = input . get ( 90 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 91 = input . get ( 91 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 92 = input . get ( 92 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 93 = input . get ( 93 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 94 = input . get ( 94 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 95 = input . get ( 95 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 96 = input . get ( 96 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 97 = input . get ( 97 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 98 = input . get ( 98 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 99 = input . get ( 99 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 100 = input . get ( 100 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 101 = input . get ( 101 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 102 = input . get ( 102 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 103 = input . get ( 103 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 104 = input . get ( 104 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 105 = input . get ( 105 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 106 = input . get ( 106 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 107 = input . get ( 107 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 108 = input . get ( 108 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 109 = input . get ( 109 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 110 = input . get ( 110 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 111 = input . get ( 111 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 112 = input . get ( 112 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 113 = input . get ( 113 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 114 = input . get ( 114 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 115 = input . get ( 115 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 116 = input . get ( 116 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 117 = input . get ( 117 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 118 = input . get ( 118 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 119 = input . get ( 119 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 120 = input . get ( 120 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 121 = input . get ( 121 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 122 = input . get ( 122 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 123 = input . get ( 123 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 124 = input . get ( 124 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 125 = input . get ( 125 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 126 = input . get ( 126 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 127 = input . get ( 127 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 128 = input . get ( 128 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 129 = input . get ( 129 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 130 = input . get ( 130 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 131 = input . get ( 131 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 132 = input . get ( 132 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 133 = input . get ( 133 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 134 = input . get ( 134 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 135 = input . get ( 135 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 136 = input . get ( 136 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 137 = input . get ( 137 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 138 = input . get ( 138 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 139 = input . get ( 139 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 140 = input . get ( 140 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 141 = input . get ( 141 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 142 = input . get ( 142 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 143 = input . get ( 143 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 144 = input . get ( 144 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 145 = input . get ( 145 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 146 = input . get ( 146 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 147 = input . get ( 147 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 148 = input . get ( 148 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 149 = input . get ( 149 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 150 = input . get ( 150 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 151 = input . get ( 151 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 152 = input . get ( 152 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 153 = input . get ( 153 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 154 = input . get ( 154 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 155 = input . get ( 155 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 156 = input . get ( 156 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 157 = input . get ( 157 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 158 = input . get ( 158 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 159 = input . get ( 159 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 160 = input . get ( 160 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 161 = input . get ( 161 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 162 = input . get ( 162 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 163 = input . get ( 163 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 164 = input . get ( 164 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 165 = input . get ( 165 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 166 = input . get ( 166 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 167 = input . get ( 167 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 168 = input . get ( 168 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 169 = input . get ( 169 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 170 = input . get ( 170 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 171 = input . get ( 171 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 172 = input . get ( 172 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 173 = input . get ( 173 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 174 = input . get ( 174 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 175 = input . get ( 175 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 176 = input . get ( 176 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 177 = input . get ( 177 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 178 = input . get ( 178 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 179 = input . get ( 179 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 180 = input . get ( 180 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 181 = input . get ( 181 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 182 = input . get ( 182 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 183 = input . get ( 183 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 184 = input . get ( 184 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 185 = input . get ( 185 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 186 = input . get ( 186 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 187 = input . get ( 187 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 188 = input . get ( 188 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 189 = input . get ( 189 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 190 = input . get ( 190 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 191 = input . get ( 191 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 192 = input . get ( 192 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 193 = input . get ( 193 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 194 = input . get ( 194 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 195 = input . get ( 195 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 196 = input . get ( 196 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 197 = input . get ( 197 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 198 = input . get ( 198 as usize ) . copied ( ) . unwrap _ or ( 0 );" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " let v199 = input.get(199 as usize).copied().unwrap_or(0);\n let v200 = input.get(200 as usize).copied().unwrap_or(0);\n let v201 = input.get(201 as usize).copied().unwrap_or(0);\n let v202 = input.get(202 as usize).copied().unwrap_or(0);\n let v203 = input.get(203 as usize).copied().unwrap_or(0);\n let v204 = input.get(204 as usize).copied().unwrap_or(0);\n let v205 = input.get(205 as usize).copied().unwrap_or(0);\n let v206 = input.get(206 as usize).copied().unwrap_or(0);\n let v207 = input.get(207 as usize).copied().unwrap_or(0);\n let v208 = input.get(208 as usize).copied().unwrap_or(0);\n let v209 = input.get(209 as usize).copied().unwrap_or(0);\n vec![0u8]\n}", - "token_estimate": 233 + "token_estimate": 233, + "tokenized_korean_text": "let v 199 = input . get ( 199 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 200 = input . get ( 200 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 201 = input . get ( 201 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 202 = input . get ( 202 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 203 = input . get ( 203 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 204 = input . get ( 204 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 205 = input . get ( 205 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 206 = input . get ( 206 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 207 = input . get ( 207 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 208 = input . get ( 208 as usize ) . copied ( ) . unwrap _ or ( 0 ); let v 209 = input . get ( 209 as usize ) . copied ( ) . unwrap _ or ( 0 ); vec ! [ 0 u 8 ] }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json index 257d6e9..f6afec8 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "#include \n#include \n\nnamespace kebab {", - "token_estimate": 18 + "token_estimate": 18, + "tokenized_korean_text": "# include < string > # include < vector > namespace kebab {" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "class MdHeadingV1Chunker {\npublic:\n MdHeadingV1Chunker() = default;\n ~MdHeadingV1Chunker() = default;\n\n std::string chunk_doc(const std::string& doc) {\n return doc;\n }\n\n int operator()(int x) const {\n return x * 2;\n }\n\nprivate:\n int counter_ = 0;\n};", - "token_estimate": 95 + "token_estimate": 95, + "tokenized_korean_text": "class MdHeadingV 1 Chunker { public : MdHeadingV 1 Chunker ( ) = default ; ~ MdHeadingV 1 Chunker ( ) = default ; std : : string chunk _ doc ( const std : : string & doc ) { return doc ; } int operator ( ) ( int x ) const { return x * 2 ; } private : int counter _ = 0 ; };" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "template \nT identity(T value) {\n return value;\n}", - "token_estimate": 21 + "token_estimate": 21, + "tokenized_korean_text": "template < typename T > T identity ( T value ) { return value ; }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "void global_helper() {\n // free function in kebab namespace\n}", - "token_estimate": 22 + "token_estimate": 22, + "tokenized_korean_text": "void global _ helper ( ) { / / free function in kebab namespace }" }, { "block_ids": [ @@ -102,6 +106,7 @@ } ], "text": "int main() {\n kebab::chunk::MdHeadingV1Chunker c;\n return 0;\n}", - "token_estimate": 23 + "token_estimate": 23, + "tokenized_korean_text": "int main ( ) { kebab : : chunk : : MdHeadingV 1 Chunker c ; return 0 ; }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.go.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.go.chunks.snapshot.json index 26f76c1..d5add54 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.go.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.go.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "import (\n\t\"fmt\"\n\t\"os\"\n\t\"strings\"\n)", - "token_estimate": 12 + "token_estimate": 12, + "tokenized_korean_text": "import ( \" fmt \" \" os \" \" strings \" )" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "func ComputeMRR(scores []float64) float64 {\n\tif len(scores) == 0 {\n\t\treturn 0.0\n\t}\n\t_ = fmt.Sprintf(\"%v\", scores)\n\treturn 1.0 / float64(len(scores))\n}", - "token_estimate": 50 + "token_estimate": 50, + "tokenized_korean_text": "func ComputeMRR ( scores [ ] float 64 ) float 64 { if len ( scores ) == 0 { return 0 . 0 } _ = fmt . Sprintf (\"% v \", scores ) return 1 . 0 / float 64 ( len ( scores ) ) }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "type MetricsCollector struct {\n\tScores []float64\n\tLabels []string\n\tCounts map[string]int\n\tTotals map[string]float64\n\tTags []string\n}", - "token_estimate": 45 + "token_estimate": 45, + "tokenized_korean_text": "type MetricsCollector struct { Scores [ ] float 64 Labels [ ] string Counts map [ string ] int Totals map [ string ] float 64 Tags [ ] string }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "type BaseEvaluator struct {\n\tName string\n}\n\nfunc (e *BaseEvaluator) Evaluate(data []string) error {\n\t_ = os.Stderr\n\t_ = strings.Join(data, \",\")\n\treturn nil\n}", - "token_estimate": 53 + "token_estimate": 53, + "tokenized_korean_text": "type BaseEvaluator struct { Name string } func ( e * BaseEvaluator ) Evaluate ( data [ ] string ) error { _ = os . Stderr _ = strings . Join ( data , \",\") return nil }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "func (m *MetricsCollector) Run(inputs []float64) {\n\tfor _, inp := range inputs {\n\t\tm.Scores = append(\n\t\t\tm.Scores,\n\t\t\tinp,\n\t\t)\n\t}\n}", - "token_estimate": 44 + "token_estimate": 44, + "tokenized_korean_text": "func ( m * MetricsCollector ) Run ( inputs [ ] float 64 ) { for _, inp := range inputs { m . Scores = append ( m . Scores , inp , ) } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "func (m *MetricsCollector) Report() map[string]interface{} {\n\treturn map[string]interface{}{\n\t\t\"mean\": 0.0,\n\t\t\"count\": len(m.Scores),\n\t\t\"tags\": m.Tags,\n\t}\n}", - "token_estimate": 53 + "token_estimate": 53, + "tokenized_korean_text": "func ( m * MetricsCollector ) Report ( ) map [ string ] interface {} { return map [ string ] interface {}{ \" mean \": 0 . 0 , \" count \": len ( m . Scores ) , \" tags \": m . Tags , } }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "func BigCompute(data []int) int {\n\tv0 := 0\n\tif 0 < len(data) {\n\t\tv0 = data[0]\n\t}\n\tv1 := 0\n\tif 1 < len(data) {\n\t\tv1 = data[1]\n\t}\n\tv2 := 0\n\tif 2 < len(data) {\n\t\tv2 = data[2]\n\t}\n\tv3 := 0\n\tif 3 < len(data) {\n\t\tv3 = data[3]\n\t}\n\tv4 := 0\n\tif 4 < len(data) {\n\t\tv4 = data[4]\n\t}\n\tv5 := 0\n\tif 5 < len(data) {\n\t\tv5 = data[5]\n\t}\n\tv6 := 0\n\tif 6 < len(data) {\n\t\tv6 = data[6]\n\t}\n\tv7 := 0\n\tif 7 < len(data) {\n\t\tv7 = data[7]\n\t}\n\tv8 := 0\n\tif 8 < len(data) {\n\t\tv8 = data[8]\n\t}\n\tv9 := 0\n\tif 9 < len(data) {\n\t\tv9 = data[9]\n\t}\n\tv10 := 0\n\tif 10 < len(data) {\n\t\tv10 = data[10]\n\t}\n\tv11 := 0\n\tif 11 < len(data) {\n\t\tv11 = data[11]\n\t}\n\tv12 := 0\n\tif 12 < len(data) {\n\t\tv12 = data[12]\n\t}\n\tv13 := 0\n\tif 13 < len(data) {\n\t\tv13 = data[13]\n\t}\n\tv14 := 0\n\tif 14 < len(data) {\n\t\tv14 = data[14]\n\t}\n\tv15 := 0\n\tif 15 < len(data) {\n\t\tv15 = data[15]\n\t}\n\tv16 := 0\n\tif 16 < len(data) {\n\t\tv16 = data[16]\n\t}\n\tv17 := 0\n\tif 17 < len(data) {\n\t\tv17 = data[17]\n\t}\n\tv18 := 0\n\tif 18 < len(data) {\n\t\tv18 = data[18]\n\t}\n\tv19 := 0\n\tif 19 < len(data) {\n\t\tv19 = data[19]\n\t}\n\tv20 := 0\n\tif 20 < len(data) {\n\t\tv20 = data[20]\n\t}\n\tv21 := 0\n\tif 21 < len(data) {\n\t\tv21 = data[21]\n\t}\n\tv22 := 0\n\tif 22 < len(data) {\n\t\tv22 = data[22]\n\t}\n\tv23 := 0\n\tif 23 < len(data) {\n\t\tv23 = data[23]\n\t}\n\tv24 := 0\n\tif 24 < len(data) {\n\t\tv24 = data[24]\n\t}\n\tv25 := 0\n\tif 25 < len(data) {\n\t\tv25 = data[25]\n\t}\n\tv26 := 0\n\tif 26 < len(data) {\n\t\tv26 = data[26]\n\t}\n\tv27 := 0\n\tif 27 < len(data) {\n\t\tv27 = data[27]\n\t}\n\tv28 := 0\n\tif 28 < len(data) {\n\t\tv28 = data[28]\n\t}\n\tv29 := 0\n\tif 29 < len(data) {\n\t\tv29 = data[29]\n\t}\n\tv30 := 0\n\tif 30 < len(data) {\n\t\tv30 = data[30]\n\t}\n\tv31 := 0\n\tif 31 < len(data) {\n\t\tv31 = data[31]\n\t}\n\tv32 := 0\n\tif 32 < len(data) {\n\t\tv32 = data[32]\n\t}\n\tv33 := 0\n\tif 33 < len(data) {\n\t\tv33 = data[33]\n\t}\n\tv34 := 0\n\tif 34 < len(data) {\n\t\tv34 = data[34]\n\t}\n\tv35 := 0\n\tif 35 < len(data) {\n\t\tv35 = data[35]\n\t}\n\tv36 := 0\n\tif 36 < len(data) {\n\t\tv36 = data[36]\n\t}\n\tv37 := 0\n\tif 37 < len(data) {\n\t\tv37 = data[37]\n\t}\n\tv38 := 0\n\tif 38 < len(data) {\n\t\tv38 = data[38]\n\t}\n\tv39 := 0\n\tif 39 < len(data) {\n\t\tv39 = data[39]\n\t}\n\tv40 := 0\n\tif 40 < len(data) {\n\t\tv40 = data[40]\n\t}\n\tv41 := 0\n\tif 41 < len(data) {\n\t\tv41 = data[41]\n\t}\n\tv42 := 0\n\tif 42 < len(data) {\n\t\tv42 = data[42]\n\t}\n\tv43 := 0\n\tif 43 < len(data) {\n\t\tv43 = data[43]\n\t}\n\tv44 := 0\n\tif 44 < len(data) {\n\t\tv44 = data[44]\n\t}\n\tv45 := 0\n\tif 45 < len(data) {\n\t\tv45 = data[45]\n\t}\n\tv46 := 0\n\tif 46 < len(data) {\n\t\tv46 = data[46]\n\t}\n\tv47 := 0\n\tif 47 < len(data) {\n\t\tv47 = data[47]\n\t}\n\tv48 := 0\n\tif 48 < len(data) {\n\t\tv48 = data[48]\n\t}\n\tv49 := 0\n\tif 49 < len(data) {\n\t\tv49 = data[49]", - "token_estimate": 847 + "token_estimate": 847, + "tokenized_korean_text": "func BigCompute ( data [ ] int ) int { v 0 := 0 if 0 < len ( data ) { v 0 = data [ 0 ] } v 1 := 0 if 1 < len ( data ) { v 1 = data [ 1 ] } v 2 := 0 if 2 < len ( data ) { v 2 = data [ 2 ] } v 3 := 0 if 3 < len ( data ) { v 3 = data [ 3 ] } v 4 := 0 if 4 < len ( data ) { v 4 = data [ 4 ] } v 5 := 0 if 5 < len ( data ) { v 5 = data [ 5 ] } v 6 := 0 if 6 < len ( data ) { v 6 = data [ 6 ] } v 7 := 0 if 7 < len ( data ) { v 7 = data [ 7 ] } v 8 := 0 if 8 < len ( data ) { v 8 = data [ 8 ] } v 9 := 0 if 9 < len ( data ) { v 9 = data [ 9 ] } v 10 := 0 if 10 < len ( data ) { v 10 = data [ 10 ] } v 11 := 0 if 11 < len ( data ) { v 11 = data [ 11 ] } v 12 := 0 if 12 < len ( data ) { v 12 = data [ 12 ] } v 13 := 0 if 13 < len ( data ) { v 13 = data [ 13 ] } v 14 := 0 if 14 < len ( data ) { v 14 = data [ 14 ] } v 15 := 0 if 15 < len ( data ) { v 15 = data [ 15 ] } v 16 := 0 if 16 < len ( data ) { v 16 = data [ 16 ] } v 17 := 0 if 17 < len ( data ) { v 17 = data [ 17 ] } v 18 := 0 if 18 < len ( data ) { v 18 = data [ 18 ] } v 19 := 0 if 19 < len ( data ) { v 19 = data [ 19 ] } v 20 := 0 if 20 < len ( data ) { v 20 = data [ 20 ] } v 21 := 0 if 21 < len ( data ) { v 21 = data [ 21 ] } v 22 := 0 if 22 < len ( data ) { v 22 = data [ 22 ] } v 23 := 0 if 23 < len ( data ) { v 23 = data [ 23 ] } v 24 := 0 if 24 < len ( data ) { v 24 = data [ 24 ] } v 25 := 0 if 25 < len ( data ) { v 25 = data [ 25 ] } v 26 := 0 if 26 < len ( data ) { v 26 = data [ 26 ] } v 27 := 0 if 27 < len ( data ) { v 27 = data [ 27 ] } v 28 := 0 if 28 < len ( data ) { v 28 = data [ 28 ] } v 29 := 0 if 29 < len ( data ) { v 29 = data [ 29 ] } v 30 := 0 if 30 < len ( data ) { v 30 = data [ 30 ] } v 31 := 0 if 31 < len ( data ) { v 31 = data [ 31 ] } v 32 := 0 if 32 < len ( data ) { v 32 = data [ 32 ] } v 33 := 0 if 33 < len ( data ) { v 33 = data [ 33 ] } v 34 := 0 if 34 < len ( data ) { v 34 = data [ 34 ] } v 35 := 0 if 35 < len ( data ) { v 35 = data [ 35 ] } v 36 := 0 if 36 < len ( data ) { v 36 = data [ 36 ] } v 37 := 0 if 37 < len ( data ) { v 37 = data [ 37 ] } v 38 := 0 if 38 < len ( data ) { v 38 = data [ 38 ] } v 39 := 0 if 39 < len ( data ) { v 39 = data [ 39 ] } v 40 := 0 if 40 < len ( data ) { v 40 = data [ 40 ] } v 41 := 0 if 41 < len ( data ) { v 41 = data [ 41 ] } v 42 := 0 if 42 < len ( data ) { v 42 = data [ 42 ] } v 43 := 0 if 43 < len ( data ) { v 43 = data [ 43 ] } v 44 := 0 if 44 < len ( data ) { v 44 = data [ 44 ] } v 45 := 0 if 45 < len ( data ) { v 45 = data [ 45 ] } v 46 := 0 if 46 < len ( data ) { v 46 = data [ 46 ] } v 47 := 0 if 47 < len ( data ) { v 47 = data [ 47 ] } v 48 := 0 if 48 < len ( data ) { v 48 = data [ 48 ] } v 49 := 0 if 49 < len ( data ) { v 49 = data [ 49 ]" }, { "block_ids": [ @@ -165,7 +172,8 @@ } ], "text": "\t}\n\tv50 := 0\n\tif 50 < len(data) {\n\t\tv50 = data[50]\n\t}\n\tv51 := 0\n\tif 51 < len(data) {\n\t\tv51 = data[51]\n\t}\n\tv52 := 0\n\tif 52 < len(data) {\n\t\tv52 = data[52]\n\t}\n\tv53 := 0\n\tif 53 < len(data) {\n\t\tv53 = data[53]\n\t}\n\tv54 := 0\n\tif 54 < len(data) {\n\t\tv54 = data[54]\n\t}\n\tv55 := 0\n\tif 55 < len(data) {\n\t\tv55 = data[55]\n\t}\n\tv56 := 0\n\tif 56 < len(data) {\n\t\tv56 = data[56]\n\t}\n\tv57 := 0\n\tif 57 < len(data) {\n\t\tv57 = data[57]\n\t}\n\tv58 := 0\n\tif 58 < len(data) {\n\t\tv58 = data[58]\n\t}\n\tv59 := 0\n\tif 59 < len(data) {\n\t\tv59 = data[59]\n\t}\n\tv60 := 0\n\tif 60 < len(data) {\n\t\tv60 = data[60]\n\t}\n\tv61 := 0\n\tif 61 < len(data) {\n\t\tv61 = data[61]\n\t}\n\tv62 := 0\n\tif 62 < len(data) {\n\t\tv62 = data[62]\n\t}\n\tv63 := 0\n\tif 63 < len(data) {\n\t\tv63 = data[63]\n\t}\n\tv64 := 0\n\tif 64 < len(data) {\n\t\tv64 = data[64]\n\t}\n\tv65 := 0\n\tif 65 < len(data) {\n\t\tv65 = data[65]\n\t}\n\tv66 := 0\n\tif 66 < len(data) {\n\t\tv66 = data[66]\n\t}\n\tv67 := 0\n\tif 67 < len(data) {\n\t\tv67 = data[67]\n\t}\n\tv68 := 0\n\tif 68 < len(data) {\n\t\tv68 = data[68]\n\t}\n\tv69 := 0\n\tif 69 < len(data) {\n\t\tv69 = data[69]\n\t}\n\tv70 := 0\n\tif 70 < len(data) {\n\t\tv70 = data[70]\n\t}\n\tv71 := 0\n\tif 71 < len(data) {\n\t\tv71 = data[71]\n\t}\n\tv72 := 0\n\tif 72 < len(data) {\n\t\tv72 = data[72]\n\t}\n\tv73 := 0\n\tif 73 < len(data) {\n\t\tv73 = data[73]\n\t}\n\tv74 := 0\n\tif 74 < len(data) {\n\t\tv74 = data[74]\n\t}\n\tv75 := 0\n\tif 75 < len(data) {\n\t\tv75 = data[75]\n\t}\n\tv76 := 0\n\tif 76 < len(data) {\n\t\tv76 = data[76]\n\t}\n\tv77 := 0\n\tif 77 < len(data) {\n\t\tv77 = data[77]\n\t}\n\tv78 := 0\n\tif 78 < len(data) {\n\t\tv78 = data[78]\n\t}\n\tv79 := 0\n\tif 79 < len(data) {\n\t\tv79 = data[79]\n\t}\n\tv80 := 0\n\tif 80 < len(data) {\n\t\tv80 = data[80]\n\t}\n\tv81 := 0\n\tif 81 < len(data) {\n\t\tv81 = data[81]\n\t}\n\tv82 := 0\n\tif 82 < len(data) {\n\t\tv82 = data[82]\n\t}\n\tv83 := 0\n\tif 83 < len(data) {\n\t\tv83 = data[83]\n\t}\n\tv84 := 0\n\tif 84 < len(data) {\n\t\tv84 = data[84]\n\t}\n\tv85 := 0\n\tif 85 < len(data) {\n\t\tv85 = data[85]\n\t}\n\tv86 := 0\n\tif 86 < len(data) {\n\t\tv86 = data[86]\n\t}\n\tv87 := 0\n\tif 87 < len(data) {\n\t\tv87 = data[87]\n\t}\n\tv88 := 0\n\tif 88 < len(data) {\n\t\tv88 = data[88]\n\t}\n\tv89 := 0\n\tif 89 < len(data) {\n\t\tv89 = data[89]\n\t}\n\tv90 := 0\n\tif 90 < len(data) {\n\t\tv90 = data[90]\n\t}\n\tv91 := 0\n\tif 91 < len(data) {\n\t\tv91 = data[91]\n\t}\n\tv92 := 0\n\tif 92 < len(data) {\n\t\tv92 = data[92]\n\t}\n\tv93 := 0\n\tif 93 < len(data) {\n\t\tv93 = data[93]\n\t}\n\tv94 := 0\n\tif 94 < len(data) {\n\t\tv94 = data[94]\n\t}\n\tv95 := 0\n\tif 95 < len(data) {\n\t\tv95 = data[95]\n\t}\n\tv96 := 0\n\tif 96 < len(data) {\n\t\tv96 = data[96]\n\t}\n\tv97 := 0\n\tif 97 < len(data) {\n\t\tv97 = data[97]\n\t}\n\tv98 := 0\n\tif 98 < len(data) {\n\t\tv98 = data[98]\n\t}\n\tv99 := 0\n\tif 99 < len(data) {\n\t\tv99 = data[99]", - "token_estimate": 850 + "token_estimate": 850, + "tokenized_korean_text": "} v 50 := 0 if 50 < len ( data ) { v 50 = data [ 50 ] } v 51 := 0 if 51 < len ( data ) { v 51 = data [ 51 ] } v 52 := 0 if 52 < len ( data ) { v 52 = data [ 52 ] } v 53 := 0 if 53 < len ( data ) { v 53 = data [ 53 ] } v 54 := 0 if 54 < len ( data ) { v 54 = data [ 54 ] } v 55 := 0 if 55 < len ( data ) { v 55 = data [ 55 ] } v 56 := 0 if 56 < len ( data ) { v 56 = data [ 56 ] } v 57 := 0 if 57 < len ( data ) { v 57 = data [ 57 ] } v 58 := 0 if 58 < len ( data ) { v 58 = data [ 58 ] } v 59 := 0 if 59 < len ( data ) { v 59 = data [ 59 ] } v 60 := 0 if 60 < len ( data ) { v 60 = data [ 60 ] } v 61 := 0 if 61 < len ( data ) { v 61 = data [ 61 ] } v 62 := 0 if 62 < len ( data ) { v 62 = data [ 62 ] } v 63 := 0 if 63 < len ( data ) { v 63 = data [ 63 ] } v 64 := 0 if 64 < len ( data ) { v 64 = data [ 64 ] } v 65 := 0 if 65 < len ( data ) { v 65 = data [ 65 ] } v 66 := 0 if 66 < len ( data ) { v 66 = data [ 66 ] } v 67 := 0 if 67 < len ( data ) { v 67 = data [ 67 ] } v 68 := 0 if 68 < len ( data ) { v 68 = data [ 68 ] } v 69 := 0 if 69 < len ( data ) { v 69 = data [ 69 ] } v 70 := 0 if 70 < len ( data ) { v 70 = data [ 70 ] } v 71 := 0 if 71 < len ( data ) { v 71 = data [ 71 ] } v 72 := 0 if 72 < len ( data ) { v 72 = data [ 72 ] } v 73 := 0 if 73 < len ( data ) { v 73 = data [ 73 ] } v 74 := 0 if 74 < len ( data ) { v 74 = data [ 74 ] } v 75 := 0 if 75 < len ( data ) { v 75 = data [ 75 ] } v 76 := 0 if 76 < len ( data ) { v 76 = data [ 76 ] } v 77 := 0 if 77 < len ( data ) { v 77 = data [ 77 ] } v 78 := 0 if 78 < len ( data ) { v 78 = data [ 78 ] } v 79 := 0 if 79 < len ( data ) { v 79 = data [ 79 ] } v 80 := 0 if 80 < len ( data ) { v 80 = data [ 80 ] } v 81 := 0 if 81 < len ( data ) { v 81 = data [ 81 ] } v 82 := 0 if 82 < len ( data ) { v 82 = data [ 82 ] } v 83 := 0 if 83 < len ( data ) { v 83 = data [ 83 ] } v 84 := 0 if 84 < len ( data ) { v 84 = data [ 84 ] } v 85 := 0 if 85 < len ( data ) { v 85 = data [ 85 ] } v 86 := 0 if 86 < len ( data ) { v 86 = data [ 86 ] } v 87 := 0 if 87 < len ( data ) { v 87 = data [ 87 ] } v 88 := 0 if 88 < len ( data ) { v 88 = data [ 88 ] } v 89 := 0 if 89 < len ( data ) { v 89 = data [ 89 ] } v 90 := 0 if 90 < len ( data ) { v 90 = data [ 90 ] } v 91 := 0 if 91 < len ( data ) { v 91 = data [ 91 ] } v 92 := 0 if 92 < len ( data ) { v 92 = data [ 92 ] } v 93 := 0 if 93 < len ( data ) { v 93 = data [ 93 ] } v 94 := 0 if 94 < len ( data ) { v 94 = data [ 94 ] } v 95 := 0 if 95 < len ( data ) { v 95 = data [ 95 ] } v 96 := 0 if 96 < len ( data ) { v 96 = data [ 96 ] } v 97 := 0 if 97 < len ( data ) { v 97 = data [ 97 ] } v 98 := 0 if 98 < len ( data ) { v 98 = data [ 98 ] } v 99 := 0 if 99 < len ( data ) { v 99 = data [ 99 ]" }, { "block_ids": [ @@ -186,7 +194,8 @@ } ], "text": "\t}\n\tv100 := 0\n\tif 100 < len(data) {\n\t\tv100 = data[100]\n\t}\n\tv101 := 0\n\tif 101 < len(data) {\n\t\tv101 = data[101]\n\t}\n\tv102 := 0\n\tif 102 < len(data) {\n\t\tv102 = data[102]\n\t}\n\tv103 := 0\n\tif 103 < len(data) {\n\t\tv103 = data[103]\n\t}\n\tv104 := 0\n\tif 104 < len(data) {\n\t\tv104 = data[104]\n\t}\n\tv105 := 0\n\tif 105 < len(data) {\n\t\tv105 = data[105]\n\t}\n\tv106 := 0\n\tif 106 < len(data) {\n\t\tv106 = data[106]\n\t}\n\tv107 := 0\n\tif 107 < len(data) {\n\t\tv107 = data[107]\n\t}\n\tv108 := 0\n\tif 108 < len(data) {\n\t\tv108 = data[108]\n\t}\n\tv109 := 0\n\tif 109 < len(data) {\n\t\tv109 = data[109]\n\t}\n\tv110 := 0\n\tif 110 < len(data) {\n\t\tv110 = data[110]\n\t}\n\tv111 := 0\n\tif 111 < len(data) {\n\t\tv111 = data[111]\n\t}\n\tv112 := 0\n\tif 112 < len(data) {\n\t\tv112 = data[112]\n\t}\n\tv113 := 0\n\tif 113 < len(data) {\n\t\tv113 = data[113]\n\t}\n\tv114 := 0\n\tif 114 < len(data) {\n\t\tv114 = data[114]\n\t}\n\tv115 := 0\n\tif 115 < len(data) {\n\t\tv115 = data[115]\n\t}\n\tv116 := 0\n\tif 116 < len(data) {\n\t\tv116 = data[116]\n\t}\n\tv117 := 0\n\tif 117 < len(data) {\n\t\tv117 = data[117]\n\t}\n\tv118 := 0\n\tif 118 < len(data) {\n\t\tv118 = data[118]\n\t}\n\tv119 := 0\n\tif 119 < len(data) {\n\t\tv119 = data[119]\n\t}\n\tv120 := 0\n\tif 120 < len(data) {\n\t\tv120 = data[120]\n\t}\n\tv121 := 0\n\tif 121 < len(data) {\n\t\tv121 = data[121]\n\t}\n\tv122 := 0\n\tif 122 < len(data) {\n\t\tv122 = data[122]\n\t}\n\tv123 := 0\n\tif 123 < len(data) {\n\t\tv123 = data[123]\n\t}\n\tv124 := 0\n\tif 124 < len(data) {\n\t\tv124 = data[124]\n\t}\n\tv125 := 0\n\tif 125 < len(data) {\n\t\tv125 = data[125]\n\t}\n\tv126 := 0\n\tif 126 < len(data) {\n\t\tv126 = data[126]\n\t}\n\tv127 := 0\n\tif 127 < len(data) {\n\t\tv127 = data[127]\n\t}\n\tv128 := 0\n\tif 128 < len(data) {\n\t\tv128 = data[128]\n\t}\n\tv129 := 0\n\tif 129 < len(data) {\n\t\tv129 = data[129]\n\t}\n\tv130 := 0\n\tif 130 < len(data) {\n\t\tv130 = data[130]\n\t}\n\tv131 := 0\n\tif 131 < len(data) {\n\t\tv131 = data[131]\n\t}\n\tv132 := 0\n\tif 132 < len(data) {\n\t\tv132 = data[132]\n\t}\n\tv133 := 0\n\tif 133 < len(data) {\n\t\tv133 = data[133]\n\t}\n\tv134 := 0\n\tif 134 < len(data) {\n\t\tv134 = data[134]\n\t}\n\tv135 := 0\n\tif 135 < len(data) {\n\t\tv135 = data[135]\n\t}\n\tv136 := 0\n\tif 136 < len(data) {\n\t\tv136 = data[136]\n\t}\n\tv137 := 0\n\tif 137 < len(data) {\n\t\tv137 = data[137]\n\t}\n\tv138 := 0\n\tif 138 < len(data) {\n\t\tv138 = data[138]\n\t}\n\tv139 := 0\n\tif 139 < len(data) {\n\t\tv139 = data[139]\n\t}\n\tv140 := 0\n\tif 140 < len(data) {\n\t\tv140 = data[140]\n\t}\n\tv141 := 0\n\tif 141 < len(data) {\n\t\tv141 = data[141]\n\t}\n\tv142 := 0\n\tif 142 < len(data) {\n\t\tv142 = data[142]\n\t}\n\tv143 := 0\n\tif 143 < len(data) {\n\t\tv143 = data[143]\n\t}\n\tv144 := 0\n\tif 144 < len(data) {\n\t\tv144 = data[144]\n\t}\n\tv145 := 0\n\tif 145 < len(data) {\n\t\tv145 = data[145]\n\t}\n\tv146 := 0\n\tif 146 < len(data) {\n\t\tv146 = data[146]\n\t}\n\tv147 := 0\n\tif 147 < len(data) {\n\t\tv147 = data[147]\n\t}\n\tv148 := 0\n\tif 148 < len(data) {\n\t\tv148 = data[148]\n\t}\n\tv149 := 0\n\tif 149 < len(data) {\n\t\tv149 = data[149]", - "token_estimate": 917 + "token_estimate": 917, + "tokenized_korean_text": "} v 100 := 0 if 100 < len ( data ) { v 100 = data [ 100 ] } v 101 := 0 if 101 < len ( data ) { v 101 = data [ 101 ] } v 102 := 0 if 102 < len ( data ) { v 102 = data [ 102 ] } v 103 := 0 if 103 < len ( data ) { v 103 = data [ 103 ] } v 104 := 0 if 104 < len ( data ) { v 104 = data [ 104 ] } v 105 := 0 if 105 < len ( data ) { v 105 = data [ 105 ] } v 106 := 0 if 106 < len ( data ) { v 106 = data [ 106 ] } v 107 := 0 if 107 < len ( data ) { v 107 = data [ 107 ] } v 108 := 0 if 108 < len ( data ) { v 108 = data [ 108 ] } v 109 := 0 if 109 < len ( data ) { v 109 = data [ 109 ] } v 110 := 0 if 110 < len ( data ) { v 110 = data [ 110 ] } v 111 := 0 if 111 < len ( data ) { v 111 = data [ 111 ] } v 112 := 0 if 112 < len ( data ) { v 112 = data [ 112 ] } v 113 := 0 if 113 < len ( data ) { v 113 = data [ 113 ] } v 114 := 0 if 114 < len ( data ) { v 114 = data [ 114 ] } v 115 := 0 if 115 < len ( data ) { v 115 = data [ 115 ] } v 116 := 0 if 116 < len ( data ) { v 116 = data [ 116 ] } v 117 := 0 if 117 < len ( data ) { v 117 = data [ 117 ] } v 118 := 0 if 118 < len ( data ) { v 118 = data [ 118 ] } v 119 := 0 if 119 < len ( data ) { v 119 = data [ 119 ] } v 120 := 0 if 120 < len ( data ) { v 120 = data [ 120 ] } v 121 := 0 if 121 < len ( data ) { v 121 = data [ 121 ] } v 122 := 0 if 122 < len ( data ) { v 122 = data [ 122 ] } v 123 := 0 if 123 < len ( data ) { v 123 = data [ 123 ] } v 124 := 0 if 124 < len ( data ) { v 124 = data [ 124 ] } v 125 := 0 if 125 < len ( data ) { v 125 = data [ 125 ] } v 126 := 0 if 126 < len ( data ) { v 126 = data [ 126 ] } v 127 := 0 if 127 < len ( data ) { v 127 = data [ 127 ] } v 128 := 0 if 128 < len ( data ) { v 128 = data [ 128 ] } v 129 := 0 if 129 < len ( data ) { v 129 = data [ 129 ] } v 130 := 0 if 130 < len ( data ) { v 130 = data [ 130 ] } v 131 := 0 if 131 < len ( data ) { v 131 = data [ 131 ] } v 132 := 0 if 132 < len ( data ) { v 132 = data [ 132 ] } v 133 := 0 if 133 < len ( data ) { v 133 = data [ 133 ] } v 134 := 0 if 134 < len ( data ) { v 134 = data [ 134 ] } v 135 := 0 if 135 < len ( data ) { v 135 = data [ 135 ] } v 136 := 0 if 136 < len ( data ) { v 136 = data [ 136 ] } v 137 := 0 if 137 < len ( data ) { v 137 = data [ 137 ] } v 138 := 0 if 138 < len ( data ) { v 138 = data [ 138 ] } v 139 := 0 if 139 < len ( data ) { v 139 = data [ 139 ] } v 140 := 0 if 140 < len ( data ) { v 140 = data [ 140 ] } v 141 := 0 if 141 < len ( data ) { v 141 = data [ 141 ] } v 142 := 0 if 142 < len ( data ) { v 142 = data [ 142 ] } v 143 := 0 if 143 < len ( data ) { v 143 = data [ 143 ] } v 144 := 0 if 144 < len ( data ) { v 144 = data [ 144 ] } v 145 := 0 if 145 < len ( data ) { v 145 = data [ 145 ] } v 146 := 0 if 146 < len ( data ) { v 146 = data [ 146 ] } v 147 := 0 if 147 < len ( data ) { v 147 = data [ 147 ] } v 148 := 0 if 148 < len ( data ) { v 148 = data [ 148 ] } v 149 := 0 if 149 < len ( data ) { v 149 = data [ 149 ]" }, { "block_ids": [ @@ -207,7 +216,8 @@ } ], "text": "\t}\n\tv150 := 0\n\tif 150 < len(data) {\n\t\tv150 = data[150]\n\t}\n\tv151 := 0\n\tif 151 < len(data) {\n\t\tv151 = data[151]\n\t}\n\tv152 := 0\n\tif 152 < len(data) {\n\t\tv152 = data[152]\n\t}\n\tv153 := 0\n\tif 153 < len(data) {\n\t\tv153 = data[153]\n\t}\n\tv154 := 0\n\tif 154 < len(data) {\n\t\tv154 = data[154]\n\t}\n\tv155 := 0\n\tif 155 < len(data) {\n\t\tv155 = data[155]\n\t}\n\tv156 := 0\n\tif 156 < len(data) {\n\t\tv156 = data[156]\n\t}\n\tv157 := 0\n\tif 157 < len(data) {\n\t\tv157 = data[157]\n\t}\n\tv158 := 0\n\tif 158 < len(data) {\n\t\tv158 = data[158]\n\t}\n\tv159 := 0\n\tif 159 < len(data) {\n\t\tv159 = data[159]\n\t}\n\tv160 := 0\n\tif 160 < len(data) {\n\t\tv160 = data[160]\n\t}\n\tv161 := 0\n\tif 161 < len(data) {\n\t\tv161 = data[161]\n\t}\n\tv162 := 0\n\tif 162 < len(data) {\n\t\tv162 = data[162]\n\t}\n\tv163 := 0\n\tif 163 < len(data) {\n\t\tv163 = data[163]\n\t}\n\tv164 := 0\n\tif 164 < len(data) {\n\t\tv164 = data[164]\n\t}\n\tv165 := 0\n\tif 165 < len(data) {\n\t\tv165 = data[165]\n\t}\n\tv166 := 0\n\tif 166 < len(data) {\n\t\tv166 = data[166]\n\t}\n\tv167 := 0\n\tif 167 < len(data) {\n\t\tv167 = data[167]\n\t}\n\tv168 := 0\n\tif 168 < len(data) {\n\t\tv168 = data[168]\n\t}\n\tv169 := 0\n\tif 169 < len(data) {\n\t\tv169 = data[169]\n\t}\n\tv170 := 0\n\tif 170 < len(data) {\n\t\tv170 = data[170]\n\t}\n\tv171 := 0\n\tif 171 < len(data) {\n\t\tv171 = data[171]\n\t}\n\tv172 := 0\n\tif 172 < len(data) {\n\t\tv172 = data[172]\n\t}\n\tv173 := 0\n\tif 173 < len(data) {\n\t\tv173 = data[173]\n\t}\n\tv174 := 0\n\tif 174 < len(data) {\n\t\tv174 = data[174]\n\t}\n\tv175 := 0\n\tif 175 < len(data) {\n\t\tv175 = data[175]\n\t}\n\tv176 := 0\n\tif 176 < len(data) {\n\t\tv176 = data[176]\n\t}\n\tv177 := 0\n\tif 177 < len(data) {\n\t\tv177 = data[177]\n\t}\n\tv178 := 0\n\tif 178 < len(data) {\n\t\tv178 = data[178]\n\t}\n\tv179 := 0\n\tif 179 < len(data) {\n\t\tv179 = data[179]\n\t}\n\tv180 := 0\n\tif 180 < len(data) {\n\t\tv180 = data[180]\n\t}\n\tv181 := 0\n\tif 181 < len(data) {\n\t\tv181 = data[181]\n\t}\n\tv182 := 0\n\tif 182 < len(data) {\n\t\tv182 = data[182]\n\t}\n\tv183 := 0\n\tif 183 < len(data) {\n\t\tv183 = data[183]\n\t}\n\tv184 := 0\n\tif 184 < len(data) {\n\t\tv184 = data[184]\n\t}\n\tv185 := 0\n\tif 185 < len(data) {\n\t\tv185 = data[185]\n\t}\n\tv186 := 0\n\tif 186 < len(data) {\n\t\tv186 = data[186]\n\t}\n\tv187 := 0\n\tif 187 < len(data) {\n\t\tv187 = data[187]\n\t}\n\tv188 := 0\n\tif 188 < len(data) {\n\t\tv188 = data[188]\n\t}\n\tv189 := 0\n\tif 189 < len(data) {\n\t\tv189 = data[189]\n\t}\n\tv190 := 0\n\tif 190 < len(data) {\n\t\tv190 = data[190]\n\t}\n\tv191 := 0\n\tif 191 < len(data) {\n\t\tv191 = data[191]\n\t}\n\tv192 := 0\n\tif 192 < len(data) {\n\t\tv192 = data[192]\n\t}\n\tv193 := 0\n\tif 193 < len(data) {\n\t\tv193 = data[193]\n\t}\n\tv194 := 0\n\tif 194 < len(data) {\n\t\tv194 = data[194]\n\t}\n\tv195 := 0\n\tif 195 < len(data) {\n\t\tv195 = data[195]\n\t}\n\tv196 := 0\n\tif 196 < len(data) {\n\t\tv196 = data[196]\n\t}\n\tv197 := 0\n\tif 197 < len(data) {\n\t\tv197 = data[197]\n\t}\n\tv198 := 0\n\tif 198 < len(data) {\n\t\tv198 = data[198]\n\t}\n\tv199 := 0\n\tif 199 < len(data) {\n\t\tv199 = data[199]", - "token_estimate": 917 + "token_estimate": 917, + "tokenized_korean_text": "} v 150 := 0 if 150 < len ( data ) { v 150 = data [ 150 ] } v 151 := 0 if 151 < len ( data ) { v 151 = data [ 151 ] } v 152 := 0 if 152 < len ( data ) { v 152 = data [ 152 ] } v 153 := 0 if 153 < len ( data ) { v 153 = data [ 153 ] } v 154 := 0 if 154 < len ( data ) { v 154 = data [ 154 ] } v 155 := 0 if 155 < len ( data ) { v 155 = data [ 155 ] } v 156 := 0 if 156 < len ( data ) { v 156 = data [ 156 ] } v 157 := 0 if 157 < len ( data ) { v 157 = data [ 157 ] } v 158 := 0 if 158 < len ( data ) { v 158 = data [ 158 ] } v 159 := 0 if 159 < len ( data ) { v 159 = data [ 159 ] } v 160 := 0 if 160 < len ( data ) { v 160 = data [ 160 ] } v 161 := 0 if 161 < len ( data ) { v 161 = data [ 161 ] } v 162 := 0 if 162 < len ( data ) { v 162 = data [ 162 ] } v 163 := 0 if 163 < len ( data ) { v 163 = data [ 163 ] } v 164 := 0 if 164 < len ( data ) { v 164 = data [ 164 ] } v 165 := 0 if 165 < len ( data ) { v 165 = data [ 165 ] } v 166 := 0 if 166 < len ( data ) { v 166 = data [ 166 ] } v 167 := 0 if 167 < len ( data ) { v 167 = data [ 167 ] } v 168 := 0 if 168 < len ( data ) { v 168 = data [ 168 ] } v 169 := 0 if 169 < len ( data ) { v 169 = data [ 169 ] } v 170 := 0 if 170 < len ( data ) { v 170 = data [ 170 ] } v 171 := 0 if 171 < len ( data ) { v 171 = data [ 171 ] } v 172 := 0 if 172 < len ( data ) { v 172 = data [ 172 ] } v 173 := 0 if 173 < len ( data ) { v 173 = data [ 173 ] } v 174 := 0 if 174 < len ( data ) { v 174 = data [ 174 ] } v 175 := 0 if 175 < len ( data ) { v 175 = data [ 175 ] } v 176 := 0 if 176 < len ( data ) { v 176 = data [ 176 ] } v 177 := 0 if 177 < len ( data ) { v 177 = data [ 177 ] } v 178 := 0 if 178 < len ( data ) { v 178 = data [ 178 ] } v 179 := 0 if 179 < len ( data ) { v 179 = data [ 179 ] } v 180 := 0 if 180 < len ( data ) { v 180 = data [ 180 ] } v 181 := 0 if 181 < len ( data ) { v 181 = data [ 181 ] } v 182 := 0 if 182 < len ( data ) { v 182 = data [ 182 ] } v 183 := 0 if 183 < len ( data ) { v 183 = data [ 183 ] } v 184 := 0 if 184 < len ( data ) { v 184 = data [ 184 ] } v 185 := 0 if 185 < len ( data ) { v 185 = data [ 185 ] } v 186 := 0 if 186 < len ( data ) { v 186 = data [ 186 ] } v 187 := 0 if 187 < len ( data ) { v 187 = data [ 187 ] } v 188 := 0 if 188 < len ( data ) { v 188 = data [ 188 ] } v 189 := 0 if 189 < len ( data ) { v 189 = data [ 189 ] } v 190 := 0 if 190 < len ( data ) { v 190 = data [ 190 ] } v 191 := 0 if 191 < len ( data ) { v 191 = data [ 191 ] } v 192 := 0 if 192 < len ( data ) { v 192 = data [ 192 ] } v 193 := 0 if 193 < len ( data ) { v 193 = data [ 193 ] } v 194 := 0 if 194 < len ( data ) { v 194 = data [ 194 ] } v 195 := 0 if 195 < len ( data ) { v 195 = data [ 195 ] } v 196 := 0 if 196 < len ( data ) { v 196 = data [ 196 ] } v 197 := 0 if 197 < len ( data ) { v 197 = data [ 197 ] } v 198 := 0 if 198 < len ( data ) { v 198 = data [ 198 ] } v 199 := 0 if 199 < len ( data ) { v 199 = data [ 199 ]" }, { "block_ids": [ @@ -228,6 +238,7 @@ } ], "text": "\t}\n\tv200 := 0\n\tif 200 < len(data) {\n\t\tv200 = data[200]\n\t}\n\tv201 := 0\n\tif 201 < len(data) {\n\t\tv201 = data[201]\n\t}\n\tv202 := 0\n\tif 202 < len(data) {\n\t\tv202 = data[202]\n\t}\n\tv203 := 0\n\tif 203 < len(data) {\n\t\tv203 = data[203]\n\t}\n\tv204 := 0\n\tif 204 < len(data) {\n\t\tv204 = data[204]\n\t}\n\tv205 := 0\n\tif 205 < len(data) {\n\t\tv205 = data[205]\n\t}\n\tv206 := 0\n\tif 206 < len(data) {\n\t\tv206 = data[206]\n\t}\n\tv207 := 0\n\tif 207 < len(data) {\n\t\tv207 = data[207]\n\t}\n\tv208 := 0\n\tif 208 < len(data) {\n\t\tv208 = data[208]\n\t}\n\tv209 := 0\n\tif 209 < len(data) {\n\t\tv209 = data[209]\n\t}\n\treturn len(data)\n}", - "token_estimate": 191 + "token_estimate": 191, + "tokenized_korean_text": "} v 200 := 0 if 200 < len ( data ) { v 200 = data [ 200 ] } v 201 := 0 if 201 < len ( data ) { v 201 = data [ 201 ] } v 202 := 0 if 202 < len ( data ) { v 202 = data [ 202 ] } v 203 := 0 if 203 < len ( data ) { v 203 = data [ 203 ] } v 204 := 0 if 204 < len ( data ) { v 204 = data [ 204 ] } v 205 := 0 if 205 < len ( data ) { v 205 = data [ 205 ] } v 206 := 0 if 206 < len ( data ) { v 206 = data [ 206 ] } v 207 := 0 if 207 < len ( data ) { v 207 = data [ 207 ] } v 208 := 0 if 208 < len ( data ) { v 208 = data [ 208 ] } v 209 := 0 if 209 < len ( data ) { v 209 = data [ 209 ] } return len ( data ) }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.java.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.java.chunks.snapshot.json index e42d8d0..b5205c1 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.java.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.java.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "import java.util.List;\nimport java.util.Map;\nimport java.util.ArrayList;\nimport java.util.HashMap;\nimport java.util.stream.Collectors;", - "token_estimate": 45 + "token_estimate": 45, + "tokenized_korean_text": "import java . util . List ; import java . util . Map ; import java . util . ArrayList ; import java . util . HashMap ; import java . util . stream . Collectors ;" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "public static double computeMRR(List scores) {\n if (scores.isEmpty()) {\n return 0.0;\n }\n return 1.0 / scores.size();\n}", - "token_estimate": 48 + "token_estimate": 48, + "tokenized_korean_text": "public static double computeMRR ( List < Double > scores ) { if ( scores . isEmpty ( ) ) { return 0 . 0 ; } return 1 . 0 / scores . size (); }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "public class MetricsCollector {\n private List scores;\n private List labels;\n private Map counts;\n private Map totals;\n private List tags;\n}", - "token_estimate": 71 + "token_estimate": 71, + "tokenized_korean_text": "public class MetricsCollector { private List < Double > scores ; private List < String > labels ; private Map < String , Integer > counts ; private Map < String , Double > totals ; private List < String > tags ; }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "public class BaseEvaluator {\n private String name;\n\n public BaseEvaluator(String name) {\n this.name = name;\n }\n\n public void evaluate(List data) throws Exception {\n String joined = String.join(\",\", data);\n }\n}", - "token_estimate": 82 + "token_estimate": 82, + "tokenized_korean_text": "public class BaseEvaluator { private String name ; public BaseEvaluator ( String name ) { this . name = name ; } public void evaluate ( List < String > data ) throws Exception { String joined = String . join (\",\", data ); } }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "public void run(List inputs) {\n for (Double inp : inputs) {\n scores.add(\n inp\n );\n }\n}", - "token_estimate": 42 + "token_estimate": 42, + "tokenized_korean_text": "public void run ( List < Double > inputs ) { for ( Double inp : inputs ) { scores . add ( inp ); } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "public Map report() {\n Map result = new HashMap<>();\n result.put(\"mean\", 0.0);\n result.put(\"count\", scores.size());\n result.put(\"tags\", tags);\n return result;\n}", - "token_estimate": 69 + "token_estimate": 69, + "tokenized_korean_text": "public Map < String , Object > report ( ) { Map < String , Object > result = new HashMap <>(); result . put (\" mean \", 0 . 0 ); result . put (\" count \", scores . size ()); result . put (\" tags \", tags ); return result ; }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "public class BigCompute {\n public int compute(int[] data) {\n int v0 = 0 < data.length ? data[0] : 0;\n int v1 = 1 < data.length ? data[1] : 0;\n int v2 = 2 < data.length ? data[2] : 0;\n int v3 = 3 < data.length ? data[3] : 0;\n int v4 = 4 < data.length ? data[4] : 0;\n int v5 = 5 < data.length ? data[5] : 0;\n int v6 = 6 < data.length ? data[6] : 0;\n int v7 = 7 < data.length ? data[7] : 0;\n int v8 = 8 < data.length ? data[8] : 0;\n int v9 = 9 < data.length ? data[9] : 0;\n int v10 = 10 < data.length ? data[10] : 0;\n int v11 = 11 < data.length ? data[11] : 0;\n int v12 = 12 < data.length ? data[12] : 0;\n int v13 = 13 < data.length ? data[13] : 0;\n int v14 = 14 < data.length ? data[14] : 0;\n int v15 = 15 < data.length ? data[15] : 0;\n int v16 = 16 < data.length ? data[16] : 0;\n int v17 = 17 < data.length ? data[17] : 0;\n int v18 = 18 < data.length ? data[18] : 0;\n int v19 = 19 < data.length ? data[19] : 0;\n int v20 = 20 < data.length ? data[20] : 0;\n int v21 = 21 < data.length ? data[21] : 0;\n int v22 = 22 < data.length ? data[22] : 0;\n int v23 = 23 < data.length ? data[23] : 0;\n int v24 = 24 < data.length ? data[24] : 0;\n int v25 = 25 < data.length ? data[25] : 0;\n int v26 = 26 < data.length ? data[26] : 0;\n int v27 = 27 < data.length ? data[27] : 0;\n int v28 = 28 < data.length ? data[28] : 0;\n int v29 = 29 < data.length ? data[29] : 0;\n int v30 = 30 < data.length ? data[30] : 0;\n int v31 = 31 < data.length ? data[31] : 0;\n int v32 = 32 < data.length ? data[32] : 0;\n int v33 = 33 < data.length ? data[33] : 0;\n int v34 = 34 < data.length ? data[34] : 0;\n int v35 = 35 < data.length ? data[35] : 0;\n int v36 = 36 < data.length ? data[36] : 0;\n int v37 = 37 < data.length ? data[37] : 0;\n int v38 = 38 < data.length ? data[38] : 0;\n int v39 = 39 < data.length ? data[39] : 0;\n int v40 = 40 < data.length ? data[40] : 0;\n int v41 = 41 < data.length ? data[41] : 0;\n int v42 = 42 < data.length ? data[42] : 0;\n int v43 = 43 < data.length ? data[43] : 0;\n int v44 = 44 < data.length ? data[44] : 0;\n int v45 = 45 < data.length ? data[45] : 0;\n int v46 = 46 < data.length ? data[46] : 0;\n int v47 = 47 < data.length ? data[47] : 0;\n int v48 = 48 < data.length ? data[48] : 0;\n int v49 = 49 < data.length ? data[49] : 0;\n int v50 = 50 < data.length ? data[50] : 0;\n int v51 = 51 < data.length ? data[51] : 0;\n int v52 = 52 < data.length ? data[52] : 0;\n int v53 = 53 < data.length ? data[53] : 0;\n int v54 = 54 < data.length ? data[54] : 0;\n int v55 = 55 < data.length ? data[55] : 0;\n int v56 = 56 < data.length ? data[56] : 0;\n int v57 = 57 < data.length ? data[57] : 0;\n int v58 = 58 < data.length ? data[58] : 0;\n int v59 = 59 < data.length ? data[59] : 0;\n int v60 = 60 < data.length ? data[60] : 0;\n int v61 = 61 < data.length ? data[61] : 0;\n int v62 = 62 < data.length ? data[62] : 0;\n int v63 = 63 < data.length ? data[63] : 0;\n int v64 = 64 < data.length ? data[64] : 0;\n int v65 = 65 < data.length ? data[65] : 0;\n int v66 = 66 < data.length ? data[66] : 0;\n int v67 = 67 < data.length ? data[67] : 0;\n int v68 = 68 < data.length ? data[68] : 0;\n int v69 = 69 < data.length ? data[69] : 0;\n int v70 = 70 < data.length ? data[70] : 0;\n int v71 = 71 < data.length ? data[71] : 0;\n int v72 = 72 < data.length ? data[72] : 0;\n int v73 = 73 < data.length ? data[73] : 0;\n int v74 = 74 < data.length ? data[74] : 0;\n int v75 = 75 < data.length ? data[75] : 0;\n int v76 = 76 < data.length ? data[76] : 0;\n int v77 = 77 < data.length ? data[77] : 0;\n int v78 = 78 < data.length ? data[78] : 0;\n int v79 = 79 < data.length ? data[79] : 0;\n int v80 = 80 < data.length ? data[80] : 0;\n int v81 = 81 < data.length ? data[81] : 0;\n int v82 = 82 < data.length ? data[82] : 0;\n int v83 = 83 < data.length ? data[83] : 0;\n int v84 = 84 < data.length ? data[84] : 0;\n int v85 = 85 < data.length ? data[85] : 0;\n int v86 = 86 < data.length ? data[86] : 0;\n int v87 = 87 < data.length ? data[87] : 0;\n int v88 = 88 < data.length ? data[88] : 0;\n int v89 = 89 < data.length ? data[89] : 0;\n int v90 = 90 < data.length ? data[90] : 0;\n int v91 = 91 < data.length ? data[91] : 0;\n int v92 = 92 < data.length ? data[92] : 0;\n int v93 = 93 < data.length ? data[93] : 0;\n int v94 = 94 < data.length ? data[94] : 0;\n int v95 = 95 < data.length ? data[95] : 0;\n int v96 = 96 < data.length ? data[96] : 0;\n int v97 = 97 < data.length ? data[97] : 0;\n int v98 = 98 < data.length ? data[98] : 0;\n int v99 = 99 < data.length ? data[99] : 0;\n int v100 = 100 < data.length ? data[100] : 0;\n int v101 = 101 < data.length ? data[101] : 0;\n int v102 = 102 < data.length ? data[102] : 0;\n int v103 = 103 < data.length ? data[103] : 0;\n int v104 = 104 < data.length ? data[104] : 0;\n int v105 = 105 < data.length ? data[105] : 0;\n int v106 = 106 < data.length ? data[106] : 0;\n int v107 = 107 < data.length ? data[107] : 0;\n int v108 = 108 < data.length ? data[108] : 0;\n int v109 = 109 < data.length ? data[109] : 0;\n int v110 = 110 < data.length ? data[110] : 0;\n int v111 = 111 < data.length ? data[111] : 0;\n int v112 = 112 < data.length ? data[112] : 0;\n int v113 = 113 < data.length ? data[113] : 0;\n int v114 = 114 < data.length ? data[114] : 0;\n int v115 = 115 < data.length ? data[115] : 0;\n int v116 = 116 < data.length ? data[116] : 0;\n int v117 = 117 < data.length ? data[117] : 0;\n int v118 = 118 < data.length ? data[118] : 0;\n int v119 = 119 < data.length ? data[119] : 0;\n int v120 = 120 < data.length ? data[120] : 0;\n int v121 = 121 < data.length ? data[121] : 0;\n int v122 = 122 < data.length ? data[122] : 0;\n int v123 = 123 < data.length ? data[123] : 0;\n int v124 = 124 < data.length ? data[124] : 0;\n int v125 = 125 < data.length ? data[125] : 0;\n int v126 = 126 < data.length ? data[126] : 0;\n int v127 = 127 < data.length ? data[127] : 0;\n int v128 = 128 < data.length ? data[128] : 0;\n int v129 = 129 < data.length ? data[129] : 0;\n int v130 = 130 < data.length ? data[130] : 0;\n int v131 = 131 < data.length ? data[131] : 0;\n int v132 = 132 < data.length ? data[132] : 0;\n int v133 = 133 < data.length ? data[133] : 0;\n int v134 = 134 < data.length ? data[134] : 0;\n int v135 = 135 < data.length ? data[135] : 0;\n int v136 = 136 < data.length ? data[136] : 0;\n int v137 = 137 < data.length ? data[137] : 0;\n int v138 = 138 < data.length ? data[138] : 0;\n int v139 = 139 < data.length ? data[139] : 0;\n int v140 = 140 < data.length ? data[140] : 0;\n int v141 = 141 < data.length ? data[141] : 0;\n int v142 = 142 < data.length ? data[142] : 0;\n int v143 = 143 < data.length ? data[143] : 0;\n int v144 = 144 < data.length ? data[144] : 0;\n int v145 = 145 < data.length ? data[145] : 0;\n int v146 = 146 < data.length ? data[146] : 0;\n int v147 = 147 < data.length ? data[147] : 0;\n int v148 = 148 < data.length ? data[148] : 0;\n int v149 = 149 < data.length ? data[149] : 0;\n int v150 = 150 < data.length ? data[150] : 0;\n int v151 = 151 < data.length ? data[151] : 0;\n int v152 = 152 < data.length ? data[152] : 0;\n int v153 = 153 < data.length ? data[153] : 0;\n int v154 = 154 < data.length ? data[154] : 0;\n int v155 = 155 < data.length ? data[155] : 0;\n int v156 = 156 < data.length ? data[156] : 0;\n int v157 = 157 < data.length ? data[157] : 0;\n int v158 = 158 < data.length ? data[158] : 0;\n int v159 = 159 < data.length ? data[159] : 0;\n int v160 = 160 < data.length ? data[160] : 0;\n int v161 = 161 < data.length ? data[161] : 0;\n int v162 = 162 < data.length ? data[162] : 0;\n int v163 = 163 < data.length ? data[163] : 0;\n int v164 = 164 < data.length ? data[164] : 0;\n int v165 = 165 < data.length ? data[165] : 0;\n int v166 = 166 < data.length ? data[166] : 0;\n int v167 = 167 < data.length ? data[167] : 0;\n int v168 = 168 < data.length ? data[168] : 0;\n int v169 = 169 < data.length ? data[169] : 0;\n int v170 = 170 < data.length ? data[170] : 0;\n int v171 = 171 < data.length ? data[171] : 0;\n int v172 = 172 < data.length ? data[172] : 0;\n int v173 = 173 < data.length ? data[173] : 0;\n int v174 = 174 < data.length ? data[174] : 0;\n int v175 = 175 < data.length ? data[175] : 0;\n int v176 = 176 < data.length ? data[176] : 0;\n int v177 = 177 < data.length ? data[177] : 0;\n int v178 = 178 < data.length ? data[178] : 0;\n int v179 = 179 < data.length ? data[179] : 0;\n int v180 = 180 < data.length ? data[180] : 0;\n int v181 = 181 < data.length ? data[181] : 0;\n int v182 = 182 < data.length ? data[182] : 0;\n int v183 = 183 < data.length ? data[183] : 0;\n int v184 = 184 < data.length ? data[184] : 0;\n int v185 = 185 < data.length ? data[185] : 0;\n int v186 = 186 < data.length ? data[186] : 0;\n int v187 = 187 < data.length ? data[187] : 0;\n int v188 = 188 < data.length ? data[188] : 0;\n int v189 = 189 < data.length ? data[189] : 0;\n int v190 = 190 < data.length ? data[190] : 0;\n int v191 = 191 < data.length ? data[191] : 0;\n int v192 = 192 < data.length ? data[192] : 0;\n int v193 = 193 < data.length ? data[193] : 0;\n int v194 = 194 < data.length ? data[194] : 0;\n int v195 = 195 < data.length ? data[195] : 0;\n int v196 = 196 < data.length ? data[196] : 0;\n int v197 = 197 < data.length ? data[197] : 0;", - "token_estimate": 3475 + "token_estimate": 3475, + "tokenized_korean_text": "public class BigCompute { public int compute ( int [ ] data ) { int v 0 = 0 < data . length ? data [ 0 ] : 0 ; int v 1 = 1 < data . length ? data [ 1 ] : 0 ; int v 2 = 2 < data . length ? data [ 2 ] : 0 ; int v 3 = 3 < data . length ? data [ 3 ] : 0 ; int v 4 = 4 < data . length ? data [ 4 ] : 0 ; int v 5 = 5 < data . length ? data [ 5 ] : 0 ; int v 6 = 6 < data . length ? data [ 6 ] : 0 ; int v 7 = 7 < data . length ? data [ 7 ] : 0 ; int v 8 = 8 < data . length ? data [ 8 ] : 0 ; int v 9 = 9 < data . length ? data [ 9 ] : 0 ; int v 10 = 10 < data . length ? data [ 10 ] : 0 ; int v 11 = 11 < data . length ? data [ 11 ] : 0 ; int v 12 = 12 < data . length ? data [ 12 ] : 0 ; int v 13 = 13 < data . length ? data [ 13 ] : 0 ; int v 14 = 14 < data . length ? data [ 14 ] : 0 ; int v 15 = 15 < data . length ? data [ 15 ] : 0 ; int v 16 = 16 < data . length ? data [ 16 ] : 0 ; int v 17 = 17 < data . length ? data [ 17 ] : 0 ; int v 18 = 18 < data . length ? data [ 18 ] : 0 ; int v 19 = 19 < data . length ? data [ 19 ] : 0 ; int v 20 = 20 < data . length ? data [ 20 ] : 0 ; int v 21 = 21 < data . length ? data [ 21 ] : 0 ; int v 22 = 22 < data . length ? data [ 22 ] : 0 ; int v 23 = 23 < data . length ? data [ 23 ] : 0 ; int v 24 = 24 < data . length ? data [ 24 ] : 0 ; int v 25 = 25 < data . length ? data [ 25 ] : 0 ; int v 26 = 26 < data . length ? data [ 26 ] : 0 ; int v 27 = 27 < data . length ? data [ 27 ] : 0 ; int v 28 = 28 < data . length ? data [ 28 ] : 0 ; int v 29 = 29 < data . length ? data [ 29 ] : 0 ; int v 30 = 30 < data . length ? data [ 30 ] : 0 ; int v 31 = 31 < data . length ? data [ 31 ] : 0 ; int v 32 = 32 < data . length ? data [ 32 ] : 0 ; int v 33 = 33 < data . length ? data [ 33 ] : 0 ; int v 34 = 34 < data . length ? data [ 34 ] : 0 ; int v 35 = 35 < data . length ? data [ 35 ] : 0 ; int v 36 = 36 < data . length ? data [ 36 ] : 0 ; int v 37 = 37 < data . length ? data [ 37 ] : 0 ; int v 38 = 38 < data . length ? data [ 38 ] : 0 ; int v 39 = 39 < data . length ? data [ 39 ] : 0 ; int v 40 = 40 < data . length ? data [ 40 ] : 0 ; int v 41 = 41 < data . length ? data [ 41 ] : 0 ; int v 42 = 42 < data . length ? data [ 42 ] : 0 ; int v 43 = 43 < data . length ? data [ 43 ] : 0 ; int v 44 = 44 < data . length ? data [ 44 ] : 0 ; int v 45 = 45 < data . length ? data [ 45 ] : 0 ; int v 46 = 46 < data . length ? data [ 46 ] : 0 ; int v 47 = 47 < data . length ? data [ 47 ] : 0 ; int v 48 = 48 < data . length ? data [ 48 ] : 0 ; int v 49 = 49 < data . length ? data [ 49 ] : 0 ; int v 50 = 50 < data . length ? data [ 50 ] : 0 ; int v 51 = 51 < data . length ? data [ 51 ] : 0 ; int v 52 = 52 < data . length ? data [ 52 ] : 0 ; int v 53 = 53 < data . length ? data [ 53 ] : 0 ; int v 54 = 54 < data . length ? data [ 54 ] : 0 ; int v 55 = 55 < data . length ? data [ 55 ] : 0 ; int v 56 = 56 < data . length ? data [ 56 ] : 0 ; int v 57 = 57 < data . length ? data [ 57 ] : 0 ; int v 58 = 58 < data . length ? data [ 58 ] : 0 ; int v 59 = 59 < data . length ? data [ 59 ] : 0 ; int v 60 = 60 < data . length ? data [ 60 ] : 0 ; int v 61 = 61 < data . length ? data [ 61 ] : 0 ; int v 62 = 62 < data . length ? data [ 62 ] : 0 ; int v 63 = 63 < data . length ? data [ 63 ] : 0 ; int v 64 = 64 < data . length ? data [ 64 ] : 0 ; int v 65 = 65 < data . length ? data [ 65 ] : 0 ; int v 66 = 66 < data . length ? data [ 66 ] : 0 ; int v 67 = 67 < data . length ? data [ 67 ] : 0 ; int v 68 = 68 < data . length ? data [ 68 ] : 0 ; int v 69 = 69 < data . length ? data [ 69 ] : 0 ; int v 70 = 70 < data . length ? data [ 70 ] : 0 ; int v 71 = 71 < data . length ? data [ 71 ] : 0 ; int v 72 = 72 < data . length ? data [ 72 ] : 0 ; int v 73 = 73 < data . length ? data [ 73 ] : 0 ; int v 74 = 74 < data . length ? data [ 74 ] : 0 ; int v 75 = 75 < data . length ? data [ 75 ] : 0 ; int v 76 = 76 < data . length ? data [ 76 ] : 0 ; int v 77 = 77 < data . length ? data [ 77 ] : 0 ; int v 78 = 78 < data . length ? data [ 78 ] : 0 ; int v 79 = 79 < data . length ? data [ 79 ] : 0 ; int v 80 = 80 < data . length ? data [ 80 ] : 0 ; int v 81 = 81 < data . length ? data [ 81 ] : 0 ; int v 82 = 82 < data . length ? data [ 82 ] : 0 ; int v 83 = 83 < data . length ? data [ 83 ] : 0 ; int v 84 = 84 < data . length ? data [ 84 ] : 0 ; int v 85 = 85 < data . length ? data [ 85 ] : 0 ; int v 86 = 86 < data . length ? data [ 86 ] : 0 ; int v 87 = 87 < data . length ? data [ 87 ] : 0 ; int v 88 = 88 < data . length ? data [ 88 ] : 0 ; int v 89 = 89 < data . length ? data [ 89 ] : 0 ; int v 90 = 90 < data . length ? data [ 90 ] : 0 ; int v 91 = 91 < data . length ? data [ 91 ] : 0 ; int v 92 = 92 < data . length ? data [ 92 ] : 0 ; int v 93 = 93 < data . length ? data [ 93 ] : 0 ; int v 94 = 94 < data . length ? data [ 94 ] : 0 ; int v 95 = 95 < data . length ? data [ 95 ] : 0 ; int v 96 = 96 < data . length ? data [ 96 ] : 0 ; int v 97 = 97 < data . length ? data [ 97 ] : 0 ; int v 98 = 98 < data . length ? data [ 98 ] : 0 ; int v 99 = 99 < data . length ? data [ 99 ] : 0 ; int v 100 = 100 < data . length ? data [ 100 ] : 0 ; int v 101 = 101 < data . length ? data [ 101 ] : 0 ; int v 102 = 102 < data . length ? data [ 102 ] : 0 ; int v 103 = 103 < data . length ? data [ 103 ] : 0 ; int v 104 = 104 < data . length ? data [ 104 ] : 0 ; int v 105 = 105 < data . length ? data [ 105 ] : 0 ; int v 106 = 106 < data . length ? data [ 106 ] : 0 ; int v 107 = 107 < data . length ? data [ 107 ] : 0 ; int v 108 = 108 < data . length ? data [ 108 ] : 0 ; int v 109 = 109 < data . length ? data [ 109 ] : 0 ; int v 110 = 110 < data . length ? data [ 110 ] : 0 ; int v 111 = 111 < data . length ? data [ 111 ] : 0 ; int v 112 = 112 < data . length ? data [ 112 ] : 0 ; int v 113 = 113 < data . length ? data [ 113 ] : 0 ; int v 114 = 114 < data . length ? data [ 114 ] : 0 ; int v 115 = 115 < data . length ? data [ 115 ] : 0 ; int v 116 = 116 < data . length ? data [ 116 ] : 0 ; int v 117 = 117 < data . length ? data [ 117 ] : 0 ; int v 118 = 118 < data . length ? data [ 118 ] : 0 ; int v 119 = 119 < data . length ? data [ 119 ] : 0 ; int v 120 = 120 < data . length ? data [ 120 ] : 0 ; int v 121 = 121 < data . length ? data [ 121 ] : 0 ; int v 122 = 122 < data . length ? data [ 122 ] : 0 ; int v 123 = 123 < data . length ? data [ 123 ] : 0 ; int v 124 = 124 < data . length ? data [ 124 ] : 0 ; int v 125 = 125 < data . length ? data [ 125 ] : 0 ; int v 126 = 126 < data . length ? data [ 126 ] : 0 ; int v 127 = 127 < data . length ? data [ 127 ] : 0 ; int v 128 = 128 < data . length ? data [ 128 ] : 0 ; int v 129 = 129 < data . length ? data [ 129 ] : 0 ; int v 130 = 130 < data . length ? data [ 130 ] : 0 ; int v 131 = 131 < data . length ? data [ 131 ] : 0 ; int v 132 = 132 < data . length ? data [ 132 ] : 0 ; int v 133 = 133 < data . length ? data [ 133 ] : 0 ; int v 134 = 134 < data . length ? data [ 134 ] : 0 ; int v 135 = 135 < data . length ? data [ 135 ] : 0 ; int v 136 = 136 < data . length ? data [ 136 ] : 0 ; int v 137 = 137 < data . length ? data [ 137 ] : 0 ; int v 138 = 138 < data . length ? data [ 138 ] : 0 ; int v 139 = 139 < data . length ? data [ 139 ] : 0 ; int v 140 = 140 < data . length ? data [ 140 ] : 0 ; int v 141 = 141 < data . length ? data [ 141 ] : 0 ; int v 142 = 142 < data . length ? data [ 142 ] : 0 ; int v 143 = 143 < data . length ? data [ 143 ] : 0 ; int v 144 = 144 < data . length ? data [ 144 ] : 0 ; int v 145 = 145 < data . length ? data [ 145 ] : 0 ; int v 146 = 146 < data . length ? data [ 146 ] : 0 ; int v 147 = 147 < data . length ? data [ 147 ] : 0 ; int v 148 = 148 < data . length ? data [ 148 ] : 0 ; int v 149 = 149 < data . length ? data [ 149 ] : 0 ; int v 150 = 150 < data . length ? data [ 150 ] : 0 ; int v 151 = 151 < data . length ? data [ 151 ] : 0 ; int v 152 = 152 < data . length ? data [ 152 ] : 0 ; int v 153 = 153 < data . length ? data [ 153 ] : 0 ; int v 154 = 154 < data . length ? data [ 154 ] : 0 ; int v 155 = 155 < data . length ? data [ 155 ] : 0 ; int v 156 = 156 < data . length ? data [ 156 ] : 0 ; int v 157 = 157 < data . length ? data [ 157 ] : 0 ; int v 158 = 158 < data . length ? data [ 158 ] : 0 ; int v 159 = 159 < data . length ? data [ 159 ] : 0 ; int v 160 = 160 < data . length ? data [ 160 ] : 0 ; int v 161 = 161 < data . length ? data [ 161 ] : 0 ; int v 162 = 162 < data . length ? data [ 162 ] : 0 ; int v 163 = 163 < data . length ? data [ 163 ] : 0 ; int v 164 = 164 < data . length ? data [ 164 ] : 0 ; int v 165 = 165 < data . length ? data [ 165 ] : 0 ; int v 166 = 166 < data . length ? data [ 166 ] : 0 ; int v 167 = 167 < data . length ? data [ 167 ] : 0 ; int v 168 = 168 < data . length ? data [ 168 ] : 0 ; int v 169 = 169 < data . length ? data [ 169 ] : 0 ; int v 170 = 170 < data . length ? data [ 170 ] : 0 ; int v 171 = 171 < data . length ? data [ 171 ] : 0 ; int v 172 = 172 < data . length ? data [ 172 ] : 0 ; int v 173 = 173 < data . length ? data [ 173 ] : 0 ; int v 174 = 174 < data . length ? data [ 174 ] : 0 ; int v 175 = 175 < data . length ? data [ 175 ] : 0 ; int v 176 = 176 < data . length ? data [ 176 ] : 0 ; int v 177 = 177 < data . length ? data [ 177 ] : 0 ; int v 178 = 178 < data . length ? data [ 178 ] : 0 ; int v 179 = 179 < data . length ? data [ 179 ] : 0 ; int v 180 = 180 < data . length ? data [ 180 ] : 0 ; int v 181 = 181 < data . length ? data [ 181 ] : 0 ; int v 182 = 182 < data . length ? data [ 182 ] : 0 ; int v 183 = 183 < data . length ? data [ 183 ] : 0 ; int v 184 = 184 < data . length ? data [ 184 ] : 0 ; int v 185 = 185 < data . length ? data [ 185 ] : 0 ; int v 186 = 186 < data . length ? data [ 186 ] : 0 ; int v 187 = 187 < data . length ? data [ 187 ] : 0 ; int v 188 = 188 < data . length ? data [ 188 ] : 0 ; int v 189 = 189 < data . length ? data [ 189 ] : 0 ; int v 190 = 190 < data . length ? data [ 190 ] : 0 ; int v 191 = 191 < data . length ? data [ 191 ] : 0 ; int v 192 = 192 < data . length ? data [ 192 ] : 0 ; int v 193 = 193 < data . length ? data [ 193 ] : 0 ; int v 194 = 194 < data . length ? data [ 194 ] : 0 ; int v 195 = 195 < data . length ? data [ 195 ] : 0 ; int v 196 = 196 < data . length ? data [ 196 ] : 0 ; int v 197 = 197 < data . length ? data [ 197 ] : 0 ;" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " int v198 = 198 < data.length ? data[198] : 0;\n int v199 = 199 < data.length ? data[199] : 0;\n int v200 = 200 < data.length ? data[200] : 0;\n int v201 = 201 < data.length ? data[201] : 0;\n int v202 = 202 < data.length ? data[202] : 0;\n int v203 = 203 < data.length ? data[203] : 0;\n int v204 = 204 < data.length ? data[204] : 0;\n int v205 = 205 < data.length ? data[205] : 0;\n int v206 = 206 < data.length ? data[206] : 0;\n int v207 = 207 < data.length ? data[207] : 0;\n int v208 = 208 < data.length ? data[208] : 0;\n int v209 = 209 < data.length ? data[209] : 0;\n return data.length;\n }\n}", - "token_estimate": 228 + "token_estimate": 228, + "tokenized_korean_text": "int v 198 = 198 < data . length ? data [ 198 ] : 0 ; int v 199 = 199 < data . length ? data [ 199 ] : 0 ; int v 200 = 200 < data . length ? data [ 200 ] : 0 ; int v 201 = 201 < data . length ? data [ 201 ] : 0 ; int v 202 = 202 < data . length ? data [ 202 ] : 0 ; int v 203 = 203 < data . length ? data [ 203 ] : 0 ; int v 204 = 204 < data . length ? data [ 204 ] : 0 ; int v 205 = 205 < data . length ? data [ 205 ] : 0 ; int v 206 = 206 < data . length ? data [ 206 ] : 0 ; int v 207 = 207 < data . length ? data [ 207 ] : 0 ; int v 208 = 208 < data . length ? data [ 208 ] : 0 ; int v 209 = 209 < data . length ? data [ 209 ] : 0 ; return data . length ; } }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json index fb33e50..6af5efe 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "const fs = require('fs');\nconst path = require('path');\nconst { EventEmitter } = require('events');\nconst assert = require('assert');\nconst crypto = require('crypto');", - "token_estimate": 56 + "token_estimate": 56, + "tokenized_korean_text": "const fs = require (' fs '); const path = require (' path '); const { EventEmitter } = require (' events '); const assert = require (' assert '); const crypto = require (' crypto ');" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "export function add(a, b) {\n if (typeof a !== 'number') throw new TypeError('a');\n if (typeof b !== 'number') throw new TypeError('b');\n const result = a + b;\n assert(isFinite(result));\n return result;\n}", - "token_estimate": 70 + "token_estimate": 70, + "tokenized_korean_text": "export function add ( a , b ) { if ( typeof a !== ' number ') throw new TypeError (' a '); if ( typeof b !== ' number ') throw new TypeError (' b '); const result = a + b ; assert ( isFinite ( result )); return result ; }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "class EventBus {\n constructor() {\n this._handlers = new Map();\n this._history = [];\n this._maxHistory = 100;\n this._seq = 0;\n }\n}", - "token_estimate": 48 + "token_estimate": 48, + "tokenized_korean_text": "class EventBus { constructor ( ) { this ._ handlers = new Map (); this ._ history = []; this ._ maxHistory = 100 ; this ._ seq = 0 ; } }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "class BaseHandler {\n handle(event) {\n throw new Error('not implemented');\n }\n batchHandle(events) {\n const results = [];\n for (const ev of events) {\n results.push(this.handle(ev));\n }\n return results;\n }\n}", - "token_estimate": 77 + "token_estimate": 77, + "tokenized_korean_text": "class BaseHandler { handle ( event ) { throw new Error (' not implemented '); } batchHandle ( events ) { const results = []; for ( const ev of events ) { results . push ( this . handle ( ev )); } return results ; } }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "class EventBus {\n emit(name, payload) {\n const handlers = this._handlers.get(name) ?? [];\n for (const h of handlers) {\n h(payload);\n }\n return this;\n }\n}", - "token_estimate": 58 + "token_estimate": 58, + "tokenized_korean_text": "class EventBus { emit ( name , payload ) { const handlers = this ._ handlers . get ( name ) ?? []; for ( const h of handlers ) { h ( payload ); } return this ; } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "class EventBus {\n on(name, handler) {\n if (!this._handlers.has(name)) {\n this._handlers.set(name, []);\n }\n this._handlers.get(name).push(handler);\n return this;\n }\n}", - "token_estimate": 62 + "token_estimate": 62, + "tokenized_korean_text": "class EventBus { on ( name , handler ) { if (! this ._ handlers . has ( name ) ) { this ._ handlers . set ( name , []); } this ._ handlers . get ( name ) . push ( handler ); return this ; } }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "function bigTransform(items) {\n const v0 = items[0] !== undefined ? items[0] : null;\n const v1 = items[1] !== undefined ? items[1] : null;\n const v2 = items[2] !== undefined ? items[2] : null;\n const v3 = items[3] !== undefined ? items[3] : null;\n const v4 = items[4] !== undefined ? items[4] : null;\n const v5 = items[5] !== undefined ? items[5] : null;\n const v6 = items[6] !== undefined ? items[6] : null;\n const v7 = items[7] !== undefined ? items[7] : null;\n const v8 = items[8] !== undefined ? items[8] : null;\n const v9 = items[9] !== undefined ? items[9] : null;\n const v10 = items[10] !== undefined ? items[10] : null;\n const v11 = items[11] !== undefined ? items[11] : null;\n const v12 = items[12] !== undefined ? items[12] : null;\n const v13 = items[13] !== undefined ? items[13] : null;\n const v14 = items[14] !== undefined ? items[14] : null;\n const v15 = items[15] !== undefined ? items[15] : null;\n const v16 = items[16] !== undefined ? items[16] : null;\n const v17 = items[17] !== undefined ? items[17] : null;\n const v18 = items[18] !== undefined ? items[18] : null;\n const v19 = items[19] !== undefined ? items[19] : null;\n const v20 = items[20] !== undefined ? items[20] : null;\n const v21 = items[21] !== undefined ? items[21] : null;\n const v22 = items[22] !== undefined ? items[22] : null;\n const v23 = items[23] !== undefined ? items[23] : null;\n const v24 = items[24] !== undefined ? items[24] : null;\n const v25 = items[25] !== undefined ? items[25] : null;\n const v26 = items[26] !== undefined ? items[26] : null;\n const v27 = items[27] !== undefined ? items[27] : null;\n const v28 = items[28] !== undefined ? items[28] : null;\n const v29 = items[29] !== undefined ? items[29] : null;\n const v30 = items[30] !== undefined ? items[30] : null;\n const v31 = items[31] !== undefined ? items[31] : null;\n const v32 = items[32] !== undefined ? items[32] : null;\n const v33 = items[33] !== undefined ? items[33] : null;\n const v34 = items[34] !== undefined ? items[34] : null;\n const v35 = items[35] !== undefined ? items[35] : null;\n const v36 = items[36] !== undefined ? items[36] : null;\n const v37 = items[37] !== undefined ? items[37] : null;\n const v38 = items[38] !== undefined ? items[38] : null;\n const v39 = items[39] !== undefined ? items[39] : null;\n const v40 = items[40] !== undefined ? items[40] : null;\n const v41 = items[41] !== undefined ? items[41] : null;\n const v42 = items[42] !== undefined ? items[42] : null;\n const v43 = items[43] !== undefined ? items[43] : null;\n const v44 = items[44] !== undefined ? items[44] : null;\n const v45 = items[45] !== undefined ? items[45] : null;\n const v46 = items[46] !== undefined ? items[46] : null;\n const v47 = items[47] !== undefined ? items[47] : null;\n const v48 = items[48] !== undefined ? items[48] : null;\n const v49 = items[49] !== undefined ? items[49] : null;\n const v50 = items[50] !== undefined ? items[50] : null;\n const v51 = items[51] !== undefined ? items[51] : null;\n const v52 = items[52] !== undefined ? items[52] : null;\n const v53 = items[53] !== undefined ? items[53] : null;\n const v54 = items[54] !== undefined ? items[54] : null;\n const v55 = items[55] !== undefined ? items[55] : null;\n const v56 = items[56] !== undefined ? items[56] : null;\n const v57 = items[57] !== undefined ? items[57] : null;\n const v58 = items[58] !== undefined ? items[58] : null;\n const v59 = items[59] !== undefined ? items[59] : null;\n const v60 = items[60] !== undefined ? items[60] : null;\n const v61 = items[61] !== undefined ? items[61] : null;\n const v62 = items[62] !== undefined ? items[62] : null;\n const v63 = items[63] !== undefined ? items[63] : null;\n const v64 = items[64] !== undefined ? items[64] : null;\n const v65 = items[65] !== undefined ? items[65] : null;\n const v66 = items[66] !== undefined ? items[66] : null;\n const v67 = items[67] !== undefined ? items[67] : null;\n const v68 = items[68] !== undefined ? items[68] : null;\n const v69 = items[69] !== undefined ? items[69] : null;\n const v70 = items[70] !== undefined ? items[70] : null;\n const v71 = items[71] !== undefined ? items[71] : null;\n const v72 = items[72] !== undefined ? items[72] : null;\n const v73 = items[73] !== undefined ? items[73] : null;\n const v74 = items[74] !== undefined ? items[74] : null;\n const v75 = items[75] !== undefined ? items[75] : null;\n const v76 = items[76] !== undefined ? items[76] : null;\n const v77 = items[77] !== undefined ? items[77] : null;\n const v78 = items[78] !== undefined ? items[78] : null;\n const v79 = items[79] !== undefined ? items[79] : null;\n const v80 = items[80] !== undefined ? items[80] : null;\n const v81 = items[81] !== undefined ? items[81] : null;\n const v82 = items[82] !== undefined ? items[82] : null;\n const v83 = items[83] !== undefined ? items[83] : null;\n const v84 = items[84] !== undefined ? items[84] : null;\n const v85 = items[85] !== undefined ? items[85] : null;\n const v86 = items[86] !== undefined ? items[86] : null;\n const v87 = items[87] !== undefined ? items[87] : null;\n const v88 = items[88] !== undefined ? items[88] : null;\n const v89 = items[89] !== undefined ? items[89] : null;\n const v90 = items[90] !== undefined ? items[90] : null;\n const v91 = items[91] !== undefined ? items[91] : null;\n const v92 = items[92] !== undefined ? items[92] : null;\n const v93 = items[93] !== undefined ? items[93] : null;\n const v94 = items[94] !== undefined ? items[94] : null;\n const v95 = items[95] !== undefined ? items[95] : null;\n const v96 = items[96] !== undefined ? items[96] : null;\n const v97 = items[97] !== undefined ? items[97] : null;\n const v98 = items[98] !== undefined ? items[98] : null;\n const v99 = items[99] !== undefined ? items[99] : null;\n const v100 = items[100] !== undefined ? items[100] : null;\n const v101 = items[101] !== undefined ? items[101] : null;\n const v102 = items[102] !== undefined ? items[102] : null;\n const v103 = items[103] !== undefined ? items[103] : null;\n const v104 = items[104] !== undefined ? items[104] : null;\n const v105 = items[105] !== undefined ? items[105] : null;\n const v106 = items[106] !== undefined ? items[106] : null;\n const v107 = items[107] !== undefined ? items[107] : null;\n const v108 = items[108] !== undefined ? items[108] : null;\n const v109 = items[109] !== undefined ? items[109] : null;\n const v110 = items[110] !== undefined ? items[110] : null;\n const v111 = items[111] !== undefined ? items[111] : null;\n const v112 = items[112] !== undefined ? items[112] : null;\n const v113 = items[113] !== undefined ? items[113] : null;\n const v114 = items[114] !== undefined ? items[114] : null;\n const v115 = items[115] !== undefined ? items[115] : null;\n const v116 = items[116] !== undefined ? items[116] : null;\n const v117 = items[117] !== undefined ? items[117] : null;\n const v118 = items[118] !== undefined ? items[118] : null;\n const v119 = items[119] !== undefined ? items[119] : null;\n const v120 = items[120] !== undefined ? items[120] : null;\n const v121 = items[121] !== undefined ? items[121] : null;\n const v122 = items[122] !== undefined ? items[122] : null;\n const v123 = items[123] !== undefined ? items[123] : null;\n const v124 = items[124] !== undefined ? items[124] : null;\n const v125 = items[125] !== undefined ? items[125] : null;\n const v126 = items[126] !== undefined ? items[126] : null;\n const v127 = items[127] !== undefined ? items[127] : null;\n const v128 = items[128] !== undefined ? items[128] : null;\n const v129 = items[129] !== undefined ? items[129] : null;\n const v130 = items[130] !== undefined ? items[130] : null;\n const v131 = items[131] !== undefined ? items[131] : null;\n const v132 = items[132] !== undefined ? items[132] : null;\n const v133 = items[133] !== undefined ? items[133] : null;\n const v134 = items[134] !== undefined ? items[134] : null;\n const v135 = items[135] !== undefined ? items[135] : null;\n const v136 = items[136] !== undefined ? items[136] : null;\n const v137 = items[137] !== undefined ? items[137] : null;\n const v138 = items[138] !== undefined ? items[138] : null;\n const v139 = items[139] !== undefined ? items[139] : null;\n const v140 = items[140] !== undefined ? items[140] : null;\n const v141 = items[141] !== undefined ? items[141] : null;\n const v142 = items[142] !== undefined ? items[142] : null;\n const v143 = items[143] !== undefined ? items[143] : null;\n const v144 = items[144] !== undefined ? items[144] : null;\n const v145 = items[145] !== undefined ? items[145] : null;\n const v146 = items[146] !== undefined ? items[146] : null;\n const v147 = items[147] !== undefined ? items[147] : null;\n const v148 = items[148] !== undefined ? items[148] : null;\n const v149 = items[149] !== undefined ? items[149] : null;\n const v150 = items[150] !== undefined ? items[150] : null;\n const v151 = items[151] !== undefined ? items[151] : null;\n const v152 = items[152] !== undefined ? items[152] : null;\n const v153 = items[153] !== undefined ? items[153] : null;\n const v154 = items[154] !== undefined ? items[154] : null;\n const v155 = items[155] !== undefined ? items[155] : null;\n const v156 = items[156] !== undefined ? items[156] : null;\n const v157 = items[157] !== undefined ? items[157] : null;\n const v158 = items[158] !== undefined ? items[158] : null;\n const v159 = items[159] !== undefined ? items[159] : null;\n const v160 = items[160] !== undefined ? items[160] : null;\n const v161 = items[161] !== undefined ? items[161] : null;\n const v162 = items[162] !== undefined ? items[162] : null;\n const v163 = items[163] !== undefined ? items[163] : null;\n const v164 = items[164] !== undefined ? items[164] : null;\n const v165 = items[165] !== undefined ? items[165] : null;\n const v166 = items[166] !== undefined ? items[166] : null;\n const v167 = items[167] !== undefined ? items[167] : null;\n const v168 = items[168] !== undefined ? items[168] : null;\n const v169 = items[169] !== undefined ? items[169] : null;\n const v170 = items[170] !== undefined ? items[170] : null;\n const v171 = items[171] !== undefined ? items[171] : null;\n const v172 = items[172] !== undefined ? items[172] : null;\n const v173 = items[173] !== undefined ? items[173] : null;\n const v174 = items[174] !== undefined ? items[174] : null;\n const v175 = items[175] !== undefined ? items[175] : null;\n const v176 = items[176] !== undefined ? items[176] : null;\n const v177 = items[177] !== undefined ? items[177] : null;\n const v178 = items[178] !== undefined ? items[178] : null;\n const v179 = items[179] !== undefined ? items[179] : null;\n const v180 = items[180] !== undefined ? items[180] : null;\n const v181 = items[181] !== undefined ? items[181] : null;\n const v182 = items[182] !== undefined ? items[182] : null;\n const v183 = items[183] !== undefined ? items[183] : null;\n const v184 = items[184] !== undefined ? items[184] : null;\n const v185 = items[185] !== undefined ? items[185] : null;\n const v186 = items[186] !== undefined ? items[186] : null;\n const v187 = items[187] !== undefined ? items[187] : null;\n const v188 = items[188] !== undefined ? items[188] : null;\n const v189 = items[189] !== undefined ? items[189] : null;\n const v190 = items[190] !== undefined ? items[190] : null;\n const v191 = items[191] !== undefined ? items[191] : null;\n const v192 = items[192] !== undefined ? items[192] : null;\n const v193 = items[193] !== undefined ? items[193] : null;\n const v194 = items[194] !== undefined ? items[194] : null;\n const v195 = items[195] !== undefined ? items[195] : null;\n const v196 = items[196] !== undefined ? items[196] : null;\n const v197 = items[197] !== undefined ? items[197] : null;\n const v198 = items[198] !== undefined ? items[198] : null;", - "token_estimate": 3947 + "token_estimate": 3947, + "tokenized_korean_text": "function bigTransform ( items ) { const v 0 = items [ 0 ] !== undefined ? items [ 0 ] : null ; const v 1 = items [ 1 ] !== undefined ? items [ 1 ] : null ; const v 2 = items [ 2 ] !== undefined ? items [ 2 ] : null ; const v 3 = items [ 3 ] !== undefined ? items [ 3 ] : null ; const v 4 = items [ 4 ] !== undefined ? items [ 4 ] : null ; const v 5 = items [ 5 ] !== undefined ? items [ 5 ] : null ; const v 6 = items [ 6 ] !== undefined ? items [ 6 ] : null ; const v 7 = items [ 7 ] !== undefined ? items [ 7 ] : null ; const v 8 = items [ 8 ] !== undefined ? items [ 8 ] : null ; const v 9 = items [ 9 ] !== undefined ? items [ 9 ] : null ; const v 10 = items [ 10 ] !== undefined ? items [ 10 ] : null ; const v 11 = items [ 11 ] !== undefined ? items [ 11 ] : null ; const v 12 = items [ 12 ] !== undefined ? items [ 12 ] : null ; const v 13 = items [ 13 ] !== undefined ? items [ 13 ] : null ; const v 14 = items [ 14 ] !== undefined ? items [ 14 ] : null ; const v 15 = items [ 15 ] !== undefined ? items [ 15 ] : null ; const v 16 = items [ 16 ] !== undefined ? items [ 16 ] : null ; const v 17 = items [ 17 ] !== undefined ? items [ 17 ] : null ; const v 18 = items [ 18 ] !== undefined ? items [ 18 ] : null ; const v 19 = items [ 19 ] !== undefined ? items [ 19 ] : null ; const v 20 = items [ 20 ] !== undefined ? items [ 20 ] : null ; const v 21 = items [ 21 ] !== undefined ? items [ 21 ] : null ; const v 22 = items [ 22 ] !== undefined ? items [ 22 ] : null ; const v 23 = items [ 23 ] !== undefined ? items [ 23 ] : null ; const v 24 = items [ 24 ] !== undefined ? items [ 24 ] : null ; const v 25 = items [ 25 ] !== undefined ? items [ 25 ] : null ; const v 26 = items [ 26 ] !== undefined ? items [ 26 ] : null ; const v 27 = items [ 27 ] !== undefined ? items [ 27 ] : null ; const v 28 = items [ 28 ] !== undefined ? items [ 28 ] : null ; const v 29 = items [ 29 ] !== undefined ? items [ 29 ] : null ; const v 30 = items [ 30 ] !== undefined ? items [ 30 ] : null ; const v 31 = items [ 31 ] !== undefined ? items [ 31 ] : null ; const v 32 = items [ 32 ] !== undefined ? items [ 32 ] : null ; const v 33 = items [ 33 ] !== undefined ? items [ 33 ] : null ; const v 34 = items [ 34 ] !== undefined ? items [ 34 ] : null ; const v 35 = items [ 35 ] !== undefined ? items [ 35 ] : null ; const v 36 = items [ 36 ] !== undefined ? items [ 36 ] : null ; const v 37 = items [ 37 ] !== undefined ? items [ 37 ] : null ; const v 38 = items [ 38 ] !== undefined ? items [ 38 ] : null ; const v 39 = items [ 39 ] !== undefined ? items [ 39 ] : null ; const v 40 = items [ 40 ] !== undefined ? items [ 40 ] : null ; const v 41 = items [ 41 ] !== undefined ? items [ 41 ] : null ; const v 42 = items [ 42 ] !== undefined ? items [ 42 ] : null ; const v 43 = items [ 43 ] !== undefined ? items [ 43 ] : null ; const v 44 = items [ 44 ] !== undefined ? items [ 44 ] : null ; const v 45 = items [ 45 ] !== undefined ? items [ 45 ] : null ; const v 46 = items [ 46 ] !== undefined ? items [ 46 ] : null ; const v 47 = items [ 47 ] !== undefined ? items [ 47 ] : null ; const v 48 = items [ 48 ] !== undefined ? items [ 48 ] : null ; const v 49 = items [ 49 ] !== undefined ? items [ 49 ] : null ; const v 50 = items [ 50 ] !== undefined ? items [ 50 ] : null ; const v 51 = items [ 51 ] !== undefined ? items [ 51 ] : null ; const v 52 = items [ 52 ] !== undefined ? items [ 52 ] : null ; const v 53 = items [ 53 ] !== undefined ? items [ 53 ] : null ; const v 54 = items [ 54 ] !== undefined ? items [ 54 ] : null ; const v 55 = items [ 55 ] !== undefined ? items [ 55 ] : null ; const v 56 = items [ 56 ] !== undefined ? items [ 56 ] : null ; const v 57 = items [ 57 ] !== undefined ? items [ 57 ] : null ; const v 58 = items [ 58 ] !== undefined ? items [ 58 ] : null ; const v 59 = items [ 59 ] !== undefined ? items [ 59 ] : null ; const v 60 = items [ 60 ] !== undefined ? items [ 60 ] : null ; const v 61 = items [ 61 ] !== undefined ? items [ 61 ] : null ; const v 62 = items [ 62 ] !== undefined ? items [ 62 ] : null ; const v 63 = items [ 63 ] !== undefined ? items [ 63 ] : null ; const v 64 = items [ 64 ] !== undefined ? items [ 64 ] : null ; const v 65 = items [ 65 ] !== undefined ? items [ 65 ] : null ; const v 66 = items [ 66 ] !== undefined ? items [ 66 ] : null ; const v 67 = items [ 67 ] !== undefined ? items [ 67 ] : null ; const v 68 = items [ 68 ] !== undefined ? items [ 68 ] : null ; const v 69 = items [ 69 ] !== undefined ? items [ 69 ] : null ; const v 70 = items [ 70 ] !== undefined ? items [ 70 ] : null ; const v 71 = items [ 71 ] !== undefined ? items [ 71 ] : null ; const v 72 = items [ 72 ] !== undefined ? items [ 72 ] : null ; const v 73 = items [ 73 ] !== undefined ? items [ 73 ] : null ; const v 74 = items [ 74 ] !== undefined ? items [ 74 ] : null ; const v 75 = items [ 75 ] !== undefined ? items [ 75 ] : null ; const v 76 = items [ 76 ] !== undefined ? items [ 76 ] : null ; const v 77 = items [ 77 ] !== undefined ? items [ 77 ] : null ; const v 78 = items [ 78 ] !== undefined ? items [ 78 ] : null ; const v 79 = items [ 79 ] !== undefined ? items [ 79 ] : null ; const v 80 = items [ 80 ] !== undefined ? items [ 80 ] : null ; const v 81 = items [ 81 ] !== undefined ? items [ 81 ] : null ; const v 82 = items [ 82 ] !== undefined ? items [ 82 ] : null ; const v 83 = items [ 83 ] !== undefined ? items [ 83 ] : null ; const v 84 = items [ 84 ] !== undefined ? items [ 84 ] : null ; const v 85 = items [ 85 ] !== undefined ? items [ 85 ] : null ; const v 86 = items [ 86 ] !== undefined ? items [ 86 ] : null ; const v 87 = items [ 87 ] !== undefined ? items [ 87 ] : null ; const v 88 = items [ 88 ] !== undefined ? items [ 88 ] : null ; const v 89 = items [ 89 ] !== undefined ? items [ 89 ] : null ; const v 90 = items [ 90 ] !== undefined ? items [ 90 ] : null ; const v 91 = items [ 91 ] !== undefined ? items [ 91 ] : null ; const v 92 = items [ 92 ] !== undefined ? items [ 92 ] : null ; const v 93 = items [ 93 ] !== undefined ? items [ 93 ] : null ; const v 94 = items [ 94 ] !== undefined ? items [ 94 ] : null ; const v 95 = items [ 95 ] !== undefined ? items [ 95 ] : null ; const v 96 = items [ 96 ] !== undefined ? items [ 96 ] : null ; const v 97 = items [ 97 ] !== undefined ? items [ 97 ] : null ; const v 98 = items [ 98 ] !== undefined ? items [ 98 ] : null ; const v 99 = items [ 99 ] !== undefined ? items [ 99 ] : null ; const v 100 = items [ 100 ] !== undefined ? items [ 100 ] : null ; const v 101 = items [ 101 ] !== undefined ? items [ 101 ] : null ; const v 102 = items [ 102 ] !== undefined ? items [ 102 ] : null ; const v 103 = items [ 103 ] !== undefined ? items [ 103 ] : null ; const v 104 = items [ 104 ] !== undefined ? items [ 104 ] : null ; const v 105 = items [ 105 ] !== undefined ? items [ 105 ] : null ; const v 106 = items [ 106 ] !== undefined ? items [ 106 ] : null ; const v 107 = items [ 107 ] !== undefined ? items [ 107 ] : null ; const v 108 = items [ 108 ] !== undefined ? items [ 108 ] : null ; const v 109 = items [ 109 ] !== undefined ? items [ 109 ] : null ; const v 110 = items [ 110 ] !== undefined ? items [ 110 ] : null ; const v 111 = items [ 111 ] !== undefined ? items [ 111 ] : null ; const v 112 = items [ 112 ] !== undefined ? items [ 112 ] : null ; const v 113 = items [ 113 ] !== undefined ? items [ 113 ] : null ; const v 114 = items [ 114 ] !== undefined ? items [ 114 ] : null ; const v 115 = items [ 115 ] !== undefined ? items [ 115 ] : null ; const v 116 = items [ 116 ] !== undefined ? items [ 116 ] : null ; const v 117 = items [ 117 ] !== undefined ? items [ 117 ] : null ; const v 118 = items [ 118 ] !== undefined ? items [ 118 ] : null ; const v 119 = items [ 119 ] !== undefined ? items [ 119 ] : null ; const v 120 = items [ 120 ] !== undefined ? items [ 120 ] : null ; const v 121 = items [ 121 ] !== undefined ? items [ 121 ] : null ; const v 122 = items [ 122 ] !== undefined ? items [ 122 ] : null ; const v 123 = items [ 123 ] !== undefined ? items [ 123 ] : null ; const v 124 = items [ 124 ] !== undefined ? items [ 124 ] : null ; const v 125 = items [ 125 ] !== undefined ? items [ 125 ] : null ; const v 126 = items [ 126 ] !== undefined ? items [ 126 ] : null ; const v 127 = items [ 127 ] !== undefined ? items [ 127 ] : null ; const v 128 = items [ 128 ] !== undefined ? items [ 128 ] : null ; const v 129 = items [ 129 ] !== undefined ? items [ 129 ] : null ; const v 130 = items [ 130 ] !== undefined ? items [ 130 ] : null ; const v 131 = items [ 131 ] !== undefined ? items [ 131 ] : null ; const v 132 = items [ 132 ] !== undefined ? items [ 132 ] : null ; const v 133 = items [ 133 ] !== undefined ? items [ 133 ] : null ; const v 134 = items [ 134 ] !== undefined ? items [ 134 ] : null ; const v 135 = items [ 135 ] !== undefined ? items [ 135 ] : null ; const v 136 = items [ 136 ] !== undefined ? items [ 136 ] : null ; const v 137 = items [ 137 ] !== undefined ? items [ 137 ] : null ; const v 138 = items [ 138 ] !== undefined ? items [ 138 ] : null ; const v 139 = items [ 139 ] !== undefined ? items [ 139 ] : null ; const v 140 = items [ 140 ] !== undefined ? items [ 140 ] : null ; const v 141 = items [ 141 ] !== undefined ? items [ 141 ] : null ; const v 142 = items [ 142 ] !== undefined ? items [ 142 ] : null ; const v 143 = items [ 143 ] !== undefined ? items [ 143 ] : null ; const v 144 = items [ 144 ] !== undefined ? items [ 144 ] : null ; const v 145 = items [ 145 ] !== undefined ? items [ 145 ] : null ; const v 146 = items [ 146 ] !== undefined ? items [ 146 ] : null ; const v 147 = items [ 147 ] !== undefined ? items [ 147 ] : null ; const v 148 = items [ 148 ] !== undefined ? items [ 148 ] : null ; const v 149 = items [ 149 ] !== undefined ? items [ 149 ] : null ; const v 150 = items [ 150 ] !== undefined ? items [ 150 ] : null ; const v 151 = items [ 151 ] !== undefined ? items [ 151 ] : null ; const v 152 = items [ 152 ] !== undefined ? items [ 152 ] : null ; const v 153 = items [ 153 ] !== undefined ? items [ 153 ] : null ; const v 154 = items [ 154 ] !== undefined ? items [ 154 ] : null ; const v 155 = items [ 155 ] !== undefined ? items [ 155 ] : null ; const v 156 = items [ 156 ] !== undefined ? items [ 156 ] : null ; const v 157 = items [ 157 ] !== undefined ? items [ 157 ] : null ; const v 158 = items [ 158 ] !== undefined ? items [ 158 ] : null ; const v 159 = items [ 159 ] !== undefined ? items [ 159 ] : null ; const v 160 = items [ 160 ] !== undefined ? items [ 160 ] : null ; const v 161 = items [ 161 ] !== undefined ? items [ 161 ] : null ; const v 162 = items [ 162 ] !== undefined ? items [ 162 ] : null ; const v 163 = items [ 163 ] !== undefined ? items [ 163 ] : null ; const v 164 = items [ 164 ] !== undefined ? items [ 164 ] : null ; const v 165 = items [ 165 ] !== undefined ? items [ 165 ] : null ; const v 166 = items [ 166 ] !== undefined ? items [ 166 ] : null ; const v 167 = items [ 167 ] !== undefined ? items [ 167 ] : null ; const v 168 = items [ 168 ] !== undefined ? items [ 168 ] : null ; const v 169 = items [ 169 ] !== undefined ? items [ 169 ] : null ; const v 170 = items [ 170 ] !== undefined ? items [ 170 ] : null ; const v 171 = items [ 171 ] !== undefined ? items [ 171 ] : null ; const v 172 = items [ 172 ] !== undefined ? items [ 172 ] : null ; const v 173 = items [ 173 ] !== undefined ? items [ 173 ] : null ; const v 174 = items [ 174 ] !== undefined ? items [ 174 ] : null ; const v 175 = items [ 175 ] !== undefined ? items [ 175 ] : null ; const v 176 = items [ 176 ] !== undefined ? items [ 176 ] : null ; const v 177 = items [ 177 ] !== undefined ? items [ 177 ] : null ; const v 178 = items [ 178 ] !== undefined ? items [ 178 ] : null ; const v 179 = items [ 179 ] !== undefined ? items [ 179 ] : null ; const v 180 = items [ 180 ] !== undefined ? items [ 180 ] : null ; const v 181 = items [ 181 ] !== undefined ? items [ 181 ] : null ; const v 182 = items [ 182 ] !== undefined ? items [ 182 ] : null ; const v 183 = items [ 183 ] !== undefined ? items [ 183 ] : null ; const v 184 = items [ 184 ] !== undefined ? items [ 184 ] : null ; const v 185 = items [ 185 ] !== undefined ? items [ 185 ] : null ; const v 186 = items [ 186 ] !== undefined ? items [ 186 ] : null ; const v 187 = items [ 187 ] !== undefined ? items [ 187 ] : null ; const v 188 = items [ 188 ] !== undefined ? items [ 188 ] : null ; const v 189 = items [ 189 ] !== undefined ? items [ 189 ] : null ; const v 190 = items [ 190 ] !== undefined ? items [ 190 ] : null ; const v 191 = items [ 191 ] !== undefined ? items [ 191 ] : null ; const v 192 = items [ 192 ] !== undefined ? items [ 192 ] : null ; const v 193 = items [ 193 ] !== undefined ? items [ 193 ] : null ; const v 194 = items [ 194 ] !== undefined ? items [ 194 ] : null ; const v 195 = items [ 195 ] !== undefined ? items [ 195 ] : null ; const v 196 = items [ 196 ] !== undefined ? items [ 196 ] : null ; const v 197 = items [ 197 ] !== undefined ? items [ 197 ] : null ; const v 198 = items [ 198 ] !== undefined ? items [ 198 ] : null ;" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " const v199 = items[199] !== undefined ? items[199] : null;\n const v200 = items[200] !== undefined ? items[200] : null;\n const v201 = items[201] !== undefined ? items[201] : null;\n const v202 = items[202] !== undefined ? items[202] : null;\n const v203 = items[203] !== undefined ? items[203] : null;\n const v204 = items[204] !== undefined ? items[204] : null;\n const v205 = items[205] !== undefined ? items[205] : null;\n const v206 = items[206] !== undefined ? items[206] : null;\n const v207 = items[207] !== undefined ? items[207] : null;\n const v208 = items[208] !== undefined ? items[208] : null;\n const v209 = items[209] !== undefined ? items[209] : null;\n return items;\n}", - "token_estimate": 230 + "token_estimate": 230, + "tokenized_korean_text": "const v 199 = items [ 199 ] !== undefined ? items [ 199 ] : null ; const v 200 = items [ 200 ] !== undefined ? items [ 200 ] : null ; const v 201 = items [ 201 ] !== undefined ? items [ 201 ] : null ; const v 202 = items [ 202 ] !== undefined ? items [ 202 ] : null ; const v 203 = items [ 203 ] !== undefined ? items [ 203 ] : null ; const v 204 = items [ 204 ] !== undefined ? items [ 204 ] : null ; const v 205 = items [ 205 ] !== undefined ? items [ 205 ] : null ; const v 206 = items [ 206 ] !== undefined ? items [ 206 ] : null ; const v 207 = items [ 207 ] !== undefined ? items [ 207 ] : null ; const v 208 = items [ 208 ] !== undefined ? items [ 208 ] : null ; const v 209 = items [ 209 ] !== undefined ? items [ 209 ] : null ; return items ; }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.kt.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.kt.chunks.snapshot.json index 3e046ff..97244e1 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.kt.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.kt.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "import kotlin.collections.List\nimport kotlin.collections.Map\nimport kotlin.collections.MutableList\nimport kotlin.collections.MutableMap\nimport kotlin.collections.mutableListOf", - "token_estimate": 59 + "token_estimate": 59, + "tokenized_korean_text": "import kotlin . collections . List import kotlin . collections . Map import kotlin . collections . MutableList import kotlin . collections . MutableMap import kotlin . collections . mutableListOf" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "fun computeMRR(scores: List): Double {\n if (scores.isEmpty()) {\n return 0.0\n }\n return 1.0 / scores.size\n}", - "token_estimate": 44 + "token_estimate": 44, + "tokenized_korean_text": "fun computeMRR ( scores : List < Double >): Double { if ( scores . isEmpty ( ) ) { return 0 . 0 } return 1 . 0 / scores . size }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "data class MetricsCollector(\n val scores: MutableList = mutableListOf(),\n val labels: MutableList = mutableListOf(),\n val counts: MutableMap = mutableMapOf(),\n val totals: MutableMap = mutableMapOf(),\n val tags: MutableList = mutableListOf(),\n)", - "token_estimate": 104 + "token_estimate": 104, + "tokenized_korean_text": "data class MetricsCollector ( val scores : MutableList < Double > = mutableListOf ( ) , val labels : MutableList < String > = mutableListOf ( ) , val counts : MutableMap < String , Int > = mutableMapOf ( ) , val totals : MutableMap < String , Double > = mutableMapOf ( ) , val tags : MutableList < String > = mutableListOf ( ) , )" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "open class BaseEvaluator(val name: String) {\n\n fun evaluate(data: List) {\n val joined = data.joinToString(\",\")\n println(joined)\n }\n\n open fun describe(): String = name\n}", - "token_estimate": 67 + "token_estimate": 67, + "tokenized_korean_text": "open class BaseEvaluator ( val name : String ) { fun evaluate ( data : List < String >) { val joined = data . joinToString (\",\") println ( joined ) } open fun describe ( ) : String = name }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "fun MetricsCollector.run(inputs: List) {\n for (inp in inputs) {\n scores.add(\n inp\n )\n }\n}", - "token_estimate": 43 + "token_estimate": 43, + "tokenized_korean_text": "fun MetricsCollector . run ( inputs : List < Double >) { for ( inp in inputs ) { scores . add ( inp ) } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "fun MetricsCollector.report(): Map {\n return mapOf(\n \"mean\" to 0.0,\n \"count\" to scores.size,\n \"tags\" to tags,\n )\n}", - "token_estimate": 52 + "token_estimate": 52, + "tokenized_korean_text": "fun MetricsCollector . report ( ) : Map < String , Any > { return mapOf ( \" mean \" to 0 . 0 , \" count \" to scores . size , \" tags \" to tags , ) }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "class BigCompute {\n fun compute(data: IntArray): Int {\n val v0 = if (0 < data.size) data[0] else 0\n val v1 = if (1 < data.size) data[1] else 0\n val v2 = if (2 < data.size) data[2] else 0\n val v3 = if (3 < data.size) data[3] else 0\n val v4 = if (4 < data.size) data[4] else 0\n val v5 = if (5 < data.size) data[5] else 0\n val v6 = if (6 < data.size) data[6] else 0\n val v7 = if (7 < data.size) data[7] else 0\n val v8 = if (8 < data.size) data[8] else 0\n val v9 = if (9 < data.size) data[9] else 0\n val v10 = if (10 < data.size) data[10] else 0\n val v11 = if (11 < data.size) data[11] else 0\n val v12 = if (12 < data.size) data[12] else 0\n val v13 = if (13 < data.size) data[13] else 0\n val v14 = if (14 < data.size) data[14] else 0\n val v15 = if (15 < data.size) data[15] else 0\n val v16 = if (16 < data.size) data[16] else 0\n val v17 = if (17 < data.size) data[17] else 0\n val v18 = if (18 < data.size) data[18] else 0\n val v19 = if (19 < data.size) data[19] else 0\n val v20 = if (20 < data.size) data[20] else 0\n val v21 = if (21 < data.size) data[21] else 0\n val v22 = if (22 < data.size) data[22] else 0\n val v23 = if (23 < data.size) data[23] else 0\n val v24 = if (24 < data.size) data[24] else 0\n val v25 = if (25 < data.size) data[25] else 0\n val v26 = if (26 < data.size) data[26] else 0\n val v27 = if (27 < data.size) data[27] else 0\n val v28 = if (28 < data.size) data[28] else 0\n val v29 = if (29 < data.size) data[29] else 0\n val v30 = if (30 < data.size) data[30] else 0\n val v31 = if (31 < data.size) data[31] else 0\n val v32 = if (32 < data.size) data[32] else 0\n val v33 = if (33 < data.size) data[33] else 0\n val v34 = if (34 < data.size) data[34] else 0\n val v35 = if (35 < data.size) data[35] else 0\n val v36 = if (36 < data.size) data[36] else 0\n val v37 = if (37 < data.size) data[37] else 0\n val v38 = if (38 < data.size) data[38] else 0\n val v39 = if (39 < data.size) data[39] else 0\n val v40 = if (40 < data.size) data[40] else 0\n val v41 = if (41 < data.size) data[41] else 0\n val v42 = if (42 < data.size) data[42] else 0\n val v43 = if (43 < data.size) data[43] else 0\n val v44 = if (44 < data.size) data[44] else 0\n val v45 = if (45 < data.size) data[45] else 0\n val v46 = if (46 < data.size) data[46] else 0\n val v47 = if (47 < data.size) data[47] else 0\n val v48 = if (48 < data.size) data[48] else 0\n val v49 = if (49 < data.size) data[49] else 0\n val v50 = if (50 < data.size) data[50] else 0\n val v51 = if (51 < data.size) data[51] else 0\n val v52 = if (52 < data.size) data[52] else 0\n val v53 = if (53 < data.size) data[53] else 0\n val v54 = if (54 < data.size) data[54] else 0\n val v55 = if (55 < data.size) data[55] else 0\n val v56 = if (56 < data.size) data[56] else 0\n val v57 = if (57 < data.size) data[57] else 0\n val v58 = if (58 < data.size) data[58] else 0\n val v59 = if (59 < data.size) data[59] else 0\n val v60 = if (60 < data.size) data[60] else 0\n val v61 = if (61 < data.size) data[61] else 0\n val v62 = if (62 < data.size) data[62] else 0\n val v63 = if (63 < data.size) data[63] else 0\n val v64 = if (64 < data.size) data[64] else 0\n val v65 = if (65 < data.size) data[65] else 0\n val v66 = if (66 < data.size) data[66] else 0\n val v67 = if (67 < data.size) data[67] else 0\n val v68 = if (68 < data.size) data[68] else 0\n val v69 = if (69 < data.size) data[69] else 0\n val v70 = if (70 < data.size) data[70] else 0\n val v71 = if (71 < data.size) data[71] else 0\n val v72 = if (72 < data.size) data[72] else 0\n val v73 = if (73 < data.size) data[73] else 0\n val v74 = if (74 < data.size) data[74] else 0\n val v75 = if (75 < data.size) data[75] else 0\n val v76 = if (76 < data.size) data[76] else 0\n val v77 = if (77 < data.size) data[77] else 0\n val v78 = if (78 < data.size) data[78] else 0\n val v79 = if (79 < data.size) data[79] else 0\n val v80 = if (80 < data.size) data[80] else 0\n val v81 = if (81 < data.size) data[81] else 0\n val v82 = if (82 < data.size) data[82] else 0\n val v83 = if (83 < data.size) data[83] else 0\n val v84 = if (84 < data.size) data[84] else 0\n val v85 = if (85 < data.size) data[85] else 0\n val v86 = if (86 < data.size) data[86] else 0\n val v87 = if (87 < data.size) data[87] else 0\n val v88 = if (88 < data.size) data[88] else 0\n val v89 = if (89 < data.size) data[89] else 0\n val v90 = if (90 < data.size) data[90] else 0\n val v91 = if (91 < data.size) data[91] else 0\n val v92 = if (92 < data.size) data[92] else 0\n val v93 = if (93 < data.size) data[93] else 0\n val v94 = if (94 < data.size) data[94] else 0\n val v95 = if (95 < data.size) data[95] else 0\n val v96 = if (96 < data.size) data[96] else 0\n val v97 = if (97 < data.size) data[97] else 0\n val v98 = if (98 < data.size) data[98] else 0\n val v99 = if (99 < data.size) data[99] else 0\n val v100 = if (100 < data.size) data[100] else 0\n val v101 = if (101 < data.size) data[101] else 0\n val v102 = if (102 < data.size) data[102] else 0\n val v103 = if (103 < data.size) data[103] else 0\n val v104 = if (104 < data.size) data[104] else 0\n val v105 = if (105 < data.size) data[105] else 0\n val v106 = if (106 < data.size) data[106] else 0\n val v107 = if (107 < data.size) data[107] else 0\n val v108 = if (108 < data.size) data[108] else 0\n val v109 = if (109 < data.size) data[109] else 0\n val v110 = if (110 < data.size) data[110] else 0\n val v111 = if (111 < data.size) data[111] else 0\n val v112 = if (112 < data.size) data[112] else 0\n val v113 = if (113 < data.size) data[113] else 0\n val v114 = if (114 < data.size) data[114] else 0\n val v115 = if (115 < data.size) data[115] else 0\n val v116 = if (116 < data.size) data[116] else 0\n val v117 = if (117 < data.size) data[117] else 0\n val v118 = if (118 < data.size) data[118] else 0\n val v119 = if (119 < data.size) data[119] else 0\n val v120 = if (120 < data.size) data[120] else 0\n val v121 = if (121 < data.size) data[121] else 0\n val v122 = if (122 < data.size) data[122] else 0\n val v123 = if (123 < data.size) data[123] else 0\n val v124 = if (124 < data.size) data[124] else 0\n val v125 = if (125 < data.size) data[125] else 0\n val v126 = if (126 < data.size) data[126] else 0\n val v127 = if (127 < data.size) data[127] else 0\n val v128 = if (128 < data.size) data[128] else 0\n val v129 = if (129 < data.size) data[129] else 0\n val v130 = if (130 < data.size) data[130] else 0\n val v131 = if (131 < data.size) data[131] else 0\n val v132 = if (132 < data.size) data[132] else 0\n val v133 = if (133 < data.size) data[133] else 0\n val v134 = if (134 < data.size) data[134] else 0\n val v135 = if (135 < data.size) data[135] else 0\n val v136 = if (136 < data.size) data[136] else 0\n val v137 = if (137 < data.size) data[137] else 0\n val v138 = if (138 < data.size) data[138] else 0\n val v139 = if (139 < data.size) data[139] else 0\n val v140 = if (140 < data.size) data[140] else 0\n val v141 = if (141 < data.size) data[141] else 0\n val v142 = if (142 < data.size) data[142] else 0\n val v143 = if (143 < data.size) data[143] else 0\n val v144 = if (144 < data.size) data[144] else 0\n val v145 = if (145 < data.size) data[145] else 0\n val v146 = if (146 < data.size) data[146] else 0\n val v147 = if (147 < data.size) data[147] else 0\n val v148 = if (148 < data.size) data[148] else 0\n val v149 = if (149 < data.size) data[149] else 0\n val v150 = if (150 < data.size) data[150] else 0\n val v151 = if (151 < data.size) data[151] else 0\n val v152 = if (152 < data.size) data[152] else 0\n val v153 = if (153 < data.size) data[153] else 0\n val v154 = if (154 < data.size) data[154] else 0\n val v155 = if (155 < data.size) data[155] else 0\n val v156 = if (156 < data.size) data[156] else 0\n val v157 = if (157 < data.size) data[157] else 0\n val v158 = if (158 < data.size) data[158] else 0\n val v159 = if (159 < data.size) data[159] else 0\n val v160 = if (160 < data.size) data[160] else 0\n val v161 = if (161 < data.size) data[161] else 0\n val v162 = if (162 < data.size) data[162] else 0\n val v163 = if (163 < data.size) data[163] else 0\n val v164 = if (164 < data.size) data[164] else 0\n val v165 = if (165 < data.size) data[165] else 0\n val v166 = if (166 < data.size) data[166] else 0\n val v167 = if (167 < data.size) data[167] else 0\n val v168 = if (168 < data.size) data[168] else 0\n val v169 = if (169 < data.size) data[169] else 0\n val v170 = if (170 < data.size) data[170] else 0\n val v171 = if (171 < data.size) data[171] else 0\n val v172 = if (172 < data.size) data[172] else 0\n val v173 = if (173 < data.size) data[173] else 0\n val v174 = if (174 < data.size) data[174] else 0\n val v175 = if (175 < data.size) data[175] else 0\n val v176 = if (176 < data.size) data[176] else 0\n val v177 = if (177 < data.size) data[177] else 0\n val v178 = if (178 < data.size) data[178] else 0\n val v179 = if (179 < data.size) data[179] else 0\n val v180 = if (180 < data.size) data[180] else 0\n val v181 = if (181 < data.size) data[181] else 0\n val v182 = if (182 < data.size) data[182] else 0\n val v183 = if (183 < data.size) data[183] else 0\n val v184 = if (184 < data.size) data[184] else 0\n val v185 = if (185 < data.size) data[185] else 0\n val v186 = if (186 < data.size) data[186] else 0\n val v187 = if (187 < data.size) data[187] else 0\n val v188 = if (188 < data.size) data[188] else 0\n val v189 = if (189 < data.size) data[189] else 0\n val v190 = if (190 < data.size) data[190] else 0\n val v191 = if (191 < data.size) data[191] else 0\n val v192 = if (192 < data.size) data[192] else 0\n val v193 = if (193 < data.size) data[193] else 0\n val v194 = if (194 < data.size) data[194] else 0\n val v195 = if (195 < data.size) data[195] else 0\n val v196 = if (196 < data.size) data[196] else 0\n val v197 = if (197 < data.size) data[197] else 0", - "token_estimate": 3671 + "token_estimate": 3671, + "tokenized_korean_text": "class BigCompute { fun compute ( data : IntArray ) : Int { val v 0 = if ( 0 < data . size ) data [ 0 ] else 0 val v 1 = if ( 1 < data . size ) data [ 1 ] else 0 val v 2 = if ( 2 < data . size ) data [ 2 ] else 0 val v 3 = if ( 3 < data . size ) data [ 3 ] else 0 val v 4 = if ( 4 < data . size ) data [ 4 ] else 0 val v 5 = if ( 5 < data . size ) data [ 5 ] else 0 val v 6 = if ( 6 < data . size ) data [ 6 ] else 0 val v 7 = if ( 7 < data . size ) data [ 7 ] else 0 val v 8 = if ( 8 < data . size ) data [ 8 ] else 0 val v 9 = if ( 9 < data . size ) data [ 9 ] else 0 val v 10 = if ( 10 < data . size ) data [ 10 ] else 0 val v 11 = if ( 11 < data . size ) data [ 11 ] else 0 val v 12 = if ( 12 < data . size ) data [ 12 ] else 0 val v 13 = if ( 13 < data . size ) data [ 13 ] else 0 val v 14 = if ( 14 < data . size ) data [ 14 ] else 0 val v 15 = if ( 15 < data . size ) data [ 15 ] else 0 val v 16 = if ( 16 < data . size ) data [ 16 ] else 0 val v 17 = if ( 17 < data . size ) data [ 17 ] else 0 val v 18 = if ( 18 < data . size ) data [ 18 ] else 0 val v 19 = if ( 19 < data . size ) data [ 19 ] else 0 val v 20 = if ( 20 < data . size ) data [ 20 ] else 0 val v 21 = if ( 21 < data . size ) data [ 21 ] else 0 val v 22 = if ( 22 < data . size ) data [ 22 ] else 0 val v 23 = if ( 23 < data . size ) data [ 23 ] else 0 val v 24 = if ( 24 < data . size ) data [ 24 ] else 0 val v 25 = if ( 25 < data . size ) data [ 25 ] else 0 val v 26 = if ( 26 < data . size ) data [ 26 ] else 0 val v 27 = if ( 27 < data . size ) data [ 27 ] else 0 val v 28 = if ( 28 < data . size ) data [ 28 ] else 0 val v 29 = if ( 29 < data . size ) data [ 29 ] else 0 val v 30 = if ( 30 < data . size ) data [ 30 ] else 0 val v 31 = if ( 31 < data . size ) data [ 31 ] else 0 val v 32 = if ( 32 < data . size ) data [ 32 ] else 0 val v 33 = if ( 33 < data . size ) data [ 33 ] else 0 val v 34 = if ( 34 < data . size ) data [ 34 ] else 0 val v 35 = if ( 35 < data . size ) data [ 35 ] else 0 val v 36 = if ( 36 < data . size ) data [ 36 ] else 0 val v 37 = if ( 37 < data . size ) data [ 37 ] else 0 val v 38 = if ( 38 < data . size ) data [ 38 ] else 0 val v 39 = if ( 39 < data . size ) data [ 39 ] else 0 val v 40 = if ( 40 < data . size ) data [ 40 ] else 0 val v 41 = if ( 41 < data . size ) data [ 41 ] else 0 val v 42 = if ( 42 < data . size ) data [ 42 ] else 0 val v 43 = if ( 43 < data . size ) data [ 43 ] else 0 val v 44 = if ( 44 < data . size ) data [ 44 ] else 0 val v 45 = if ( 45 < data . size ) data [ 45 ] else 0 val v 46 = if ( 46 < data . size ) data [ 46 ] else 0 val v 47 = if ( 47 < data . size ) data [ 47 ] else 0 val v 48 = if ( 48 < data . size ) data [ 48 ] else 0 val v 49 = if ( 49 < data . size ) data [ 49 ] else 0 val v 50 = if ( 50 < data . size ) data [ 50 ] else 0 val v 51 = if ( 51 < data . size ) data [ 51 ] else 0 val v 52 = if ( 52 < data . size ) data [ 52 ] else 0 val v 53 = if ( 53 < data . size ) data [ 53 ] else 0 val v 54 = if ( 54 < data . size ) data [ 54 ] else 0 val v 55 = if ( 55 < data . size ) data [ 55 ] else 0 val v 56 = if ( 56 < data . size ) data [ 56 ] else 0 val v 57 = if ( 57 < data . size ) data [ 57 ] else 0 val v 58 = if ( 58 < data . size ) data [ 58 ] else 0 val v 59 = if ( 59 < data . size ) data [ 59 ] else 0 val v 60 = if ( 60 < data . size ) data [ 60 ] else 0 val v 61 = if ( 61 < data . size ) data [ 61 ] else 0 val v 62 = if ( 62 < data . size ) data [ 62 ] else 0 val v 63 = if ( 63 < data . size ) data [ 63 ] else 0 val v 64 = if ( 64 < data . size ) data [ 64 ] else 0 val v 65 = if ( 65 < data . size ) data [ 65 ] else 0 val v 66 = if ( 66 < data . size ) data [ 66 ] else 0 val v 67 = if ( 67 < data . size ) data [ 67 ] else 0 val v 68 = if ( 68 < data . size ) data [ 68 ] else 0 val v 69 = if ( 69 < data . size ) data [ 69 ] else 0 val v 70 = if ( 70 < data . size ) data [ 70 ] else 0 val v 71 = if ( 71 < data . size ) data [ 71 ] else 0 val v 72 = if ( 72 < data . size ) data [ 72 ] else 0 val v 73 = if ( 73 < data . size ) data [ 73 ] else 0 val v 74 = if ( 74 < data . size ) data [ 74 ] else 0 val v 75 = if ( 75 < data . size ) data [ 75 ] else 0 val v 76 = if ( 76 < data . size ) data [ 76 ] else 0 val v 77 = if ( 77 < data . size ) data [ 77 ] else 0 val v 78 = if ( 78 < data . size ) data [ 78 ] else 0 val v 79 = if ( 79 < data . size ) data [ 79 ] else 0 val v 80 = if ( 80 < data . size ) data [ 80 ] else 0 val v 81 = if ( 81 < data . size ) data [ 81 ] else 0 val v 82 = if ( 82 < data . size ) data [ 82 ] else 0 val v 83 = if ( 83 < data . size ) data [ 83 ] else 0 val v 84 = if ( 84 < data . size ) data [ 84 ] else 0 val v 85 = if ( 85 < data . size ) data [ 85 ] else 0 val v 86 = if ( 86 < data . size ) data [ 86 ] else 0 val v 87 = if ( 87 < data . size ) data [ 87 ] else 0 val v 88 = if ( 88 < data . size ) data [ 88 ] else 0 val v 89 = if ( 89 < data . size ) data [ 89 ] else 0 val v 90 = if ( 90 < data . size ) data [ 90 ] else 0 val v 91 = if ( 91 < data . size ) data [ 91 ] else 0 val v 92 = if ( 92 < data . size ) data [ 92 ] else 0 val v 93 = if ( 93 < data . size ) data [ 93 ] else 0 val v 94 = if ( 94 < data . size ) data [ 94 ] else 0 val v 95 = if ( 95 < data . size ) data [ 95 ] else 0 val v 96 = if ( 96 < data . size ) data [ 96 ] else 0 val v 97 = if ( 97 < data . size ) data [ 97 ] else 0 val v 98 = if ( 98 < data . size ) data [ 98 ] else 0 val v 99 = if ( 99 < data . size ) data [ 99 ] else 0 val v 100 = if ( 100 < data . size ) data [ 100 ] else 0 val v 101 = if ( 101 < data . size ) data [ 101 ] else 0 val v 102 = if ( 102 < data . size ) data [ 102 ] else 0 val v 103 = if ( 103 < data . size ) data [ 103 ] else 0 val v 104 = if ( 104 < data . size ) data [ 104 ] else 0 val v 105 = if ( 105 < data . size ) data [ 105 ] else 0 val v 106 = if ( 106 < data . size ) data [ 106 ] else 0 val v 107 = if ( 107 < data . size ) data [ 107 ] else 0 val v 108 = if ( 108 < data . size ) data [ 108 ] else 0 val v 109 = if ( 109 < data . size ) data [ 109 ] else 0 val v 110 = if ( 110 < data . size ) data [ 110 ] else 0 val v 111 = if ( 111 < data . size ) data [ 111 ] else 0 val v 112 = if ( 112 < data . size ) data [ 112 ] else 0 val v 113 = if ( 113 < data . size ) data [ 113 ] else 0 val v 114 = if ( 114 < data . size ) data [ 114 ] else 0 val v 115 = if ( 115 < data . size ) data [ 115 ] else 0 val v 116 = if ( 116 < data . size ) data [ 116 ] else 0 val v 117 = if ( 117 < data . size ) data [ 117 ] else 0 val v 118 = if ( 118 < data . size ) data [ 118 ] else 0 val v 119 = if ( 119 < data . size ) data [ 119 ] else 0 val v 120 = if ( 120 < data . size ) data [ 120 ] else 0 val v 121 = if ( 121 < data . size ) data [ 121 ] else 0 val v 122 = if ( 122 < data . size ) data [ 122 ] else 0 val v 123 = if ( 123 < data . size ) data [ 123 ] else 0 val v 124 = if ( 124 < data . size ) data [ 124 ] else 0 val v 125 = if ( 125 < data . size ) data [ 125 ] else 0 val v 126 = if ( 126 < data . size ) data [ 126 ] else 0 val v 127 = if ( 127 < data . size ) data [ 127 ] else 0 val v 128 = if ( 128 < data . size ) data [ 128 ] else 0 val v 129 = if ( 129 < data . size ) data [ 129 ] else 0 val v 130 = if ( 130 < data . size ) data [ 130 ] else 0 val v 131 = if ( 131 < data . size ) data [ 131 ] else 0 val v 132 = if ( 132 < data . size ) data [ 132 ] else 0 val v 133 = if ( 133 < data . size ) data [ 133 ] else 0 val v 134 = if ( 134 < data . size ) data [ 134 ] else 0 val v 135 = if ( 135 < data . size ) data [ 135 ] else 0 val v 136 = if ( 136 < data . size ) data [ 136 ] else 0 val v 137 = if ( 137 < data . size ) data [ 137 ] else 0 val v 138 = if ( 138 < data . size ) data [ 138 ] else 0 val v 139 = if ( 139 < data . size ) data [ 139 ] else 0 val v 140 = if ( 140 < data . size ) data [ 140 ] else 0 val v 141 = if ( 141 < data . size ) data [ 141 ] else 0 val v 142 = if ( 142 < data . size ) data [ 142 ] else 0 val v 143 = if ( 143 < data . size ) data [ 143 ] else 0 val v 144 = if ( 144 < data . size ) data [ 144 ] else 0 val v 145 = if ( 145 < data . size ) data [ 145 ] else 0 val v 146 = if ( 146 < data . size ) data [ 146 ] else 0 val v 147 = if ( 147 < data . size ) data [ 147 ] else 0 val v 148 = if ( 148 < data . size ) data [ 148 ] else 0 val v 149 = if ( 149 < data . size ) data [ 149 ] else 0 val v 150 = if ( 150 < data . size ) data [ 150 ] else 0 val v 151 = if ( 151 < data . size ) data [ 151 ] else 0 val v 152 = if ( 152 < data . size ) data [ 152 ] else 0 val v 153 = if ( 153 < data . size ) data [ 153 ] else 0 val v 154 = if ( 154 < data . size ) data [ 154 ] else 0 val v 155 = if ( 155 < data . size ) data [ 155 ] else 0 val v 156 = if ( 156 < data . size ) data [ 156 ] else 0 val v 157 = if ( 157 < data . size ) data [ 157 ] else 0 val v 158 = if ( 158 < data . size ) data [ 158 ] else 0 val v 159 = if ( 159 < data . size ) data [ 159 ] else 0 val v 160 = if ( 160 < data . size ) data [ 160 ] else 0 val v 161 = if ( 161 < data . size ) data [ 161 ] else 0 val v 162 = if ( 162 < data . size ) data [ 162 ] else 0 val v 163 = if ( 163 < data . size ) data [ 163 ] else 0 val v 164 = if ( 164 < data . size ) data [ 164 ] else 0 val v 165 = if ( 165 < data . size ) data [ 165 ] else 0 val v 166 = if ( 166 < data . size ) data [ 166 ] else 0 val v 167 = if ( 167 < data . size ) data [ 167 ] else 0 val v 168 = if ( 168 < data . size ) data [ 168 ] else 0 val v 169 = if ( 169 < data . size ) data [ 169 ] else 0 val v 170 = if ( 170 < data . size ) data [ 170 ] else 0 val v 171 = if ( 171 < data . size ) data [ 171 ] else 0 val v 172 = if ( 172 < data . size ) data [ 172 ] else 0 val v 173 = if ( 173 < data . size ) data [ 173 ] else 0 val v 174 = if ( 174 < data . size ) data [ 174 ] else 0 val v 175 = if ( 175 < data . size ) data [ 175 ] else 0 val v 176 = if ( 176 < data . size ) data [ 176 ] else 0 val v 177 = if ( 177 < data . size ) data [ 177 ] else 0 val v 178 = if ( 178 < data . size ) data [ 178 ] else 0 val v 179 = if ( 179 < data . size ) data [ 179 ] else 0 val v 180 = if ( 180 < data . size ) data [ 180 ] else 0 val v 181 = if ( 181 < data . size ) data [ 181 ] else 0 val v 182 = if ( 182 < data . size ) data [ 182 ] else 0 val v 183 = if ( 183 < data . size ) data [ 183 ] else 0 val v 184 = if ( 184 < data . size ) data [ 184 ] else 0 val v 185 = if ( 185 < data . size ) data [ 185 ] else 0 val v 186 = if ( 186 < data . size ) data [ 186 ] else 0 val v 187 = if ( 187 < data . size ) data [ 187 ] else 0 val v 188 = if ( 188 < data . size ) data [ 188 ] else 0 val v 189 = if ( 189 < data . size ) data [ 189 ] else 0 val v 190 = if ( 190 < data . size ) data [ 190 ] else 0 val v 191 = if ( 191 < data . size ) data [ 191 ] else 0 val v 192 = if ( 192 < data . size ) data [ 192 ] else 0 val v 193 = if ( 193 < data . size ) data [ 193 ] else 0 val v 194 = if ( 194 < data . size ) data [ 194 ] else 0 val v 195 = if ( 195 < data . size ) data [ 195 ] else 0 val v 196 = if ( 196 < data . size ) data [ 196 ] else 0 val v 197 = if ( 197 < data . size ) data [ 197 ] else 0" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " val v198 = if (198 < data.size) data[198] else 0\n val v199 = if (199 < data.size) data[199] else 0\n val v200 = if (200 < data.size) data[200] else 0\n val v201 = if (201 < data.size) data[201] else 0\n val v202 = if (202 < data.size) data[202] else 0\n val v203 = if (203 < data.size) data[203] else 0\n val v204 = if (204 < data.size) data[204] else 0\n val v205 = if (205 < data.size) data[205] else 0\n val v206 = if (206 < data.size) data[206] else 0\n val v207 = if (207 < data.size) data[207] else 0\n val v208 = if (208 < data.size) data[208] else 0\n val v209 = if (209 < data.size) data[209] else 0\n return data.size\n }\n}", - "token_estimate": 239 + "token_estimate": 239, + "tokenized_korean_text": "val v 198 = if ( 198 < data . size ) data [ 198 ] else 0 val v 199 = if ( 199 < data . size ) data [ 199 ] else 0 val v 200 = if ( 200 < data . size ) data [ 200 ] else 0 val v 201 = if ( 201 < data . size ) data [ 201 ] else 0 val v 202 = if ( 202 < data . size ) data [ 202 ] else 0 val v 203 = if ( 203 < data . size ) data [ 203 ] else 0 val v 204 = if ( 204 < data . size ) data [ 204 ] else 0 val v 205 = if ( 205 < data . size ) data [ 205 ] else 0 val v 206 = if ( 206 < data . size ) data [ 206 ] else 0 val v 207 = if ( 207 < data . size ) data [ 207 ] else 0 val v 208 = if ( 208 < data . size ) data [ 208 ] else 0 val v 209 = if ( 209 < data . size ) data [ 209 ] else 0 return data . size } }" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json index 1b9d86e..48507b0 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "import os\nimport sys\nfrom typing import List\nfrom pathlib import Path\nfrom collections import defaultdict", - "token_estimate": 35 + "token_estimate": 35, + "tokenized_korean_text": "import os import sys from typing import List from pathlib import Path from collections import defaultdict" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "def compute_mrr(scores):\n if not scores:\n return 0.0\n return sum(\n 1.0 / r for r in scores\n ) / len(scores)", - "token_estimate": 44 + "token_estimate": 44, + "tokenized_korean_text": "def compute _ mrr ( scores ) : if not scores : return 0 . 0 return sum ( 1 . 0 / r for r in scores ) / len ( scores )" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "class MetricsCollector:\n def __init__(self):\n self.scores = []\n self.labels = []\n self.counts = defaultdict(int)\n self.totals = defaultdict(float)\n self.tags = []", - "token_estimate": 67 + "token_estimate": 67, + "tokenized_korean_text": "class MetricsCollector : def __ init __( self ) : self . scores = [ ] self . labels = [ ] self . counts = defaultdict ( int ) self . totals = defaultdict ( float ) self . tags = [ ]" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "class BaseEvaluator:\n def evaluate(self, data):\n raise NotImplementedError\n def batch_evaluate(self, items):\n results = []\n for item in items:\n results.append(self.evaluate(item))\n return results\n def name(self):\n return type(self).__name__", - "token_estimate": 99 + "token_estimate": 99, + "tokenized_korean_text": "class BaseEvaluator : def evaluate ( self , data ) : raise NotImplementedError def batch _ evaluate ( self , items ) : results = [ ] for item in items : results . append ( self . evaluate ( item ) ) return results def name ( self ) : return type ( self ).__ name __" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "class MetricsCollector:\n def run(self, inputs):\n for inp in inputs:\n score = self._score(inp)\n self.scores.append(\n score\n )", - "token_estimate": 61 + "token_estimate": 61, + "tokenized_korean_text": "class MetricsCollector : def run ( self , inputs ) : for inp in inputs : score = self ._ score ( inp ) self . scores . append ( score )" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "class MetricsCollector:\n def report(self):\n return {\n 'mean': sum(self.scores) / max(len(self.scores), 1),\n 'count': len(self.scores),\n 'tags': self.tags,\n }", - "token_estimate": 69 + "token_estimate": 69, + "tokenized_korean_text": "class MetricsCollector : def report ( self ) : return { ' mean ': sum ( self . scores ) / max ( len ( self . scores ) , 1 ) , ' count ': len ( self . scores ) , ' tags ': self . tags , }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "def big_compute(data):\n v0 = data[0] if 0 < len(data) else 0\n v1 = data[1] if 1 < len(data) else 0\n v2 = data[2] if 2 < len(data) else 0\n v3 = data[3] if 3 < len(data) else 0\n v4 = data[4] if 4 < len(data) else 0\n v5 = data[5] if 5 < len(data) else 0\n v6 = data[6] if 6 < len(data) else 0\n v7 = data[7] if 7 < len(data) else 0\n v8 = data[8] if 8 < len(data) else 0\n v9 = data[9] if 9 < len(data) else 0\n v10 = data[10] if 10 < len(data) else 0\n v11 = data[11] if 11 < len(data) else 0\n v12 = data[12] if 12 < len(data) else 0\n v13 = data[13] if 13 < len(data) else 0\n v14 = data[14] if 14 < len(data) else 0\n v15 = data[15] if 15 < len(data) else 0\n v16 = data[16] if 16 < len(data) else 0\n v17 = data[17] if 17 < len(data) else 0\n v18 = data[18] if 18 < len(data) else 0\n v19 = data[19] if 19 < len(data) else 0\n v20 = data[20] if 20 < len(data) else 0\n v21 = data[21] if 21 < len(data) else 0\n v22 = data[22] if 22 < len(data) else 0\n v23 = data[23] if 23 < len(data) else 0\n v24 = data[24] if 24 < len(data) else 0\n v25 = data[25] if 25 < len(data) else 0\n v26 = data[26] if 26 < len(data) else 0\n v27 = data[27] if 27 < len(data) else 0\n v28 = data[28] if 28 < len(data) else 0\n v29 = data[29] if 29 < len(data) else 0\n v30 = data[30] if 30 < len(data) else 0\n v31 = data[31] if 31 < len(data) else 0\n v32 = data[32] if 32 < len(data) else 0\n v33 = data[33] if 33 < len(data) else 0\n v34 = data[34] if 34 < len(data) else 0\n v35 = data[35] if 35 < len(data) else 0\n v36 = data[36] if 36 < len(data) else 0\n v37 = data[37] if 37 < len(data) else 0\n v38 = data[38] if 38 < len(data) else 0\n v39 = data[39] if 39 < len(data) else 0\n v40 = data[40] if 40 < len(data) else 0\n v41 = data[41] if 41 < len(data) else 0\n v42 = data[42] if 42 < len(data) else 0\n v43 = data[43] if 43 < len(data) else 0\n v44 = data[44] if 44 < len(data) else 0\n v45 = data[45] if 45 < len(data) else 0\n v46 = data[46] if 46 < len(data) else 0\n v47 = data[47] if 47 < len(data) else 0\n v48 = data[48] if 48 < len(data) else 0\n v49 = data[49] if 49 < len(data) else 0\n v50 = data[50] if 50 < len(data) else 0\n v51 = data[51] if 51 < len(data) else 0\n v52 = data[52] if 52 < len(data) else 0\n v53 = data[53] if 53 < len(data) else 0\n v54 = data[54] if 54 < len(data) else 0\n v55 = data[55] if 55 < len(data) else 0\n v56 = data[56] if 56 < len(data) else 0\n v57 = data[57] if 57 < len(data) else 0\n v58 = data[58] if 58 < len(data) else 0\n v59 = data[59] if 59 < len(data) else 0\n v60 = data[60] if 60 < len(data) else 0\n v61 = data[61] if 61 < len(data) else 0\n v62 = data[62] if 62 < len(data) else 0\n v63 = data[63] if 63 < len(data) else 0\n v64 = data[64] if 64 < len(data) else 0\n v65 = data[65] if 65 < len(data) else 0\n v66 = data[66] if 66 < len(data) else 0\n v67 = data[67] if 67 < len(data) else 0\n v68 = data[68] if 68 < len(data) else 0\n v69 = data[69] if 69 < len(data) else 0\n v70 = data[70] if 70 < len(data) else 0\n v71 = data[71] if 71 < len(data) else 0\n v72 = data[72] if 72 < len(data) else 0\n v73 = data[73] if 73 < len(data) else 0\n v74 = data[74] if 74 < len(data) else 0\n v75 = data[75] if 75 < len(data) else 0\n v76 = data[76] if 76 < len(data) else 0\n v77 = data[77] if 77 < len(data) else 0\n v78 = data[78] if 78 < len(data) else 0\n v79 = data[79] if 79 < len(data) else 0\n v80 = data[80] if 80 < len(data) else 0\n v81 = data[81] if 81 < len(data) else 0\n v82 = data[82] if 82 < len(data) else 0\n v83 = data[83] if 83 < len(data) else 0\n v84 = data[84] if 84 < len(data) else 0\n v85 = data[85] if 85 < len(data) else 0\n v86 = data[86] if 86 < len(data) else 0\n v87 = data[87] if 87 < len(data) else 0\n v88 = data[88] if 88 < len(data) else 0\n v89 = data[89] if 89 < len(data) else 0\n v90 = data[90] if 90 < len(data) else 0\n v91 = data[91] if 91 < len(data) else 0\n v92 = data[92] if 92 < len(data) else 0\n v93 = data[93] if 93 < len(data) else 0\n v94 = data[94] if 94 < len(data) else 0\n v95 = data[95] if 95 < len(data) else 0\n v96 = data[96] if 96 < len(data) else 0\n v97 = data[97] if 97 < len(data) else 0\n v98 = data[98] if 98 < len(data) else 0\n v99 = data[99] if 99 < len(data) else 0\n v100 = data[100] if 100 < len(data) else 0\n v101 = data[101] if 101 < len(data) else 0\n v102 = data[102] if 102 < len(data) else 0\n v103 = data[103] if 103 < len(data) else 0\n v104 = data[104] if 104 < len(data) else 0\n v105 = data[105] if 105 < len(data) else 0\n v106 = data[106] if 106 < len(data) else 0\n v107 = data[107] if 107 < len(data) else 0\n v108 = data[108] if 108 < len(data) else 0\n v109 = data[109] if 109 < len(data) else 0\n v110 = data[110] if 110 < len(data) else 0\n v111 = data[111] if 111 < len(data) else 0\n v112 = data[112] if 112 < len(data) else 0\n v113 = data[113] if 113 < len(data) else 0\n v114 = data[114] if 114 < len(data) else 0\n v115 = data[115] if 115 < len(data) else 0\n v116 = data[116] if 116 < len(data) else 0\n v117 = data[117] if 117 < len(data) else 0\n v118 = data[118] if 118 < len(data) else 0\n v119 = data[119] if 119 < len(data) else 0\n v120 = data[120] if 120 < len(data) else 0\n v121 = data[121] if 121 < len(data) else 0\n v122 = data[122] if 122 < len(data) else 0\n v123 = data[123] if 123 < len(data) else 0\n v124 = data[124] if 124 < len(data) else 0\n v125 = data[125] if 125 < len(data) else 0\n v126 = data[126] if 126 < len(data) else 0\n v127 = data[127] if 127 < len(data) else 0\n v128 = data[128] if 128 < len(data) else 0\n v129 = data[129] if 129 < len(data) else 0\n v130 = data[130] if 130 < len(data) else 0\n v131 = data[131] if 131 < len(data) else 0\n v132 = data[132] if 132 < len(data) else 0\n v133 = data[133] if 133 < len(data) else 0\n v134 = data[134] if 134 < len(data) else 0\n v135 = data[135] if 135 < len(data) else 0\n v136 = data[136] if 136 < len(data) else 0\n v137 = data[137] if 137 < len(data) else 0\n v138 = data[138] if 138 < len(data) else 0\n v139 = data[139] if 139 < len(data) else 0\n v140 = data[140] if 140 < len(data) else 0\n v141 = data[141] if 141 < len(data) else 0\n v142 = data[142] if 142 < len(data) else 0\n v143 = data[143] if 143 < len(data) else 0\n v144 = data[144] if 144 < len(data) else 0\n v145 = data[145] if 145 < len(data) else 0\n v146 = data[146] if 146 < len(data) else 0\n v147 = data[147] if 147 < len(data) else 0\n v148 = data[148] if 148 < len(data) else 0\n v149 = data[149] if 149 < len(data) else 0\n v150 = data[150] if 150 < len(data) else 0\n v151 = data[151] if 151 < len(data) else 0\n v152 = data[152] if 152 < len(data) else 0\n v153 = data[153] if 153 < len(data) else 0\n v154 = data[154] if 154 < len(data) else 0\n v155 = data[155] if 155 < len(data) else 0\n v156 = data[156] if 156 < len(data) else 0\n v157 = data[157] if 157 < len(data) else 0\n v158 = data[158] if 158 < len(data) else 0\n v159 = data[159] if 159 < len(data) else 0\n v160 = data[160] if 160 < len(data) else 0\n v161 = data[161] if 161 < len(data) else 0\n v162 = data[162] if 162 < len(data) else 0\n v163 = data[163] if 163 < len(data) else 0\n v164 = data[164] if 164 < len(data) else 0\n v165 = data[165] if 165 < len(data) else 0\n v166 = data[166] if 166 < len(data) else 0\n v167 = data[167] if 167 < len(data) else 0\n v168 = data[168] if 168 < len(data) else 0\n v169 = data[169] if 169 < len(data) else 0\n v170 = data[170] if 170 < len(data) else 0\n v171 = data[171] if 171 < len(data) else 0\n v172 = data[172] if 172 < len(data) else 0\n v173 = data[173] if 173 < len(data) else 0\n v174 = data[174] if 174 < len(data) else 0\n v175 = data[175] if 175 < len(data) else 0\n v176 = data[176] if 176 < len(data) else 0\n v177 = data[177] if 177 < len(data) else 0\n v178 = data[178] if 178 < len(data) else 0\n v179 = data[179] if 179 < len(data) else 0\n v180 = data[180] if 180 < len(data) else 0\n v181 = data[181] if 181 < len(data) else 0\n v182 = data[182] if 182 < len(data) else 0\n v183 = data[183] if 183 < len(data) else 0\n v184 = data[184] if 184 < len(data) else 0\n v185 = data[185] if 185 < len(data) else 0\n v186 = data[186] if 186 < len(data) else 0\n v187 = data[187] if 187 < len(data) else 0\n v188 = data[188] if 188 < len(data) else 0\n v189 = data[189] if 189 < len(data) else 0\n v190 = data[190] if 190 < len(data) else 0\n v191 = data[191] if 191 < len(data) else 0\n v192 = data[192] if 192 < len(data) else 0\n v193 = data[193] if 193 < len(data) else 0\n v194 = data[194] if 194 < len(data) else 0\n v195 = data[195] if 195 < len(data) else 0\n v196 = data[196] if 196 < len(data) else 0\n v197 = data[197] if 197 < len(data) else 0\n v198 = data[198] if 198 < len(data) else 0", - "token_estimate": 3015 + "token_estimate": 3015, + "tokenized_korean_text": "def big _ compute ( data ) : v 0 = data [ 0 ] if 0 < len ( data ) else 0 v 1 = data [ 1 ] if 1 < len ( data ) else 0 v 2 = data [ 2 ] if 2 < len ( data ) else 0 v 3 = data [ 3 ] if 3 < len ( data ) else 0 v 4 = data [ 4 ] if 4 < len ( data ) else 0 v 5 = data [ 5 ] if 5 < len ( data ) else 0 v 6 = data [ 6 ] if 6 < len ( data ) else 0 v 7 = data [ 7 ] if 7 < len ( data ) else 0 v 8 = data [ 8 ] if 8 < len ( data ) else 0 v 9 = data [ 9 ] if 9 < len ( data ) else 0 v 10 = data [ 10 ] if 10 < len ( data ) else 0 v 11 = data [ 11 ] if 11 < len ( data ) else 0 v 12 = data [ 12 ] if 12 < len ( data ) else 0 v 13 = data [ 13 ] if 13 < len ( data ) else 0 v 14 = data [ 14 ] if 14 < len ( data ) else 0 v 15 = data [ 15 ] if 15 < len ( data ) else 0 v 16 = data [ 16 ] if 16 < len ( data ) else 0 v 17 = data [ 17 ] if 17 < len ( data ) else 0 v 18 = data [ 18 ] if 18 < len ( data ) else 0 v 19 = data [ 19 ] if 19 < len ( data ) else 0 v 20 = data [ 20 ] if 20 < len ( data ) else 0 v 21 = data [ 21 ] if 21 < len ( data ) else 0 v 22 = data [ 22 ] if 22 < len ( data ) else 0 v 23 = data [ 23 ] if 23 < len ( data ) else 0 v 24 = data [ 24 ] if 24 < len ( data ) else 0 v 25 = data [ 25 ] if 25 < len ( data ) else 0 v 26 = data [ 26 ] if 26 < len ( data ) else 0 v 27 = data [ 27 ] if 27 < len ( data ) else 0 v 28 = data [ 28 ] if 28 < len ( data ) else 0 v 29 = data [ 29 ] if 29 < len ( data ) else 0 v 30 = data [ 30 ] if 30 < len ( data ) else 0 v 31 = data [ 31 ] if 31 < len ( data ) else 0 v 32 = data [ 32 ] if 32 < len ( data ) else 0 v 33 = data [ 33 ] if 33 < len ( data ) else 0 v 34 = data [ 34 ] if 34 < len ( data ) else 0 v 35 = data [ 35 ] if 35 < len ( data ) else 0 v 36 = data [ 36 ] if 36 < len ( data ) else 0 v 37 = data [ 37 ] if 37 < len ( data ) else 0 v 38 = data [ 38 ] if 38 < len ( data ) else 0 v 39 = data [ 39 ] if 39 < len ( data ) else 0 v 40 = data [ 40 ] if 40 < len ( data ) else 0 v 41 = data [ 41 ] if 41 < len ( data ) else 0 v 42 = data [ 42 ] if 42 < len ( data ) else 0 v 43 = data [ 43 ] if 43 < len ( data ) else 0 v 44 = data [ 44 ] if 44 < len ( data ) else 0 v 45 = data [ 45 ] if 45 < len ( data ) else 0 v 46 = data [ 46 ] if 46 < len ( data ) else 0 v 47 = data [ 47 ] if 47 < len ( data ) else 0 v 48 = data [ 48 ] if 48 < len ( data ) else 0 v 49 = data [ 49 ] if 49 < len ( data ) else 0 v 50 = data [ 50 ] if 50 < len ( data ) else 0 v 51 = data [ 51 ] if 51 < len ( data ) else 0 v 52 = data [ 52 ] if 52 < len ( data ) else 0 v 53 = data [ 53 ] if 53 < len ( data ) else 0 v 54 = data [ 54 ] if 54 < len ( data ) else 0 v 55 = data [ 55 ] if 55 < len ( data ) else 0 v 56 = data [ 56 ] if 56 < len ( data ) else 0 v 57 = data [ 57 ] if 57 < len ( data ) else 0 v 58 = data [ 58 ] if 58 < len ( data ) else 0 v 59 = data [ 59 ] if 59 < len ( data ) else 0 v 60 = data [ 60 ] if 60 < len ( data ) else 0 v 61 = data [ 61 ] if 61 < len ( data ) else 0 v 62 = data [ 62 ] if 62 < len ( data ) else 0 v 63 = data [ 63 ] if 63 < len ( data ) else 0 v 64 = data [ 64 ] if 64 < len ( data ) else 0 v 65 = data [ 65 ] if 65 < len ( data ) else 0 v 66 = data [ 66 ] if 66 < len ( data ) else 0 v 67 = data [ 67 ] if 67 < len ( data ) else 0 v 68 = data [ 68 ] if 68 < len ( data ) else 0 v 69 = data [ 69 ] if 69 < len ( data ) else 0 v 70 = data [ 70 ] if 70 < len ( data ) else 0 v 71 = data [ 71 ] if 71 < len ( data ) else 0 v 72 = data [ 72 ] if 72 < len ( data ) else 0 v 73 = data [ 73 ] if 73 < len ( data ) else 0 v 74 = data [ 74 ] if 74 < len ( data ) else 0 v 75 = data [ 75 ] if 75 < len ( data ) else 0 v 76 = data [ 76 ] if 76 < len ( data ) else 0 v 77 = data [ 77 ] if 77 < len ( data ) else 0 v 78 = data [ 78 ] if 78 < len ( data ) else 0 v 79 = data [ 79 ] if 79 < len ( data ) else 0 v 80 = data [ 80 ] if 80 < len ( data ) else 0 v 81 = data [ 81 ] if 81 < len ( data ) else 0 v 82 = data [ 82 ] if 82 < len ( data ) else 0 v 83 = data [ 83 ] if 83 < len ( data ) else 0 v 84 = data [ 84 ] if 84 < len ( data ) else 0 v 85 = data [ 85 ] if 85 < len ( data ) else 0 v 86 = data [ 86 ] if 86 < len ( data ) else 0 v 87 = data [ 87 ] if 87 < len ( data ) else 0 v 88 = data [ 88 ] if 88 < len ( data ) else 0 v 89 = data [ 89 ] if 89 < len ( data ) else 0 v 90 = data [ 90 ] if 90 < len ( data ) else 0 v 91 = data [ 91 ] if 91 < len ( data ) else 0 v 92 = data [ 92 ] if 92 < len ( data ) else 0 v 93 = data [ 93 ] if 93 < len ( data ) else 0 v 94 = data [ 94 ] if 94 < len ( data ) else 0 v 95 = data [ 95 ] if 95 < len ( data ) else 0 v 96 = data [ 96 ] if 96 < len ( data ) else 0 v 97 = data [ 97 ] if 97 < len ( data ) else 0 v 98 = data [ 98 ] if 98 < len ( data ) else 0 v 99 = data [ 99 ] if 99 < len ( data ) else 0 v 100 = data [ 100 ] if 100 < len ( data ) else 0 v 101 = data [ 101 ] if 101 < len ( data ) else 0 v 102 = data [ 102 ] if 102 < len ( data ) else 0 v 103 = data [ 103 ] if 103 < len ( data ) else 0 v 104 = data [ 104 ] if 104 < len ( data ) else 0 v 105 = data [ 105 ] if 105 < len ( data ) else 0 v 106 = data [ 106 ] if 106 < len ( data ) else 0 v 107 = data [ 107 ] if 107 < len ( data ) else 0 v 108 = data [ 108 ] if 108 < len ( data ) else 0 v 109 = data [ 109 ] if 109 < len ( data ) else 0 v 110 = data [ 110 ] if 110 < len ( data ) else 0 v 111 = data [ 111 ] if 111 < len ( data ) else 0 v 112 = data [ 112 ] if 112 < len ( data ) else 0 v 113 = data [ 113 ] if 113 < len ( data ) else 0 v 114 = data [ 114 ] if 114 < len ( data ) else 0 v 115 = data [ 115 ] if 115 < len ( data ) else 0 v 116 = data [ 116 ] if 116 < len ( data ) else 0 v 117 = data [ 117 ] if 117 < len ( data ) else 0 v 118 = data [ 118 ] if 118 < len ( data ) else 0 v 119 = data [ 119 ] if 119 < len ( data ) else 0 v 120 = data [ 120 ] if 120 < len ( data ) else 0 v 121 = data [ 121 ] if 121 < len ( data ) else 0 v 122 = data [ 122 ] if 122 < len ( data ) else 0 v 123 = data [ 123 ] if 123 < len ( data ) else 0 v 124 = data [ 124 ] if 124 < len ( data ) else 0 v 125 = data [ 125 ] if 125 < len ( data ) else 0 v 126 = data [ 126 ] if 126 < len ( data ) else 0 v 127 = data [ 127 ] if 127 < len ( data ) else 0 v 128 = data [ 128 ] if 128 < len ( data ) else 0 v 129 = data [ 129 ] if 129 < len ( data ) else 0 v 130 = data [ 130 ] if 130 < len ( data ) else 0 v 131 = data [ 131 ] if 131 < len ( data ) else 0 v 132 = data [ 132 ] if 132 < len ( data ) else 0 v 133 = data [ 133 ] if 133 < len ( data ) else 0 v 134 = data [ 134 ] if 134 < len ( data ) else 0 v 135 = data [ 135 ] if 135 < len ( data ) else 0 v 136 = data [ 136 ] if 136 < len ( data ) else 0 v 137 = data [ 137 ] if 137 < len ( data ) else 0 v 138 = data [ 138 ] if 138 < len ( data ) else 0 v 139 = data [ 139 ] if 139 < len ( data ) else 0 v 140 = data [ 140 ] if 140 < len ( data ) else 0 v 141 = data [ 141 ] if 141 < len ( data ) else 0 v 142 = data [ 142 ] if 142 < len ( data ) else 0 v 143 = data [ 143 ] if 143 < len ( data ) else 0 v 144 = data [ 144 ] if 144 < len ( data ) else 0 v 145 = data [ 145 ] if 145 < len ( data ) else 0 v 146 = data [ 146 ] if 146 < len ( data ) else 0 v 147 = data [ 147 ] if 147 < len ( data ) else 0 v 148 = data [ 148 ] if 148 < len ( data ) else 0 v 149 = data [ 149 ] if 149 < len ( data ) else 0 v 150 = data [ 150 ] if 150 < len ( data ) else 0 v 151 = data [ 151 ] if 151 < len ( data ) else 0 v 152 = data [ 152 ] if 152 < len ( data ) else 0 v 153 = data [ 153 ] if 153 < len ( data ) else 0 v 154 = data [ 154 ] if 154 < len ( data ) else 0 v 155 = data [ 155 ] if 155 < len ( data ) else 0 v 156 = data [ 156 ] if 156 < len ( data ) else 0 v 157 = data [ 157 ] if 157 < len ( data ) else 0 v 158 = data [ 158 ] if 158 < len ( data ) else 0 v 159 = data [ 159 ] if 159 < len ( data ) else 0 v 160 = data [ 160 ] if 160 < len ( data ) else 0 v 161 = data [ 161 ] if 161 < len ( data ) else 0 v 162 = data [ 162 ] if 162 < len ( data ) else 0 v 163 = data [ 163 ] if 163 < len ( data ) else 0 v 164 = data [ 164 ] if 164 < len ( data ) else 0 v 165 = data [ 165 ] if 165 < len ( data ) else 0 v 166 = data [ 166 ] if 166 < len ( data ) else 0 v 167 = data [ 167 ] if 167 < len ( data ) else 0 v 168 = data [ 168 ] if 168 < len ( data ) else 0 v 169 = data [ 169 ] if 169 < len ( data ) else 0 v 170 = data [ 170 ] if 170 < len ( data ) else 0 v 171 = data [ 171 ] if 171 < len ( data ) else 0 v 172 = data [ 172 ] if 172 < len ( data ) else 0 v 173 = data [ 173 ] if 173 < len ( data ) else 0 v 174 = data [ 174 ] if 174 < len ( data ) else 0 v 175 = data [ 175 ] if 175 < len ( data ) else 0 v 176 = data [ 176 ] if 176 < len ( data ) else 0 v 177 = data [ 177 ] if 177 < len ( data ) else 0 v 178 = data [ 178 ] if 178 < len ( data ) else 0 v 179 = data [ 179 ] if 179 < len ( data ) else 0 v 180 = data [ 180 ] if 180 < len ( data ) else 0 v 181 = data [ 181 ] if 181 < len ( data ) else 0 v 182 = data [ 182 ] if 182 < len ( data ) else 0 v 183 = data [ 183 ] if 183 < len ( data ) else 0 v 184 = data [ 184 ] if 184 < len ( data ) else 0 v 185 = data [ 185 ] if 185 < len ( data ) else 0 v 186 = data [ 186 ] if 186 < len ( data ) else 0 v 187 = data [ 187 ] if 187 < len ( data ) else 0 v 188 = data [ 188 ] if 188 < len ( data ) else 0 v 189 = data [ 189 ] if 189 < len ( data ) else 0 v 190 = data [ 190 ] if 190 < len ( data ) else 0 v 191 = data [ 191 ] if 191 < len ( data ) else 0 v 192 = data [ 192 ] if 192 < len ( data ) else 0 v 193 = data [ 193 ] if 193 < len ( data ) else 0 v 194 = data [ 194 ] if 194 < len ( data ) else 0 v 195 = data [ 195 ] if 195 < len ( data ) else 0 v 196 = data [ 196 ] if 196 < len ( data ) else 0 v 197 = data [ 197 ] if 197 < len ( data ) else 0 v 198 = data [ 198 ] if 198 < len ( data ) else 0" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " v199 = data[199] if 199 < len(data) else 0\n v200 = data[200] if 200 < len(data) else 0\n v201 = data[201] if 201 < len(data) else 0\n v202 = data[202] if 202 < len(data) else 0\n v203 = data[203] if 203 < len(data) else 0\n v204 = data[204] if 204 < len(data) else 0\n v205 = data[205] if 205 < len(data) else 0\n v206 = data[206] if 206 < len(data) else 0\n v207 = data[207] if 207 < len(data) else 0\n v208 = data[208] if 208 < len(data) else 0\n v209 = data[209] if 209 < len(data) else 0\n return sum(data)", - "token_estimate": 179 + "token_estimate": 179, + "tokenized_korean_text": "v 199 = data [ 199 ] if 199 < len ( data ) else 0 v 200 = data [ 200 ] if 200 < len ( data ) else 0 v 201 = data [ 201 ] if 201 < len ( data ) else 0 v 202 = data [ 202 ] if 202 < len ( data ) else 0 v 203 = data [ 203 ] if 203 < len ( data ) else 0 v 204 = data [ 204 ] if 204 < len ( data ) else 0 v 205 = data [ 205 ] if 205 < len ( data ) else 0 v 206 = data [ 206 ] if 206 < len ( data ) else 0 v 207 = data [ 207 ] if 207 < len ( data ) else 0 v 208 = data [ 208 ] if 208 < len ( data ) else 0 v 209 = data [ 209 ] if 209 < len ( data ) else 0 return sum ( data )" } ] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json index 446b98d..e9de78f 100644 --- a/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json +++ b/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json @@ -18,7 +18,8 @@ } ], "text": "import { readFileSync } from 'fs';\nimport { join } from 'path';\nimport type { Config } from './config';\nimport { Logger } from './logger';\nimport { EventEmitter } from 'events';", - "token_estimate": 59 + "token_estimate": 59, + "tokenized_korean_text": "import { readFileSync } from ' fs '; import { join } from ' path '; import type { Config } from './ config '; import { Logger } from './ logger '; import { EventEmitter } from ' events ';" }, { "block_ids": [ @@ -39,7 +40,8 @@ } ], "text": "export function parseInput(raw: string): number | null {\n const trimmed = raw.trim();\n const n = Number(trimmed);\n if (isNaN(n)) return null;\n return n;\n}", - "token_estimate": 53 + "token_estimate": 53, + "tokenized_korean_text": "export function parseInput ( raw : string ) : number | null { const trimmed = raw . trim (); const n = Number ( trimmed ); if ( isNaN ( n ) ) return null ; return n ; }" }, { "block_ids": [ @@ -60,7 +62,8 @@ } ], "text": "export interface Frobable {\n frob(): string;\n frobTwice(): string;\n readonly name: string;\n readonly tags: string[];\n count: number;\n reset(): void;\n}", - "token_estimate": 52 + "token_estimate": 52, + "tokenized_korean_text": "export interface Frobable { frob ( ) : string ; frobTwice ( ) : string ; readonly name : string ; readonly tags : string []; count : number ; reset ( ) : void ; }" }, { "block_ids": [ @@ -81,7 +84,8 @@ } ], "text": "export class Foo implements Frobable {\n constructor(\n public readonly name: string,\n public value: number,\n public tags: string[] = [],\n ) {}\n frob(): string { return this.name; }\n frobTwice(): string { return this.name.repeat(2); }\n reset(): void { this.value = 0; }\n}", - "token_estimate": 95 + "token_estimate": 95, + "tokenized_korean_text": "export class Foo implements Frobable { constructor ( public readonly name : string , public value : number , public tags : string [ ] = [ ] , ) {} frob ( ) : string { return this . name ; } frobTwice ( ) : string { return this . name . repeat ( 2 ); } reset ( ) : void { this . value = 0 ; } }" }, { "block_ids": [ @@ -102,7 +106,8 @@ } ], "text": "export class Foo {\n double(): number {\n const result = this.value * 2;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}", - "token_estimate": 63 + "token_estimate": 63, + "tokenized_korean_text": "export class Foo { double ( ) : number { const result = this . value * 2 ; if ( result > Number . MAX _ SAFE _ INTEGER ) { return Number . MAX _ SAFE _ INTEGER ; } return result ; } }" }, { "block_ids": [ @@ -123,7 +128,8 @@ } ], "text": "export class Foo {\n triple(): number {\n const result = this.value * 3;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}", - "token_estimate": 63 + "token_estimate": 63, + "tokenized_korean_text": "export class Foo { triple ( ) : number { const result = this . value * 3 ; if ( result > Number . MAX _ SAFE _ INTEGER ) { return Number . MAX _ SAFE _ INTEGER ; } return result ; } }" }, { "block_ids": [ @@ -144,7 +150,8 @@ } ], "text": "export class BigProcessor {\n process(items: string[]): string[] {\n const v0 = items[0] ?? '';\n const v1 = items[1] ?? '';\n const v2 = items[2] ?? '';\n const v3 = items[3] ?? '';\n const v4 = items[4] ?? '';\n const v5 = items[5] ?? '';\n const v6 = items[6] ?? '';\n const v7 = items[7] ?? '';\n const v8 = items[8] ?? '';\n const v9 = items[9] ?? '';\n const v10 = items[10] ?? '';\n const v11 = items[11] ?? '';\n const v12 = items[12] ?? '';\n const v13 = items[13] ?? '';\n const v14 = items[14] ?? '';\n const v15 = items[15] ?? '';\n const v16 = items[16] ?? '';\n const v17 = items[17] ?? '';\n const v18 = items[18] ?? '';\n const v19 = items[19] ?? '';\n const v20 = items[20] ?? '';\n const v21 = items[21] ?? '';\n const v22 = items[22] ?? '';\n const v23 = items[23] ?? '';\n const v24 = items[24] ?? '';\n const v25 = items[25] ?? '';\n const v26 = items[26] ?? '';\n const v27 = items[27] ?? '';\n const v28 = items[28] ?? '';\n const v29 = items[29] ?? '';\n const v30 = items[30] ?? '';\n const v31 = items[31] ?? '';\n const v32 = items[32] ?? '';\n const v33 = items[33] ?? '';\n const v34 = items[34] ?? '';\n const v35 = items[35] ?? '';\n const v36 = items[36] ?? '';\n const v37 = items[37] ?? '';\n const v38 = items[38] ?? '';\n const v39 = items[39] ?? '';\n const v40 = items[40] ?? '';\n const v41 = items[41] ?? '';\n const v42 = items[42] ?? '';\n const v43 = items[43] ?? '';\n const v44 = items[44] ?? '';\n const v45 = items[45] ?? '';\n const v46 = items[46] ?? '';\n const v47 = items[47] ?? '';\n const v48 = items[48] ?? '';\n const v49 = items[49] ?? '';\n const v50 = items[50] ?? '';\n const v51 = items[51] ?? '';\n const v52 = items[52] ?? '';\n const v53 = items[53] ?? '';\n const v54 = items[54] ?? '';\n const v55 = items[55] ?? '';\n const v56 = items[56] ?? '';\n const v57 = items[57] ?? '';\n const v58 = items[58] ?? '';\n const v59 = items[59] ?? '';\n const v60 = items[60] ?? '';\n const v61 = items[61] ?? '';\n const v62 = items[62] ?? '';\n const v63 = items[63] ?? '';\n const v64 = items[64] ?? '';\n const v65 = items[65] ?? '';\n const v66 = items[66] ?? '';\n const v67 = items[67] ?? '';\n const v68 = items[68] ?? '';\n const v69 = items[69] ?? '';\n const v70 = items[70] ?? '';\n const v71 = items[71] ?? '';\n const v72 = items[72] ?? '';\n const v73 = items[73] ?? '';\n const v74 = items[74] ?? '';\n const v75 = items[75] ?? '';\n const v76 = items[76] ?? '';\n const v77 = items[77] ?? '';\n const v78 = items[78] ?? '';\n const v79 = items[79] ?? '';\n const v80 = items[80] ?? '';\n const v81 = items[81] ?? '';\n const v82 = items[82] ?? '';\n const v83 = items[83] ?? '';\n const v84 = items[84] ?? '';\n const v85 = items[85] ?? '';\n const v86 = items[86] ?? '';\n const v87 = items[87] ?? '';\n const v88 = items[88] ?? '';\n const v89 = items[89] ?? '';\n const v90 = items[90] ?? '';\n const v91 = items[91] ?? '';\n const v92 = items[92] ?? '';\n const v93 = items[93] ?? '';\n const v94 = items[94] ?? '';\n const v95 = items[95] ?? '';\n const v96 = items[96] ?? '';\n const v97 = items[97] ?? '';\n const v98 = items[98] ?? '';\n const v99 = items[99] ?? '';\n const v100 = items[100] ?? '';\n const v101 = items[101] ?? '';\n const v102 = items[102] ?? '';\n const v103 = items[103] ?? '';\n const v104 = items[104] ?? '';\n const v105 = items[105] ?? '';\n const v106 = items[106] ?? '';\n const v107 = items[107] ?? '';\n const v108 = items[108] ?? '';\n const v109 = items[109] ?? '';\n const v110 = items[110] ?? '';\n const v111 = items[111] ?? '';\n const v112 = items[112] ?? '';\n const v113 = items[113] ?? '';\n const v114 = items[114] ?? '';\n const v115 = items[115] ?? '';\n const v116 = items[116] ?? '';\n const v117 = items[117] ?? '';\n const v118 = items[118] ?? '';\n const v119 = items[119] ?? '';\n const v120 = items[120] ?? '';\n const v121 = items[121] ?? '';\n const v122 = items[122] ?? '';\n const v123 = items[123] ?? '';\n const v124 = items[124] ?? '';\n const v125 = items[125] ?? '';\n const v126 = items[126] ?? '';\n const v127 = items[127] ?? '';\n const v128 = items[128] ?? '';\n const v129 = items[129] ?? '';\n const v130 = items[130] ?? '';\n const v131 = items[131] ?? '';\n const v132 = items[132] ?? '';\n const v133 = items[133] ?? '';\n const v134 = items[134] ?? '';\n const v135 = items[135] ?? '';\n const v136 = items[136] ?? '';\n const v137 = items[137] ?? '';\n const v138 = items[138] ?? '';\n const v139 = items[139] ?? '';\n const v140 = items[140] ?? '';\n const v141 = items[141] ?? '';\n const v142 = items[142] ?? '';\n const v143 = items[143] ?? '';\n const v144 = items[144] ?? '';\n const v145 = items[145] ?? '';\n const v146 = items[146] ?? '';\n const v147 = items[147] ?? '';\n const v148 = items[148] ?? '';\n const v149 = items[149] ?? '';\n const v150 = items[150] ?? '';\n const v151 = items[151] ?? '';\n const v152 = items[152] ?? '';\n const v153 = items[153] ?? '';\n const v154 = items[154] ?? '';\n const v155 = items[155] ?? '';\n const v156 = items[156] ?? '';\n const v157 = items[157] ?? '';\n const v158 = items[158] ?? '';\n const v159 = items[159] ?? '';\n const v160 = items[160] ?? '';\n const v161 = items[161] ?? '';\n const v162 = items[162] ?? '';\n const v163 = items[163] ?? '';\n const v164 = items[164] ?? '';\n const v165 = items[165] ?? '';\n const v166 = items[166] ?? '';\n const v167 = items[167] ?? '';\n const v168 = items[168] ?? '';\n const v169 = items[169] ?? '';\n const v170 = items[170] ?? '';\n const v171 = items[171] ?? '';\n const v172 = items[172] ?? '';\n const v173 = items[173] ?? '';\n const v174 = items[174] ?? '';\n const v175 = items[175] ?? '';\n const v176 = items[176] ?? '';\n const v177 = items[177] ?? '';\n const v178 = items[178] ?? '';\n const v179 = items[179] ?? '';\n const v180 = items[180] ?? '';\n const v181 = items[181] ?? '';\n const v182 = items[182] ?? '';\n const v183 = items[183] ?? '';\n const v184 = items[184] ?? '';\n const v185 = items[185] ?? '';\n const v186 = items[186] ?? '';\n const v187 = items[187] ?? '';\n const v188 = items[188] ?? '';\n const v189 = items[189] ?? '';\n const v190 = items[190] ?? '';\n const v191 = items[191] ?? '';\n const v192 = items[192] ?? '';\n const v193 = items[193] ?? '';\n const v194 = items[194] ?? '';\n const v195 = items[195] ?? '';\n const v196 = items[196] ?? '';\n const v197 = items[197] ?? '';", - "token_estimate": 2259 + "token_estimate": 2259, + "tokenized_korean_text": "export class BigProcessor { process ( items : string [ ] ) : string [ ] { const v 0 = items [ 0 ] ?? ''; const v 1 = items [ 1 ] ?? ''; const v 2 = items [ 2 ] ?? ''; const v 3 = items [ 3 ] ?? ''; const v 4 = items [ 4 ] ?? ''; const v 5 = items [ 5 ] ?? ''; const v 6 = items [ 6 ] ?? ''; const v 7 = items [ 7 ] ?? ''; const v 8 = items [ 8 ] ?? ''; const v 9 = items [ 9 ] ?? ''; const v 10 = items [ 10 ] ?? ''; const v 11 = items [ 11 ] ?? ''; const v 12 = items [ 12 ] ?? ''; const v 13 = items [ 13 ] ?? ''; const v 14 = items [ 14 ] ?? ''; const v 15 = items [ 15 ] ?? ''; const v 16 = items [ 16 ] ?? ''; const v 17 = items [ 17 ] ?? ''; const v 18 = items [ 18 ] ?? ''; const v 19 = items [ 19 ] ?? ''; const v 20 = items [ 20 ] ?? ''; const v 21 = items [ 21 ] ?? ''; const v 22 = items [ 22 ] ?? ''; const v 23 = items [ 23 ] ?? ''; const v 24 = items [ 24 ] ?? ''; const v 25 = items [ 25 ] ?? ''; const v 26 = items [ 26 ] ?? ''; const v 27 = items [ 27 ] ?? ''; const v 28 = items [ 28 ] ?? ''; const v 29 = items [ 29 ] ?? ''; const v 30 = items [ 30 ] ?? ''; const v 31 = items [ 31 ] ?? ''; const v 32 = items [ 32 ] ?? ''; const v 33 = items [ 33 ] ?? ''; const v 34 = items [ 34 ] ?? ''; const v 35 = items [ 35 ] ?? ''; const v 36 = items [ 36 ] ?? ''; const v 37 = items [ 37 ] ?? ''; const v 38 = items [ 38 ] ?? ''; const v 39 = items [ 39 ] ?? ''; const v 40 = items [ 40 ] ?? ''; const v 41 = items [ 41 ] ?? ''; const v 42 = items [ 42 ] ?? ''; const v 43 = items [ 43 ] ?? ''; const v 44 = items [ 44 ] ?? ''; const v 45 = items [ 45 ] ?? ''; const v 46 = items [ 46 ] ?? ''; const v 47 = items [ 47 ] ?? ''; const v 48 = items [ 48 ] ?? ''; const v 49 = items [ 49 ] ?? ''; const v 50 = items [ 50 ] ?? ''; const v 51 = items [ 51 ] ?? ''; const v 52 = items [ 52 ] ?? ''; const v 53 = items [ 53 ] ?? ''; const v 54 = items [ 54 ] ?? ''; const v 55 = items [ 55 ] ?? ''; const v 56 = items [ 56 ] ?? ''; const v 57 = items [ 57 ] ?? ''; const v 58 = items [ 58 ] ?? ''; const v 59 = items [ 59 ] ?? ''; const v 60 = items [ 60 ] ?? ''; const v 61 = items [ 61 ] ?? ''; const v 62 = items [ 62 ] ?? ''; const v 63 = items [ 63 ] ?? ''; const v 64 = items [ 64 ] ?? ''; const v 65 = items [ 65 ] ?? ''; const v 66 = items [ 66 ] ?? ''; const v 67 = items [ 67 ] ?? ''; const v 68 = items [ 68 ] ?? ''; const v 69 = items [ 69 ] ?? ''; const v 70 = items [ 70 ] ?? ''; const v 71 = items [ 71 ] ?? ''; const v 72 = items [ 72 ] ?? ''; const v 73 = items [ 73 ] ?? ''; const v 74 = items [ 74 ] ?? ''; const v 75 = items [ 75 ] ?? ''; const v 76 = items [ 76 ] ?? ''; const v 77 = items [ 77 ] ?? ''; const v 78 = items [ 78 ] ?? ''; const v 79 = items [ 79 ] ?? ''; const v 80 = items [ 80 ] ?? ''; const v 81 = items [ 81 ] ?? ''; const v 82 = items [ 82 ] ?? ''; const v 83 = items [ 83 ] ?? ''; const v 84 = items [ 84 ] ?? ''; const v 85 = items [ 85 ] ?? ''; const v 86 = items [ 86 ] ?? ''; const v 87 = items [ 87 ] ?? ''; const v 88 = items [ 88 ] ?? ''; const v 89 = items [ 89 ] ?? ''; const v 90 = items [ 90 ] ?? ''; const v 91 = items [ 91 ] ?? ''; const v 92 = items [ 92 ] ?? ''; const v 93 = items [ 93 ] ?? ''; const v 94 = items [ 94 ] ?? ''; const v 95 = items [ 95 ] ?? ''; const v 96 = items [ 96 ] ?? ''; const v 97 = items [ 97 ] ?? ''; const v 98 = items [ 98 ] ?? ''; const v 99 = items [ 99 ] ?? ''; const v 100 = items [ 100 ] ?? ''; const v 101 = items [ 101 ] ?? ''; const v 102 = items [ 102 ] ?? ''; const v 103 = items [ 103 ] ?? ''; const v 104 = items [ 104 ] ?? ''; const v 105 = items [ 105 ] ?? ''; const v 106 = items [ 106 ] ?? ''; const v 107 = items [ 107 ] ?? ''; const v 108 = items [ 108 ] ?? ''; const v 109 = items [ 109 ] ?? ''; const v 110 = items [ 110 ] ?? ''; const v 111 = items [ 111 ] ?? ''; const v 112 = items [ 112 ] ?? ''; const v 113 = items [ 113 ] ?? ''; const v 114 = items [ 114 ] ?? ''; const v 115 = items [ 115 ] ?? ''; const v 116 = items [ 116 ] ?? ''; const v 117 = items [ 117 ] ?? ''; const v 118 = items [ 118 ] ?? ''; const v 119 = items [ 119 ] ?? ''; const v 120 = items [ 120 ] ?? ''; const v 121 = items [ 121 ] ?? ''; const v 122 = items [ 122 ] ?? ''; const v 123 = items [ 123 ] ?? ''; const v 124 = items [ 124 ] ?? ''; const v 125 = items [ 125 ] ?? ''; const v 126 = items [ 126 ] ?? ''; const v 127 = items [ 127 ] ?? ''; const v 128 = items [ 128 ] ?? ''; const v 129 = items [ 129 ] ?? ''; const v 130 = items [ 130 ] ?? ''; const v 131 = items [ 131 ] ?? ''; const v 132 = items [ 132 ] ?? ''; const v 133 = items [ 133 ] ?? ''; const v 134 = items [ 134 ] ?? ''; const v 135 = items [ 135 ] ?? ''; const v 136 = items [ 136 ] ?? ''; const v 137 = items [ 137 ] ?? ''; const v 138 = items [ 138 ] ?? ''; const v 139 = items [ 139 ] ?? ''; const v 140 = items [ 140 ] ?? ''; const v 141 = items [ 141 ] ?? ''; const v 142 = items [ 142 ] ?? ''; const v 143 = items [ 143 ] ?? ''; const v 144 = items [ 144 ] ?? ''; const v 145 = items [ 145 ] ?? ''; const v 146 = items [ 146 ] ?? ''; const v 147 = items [ 147 ] ?? ''; const v 148 = items [ 148 ] ?? ''; const v 149 = items [ 149 ] ?? ''; const v 150 = items [ 150 ] ?? ''; const v 151 = items [ 151 ] ?? ''; const v 152 = items [ 152 ] ?? ''; const v 153 = items [ 153 ] ?? ''; const v 154 = items [ 154 ] ?? ''; const v 155 = items [ 155 ] ?? ''; const v 156 = items [ 156 ] ?? ''; const v 157 = items [ 157 ] ?? ''; const v 158 = items [ 158 ] ?? ''; const v 159 = items [ 159 ] ?? ''; const v 160 = items [ 160 ] ?? ''; const v 161 = items [ 161 ] ?? ''; const v 162 = items [ 162 ] ?? ''; const v 163 = items [ 163 ] ?? ''; const v 164 = items [ 164 ] ?? ''; const v 165 = items [ 165 ] ?? ''; const v 166 = items [ 166 ] ?? ''; const v 167 = items [ 167 ] ?? ''; const v 168 = items [ 168 ] ?? ''; const v 169 = items [ 169 ] ?? ''; const v 170 = items [ 170 ] ?? ''; const v 171 = items [ 171 ] ?? ''; const v 172 = items [ 172 ] ?? ''; const v 173 = items [ 173 ] ?? ''; const v 174 = items [ 174 ] ?? ''; const v 175 = items [ 175 ] ?? ''; const v 176 = items [ 176 ] ?? ''; const v 177 = items [ 177 ] ?? ''; const v 178 = items [ 178 ] ?? ''; const v 179 = items [ 179 ] ?? ''; const v 180 = items [ 180 ] ?? ''; const v 181 = items [ 181 ] ?? ''; const v 182 = items [ 182 ] ?? ''; const v 183 = items [ 183 ] ?? ''; const v 184 = items [ 184 ] ?? ''; const v 185 = items [ 185 ] ?? ''; const v 186 = items [ 186 ] ?? ''; const v 187 = items [ 187 ] ?? ''; const v 188 = items [ 188 ] ?? ''; const v 189 = items [ 189 ] ?? ''; const v 190 = items [ 190 ] ?? ''; const v 191 = items [ 191 ] ?? ''; const v 192 = items [ 192 ] ?? ''; const v 193 = items [ 193 ] ?? ''; const v 194 = items [ 194 ] ?? ''; const v 195 = items [ 195 ] ?? ''; const v 196 = items [ 196 ] ?? ''; const v 197 = items [ 197 ] ?? '';" }, { "block_ids": [ @@ -165,6 +172,7 @@ } ], "text": " const v198 = items[198] ?? '';\n const v199 = items[199] ?? '';\n const v200 = items[200] ?? '';\n const v201 = items[201] ?? '';\n const v202 = items[202] ?? '';\n const v203 = items[203] ?? '';\n const v204 = items[204] ?? '';\n const v205 = items[205] ?? '';\n const v206 = items[206] ?? '';\n const v207 = items[207] ?? '';\n const v208 = items[208] ?? '';\n const v209 = items[209] ?? '';\n return items;\n }\n}", - "token_estimate": 148 + "token_estimate": 148, + "tokenized_korean_text": "const v 198 = items [ 198 ] ?? ''; const v 199 = items [ 199 ] ?? ''; const v 200 = items [ 200 ] ?? ''; const v 201 = items [ 201 ] ?? ''; const v 202 = items [ 202 ] ?? ''; const v 203 = items [ 203 ] ?? ''; const v 204 = items [ 204 ] ?? ''; const v 205 = items [ 205 ] ?? ''; const v 206 = items [ 206 ] ?? ''; const v 207 = items [ 207 ] ?? ''; const v 208 = items [ 208 ] ?? ''; const v 209 = items [ 209 ] ?? ''; return items ; } }" } ] diff --git a/fixtures/markdown/long-section.chunks.snapshot.json b/fixtures/markdown/long-section.chunks.snapshot.json index 1ea045b..2340927 100644 --- a/fixtures/markdown/long-section.chunks.snapshot.json +++ b/fixtures/markdown/long-section.chunks.snapshot.json @@ -30,7 +30,8 @@ } ], "text": "Alpha\n\nAlpha intro paragraph one. This first paragraph in the alpha section gives a brief overview of what is to follow and serves as the lead-in for the subsequent material covered under the alpha heading.\n\nAlpha intro paragraph two. The second paragraph extends the discussion with additional sentences, padding out the paragraph so that paragraph-level chunk splitting actually has multiple candidates to consider when deciding where to slice the content stream.", - "token_estimate": 155 + "token_estimate": 155, + "tokenized_korean_text": "Alpha Alpha intro paragraph one . This first paragraph in the alpha section gives a brief overview of what is to follow and serves as the lead - in for the subsequent material covered under the alpha heading . Alpha intro paragraph two . The second paragraph extends the discussion with additional sentences , padding out the paragraph so that paragraph - level chunk splitting actually has multiple candidates to consider when deciding where to slice the content stream ." }, { "block_ids": [ @@ -58,7 +59,8 @@ } ], "text": "Alpha Sub\n\nSome prose under the alpha sub-heading. The nested heading should still be respected as a chunk boundary distinct from the parent alpha heading.", - "token_estimate": 52 + "token_estimate": 52, + "tokenized_korean_text": "Alpha Sub Some prose under the alpha sub - heading . The nested heading should still be respected as a chunk boundary distinct from the parent alpha heading ." }, { "block_ids": [ @@ -80,7 +82,8 @@ } ], "text": "// A code block long enough to easily clear any reasonable target_tokens\n// so the never-split-code-block rule is exercised by this fixture. The\n// rest of the function body is intentional filler: line after line of\n// content that, were the chunker permitted to split it, would exceed\n// the target threshold and force a break in the middle of the snippet.\nfn long_code_example_one() {\n let mut numbers = Vec::new();\n for i in 0..10 {\n numbers.push(i * 2);\n }\n let mut total = 0_i64;\n for n in &numbers {\n total += *n as i64;\n }\n println!(\"total = {total}\");\n}\n\nfn long_code_example_two() {\n let words = [\"alpha\", \"beta\", \"gamma\", \"delta\", \"epsilon\"];\n for w in words.iter() {\n if w.starts_with('a') {\n println!(\"starts with a: {w}\");\n } else if w.starts_with('b') {\n println!(\"starts with b: {w}\");\n } else if w.starts_with('g') {\n println!(\"starts with g: {w}\");\n } else {\n println!(\"other: {w}\");\n }\n }\n}\n\nfn long_code_example_three() {\n let mut buf = String::new();\n for ch in \"lorem ipsum dolor sit amet\".chars() {\n if ch.is_ascii_alphabetic() {\n buf.push(ch.to_ascii_uppercase());\n }\n }\n println!(\"buf = {buf}\");\n}", - "token_estimate": 427 + "token_estimate": 427, + "tokenized_korean_text": "/ / A code block long enough to easily clear any reasonable target _ tokens / / so the never - split - code - block rule is exercised by this fixture . The / / rest of the function body is intentional filler : line after line of / / content that , were the chunker permitted to split it , would exceed / / the target threshold and force a break in the middle of the snippet . fn long _ code _ example _ one ( ) { let mut numbers = Vec : : new (); for i in 0 .. 10 { numbers . push ( i * 2 ); } let mut total = 0 _ i 64 ; for n in & numbers { total += * n as i 64 ; } println !(\" total = { total }\"); } fn long _ code _ example _ two ( ) { let words = [\" alpha \", \" beta \", \" gamma \", \" delta \", \" epsilon \"]; for w in words . iter ( ) { if w . starts _ with (' a ') { println !(\" starts with a : { w }\"); } else if w . starts _ with (' b ') { println !(\" starts with b : { w }\"); } else if w . starts _ with (' g ') { println !(\" starts with g : { w }\"); } else { println !(\" other : { w }\"); } } } fn long _ code _ example _ three ( ) { let mut buf = String : : new (); for ch in \" lorem ipsum dolor sit amet \". chars ( ) { if ch . is _ ascii _ alphabetic ( ) { buf . push ( ch . to _ ascii _ uppercase ()); } } println !(\" buf = { buf }\"); }" }, { "block_ids": [ @@ -107,7 +110,8 @@ } ], "text": "Beta\n\nBeta paragraph one. The beta section opens with an introductory paragraph that sets up the table appearing further down.", - "token_estimate": 42 + "token_estimate": 42, + "tokenized_korean_text": "Beta Beta paragraph one . The beta section opens with an introductory paragraph that sets up the table appearing further down ." }, { "block_ids": [ @@ -128,7 +132,8 @@ } ], "text": "name | kind | note\none | small | first row\ntwo | medium | second row\nthree | large | third row\nfour | huge | fourth row", - "token_estimate": 40 + "token_estimate": 40, + "tokenized_korean_text": "name | kind | note one | small | first row two | medium | second row three | large | third row four | huge | fourth row" }, { "block_ids": [ @@ -149,7 +154,8 @@ } ], "text": "Beta closing paragraph. After the table we have one more paragraph of prose that anchors the end of the beta section before we move on to gamma.", - "token_estimate": 48 + "token_estimate": 48, + "tokenized_korean_text": "Beta closing paragraph . After the table we have one more paragraph of prose that anchors the end of the beta section before we move on to gamma ." }, { "block_ids": [ @@ -182,7 +188,8 @@ } ], "text": "Gamma\n\nGamma paragraph one. The gamma section is intentionally long to exercise the paragraph-level split with overlap rule when chunking under a single heading without any nested sub-headings to break things up further.\n\nGamma paragraph two. We continue accumulating prose so that the running token estimator climbs steadily and eventually trips the target_tokens threshold, forcing the chunker to emit a chunk and seed the next chunk with overlap from the prior tail.", - "token_estimate": 157 + "token_estimate": 157, + "tokenized_korean_text": "Gamma Gamma paragraph one . The gamma section is intentionally long to exercise the paragraph - level split with overlap rule when chunking under a single heading without any nested sub - headings to break things up further . Gamma paragraph two . We continue accumulating prose so that the running token estimator climbs steadily and eventually trips the target _ tokens threshold , forcing the chunker to emit a chunk and seed the next chunk with overlap from the prior tail ." }, { "block_ids": [ @@ -209,6 +216,7 @@ } ], "text": "Gamma paragraph two. We continue accumulating prose so that the running token estimator climbs steadily and eventually trips the target_tokens threshold, forcing the chunker to emit a chunk and seed the next chunk with overlap from the prior tail.\n\nGamma paragraph three. Yet another paragraph under the gamma heading, padded with words to ensure the byte count clears the threshold and the splitting behaviour shows up unambiguously in the snapshot output.", - "token_estimate": 153 + "token_estimate": 153, + "tokenized_korean_text": "Gamma paragraph two . We continue accumulating prose so that the running token estimator climbs steadily and eventually trips the target _ tokens threshold , forcing the chunker to emit a chunk and seed the next chunk with overlap from the prior tail . Gamma paragraph three . Yet another paragraph under the gamma heading , padded with words to ensure the byte count clears the threshold and the splitting behaviour shows up unambiguously in the snapshot output ." } ]