test(chunk): regenerate AST + long-section snapshots for V009 chunk field
S3 의 Chunk struct 갱신 (kebab-core 의 tokenized_korean_text: Option<String> field 추가) 가 모든 chunk snapshot JSON 의 serde serialize 결과를 변경시킴. 10 snapshot fixture (9 AST chunker + markdown long-section) 의 baseline 을 V009 형태로 regenerate. 각 snapshot 의 변경 = chunk JSON 마다 `"tokenized_korean_text": null` field 추가 (대부분의 fixture 가 영어 코드라 lindera 의 None fallback). 동작 변경 없음 — serde representation 의 cascade만. Spec: docs/superpowers/specs/2026-05-28-v0.20.x-korean-morphological-tokenizer-spec.md §6.2 Plan: docs/superpowers/plans/2026-05-28-v0.20.x-korean-morphological-tokenizer-plan.md (S3 follow-up via S11 sanity)
This commit is contained in:
@@ -18,7 +18,8 @@
|
||||
}
|
||||
],
|
||||
"text": "#include <string>\n#include <vector>\n\nnamespace kebab {",
|
||||
"token_estimate": 18
|
||||
"token_estimate": 18,
|
||||
"tokenized_korean_text": "# include < string > # include < vector > namespace kebab {"
|
||||
},
|
||||
{
|
||||
"block_ids": [
|
||||
@@ -39,7 +40,8 @@
|
||||
}
|
||||
],
|
||||
"text": "class MdHeadingV1Chunker {\npublic:\n MdHeadingV1Chunker() = default;\n ~MdHeadingV1Chunker() = default;\n\n std::string chunk_doc(const std::string& doc) {\n return doc;\n }\n\n int operator()(int x) const {\n return x * 2;\n }\n\nprivate:\n int counter_ = 0;\n};",
|
||||
"token_estimate": 95
|
||||
"token_estimate": 95,
|
||||
"tokenized_korean_text": "class MdHeadingV 1 Chunker { public : MdHeadingV 1 Chunker ( ) = default ; ~ MdHeadingV 1 Chunker ( ) = default ; std : : string chunk _ doc ( const std : : string & doc ) { return doc ; } int operator ( ) ( int x ) const { return x * 2 ; } private : int counter _ = 0 ; };"
|
||||
},
|
||||
{
|
||||
"block_ids": [
|
||||
@@ -60,7 +62,8 @@
|
||||
}
|
||||
],
|
||||
"text": "template <typename T>\nT identity(T value) {\n return value;\n}",
|
||||
"token_estimate": 21
|
||||
"token_estimate": 21,
|
||||
"tokenized_korean_text": "template < typename T > T identity ( T value ) { return value ; }"
|
||||
},
|
||||
{
|
||||
"block_ids": [
|
||||
@@ -81,7 +84,8 @@
|
||||
}
|
||||
],
|
||||
"text": "void global_helper() {\n // free function in kebab namespace\n}",
|
||||
"token_estimate": 22
|
||||
"token_estimate": 22,
|
||||
"tokenized_korean_text": "void global _ helper ( ) { / / free function in kebab namespace }"
|
||||
},
|
||||
{
|
||||
"block_ids": [
|
||||
@@ -102,6 +106,7 @@
|
||||
}
|
||||
],
|
||||
"text": "int main() {\n kebab::chunk::MdHeadingV1Chunker c;\n return 0;\n}",
|
||||
"token_estimate": 23
|
||||
"token_estimate": 23,
|
||||
"tokenized_korean_text": "int main ( ) { kebab : : chunk : : MdHeadingV 1 Chunker c ; return 0 ; }"
|
||||
}
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user