From d6bb6cfd3b11c4a2c3ca32c09b1d7859b0a3ba6c Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 20 May 2026 01:39:17 +0000 Subject: [PATCH] test(p10-1b): per-language chunker snapshots (python/ts/js) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors code_rust_ast_snapshot pattern. In-memory CanonicalDocument build so no kebab-parse-code dep (boundary §6.3 respected). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../kebab-chunk/tests/code_js_ast_snapshot.rs | 221 ++++++++++++++++++ .../tests/code_python_ast_snapshot.rs | 221 ++++++++++++++++++ .../kebab-chunk/tests/code_ts_ast_snapshot.rs | 221 ++++++++++++++++++ .../code-sample.js.chunks.snapshot.json | 170 ++++++++++++++ .../code-sample.py.chunks.snapshot.json | 170 ++++++++++++++ .../code-sample.ts.chunks.snapshot.json | 170 ++++++++++++++ 6 files changed, 1173 insertions(+) create mode 100644 crates/kebab-chunk/tests/code_js_ast_snapshot.rs create mode 100644 crates/kebab-chunk/tests/code_python_ast_snapshot.rs create mode 100644 crates/kebab-chunk/tests/code_ts_ast_snapshot.rs create mode 100644 crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json create mode 100644 crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json create mode 100644 crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json diff --git a/crates/kebab-chunk/tests/code_js_ast_snapshot.rs b/crates/kebab-chunk/tests/code_js_ast_snapshot.rs new file mode 100644 index 0000000..9cb818d --- /dev/null +++ b/crates/kebab-chunk/tests/code_js_ast_snapshot.rs @@ -0,0 +1,221 @@ +//! Snapshot test pinning the `Vec` JSON for a +//! representative JavaScript code `CanonicalDocument`. +//! +//! This is an integration test. `kebab-parse-code` is intentionally NOT +//! a dev-dep (design §6.3 / §8 boundary: AST extraction is parser-side). +//! The `CanonicalDocument` is built inline from hand-crafted `Block::Code` +//! units, which is the same pattern used in `code_rust_ast_v1.rs`'s +//! internal `code_doc` test helper. +//! +//! Set `UPDATE_SNAPSHOTS=1` to re-bake the baseline. + +use std::path::PathBuf; + +use kebab_chunk::CodeJsAstV1Chunker; +use kebab_core::{ + AssetId, Block, CanonicalDocument, ChunkPolicy, Chunker, ChunkerVersion, CodeBlock, CommonBlock, + Lang, Metadata, ParserVersion, Provenance, SourceSpan, SourceType, TrustLevel, WorkspacePath, + id_for_block, id_for_doc, +}; +use serde_json::Value; +use time::OffsetDateTime; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") +} + +fn fixed_doc() -> CanonicalDocument { + let wp = WorkspacePath("src/bar.js".into()); + let aid = AssetId("b".repeat(64)); + // Pin parser_version so doc_id / block_ids are reproducible. + let pv = ParserVersion("code-js-v1".into()); + let doc_id = id_for_doc(&wp, &aid, &pv); + + // Build a >200-line function body to force split_oversize. + let big_body: String = { + let header = "function bigTransform(items) {\n"; + let body: String = (0..210u32) + .map(|i| format!(" const v{i} = items[{i}] !== undefined ? items[{i}] : null;\n")) + .collect(); + let footer = " return items;\n}"; + format!("{header}{body}{footer}") + }; + let big_line_count = big_body.lines().count() as u32; + let big_line_end = 48 + big_line_count - 1; + + // Representative units: + // 0. require/import block (lines 1–5, ≤200) + // 1. free fn `add` (lines 7–12, ≤200) + // 2. class `EventBus` (lines 14–20, ≤200) + // 3. class `BaseHandler` (lines 22–30, ≤200) + // 4. method `EventBus.emit` (lines 32–38, ≤200) + // 5. method `EventBus.on` (lines 40–46, ≤200) + // 6. bigTransform (>200 lines) to force split_oversize + let raw_units: Vec<(&str, u32, u32, String)> = vec![ + ( + "requires", + 1, + 5, + "const fs = require('fs');\nconst path = require('path');\nconst { EventEmitter } = require('events');\nconst assert = require('assert');\nconst crypto = require('crypto');".to_string(), + ), + ( + "add", + 7, + 12, + "export function add(a, b) {\n if (typeof a !== 'number') throw new TypeError('a');\n if (typeof b !== 'number') throw new TypeError('b');\n const result = a + b;\n assert(isFinite(result));\n return result;\n}".to_string(), + ), + ( + "EventBus", + 14, + 20, + "class EventBus {\n constructor() {\n this._handlers = new Map();\n this._history = [];\n this._maxHistory = 100;\n this._seq = 0;\n }\n}".to_string(), + ), + ( + "BaseHandler", + 22, + 30, + "class BaseHandler {\n handle(event) {\n throw new Error('not implemented');\n }\n batchHandle(events) {\n const results = [];\n for (const ev of events) {\n results.push(this.handle(ev));\n }\n return results;\n }\n}".to_string(), + ), + ( + "EventBus.emit", + 32, + 38, + "class EventBus {\n emit(name, payload) {\n const handlers = this._handlers.get(name) ?? [];\n for (const h of handlers) {\n h(payload);\n }\n return this;\n }\n}".to_string(), + ), + ( + "EventBus.on", + 40, + 46, + "class EventBus {\n on(name, handler) {\n if (!this._handlers.has(name)) {\n this._handlers.set(name, []);\n }\n this._handlers.get(name).push(handler);\n return this;\n }\n}".to_string(), + ), + ("bigTransform", 48, big_line_end, big_body), + ]; + + let blocks: Vec = raw_units + .iter() + .enumerate() + .map(|(i, (sym, ls, le, code))| { + let span = SourceSpan::Code { + line_start: *ls, + line_end: *le, + symbol: Some((*sym).to_string()), + lang: Some("javascript".into()), + }; + let bid = id_for_block(&doc_id, "code", &[], i as u32, &span); + Block::Code(CodeBlock { + common: CommonBlock { + block_id: bid, + heading_path: vec![], + source_span: span, + }, + lang: Some("javascript".into()), + code: code.clone(), + }) + }) + .collect(); + + CanonicalDocument { + doc_id, + source_asset_id: aid, + workspace_path: wp, + title: "bar.js".into(), + lang: Lang("und".into()), + blocks, + metadata: Metadata { + aliases: vec![], + tags: vec![], + created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + updated_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + source_type: SourceType::Note, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: Some("kebab".into()), + git_branch: Some("main".into()), + git_commit: Some("0".repeat(40)), + code_lang: Some("javascript".into()), + }, + provenance: Provenance { events: vec![] }, + parser_version: pv, + schema_version: 1, + doc_version: 1, + last_chunker_version: None, + last_embedding_version: None, + } +} + +fn fixed_policy() -> ChunkPolicy { + ChunkPolicy { + target_tokens: 500, + overlap_tokens: 80, + respect_markdown_headings: false, + chunker_version: ChunkerVersion("code-js-ast-v1".into()), + } +} + +#[test] +fn code_js_ast_chunks_snapshot() { + let doc = fixed_doc(); + let policy = fixed_policy(); + + let chunks = CodeJsAstV1Chunker.chunk(&doc, &policy).expect("chunk"); + let actual = serde_json::to_value(&chunks).unwrap(); + + let dir = fixtures_dir(); + let baseline_path = dir.join("code-sample.js.chunks.snapshot.json"); + let baseline_text = match std::fs::read_to_string(&baseline_path) { + Ok(s) => s, + Err(_) if std::env::var("UPDATE_SNAPSHOTS").is_ok() => { + std::fs::create_dir_all(&dir).unwrap(); + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + return; + } + Err(e) => panic!( + "missing baseline {}; run with UPDATE_SNAPSHOTS=1 to create: {e}", + baseline_path.display() + ), + }; + let expected: Value = serde_json::from_str(&baseline_text).expect("baseline parses as json"); + + if actual != expected { + if std::env::var("UPDATE_SNAPSHOTS").is_ok() { + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + eprintln!("updated baseline {}", baseline_path.display()); + return; + } + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + panic!( + "code-js-ast-v1 chunks snapshot drift\n\ + --- expected ({}) ---\n{baseline_text}\n\ + --- actual ---\n{pretty}\n\ + If intentional, re-run with UPDATE_SNAPSHOTS=1.", + baseline_path.display() + ); + } +} + +/// Determinism cross-check: re-running the same pipeline yields the same +/// chunk_ids byte-for-byte. +#[test] +fn code_js_ast_chunks_are_deterministic() { + let policy = fixed_policy(); + let baseline: Vec = CodeJsAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + for _ in 0..5 { + let again: Vec = CodeJsAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + assert_eq!(again, baseline); + } +} diff --git a/crates/kebab-chunk/tests/code_python_ast_snapshot.rs b/crates/kebab-chunk/tests/code_python_ast_snapshot.rs new file mode 100644 index 0000000..2a164b1 --- /dev/null +++ b/crates/kebab-chunk/tests/code_python_ast_snapshot.rs @@ -0,0 +1,221 @@ +//! Snapshot test pinning the `Vec` JSON for a +//! representative Python code `CanonicalDocument`. +//! +//! This is an integration test. `kebab-parse-code` is intentionally NOT +//! a dev-dep (design §6.3 / §8 boundary: AST extraction is parser-side). +//! The `CanonicalDocument` is built inline from hand-crafted `Block::Code` +//! units, which is the same pattern used in `code_rust_ast_v1.rs`'s +//! internal `code_doc` test helper. +//! +//! Set `UPDATE_SNAPSHOTS=1` to re-bake the baseline. + +use std::path::PathBuf; + +use kebab_chunk::CodePythonAstV1Chunker; +use kebab_core::{ + AssetId, Block, CanonicalDocument, ChunkPolicy, Chunker, ChunkerVersion, CodeBlock, CommonBlock, + Lang, Metadata, ParserVersion, Provenance, SourceSpan, SourceType, TrustLevel, WorkspacePath, + id_for_block, id_for_doc, +}; +use serde_json::Value; +use time::OffsetDateTime; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") +} + +fn fixed_doc() -> CanonicalDocument { + let wp = WorkspacePath("kebab_eval/metrics.py".into()); + let aid = AssetId("b".repeat(64)); + // Pin parser_version so doc_id / block_ids are reproducible. + let pv = ParserVersion("code-python-v1".into()); + let doc_id = id_for_doc(&wp, &aid, &pv); + + // Build a >200-line function body to force split_oversize. + let big_body: String = { + let header = "def big_compute(data):\n"; + let body: String = (0..210u32) + .map(|i| format!(" v{i} = data[{i}] if {i} < len(data) else 0\n")) + .collect(); + let footer = " return sum(data)"; + format!("{header}{body}{footer}") + }; + let big_line_count = big_body.lines().count() as u32; + let big_line_end = 48 + big_line_count - 1; + + // Representative units: + // 0. import block (lines 1–5, ≤200) + // 1. free fn `compute_mrr` (lines 7–12, ≤200) + // 2. class `MetricsCollector` (lines 14–20, ≤200) + // 3. class `BaseEvaluator` (lines 22–30, ≤200) + // 4. method `run` (lines 32–38, ≤200) + // 5. method `report` (lines 40–46, ≤200) + // 6. big_compute (>200 lines) to force split_oversize + let raw_units: Vec<(&str, u32, u32, String)> = vec![ + ( + "imports", + 1, + 5, + "import os\nimport sys\nfrom typing import List\nfrom pathlib import Path\nfrom collections import defaultdict".to_string(), + ), + ( + "compute_mrr", + 7, + 12, + "def compute_mrr(scores):\n if not scores:\n return 0.0\n return sum(\n 1.0 / r for r in scores\n ) / len(scores)".to_string(), + ), + ( + "MetricsCollector", + 14, + 20, + "class MetricsCollector:\n def __init__(self):\n self.scores = []\n self.labels = []\n self.counts = defaultdict(int)\n self.totals = defaultdict(float)\n self.tags = []".to_string(), + ), + ( + "BaseEvaluator", + 22, + 30, + "class BaseEvaluator:\n def evaluate(self, data):\n raise NotImplementedError\n def batch_evaluate(self, items):\n results = []\n for item in items:\n results.append(self.evaluate(item))\n return results\n def name(self):\n return type(self).__name__".to_string(), + ), + ( + "MetricsCollector.run", + 32, + 38, + "class MetricsCollector:\n def run(self, inputs):\n for inp in inputs:\n score = self._score(inp)\n self.scores.append(\n score\n )".to_string(), + ), + ( + "MetricsCollector.report", + 40, + 46, + "class MetricsCollector:\n def report(self):\n return {\n 'mean': sum(self.scores) / max(len(self.scores), 1),\n 'count': len(self.scores),\n 'tags': self.tags,\n }".to_string(), + ), + ("big_compute", 48, big_line_end, big_body), + ]; + + let blocks: Vec = raw_units + .iter() + .enumerate() + .map(|(i, (sym, ls, le, code))| { + let span = SourceSpan::Code { + line_start: *ls, + line_end: *le, + symbol: Some((*sym).to_string()), + lang: Some("python".into()), + }; + let bid = id_for_block(&doc_id, "code", &[], i as u32, &span); + Block::Code(CodeBlock { + common: CommonBlock { + block_id: bid, + heading_path: vec![], + source_span: span, + }, + lang: Some("python".into()), + code: code.clone(), + }) + }) + .collect(); + + CanonicalDocument { + doc_id, + source_asset_id: aid, + workspace_path: wp, + title: "metrics.py".into(), + lang: Lang("und".into()), + blocks, + metadata: Metadata { + aliases: vec![], + tags: vec![], + created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + updated_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + source_type: SourceType::Note, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: Some("kebab".into()), + git_branch: Some("main".into()), + git_commit: Some("0".repeat(40)), + code_lang: Some("python".into()), + }, + provenance: Provenance { events: vec![] }, + parser_version: pv, + schema_version: 1, + doc_version: 1, + last_chunker_version: None, + last_embedding_version: None, + } +} + +fn fixed_policy() -> ChunkPolicy { + ChunkPolicy { + target_tokens: 500, + overlap_tokens: 80, + respect_markdown_headings: false, + chunker_version: ChunkerVersion("code-python-ast-v1".into()), + } +} + +#[test] +fn code_python_ast_chunks_snapshot() { + let doc = fixed_doc(); + let policy = fixed_policy(); + + let chunks = CodePythonAstV1Chunker.chunk(&doc, &policy).expect("chunk"); + let actual = serde_json::to_value(&chunks).unwrap(); + + let dir = fixtures_dir(); + let baseline_path = dir.join("code-sample.py.chunks.snapshot.json"); + let baseline_text = match std::fs::read_to_string(&baseline_path) { + Ok(s) => s, + Err(_) if std::env::var("UPDATE_SNAPSHOTS").is_ok() => { + std::fs::create_dir_all(&dir).unwrap(); + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + return; + } + Err(e) => panic!( + "missing baseline {}; run with UPDATE_SNAPSHOTS=1 to create: {e}", + baseline_path.display() + ), + }; + let expected: Value = serde_json::from_str(&baseline_text).expect("baseline parses as json"); + + if actual != expected { + if std::env::var("UPDATE_SNAPSHOTS").is_ok() { + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + eprintln!("updated baseline {}", baseline_path.display()); + return; + } + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + panic!( + "code-python-ast-v1 chunks snapshot drift\n\ + --- expected ({}) ---\n{baseline_text}\n\ + --- actual ---\n{pretty}\n\ + If intentional, re-run with UPDATE_SNAPSHOTS=1.", + baseline_path.display() + ); + } +} + +/// Determinism cross-check: re-running the same pipeline yields the same +/// chunk_ids byte-for-byte. +#[test] +fn code_python_ast_chunks_are_deterministic() { + let policy = fixed_policy(); + let baseline: Vec = CodePythonAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + for _ in 0..5 { + let again: Vec = CodePythonAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + assert_eq!(again, baseline); + } +} diff --git a/crates/kebab-chunk/tests/code_ts_ast_snapshot.rs b/crates/kebab-chunk/tests/code_ts_ast_snapshot.rs new file mode 100644 index 0000000..bca0301 --- /dev/null +++ b/crates/kebab-chunk/tests/code_ts_ast_snapshot.rs @@ -0,0 +1,221 @@ +//! Snapshot test pinning the `Vec` JSON for a +//! representative TypeScript code `CanonicalDocument`. +//! +//! This is an integration test. `kebab-parse-code` is intentionally NOT +//! a dev-dep (design §6.3 / §8 boundary: AST extraction is parser-side). +//! The `CanonicalDocument` is built inline from hand-crafted `Block::Code` +//! units, which is the same pattern used in `code_rust_ast_v1.rs`'s +//! internal `code_doc` test helper. +//! +//! Set `UPDATE_SNAPSHOTS=1` to re-bake the baseline. + +use std::path::PathBuf; + +use kebab_chunk::CodeTsAstV1Chunker; +use kebab_core::{ + AssetId, Block, CanonicalDocument, ChunkPolicy, Chunker, ChunkerVersion, CodeBlock, CommonBlock, + Lang, Metadata, ParserVersion, Provenance, SourceSpan, SourceType, TrustLevel, WorkspacePath, + id_for_block, id_for_doc, +}; +use serde_json::Value; +use time::OffsetDateTime; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") +} + +fn fixed_doc() -> CanonicalDocument { + let wp = WorkspacePath("src/Foo.ts".into()); + let aid = AssetId("b".repeat(64)); + // Pin parser_version so doc_id / block_ids are reproducible. + let pv = ParserVersion("code-ts-v1".into()); + let doc_id = id_for_doc(&wp, &aid, &pv); + + // Build a >200-line method body to force split_oversize. + let big_body: String = { + let header = "export class BigProcessor {\n process(items: string[]): string[] {\n"; + let body: String = (0..210u32) + .map(|i| format!(" const v{i} = items[{i}] ?? '';\n")) + .collect(); + let footer = " return items;\n }\n}"; + format!("{header}{body}{footer}") + }; + let big_line_count = big_body.lines().count() as u32; + let big_line_end = 48 + big_line_count - 1; + + // Representative units: + // 0. import block (lines 1–5, ≤200) + // 1. free fn `parseInput` (lines 7–12, ≤200) + // 2. interface `Frobable` (lines 14–20, ≤200) + // 3. class `Foo` (lines 22–30, ≤200) + // 4. method `Foo.double` (lines 32–38, ≤200) + // 5. method `Foo.triple` (lines 40–46, ≤200) + // 6. BigProcessor (>200 lines) to force split_oversize + let raw_units: Vec<(&str, u32, u32, String)> = vec![ + ( + "imports", + 1, + 5, + "import { readFileSync } from 'fs';\nimport { join } from 'path';\nimport type { Config } from './config';\nimport { Logger } from './logger';\nimport { EventEmitter } from 'events';".to_string(), + ), + ( + "parseInput", + 7, + 12, + "export function parseInput(raw: string): number | null {\n const trimmed = raw.trim();\n const n = Number(trimmed);\n if (isNaN(n)) return null;\n return n;\n}".to_string(), + ), + ( + "Frobable", + 14, + 20, + "export interface Frobable {\n frob(): string;\n frobTwice(): string;\n readonly name: string;\n readonly tags: string[];\n count: number;\n reset(): void;\n}".to_string(), + ), + ( + "Foo", + 22, + 30, + "export class Foo implements Frobable {\n constructor(\n public readonly name: string,\n public value: number,\n public tags: string[] = [],\n ) {}\n frob(): string { return this.name; }\n frobTwice(): string { return this.name.repeat(2); }\n reset(): void { this.value = 0; }\n}".to_string(), + ), + ( + "Foo.double", + 32, + 38, + "export class Foo {\n double(): number {\n const result = this.value * 2;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}".to_string(), + ), + ( + "Foo.triple", + 40, + 46, + "export class Foo {\n triple(): number {\n const result = this.value * 3;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}".to_string(), + ), + ("BigProcessor", 48, big_line_end, big_body), + ]; + + let blocks: Vec = raw_units + .iter() + .enumerate() + .map(|(i, (sym, ls, le, code))| { + let span = SourceSpan::Code { + line_start: *ls, + line_end: *le, + symbol: Some((*sym).to_string()), + lang: Some("typescript".into()), + }; + let bid = id_for_block(&doc_id, "code", &[], i as u32, &span); + Block::Code(CodeBlock { + common: CommonBlock { + block_id: bid, + heading_path: vec![], + source_span: span, + }, + lang: Some("typescript".into()), + code: code.clone(), + }) + }) + .collect(); + + CanonicalDocument { + doc_id, + source_asset_id: aid, + workspace_path: wp, + title: "Foo.ts".into(), + lang: Lang("und".into()), + blocks, + metadata: Metadata { + aliases: vec![], + tags: vec![], + created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + updated_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + source_type: SourceType::Note, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: Some("kebab".into()), + git_branch: Some("main".into()), + git_commit: Some("0".repeat(40)), + code_lang: Some("typescript".into()), + }, + provenance: Provenance { events: vec![] }, + parser_version: pv, + schema_version: 1, + doc_version: 1, + last_chunker_version: None, + last_embedding_version: None, + } +} + +fn fixed_policy() -> ChunkPolicy { + ChunkPolicy { + target_tokens: 500, + overlap_tokens: 80, + respect_markdown_headings: false, + chunker_version: ChunkerVersion("code-ts-ast-v1".into()), + } +} + +#[test] +fn code_ts_ast_chunks_snapshot() { + let doc = fixed_doc(); + let policy = fixed_policy(); + + let chunks = CodeTsAstV1Chunker.chunk(&doc, &policy).expect("chunk"); + let actual = serde_json::to_value(&chunks).unwrap(); + + let dir = fixtures_dir(); + let baseline_path = dir.join("code-sample.ts.chunks.snapshot.json"); + let baseline_text = match std::fs::read_to_string(&baseline_path) { + Ok(s) => s, + Err(_) if std::env::var("UPDATE_SNAPSHOTS").is_ok() => { + std::fs::create_dir_all(&dir).unwrap(); + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + return; + } + Err(e) => panic!( + "missing baseline {}; run with UPDATE_SNAPSHOTS=1 to create: {e}", + baseline_path.display() + ), + }; + let expected: Value = serde_json::from_str(&baseline_text).expect("baseline parses as json"); + + if actual != expected { + if std::env::var("UPDATE_SNAPSHOTS").is_ok() { + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + eprintln!("updated baseline {}", baseline_path.display()); + return; + } + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + panic!( + "code-ts-ast-v1 chunks snapshot drift\n\ + --- expected ({}) ---\n{baseline_text}\n\ + --- actual ---\n{pretty}\n\ + If intentional, re-run with UPDATE_SNAPSHOTS=1.", + baseline_path.display() + ); + } +} + +/// Determinism cross-check: re-running the same pipeline yields the same +/// chunk_ids byte-for-byte. +#[test] +fn code_ts_ast_chunks_are_deterministic() { + let policy = fixed_policy(); + let baseline: Vec = CodeTsAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + for _ in 0..5 { + let again: Vec = CodeTsAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + assert_eq!(again, baseline); + } +} diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json new file mode 100644 index 0000000..fb33e50 --- /dev/null +++ b/crates/kebab-chunk/tests/fixtures/code-sample.js.chunks.snapshot.json @@ -0,0 +1,170 @@ +[ + { + "block_ids": [ + "cc724d960aebe9fb36062d24f4626c66" + ], + "chunk_id": "c9cf256456a47671fd6ceda800db8c05", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 5, + "line_start": 1, + "symbol": "requires" + } + ], + "text": "const fs = require('fs');\nconst path = require('path');\nconst { EventEmitter } = require('events');\nconst assert = require('assert');\nconst crypto = require('crypto');", + "token_estimate": 56 + }, + { + "block_ids": [ + "f0d00af94e8b3e0fe4249f66d27caedd" + ], + "chunk_id": "93756a717b518ce2a94d1390b1b6d4f5", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 12, + "line_start": 7, + "symbol": "add" + } + ], + "text": "export function add(a, b) {\n if (typeof a !== 'number') throw new TypeError('a');\n if (typeof b !== 'number') throw new TypeError('b');\n const result = a + b;\n assert(isFinite(result));\n return result;\n}", + "token_estimate": 70 + }, + { + "block_ids": [ + "19e5e07c316d04e18ec0b10598c20ec7" + ], + "chunk_id": "1de13c1d85ba9c05e4f05f2d9c32820d", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 20, + "line_start": 14, + "symbol": "EventBus" + } + ], + "text": "class EventBus {\n constructor() {\n this._handlers = new Map();\n this._history = [];\n this._maxHistory = 100;\n this._seq = 0;\n }\n}", + "token_estimate": 48 + }, + { + "block_ids": [ + "8e016bd376edde2c49320c5094d01b67" + ], + "chunk_id": "db80b34645e2b9148a2ebd8967d44a64", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 30, + "line_start": 22, + "symbol": "BaseHandler" + } + ], + "text": "class BaseHandler {\n handle(event) {\n throw new Error('not implemented');\n }\n batchHandle(events) {\n const results = [];\n for (const ev of events) {\n results.push(this.handle(ev));\n }\n return results;\n }\n}", + "token_estimate": 77 + }, + { + "block_ids": [ + "e06656d11af2c1d7928856766382d168" + ], + "chunk_id": "7b0422e3646997d0cd2e694a6c4ca2e7", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 38, + "line_start": 32, + "symbol": "EventBus.emit" + } + ], + "text": "class EventBus {\n emit(name, payload) {\n const handlers = this._handlers.get(name) ?? [];\n for (const h of handlers) {\n h(payload);\n }\n return this;\n }\n}", + "token_estimate": 58 + }, + { + "block_ids": [ + "601a8af776f0634cfb4ccfa97e612afc" + ], + "chunk_id": "640269e1e8c1d052868f1f88558f28a2", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 46, + "line_start": 40, + "symbol": "EventBus.on" + } + ], + "text": "class EventBus {\n on(name, handler) {\n if (!this._handlers.has(name)) {\n this._handlers.set(name, []);\n }\n this._handlers.get(name).push(handler);\n return this;\n }\n}", + "token_estimate": 62 + }, + { + "block_ids": [ + "2bc61a811414be749c17290832857c7f" + ], + "chunk_id": "073802021b95a24f0c905b36dd9905c3", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 247, + "line_start": 48, + "symbol": "bigTransform [part 1/2]" + } + ], + "text": "function bigTransform(items) {\n const v0 = items[0] !== undefined ? items[0] : null;\n const v1 = items[1] !== undefined ? items[1] : null;\n const v2 = items[2] !== undefined ? items[2] : null;\n const v3 = items[3] !== undefined ? items[3] : null;\n const v4 = items[4] !== undefined ? items[4] : null;\n const v5 = items[5] !== undefined ? items[5] : null;\n const v6 = items[6] !== undefined ? items[6] : null;\n const v7 = items[7] !== undefined ? items[7] : null;\n const v8 = items[8] !== undefined ? items[8] : null;\n const v9 = items[9] !== undefined ? items[9] : null;\n const v10 = items[10] !== undefined ? items[10] : null;\n const v11 = items[11] !== undefined ? items[11] : null;\n const v12 = items[12] !== undefined ? items[12] : null;\n const v13 = items[13] !== undefined ? items[13] : null;\n const v14 = items[14] !== undefined ? items[14] : null;\n const v15 = items[15] !== undefined ? items[15] : null;\n const v16 = items[16] !== undefined ? items[16] : null;\n const v17 = items[17] !== undefined ? items[17] : null;\n const v18 = items[18] !== undefined ? items[18] : null;\n const v19 = items[19] !== undefined ? items[19] : null;\n const v20 = items[20] !== undefined ? items[20] : null;\n const v21 = items[21] !== undefined ? items[21] : null;\n const v22 = items[22] !== undefined ? items[22] : null;\n const v23 = items[23] !== undefined ? items[23] : null;\n const v24 = items[24] !== undefined ? items[24] : null;\n const v25 = items[25] !== undefined ? items[25] : null;\n const v26 = items[26] !== undefined ? items[26] : null;\n const v27 = items[27] !== undefined ? items[27] : null;\n const v28 = items[28] !== undefined ? items[28] : null;\n const v29 = items[29] !== undefined ? items[29] : null;\n const v30 = items[30] !== undefined ? items[30] : null;\n const v31 = items[31] !== undefined ? items[31] : null;\n const v32 = items[32] !== undefined ? items[32] : null;\n const v33 = items[33] !== undefined ? items[33] : null;\n const v34 = items[34] !== undefined ? items[34] : null;\n const v35 = items[35] !== undefined ? items[35] : null;\n const v36 = items[36] !== undefined ? items[36] : null;\n const v37 = items[37] !== undefined ? items[37] : null;\n const v38 = items[38] !== undefined ? items[38] : null;\n const v39 = items[39] !== undefined ? items[39] : null;\n const v40 = items[40] !== undefined ? items[40] : null;\n const v41 = items[41] !== undefined ? items[41] : null;\n const v42 = items[42] !== undefined ? items[42] : null;\n const v43 = items[43] !== undefined ? items[43] : null;\n const v44 = items[44] !== undefined ? items[44] : null;\n const v45 = items[45] !== undefined ? items[45] : null;\n const v46 = items[46] !== undefined ? items[46] : null;\n const v47 = items[47] !== undefined ? items[47] : null;\n const v48 = items[48] !== undefined ? items[48] : null;\n const v49 = items[49] !== undefined ? items[49] : null;\n const v50 = items[50] !== undefined ? items[50] : null;\n const v51 = items[51] !== undefined ? items[51] : null;\n const v52 = items[52] !== undefined ? items[52] : null;\n const v53 = items[53] !== undefined ? items[53] : null;\n const v54 = items[54] !== undefined ? items[54] : null;\n const v55 = items[55] !== undefined ? items[55] : null;\n const v56 = items[56] !== undefined ? items[56] : null;\n const v57 = items[57] !== undefined ? items[57] : null;\n const v58 = items[58] !== undefined ? items[58] : null;\n const v59 = items[59] !== undefined ? items[59] : null;\n const v60 = items[60] !== undefined ? items[60] : null;\n const v61 = items[61] !== undefined ? items[61] : null;\n const v62 = items[62] !== undefined ? items[62] : null;\n const v63 = items[63] !== undefined ? items[63] : null;\n const v64 = items[64] !== undefined ? items[64] : null;\n const v65 = items[65] !== undefined ? items[65] : null;\n const v66 = items[66] !== undefined ? items[66] : null;\n const v67 = items[67] !== undefined ? items[67] : null;\n const v68 = items[68] !== undefined ? items[68] : null;\n const v69 = items[69] !== undefined ? items[69] : null;\n const v70 = items[70] !== undefined ? items[70] : null;\n const v71 = items[71] !== undefined ? items[71] : null;\n const v72 = items[72] !== undefined ? items[72] : null;\n const v73 = items[73] !== undefined ? items[73] : null;\n const v74 = items[74] !== undefined ? items[74] : null;\n const v75 = items[75] !== undefined ? items[75] : null;\n const v76 = items[76] !== undefined ? items[76] : null;\n const v77 = items[77] !== undefined ? items[77] : null;\n const v78 = items[78] !== undefined ? items[78] : null;\n const v79 = items[79] !== undefined ? items[79] : null;\n const v80 = items[80] !== undefined ? items[80] : null;\n const v81 = items[81] !== undefined ? items[81] : null;\n const v82 = items[82] !== undefined ? items[82] : null;\n const v83 = items[83] !== undefined ? items[83] : null;\n const v84 = items[84] !== undefined ? items[84] : null;\n const v85 = items[85] !== undefined ? items[85] : null;\n const v86 = items[86] !== undefined ? items[86] : null;\n const v87 = items[87] !== undefined ? items[87] : null;\n const v88 = items[88] !== undefined ? items[88] : null;\n const v89 = items[89] !== undefined ? items[89] : null;\n const v90 = items[90] !== undefined ? items[90] : null;\n const v91 = items[91] !== undefined ? items[91] : null;\n const v92 = items[92] !== undefined ? items[92] : null;\n const v93 = items[93] !== undefined ? items[93] : null;\n const v94 = items[94] !== undefined ? items[94] : null;\n const v95 = items[95] !== undefined ? items[95] : null;\n const v96 = items[96] !== undefined ? items[96] : null;\n const v97 = items[97] !== undefined ? items[97] : null;\n const v98 = items[98] !== undefined ? items[98] : null;\n const v99 = items[99] !== undefined ? items[99] : null;\n const v100 = items[100] !== undefined ? items[100] : null;\n const v101 = items[101] !== undefined ? items[101] : null;\n const v102 = items[102] !== undefined ? items[102] : null;\n const v103 = items[103] !== undefined ? items[103] : null;\n const v104 = items[104] !== undefined ? items[104] : null;\n const v105 = items[105] !== undefined ? items[105] : null;\n const v106 = items[106] !== undefined ? items[106] : null;\n const v107 = items[107] !== undefined ? items[107] : null;\n const v108 = items[108] !== undefined ? items[108] : null;\n const v109 = items[109] !== undefined ? items[109] : null;\n const v110 = items[110] !== undefined ? items[110] : null;\n const v111 = items[111] !== undefined ? items[111] : null;\n const v112 = items[112] !== undefined ? items[112] : null;\n const v113 = items[113] !== undefined ? items[113] : null;\n const v114 = items[114] !== undefined ? items[114] : null;\n const v115 = items[115] !== undefined ? items[115] : null;\n const v116 = items[116] !== undefined ? items[116] : null;\n const v117 = items[117] !== undefined ? items[117] : null;\n const v118 = items[118] !== undefined ? items[118] : null;\n const v119 = items[119] !== undefined ? items[119] : null;\n const v120 = items[120] !== undefined ? items[120] : null;\n const v121 = items[121] !== undefined ? items[121] : null;\n const v122 = items[122] !== undefined ? items[122] : null;\n const v123 = items[123] !== undefined ? items[123] : null;\n const v124 = items[124] !== undefined ? items[124] : null;\n const v125 = items[125] !== undefined ? items[125] : null;\n const v126 = items[126] !== undefined ? items[126] : null;\n const v127 = items[127] !== undefined ? items[127] : null;\n const v128 = items[128] !== undefined ? items[128] : null;\n const v129 = items[129] !== undefined ? items[129] : null;\n const v130 = items[130] !== undefined ? items[130] : null;\n const v131 = items[131] !== undefined ? items[131] : null;\n const v132 = items[132] !== undefined ? items[132] : null;\n const v133 = items[133] !== undefined ? items[133] : null;\n const v134 = items[134] !== undefined ? items[134] : null;\n const v135 = items[135] !== undefined ? items[135] : null;\n const v136 = items[136] !== undefined ? items[136] : null;\n const v137 = items[137] !== undefined ? items[137] : null;\n const v138 = items[138] !== undefined ? items[138] : null;\n const v139 = items[139] !== undefined ? items[139] : null;\n const v140 = items[140] !== undefined ? items[140] : null;\n const v141 = items[141] !== undefined ? items[141] : null;\n const v142 = items[142] !== undefined ? items[142] : null;\n const v143 = items[143] !== undefined ? items[143] : null;\n const v144 = items[144] !== undefined ? items[144] : null;\n const v145 = items[145] !== undefined ? items[145] : null;\n const v146 = items[146] !== undefined ? items[146] : null;\n const v147 = items[147] !== undefined ? items[147] : null;\n const v148 = items[148] !== undefined ? items[148] : null;\n const v149 = items[149] !== undefined ? items[149] : null;\n const v150 = items[150] !== undefined ? items[150] : null;\n const v151 = items[151] !== undefined ? items[151] : null;\n const v152 = items[152] !== undefined ? items[152] : null;\n const v153 = items[153] !== undefined ? items[153] : null;\n const v154 = items[154] !== undefined ? items[154] : null;\n const v155 = items[155] !== undefined ? items[155] : null;\n const v156 = items[156] !== undefined ? items[156] : null;\n const v157 = items[157] !== undefined ? items[157] : null;\n const v158 = items[158] !== undefined ? items[158] : null;\n const v159 = items[159] !== undefined ? items[159] : null;\n const v160 = items[160] !== undefined ? items[160] : null;\n const v161 = items[161] !== undefined ? items[161] : null;\n const v162 = items[162] !== undefined ? items[162] : null;\n const v163 = items[163] !== undefined ? items[163] : null;\n const v164 = items[164] !== undefined ? items[164] : null;\n const v165 = items[165] !== undefined ? items[165] : null;\n const v166 = items[166] !== undefined ? items[166] : null;\n const v167 = items[167] !== undefined ? items[167] : null;\n const v168 = items[168] !== undefined ? items[168] : null;\n const v169 = items[169] !== undefined ? items[169] : null;\n const v170 = items[170] !== undefined ? items[170] : null;\n const v171 = items[171] !== undefined ? items[171] : null;\n const v172 = items[172] !== undefined ? items[172] : null;\n const v173 = items[173] !== undefined ? items[173] : null;\n const v174 = items[174] !== undefined ? items[174] : null;\n const v175 = items[175] !== undefined ? items[175] : null;\n const v176 = items[176] !== undefined ? items[176] : null;\n const v177 = items[177] !== undefined ? items[177] : null;\n const v178 = items[178] !== undefined ? items[178] : null;\n const v179 = items[179] !== undefined ? items[179] : null;\n const v180 = items[180] !== undefined ? items[180] : null;\n const v181 = items[181] !== undefined ? items[181] : null;\n const v182 = items[182] !== undefined ? items[182] : null;\n const v183 = items[183] !== undefined ? items[183] : null;\n const v184 = items[184] !== undefined ? items[184] : null;\n const v185 = items[185] !== undefined ? items[185] : null;\n const v186 = items[186] !== undefined ? items[186] : null;\n const v187 = items[187] !== undefined ? items[187] : null;\n const v188 = items[188] !== undefined ? items[188] : null;\n const v189 = items[189] !== undefined ? items[189] : null;\n const v190 = items[190] !== undefined ? items[190] : null;\n const v191 = items[191] !== undefined ? items[191] : null;\n const v192 = items[192] !== undefined ? items[192] : null;\n const v193 = items[193] !== undefined ? items[193] : null;\n const v194 = items[194] !== undefined ? items[194] : null;\n const v195 = items[195] !== undefined ? items[195] : null;\n const v196 = items[196] !== undefined ? items[196] : null;\n const v197 = items[197] !== undefined ? items[197] : null;\n const v198 = items[198] !== undefined ? items[198] : null;", + "token_estimate": 3947 + }, + { + "block_ids": [ + "2bc61a811414be749c17290832857c7f" + ], + "chunk_id": "62cdc27761f4a7767e6caceae2517977", + "chunker_version": "code-js-ast-v1", + "doc_id": "99d3bdd276489c2d51975eefb16cb64d", + "heading_path": [], + "policy_hash": "0d0cd223ca3431b2", + "source_spans": [ + { + "kind": "code", + "lang": "javascript", + "line_end": 260, + "line_start": 248, + "symbol": "bigTransform [part 2/2]" + } + ], + "text": " const v199 = items[199] !== undefined ? items[199] : null;\n const v200 = items[200] !== undefined ? items[200] : null;\n const v201 = items[201] !== undefined ? items[201] : null;\n const v202 = items[202] !== undefined ? items[202] : null;\n const v203 = items[203] !== undefined ? items[203] : null;\n const v204 = items[204] !== undefined ? items[204] : null;\n const v205 = items[205] !== undefined ? items[205] : null;\n const v206 = items[206] !== undefined ? items[206] : null;\n const v207 = items[207] !== undefined ? items[207] : null;\n const v208 = items[208] !== undefined ? items[208] : null;\n const v209 = items[209] !== undefined ? items[209] : null;\n return items;\n}", + "token_estimate": 230 + } +] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json new file mode 100644 index 0000000..1b9d86e --- /dev/null +++ b/crates/kebab-chunk/tests/fixtures/code-sample.py.chunks.snapshot.json @@ -0,0 +1,170 @@ +[ + { + "block_ids": [ + "bd1be1fd8b8f77e2874755010b36e617" + ], + "chunk_id": "20e05d99069f939104cdc69c7ef22889", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 5, + "line_start": 1, + "symbol": "imports" + } + ], + "text": "import os\nimport sys\nfrom typing import List\nfrom pathlib import Path\nfrom collections import defaultdict", + "token_estimate": 35 + }, + { + "block_ids": [ + "2fe948bb529221e94c5139951cc65acf" + ], + "chunk_id": "99cef84788f2cbad3de6fb7c27b81c48", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 12, + "line_start": 7, + "symbol": "compute_mrr" + } + ], + "text": "def compute_mrr(scores):\n if not scores:\n return 0.0\n return sum(\n 1.0 / r for r in scores\n ) / len(scores)", + "token_estimate": 44 + }, + { + "block_ids": [ + "ff944bad66bea107fd2500c35d7ddf68" + ], + "chunk_id": "28a3abdd51390c9c9bb89aa8b3ff3f46", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 20, + "line_start": 14, + "symbol": "MetricsCollector" + } + ], + "text": "class MetricsCollector:\n def __init__(self):\n self.scores = []\n self.labels = []\n self.counts = defaultdict(int)\n self.totals = defaultdict(float)\n self.tags = []", + "token_estimate": 67 + }, + { + "block_ids": [ + "1e75f40c64ba21ad0bada0f5d35dc232" + ], + "chunk_id": "031086ad8c4b880d02cb52527382425c", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 30, + "line_start": 22, + "symbol": "BaseEvaluator" + } + ], + "text": "class BaseEvaluator:\n def evaluate(self, data):\n raise NotImplementedError\n def batch_evaluate(self, items):\n results = []\n for item in items:\n results.append(self.evaluate(item))\n return results\n def name(self):\n return type(self).__name__", + "token_estimate": 99 + }, + { + "block_ids": [ + "33d08d6405adb459e90b8d67bab5cc80" + ], + "chunk_id": "a431bd5ab64b2f12634c0d4f4b3e0841", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 38, + "line_start": 32, + "symbol": "MetricsCollector.run" + } + ], + "text": "class MetricsCollector:\n def run(self, inputs):\n for inp in inputs:\n score = self._score(inp)\n self.scores.append(\n score\n )", + "token_estimate": 61 + }, + { + "block_ids": [ + "af3d89eb1be6e11dfd14af3c86a8ba9c" + ], + "chunk_id": "00b756d5bcc43858bb98aa609f22ab6c", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 46, + "line_start": 40, + "symbol": "MetricsCollector.report" + } + ], + "text": "class MetricsCollector:\n def report(self):\n return {\n 'mean': sum(self.scores) / max(len(self.scores), 1),\n 'count': len(self.scores),\n 'tags': self.tags,\n }", + "token_estimate": 69 + }, + { + "block_ids": [ + "c86acf6ae110d7f5681093c93ee0e5e5" + ], + "chunk_id": "90071017de40b5dd57e9d6001657cf14", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 247, + "line_start": 48, + "symbol": "big_compute [part 1/2]" + } + ], + "text": "def big_compute(data):\n v0 = data[0] if 0 < len(data) else 0\n v1 = data[1] if 1 < len(data) else 0\n v2 = data[2] if 2 < len(data) else 0\n v3 = data[3] if 3 < len(data) else 0\n v4 = data[4] if 4 < len(data) else 0\n v5 = data[5] if 5 < len(data) else 0\n v6 = data[6] if 6 < len(data) else 0\n v7 = data[7] if 7 < len(data) else 0\n v8 = data[8] if 8 < len(data) else 0\n v9 = data[9] if 9 < len(data) else 0\n v10 = data[10] if 10 < len(data) else 0\n v11 = data[11] if 11 < len(data) else 0\n v12 = data[12] if 12 < len(data) else 0\n v13 = data[13] if 13 < len(data) else 0\n v14 = data[14] if 14 < len(data) else 0\n v15 = data[15] if 15 < len(data) else 0\n v16 = data[16] if 16 < len(data) else 0\n v17 = data[17] if 17 < len(data) else 0\n v18 = data[18] if 18 < len(data) else 0\n v19 = data[19] if 19 < len(data) else 0\n v20 = data[20] if 20 < len(data) else 0\n v21 = data[21] if 21 < len(data) else 0\n v22 = data[22] if 22 < len(data) else 0\n v23 = data[23] if 23 < len(data) else 0\n v24 = data[24] if 24 < len(data) else 0\n v25 = data[25] if 25 < len(data) else 0\n v26 = data[26] if 26 < len(data) else 0\n v27 = data[27] if 27 < len(data) else 0\n v28 = data[28] if 28 < len(data) else 0\n v29 = data[29] if 29 < len(data) else 0\n v30 = data[30] if 30 < len(data) else 0\n v31 = data[31] if 31 < len(data) else 0\n v32 = data[32] if 32 < len(data) else 0\n v33 = data[33] if 33 < len(data) else 0\n v34 = data[34] if 34 < len(data) else 0\n v35 = data[35] if 35 < len(data) else 0\n v36 = data[36] if 36 < len(data) else 0\n v37 = data[37] if 37 < len(data) else 0\n v38 = data[38] if 38 < len(data) else 0\n v39 = data[39] if 39 < len(data) else 0\n v40 = data[40] if 40 < len(data) else 0\n v41 = data[41] if 41 < len(data) else 0\n v42 = data[42] if 42 < len(data) else 0\n v43 = data[43] if 43 < len(data) else 0\n v44 = data[44] if 44 < len(data) else 0\n v45 = data[45] if 45 < len(data) else 0\n v46 = data[46] if 46 < len(data) else 0\n v47 = data[47] if 47 < len(data) else 0\n v48 = data[48] if 48 < len(data) else 0\n v49 = data[49] if 49 < len(data) else 0\n v50 = data[50] if 50 < len(data) else 0\n v51 = data[51] if 51 < len(data) else 0\n v52 = data[52] if 52 < len(data) else 0\n v53 = data[53] if 53 < len(data) else 0\n v54 = data[54] if 54 < len(data) else 0\n v55 = data[55] if 55 < len(data) else 0\n v56 = data[56] if 56 < len(data) else 0\n v57 = data[57] if 57 < len(data) else 0\n v58 = data[58] if 58 < len(data) else 0\n v59 = data[59] if 59 < len(data) else 0\n v60 = data[60] if 60 < len(data) else 0\n v61 = data[61] if 61 < len(data) else 0\n v62 = data[62] if 62 < len(data) else 0\n v63 = data[63] if 63 < len(data) else 0\n v64 = data[64] if 64 < len(data) else 0\n v65 = data[65] if 65 < len(data) else 0\n v66 = data[66] if 66 < len(data) else 0\n v67 = data[67] if 67 < len(data) else 0\n v68 = data[68] if 68 < len(data) else 0\n v69 = data[69] if 69 < len(data) else 0\n v70 = data[70] if 70 < len(data) else 0\n v71 = data[71] if 71 < len(data) else 0\n v72 = data[72] if 72 < len(data) else 0\n v73 = data[73] if 73 < len(data) else 0\n v74 = data[74] if 74 < len(data) else 0\n v75 = data[75] if 75 < len(data) else 0\n v76 = data[76] if 76 < len(data) else 0\n v77 = data[77] if 77 < len(data) else 0\n v78 = data[78] if 78 < len(data) else 0\n v79 = data[79] if 79 < len(data) else 0\n v80 = data[80] if 80 < len(data) else 0\n v81 = data[81] if 81 < len(data) else 0\n v82 = data[82] if 82 < len(data) else 0\n v83 = data[83] if 83 < len(data) else 0\n v84 = data[84] if 84 < len(data) else 0\n v85 = data[85] if 85 < len(data) else 0\n v86 = data[86] if 86 < len(data) else 0\n v87 = data[87] if 87 < len(data) else 0\n v88 = data[88] if 88 < len(data) else 0\n v89 = data[89] if 89 < len(data) else 0\n v90 = data[90] if 90 < len(data) else 0\n v91 = data[91] if 91 < len(data) else 0\n v92 = data[92] if 92 < len(data) else 0\n v93 = data[93] if 93 < len(data) else 0\n v94 = data[94] if 94 < len(data) else 0\n v95 = data[95] if 95 < len(data) else 0\n v96 = data[96] if 96 < len(data) else 0\n v97 = data[97] if 97 < len(data) else 0\n v98 = data[98] if 98 < len(data) else 0\n v99 = data[99] if 99 < len(data) else 0\n v100 = data[100] if 100 < len(data) else 0\n v101 = data[101] if 101 < len(data) else 0\n v102 = data[102] if 102 < len(data) else 0\n v103 = data[103] if 103 < len(data) else 0\n v104 = data[104] if 104 < len(data) else 0\n v105 = data[105] if 105 < len(data) else 0\n v106 = data[106] if 106 < len(data) else 0\n v107 = data[107] if 107 < len(data) else 0\n v108 = data[108] if 108 < len(data) else 0\n v109 = data[109] if 109 < len(data) else 0\n v110 = data[110] if 110 < len(data) else 0\n v111 = data[111] if 111 < len(data) else 0\n v112 = data[112] if 112 < len(data) else 0\n v113 = data[113] if 113 < len(data) else 0\n v114 = data[114] if 114 < len(data) else 0\n v115 = data[115] if 115 < len(data) else 0\n v116 = data[116] if 116 < len(data) else 0\n v117 = data[117] if 117 < len(data) else 0\n v118 = data[118] if 118 < len(data) else 0\n v119 = data[119] if 119 < len(data) else 0\n v120 = data[120] if 120 < len(data) else 0\n v121 = data[121] if 121 < len(data) else 0\n v122 = data[122] if 122 < len(data) else 0\n v123 = data[123] if 123 < len(data) else 0\n v124 = data[124] if 124 < len(data) else 0\n v125 = data[125] if 125 < len(data) else 0\n v126 = data[126] if 126 < len(data) else 0\n v127 = data[127] if 127 < len(data) else 0\n v128 = data[128] if 128 < len(data) else 0\n v129 = data[129] if 129 < len(data) else 0\n v130 = data[130] if 130 < len(data) else 0\n v131 = data[131] if 131 < len(data) else 0\n v132 = data[132] if 132 < len(data) else 0\n v133 = data[133] if 133 < len(data) else 0\n v134 = data[134] if 134 < len(data) else 0\n v135 = data[135] if 135 < len(data) else 0\n v136 = data[136] if 136 < len(data) else 0\n v137 = data[137] if 137 < len(data) else 0\n v138 = data[138] if 138 < len(data) else 0\n v139 = data[139] if 139 < len(data) else 0\n v140 = data[140] if 140 < len(data) else 0\n v141 = data[141] if 141 < len(data) else 0\n v142 = data[142] if 142 < len(data) else 0\n v143 = data[143] if 143 < len(data) else 0\n v144 = data[144] if 144 < len(data) else 0\n v145 = data[145] if 145 < len(data) else 0\n v146 = data[146] if 146 < len(data) else 0\n v147 = data[147] if 147 < len(data) else 0\n v148 = data[148] if 148 < len(data) else 0\n v149 = data[149] if 149 < len(data) else 0\n v150 = data[150] if 150 < len(data) else 0\n v151 = data[151] if 151 < len(data) else 0\n v152 = data[152] if 152 < len(data) else 0\n v153 = data[153] if 153 < len(data) else 0\n v154 = data[154] if 154 < len(data) else 0\n v155 = data[155] if 155 < len(data) else 0\n v156 = data[156] if 156 < len(data) else 0\n v157 = data[157] if 157 < len(data) else 0\n v158 = data[158] if 158 < len(data) else 0\n v159 = data[159] if 159 < len(data) else 0\n v160 = data[160] if 160 < len(data) else 0\n v161 = data[161] if 161 < len(data) else 0\n v162 = data[162] if 162 < len(data) else 0\n v163 = data[163] if 163 < len(data) else 0\n v164 = data[164] if 164 < len(data) else 0\n v165 = data[165] if 165 < len(data) else 0\n v166 = data[166] if 166 < len(data) else 0\n v167 = data[167] if 167 < len(data) else 0\n v168 = data[168] if 168 < len(data) else 0\n v169 = data[169] if 169 < len(data) else 0\n v170 = data[170] if 170 < len(data) else 0\n v171 = data[171] if 171 < len(data) else 0\n v172 = data[172] if 172 < len(data) else 0\n v173 = data[173] if 173 < len(data) else 0\n v174 = data[174] if 174 < len(data) else 0\n v175 = data[175] if 175 < len(data) else 0\n v176 = data[176] if 176 < len(data) else 0\n v177 = data[177] if 177 < len(data) else 0\n v178 = data[178] if 178 < len(data) else 0\n v179 = data[179] if 179 < len(data) else 0\n v180 = data[180] if 180 < len(data) else 0\n v181 = data[181] if 181 < len(data) else 0\n v182 = data[182] if 182 < len(data) else 0\n v183 = data[183] if 183 < len(data) else 0\n v184 = data[184] if 184 < len(data) else 0\n v185 = data[185] if 185 < len(data) else 0\n v186 = data[186] if 186 < len(data) else 0\n v187 = data[187] if 187 < len(data) else 0\n v188 = data[188] if 188 < len(data) else 0\n v189 = data[189] if 189 < len(data) else 0\n v190 = data[190] if 190 < len(data) else 0\n v191 = data[191] if 191 < len(data) else 0\n v192 = data[192] if 192 < len(data) else 0\n v193 = data[193] if 193 < len(data) else 0\n v194 = data[194] if 194 < len(data) else 0\n v195 = data[195] if 195 < len(data) else 0\n v196 = data[196] if 196 < len(data) else 0\n v197 = data[197] if 197 < len(data) else 0\n v198 = data[198] if 198 < len(data) else 0", + "token_estimate": 3015 + }, + { + "block_ids": [ + "c86acf6ae110d7f5681093c93ee0e5e5" + ], + "chunk_id": "efc6599ac90e8de5fe8f63896a85d747", + "chunker_version": "code-python-ast-v1", + "doc_id": "97ddfbda5585eb82ed09b0d7e95c0c03", + "heading_path": [], + "policy_hash": "383e9a070f636294", + "source_spans": [ + { + "kind": "code", + "lang": "python", + "line_end": 259, + "line_start": 248, + "symbol": "big_compute [part 2/2]" + } + ], + "text": " v199 = data[199] if 199 < len(data) else 0\n v200 = data[200] if 200 < len(data) else 0\n v201 = data[201] if 201 < len(data) else 0\n v202 = data[202] if 202 < len(data) else 0\n v203 = data[203] if 203 < len(data) else 0\n v204 = data[204] if 204 < len(data) else 0\n v205 = data[205] if 205 < len(data) else 0\n v206 = data[206] if 206 < len(data) else 0\n v207 = data[207] if 207 < len(data) else 0\n v208 = data[208] if 208 < len(data) else 0\n v209 = data[209] if 209 < len(data) else 0\n return sum(data)", + "token_estimate": 179 + } +] diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json new file mode 100644 index 0000000..446b98d --- /dev/null +++ b/crates/kebab-chunk/tests/fixtures/code-sample.ts.chunks.snapshot.json @@ -0,0 +1,170 @@ +[ + { + "block_ids": [ + "29c56554514c80a92a9d12410056e168" + ], + "chunk_id": "fc30b9a92970ee5fb940c2b12db2c005", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 5, + "line_start": 1, + "symbol": "imports" + } + ], + "text": "import { readFileSync } from 'fs';\nimport { join } from 'path';\nimport type { Config } from './config';\nimport { Logger } from './logger';\nimport { EventEmitter } from 'events';", + "token_estimate": 59 + }, + { + "block_ids": [ + "e3f542c4928032926a1e21a159686a34" + ], + "chunk_id": "d5988988e20b69da53307b43f2d400ee", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 12, + "line_start": 7, + "symbol": "parseInput" + } + ], + "text": "export function parseInput(raw: string): number | null {\n const trimmed = raw.trim();\n const n = Number(trimmed);\n if (isNaN(n)) return null;\n return n;\n}", + "token_estimate": 53 + }, + { + "block_ids": [ + "77d7f5ea7af7be27611adcbcee7c2e8f" + ], + "chunk_id": "f1147cabe4dff8bc33b56f8ff0b397e9", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 20, + "line_start": 14, + "symbol": "Frobable" + } + ], + "text": "export interface Frobable {\n frob(): string;\n frobTwice(): string;\n readonly name: string;\n readonly tags: string[];\n count: number;\n reset(): void;\n}", + "token_estimate": 52 + }, + { + "block_ids": [ + "ee878891c19c9bacebe2e2d262c2ea77" + ], + "chunk_id": "bc07691ba8e249a360fe0e056eeff9ac", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 30, + "line_start": 22, + "symbol": "Foo" + } + ], + "text": "export class Foo implements Frobable {\n constructor(\n public readonly name: string,\n public value: number,\n public tags: string[] = [],\n ) {}\n frob(): string { return this.name; }\n frobTwice(): string { return this.name.repeat(2); }\n reset(): void { this.value = 0; }\n}", + "token_estimate": 95 + }, + { + "block_ids": [ + "df08aa572f5c85d0e5d28d6490acc7bc" + ], + "chunk_id": "42b7bec354bbb69ded1c8da40d30250c", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 38, + "line_start": 32, + "symbol": "Foo.double" + } + ], + "text": "export class Foo {\n double(): number {\n const result = this.value * 2;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}", + "token_estimate": 63 + }, + { + "block_ids": [ + "91aadf18fa97c1d7c94019e0968bc9c8" + ], + "chunk_id": "f218eaf2cb72f10a78c6a2090f72c215", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 46, + "line_start": 40, + "symbol": "Foo.triple" + } + ], + "text": "export class Foo {\n triple(): number {\n const result = this.value * 3;\n if (result > Number.MAX_SAFE_INTEGER) {\n return Number.MAX_SAFE_INTEGER;\n }\n return result;\n }\n}", + "token_estimate": 63 + }, + { + "block_ids": [ + "d719400f1d79b522d0a1267331966be0" + ], + "chunk_id": "7bd082ae93cc75e683b2a9eb3f911ee9", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 247, + "line_start": 48, + "symbol": "BigProcessor [part 1/2]" + } + ], + "text": "export class BigProcessor {\n process(items: string[]): string[] {\n const v0 = items[0] ?? '';\n const v1 = items[1] ?? '';\n const v2 = items[2] ?? '';\n const v3 = items[3] ?? '';\n const v4 = items[4] ?? '';\n const v5 = items[5] ?? '';\n const v6 = items[6] ?? '';\n const v7 = items[7] ?? '';\n const v8 = items[8] ?? '';\n const v9 = items[9] ?? '';\n const v10 = items[10] ?? '';\n const v11 = items[11] ?? '';\n const v12 = items[12] ?? '';\n const v13 = items[13] ?? '';\n const v14 = items[14] ?? '';\n const v15 = items[15] ?? '';\n const v16 = items[16] ?? '';\n const v17 = items[17] ?? '';\n const v18 = items[18] ?? '';\n const v19 = items[19] ?? '';\n const v20 = items[20] ?? '';\n const v21 = items[21] ?? '';\n const v22 = items[22] ?? '';\n const v23 = items[23] ?? '';\n const v24 = items[24] ?? '';\n const v25 = items[25] ?? '';\n const v26 = items[26] ?? '';\n const v27 = items[27] ?? '';\n const v28 = items[28] ?? '';\n const v29 = items[29] ?? '';\n const v30 = items[30] ?? '';\n const v31 = items[31] ?? '';\n const v32 = items[32] ?? '';\n const v33 = items[33] ?? '';\n const v34 = items[34] ?? '';\n const v35 = items[35] ?? '';\n const v36 = items[36] ?? '';\n const v37 = items[37] ?? '';\n const v38 = items[38] ?? '';\n const v39 = items[39] ?? '';\n const v40 = items[40] ?? '';\n const v41 = items[41] ?? '';\n const v42 = items[42] ?? '';\n const v43 = items[43] ?? '';\n const v44 = items[44] ?? '';\n const v45 = items[45] ?? '';\n const v46 = items[46] ?? '';\n const v47 = items[47] ?? '';\n const v48 = items[48] ?? '';\n const v49 = items[49] ?? '';\n const v50 = items[50] ?? '';\n const v51 = items[51] ?? '';\n const v52 = items[52] ?? '';\n const v53 = items[53] ?? '';\n const v54 = items[54] ?? '';\n const v55 = items[55] ?? '';\n const v56 = items[56] ?? '';\n const v57 = items[57] ?? '';\n const v58 = items[58] ?? '';\n const v59 = items[59] ?? '';\n const v60 = items[60] ?? '';\n const v61 = items[61] ?? '';\n const v62 = items[62] ?? '';\n const v63 = items[63] ?? '';\n const v64 = items[64] ?? '';\n const v65 = items[65] ?? '';\n const v66 = items[66] ?? '';\n const v67 = items[67] ?? '';\n const v68 = items[68] ?? '';\n const v69 = items[69] ?? '';\n const v70 = items[70] ?? '';\n const v71 = items[71] ?? '';\n const v72 = items[72] ?? '';\n const v73 = items[73] ?? '';\n const v74 = items[74] ?? '';\n const v75 = items[75] ?? '';\n const v76 = items[76] ?? '';\n const v77 = items[77] ?? '';\n const v78 = items[78] ?? '';\n const v79 = items[79] ?? '';\n const v80 = items[80] ?? '';\n const v81 = items[81] ?? '';\n const v82 = items[82] ?? '';\n const v83 = items[83] ?? '';\n const v84 = items[84] ?? '';\n const v85 = items[85] ?? '';\n const v86 = items[86] ?? '';\n const v87 = items[87] ?? '';\n const v88 = items[88] ?? '';\n const v89 = items[89] ?? '';\n const v90 = items[90] ?? '';\n const v91 = items[91] ?? '';\n const v92 = items[92] ?? '';\n const v93 = items[93] ?? '';\n const v94 = items[94] ?? '';\n const v95 = items[95] ?? '';\n const v96 = items[96] ?? '';\n const v97 = items[97] ?? '';\n const v98 = items[98] ?? '';\n const v99 = items[99] ?? '';\n const v100 = items[100] ?? '';\n const v101 = items[101] ?? '';\n const v102 = items[102] ?? '';\n const v103 = items[103] ?? '';\n const v104 = items[104] ?? '';\n const v105 = items[105] ?? '';\n const v106 = items[106] ?? '';\n const v107 = items[107] ?? '';\n const v108 = items[108] ?? '';\n const v109 = items[109] ?? '';\n const v110 = items[110] ?? '';\n const v111 = items[111] ?? '';\n const v112 = items[112] ?? '';\n const v113 = items[113] ?? '';\n const v114 = items[114] ?? '';\n const v115 = items[115] ?? '';\n const v116 = items[116] ?? '';\n const v117 = items[117] ?? '';\n const v118 = items[118] ?? '';\n const v119 = items[119] ?? '';\n const v120 = items[120] ?? '';\n const v121 = items[121] ?? '';\n const v122 = items[122] ?? '';\n const v123 = items[123] ?? '';\n const v124 = items[124] ?? '';\n const v125 = items[125] ?? '';\n const v126 = items[126] ?? '';\n const v127 = items[127] ?? '';\n const v128 = items[128] ?? '';\n const v129 = items[129] ?? '';\n const v130 = items[130] ?? '';\n const v131 = items[131] ?? '';\n const v132 = items[132] ?? '';\n const v133 = items[133] ?? '';\n const v134 = items[134] ?? '';\n const v135 = items[135] ?? '';\n const v136 = items[136] ?? '';\n const v137 = items[137] ?? '';\n const v138 = items[138] ?? '';\n const v139 = items[139] ?? '';\n const v140 = items[140] ?? '';\n const v141 = items[141] ?? '';\n const v142 = items[142] ?? '';\n const v143 = items[143] ?? '';\n const v144 = items[144] ?? '';\n const v145 = items[145] ?? '';\n const v146 = items[146] ?? '';\n const v147 = items[147] ?? '';\n const v148 = items[148] ?? '';\n const v149 = items[149] ?? '';\n const v150 = items[150] ?? '';\n const v151 = items[151] ?? '';\n const v152 = items[152] ?? '';\n const v153 = items[153] ?? '';\n const v154 = items[154] ?? '';\n const v155 = items[155] ?? '';\n const v156 = items[156] ?? '';\n const v157 = items[157] ?? '';\n const v158 = items[158] ?? '';\n const v159 = items[159] ?? '';\n const v160 = items[160] ?? '';\n const v161 = items[161] ?? '';\n const v162 = items[162] ?? '';\n const v163 = items[163] ?? '';\n const v164 = items[164] ?? '';\n const v165 = items[165] ?? '';\n const v166 = items[166] ?? '';\n const v167 = items[167] ?? '';\n const v168 = items[168] ?? '';\n const v169 = items[169] ?? '';\n const v170 = items[170] ?? '';\n const v171 = items[171] ?? '';\n const v172 = items[172] ?? '';\n const v173 = items[173] ?? '';\n const v174 = items[174] ?? '';\n const v175 = items[175] ?? '';\n const v176 = items[176] ?? '';\n const v177 = items[177] ?? '';\n const v178 = items[178] ?? '';\n const v179 = items[179] ?? '';\n const v180 = items[180] ?? '';\n const v181 = items[181] ?? '';\n const v182 = items[182] ?? '';\n const v183 = items[183] ?? '';\n const v184 = items[184] ?? '';\n const v185 = items[185] ?? '';\n const v186 = items[186] ?? '';\n const v187 = items[187] ?? '';\n const v188 = items[188] ?? '';\n const v189 = items[189] ?? '';\n const v190 = items[190] ?? '';\n const v191 = items[191] ?? '';\n const v192 = items[192] ?? '';\n const v193 = items[193] ?? '';\n const v194 = items[194] ?? '';\n const v195 = items[195] ?? '';\n const v196 = items[196] ?? '';\n const v197 = items[197] ?? '';", + "token_estimate": 2259 + }, + { + "block_ids": [ + "d719400f1d79b522d0a1267331966be0" + ], + "chunk_id": "fd63728143d8936de0faab714fbd4165", + "chunker_version": "code-ts-ast-v1", + "doc_id": "ff6591709852ab9c57be6e50145b9800", + "heading_path": [], + "policy_hash": "dd45402f76b4e339", + "source_spans": [ + { + "kind": "code", + "lang": "typescript", + "line_end": 262, + "line_start": 248, + "symbol": "BigProcessor [part 2/2]" + } + ], + "text": " const v198 = items[198] ?? '';\n const v199 = items[199] ?? '';\n const v200 = items[200] ?? '';\n const v201 = items[201] ?? '';\n const v202 = items[202] ?? '';\n const v203 = items[203] ?? '';\n const v204 = items[204] ?? '';\n const v205 = items[205] ?? '';\n const v206 = items[206] ?? '';\n const v207 = items[207] ?? '';\n const v208 = items[208] ?? '';\n const v209 = items[209] ?? '';\n return items;\n }\n}", + "token_estimate": 148 + } +]