diff --git a/crates/kebab-chunk/src/code_cpp_ast_v1.rs b/crates/kebab-chunk/src/code_cpp_ast_v1.rs new file mode 100644 index 0000000..f9272d3 --- /dev/null +++ b/crates/kebab-chunk/src/code_cpp_ast_v1.rs @@ -0,0 +1,322 @@ +//! `code-cpp-ast-v1` — maps a tree-sitter-derived C++ AST +//! `CanonicalDocument` (one `Block::Code` per semantic unit, each with +//! `SourceSpan::Code`) to chunks 1:1. A unit longer than +//! `AST_CHUNK_MAX_LINES` is split into ` [part i/N]` sub-chunks +//! at blank-line paragraph boundaries (design §9.1 oversize fallback). +//! +//! tree-sitter is intentionally NOT a dependency here: AST work is +//! parser-side (`kebab-parse-code`, design §6.3). This chunker only +//! consumes the `CanonicalDocument`. +//! +//! `AST_CHUNK_MAX_LINES` is a constant matching +//! `IngestCodeCfg::default().ast_chunk_max_lines` (200). Per-medium +//! config threading needs a chunker registry (P+); same deviation +//! pattern as `pdf-page-v1`'s pinned `chunker_version` +//! (`tasks/HOTFIXES.md`). + +use kebab_core::{ + Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker, ChunkerVersion, DocumentId, + SourceSpan, id_for_chunk, +}; + +const VERSION_LABEL: &str = "code-cpp-ast-v1"; +const BYTES_PER_TOKEN: usize = 3; +const POLICY_HASH_HEX_LEN: usize = 16; +const AST_CHUNK_MAX_LINES: u32 = 200; + +#[derive(Clone, Copy, Debug, Default)] +pub struct CodeCppAstV1Chunker; + +impl Chunker for CodeCppAstV1Chunker { + fn chunker_version(&self) -> ChunkerVersion { + ChunkerVersion(VERSION_LABEL.to_string()) + } + + fn policy_hash(&self, policy: &ChunkPolicy) -> String { + let bytes = serde_json_canonicalizer::to_vec(policy) + .expect("canonical JSON serialization of ChunkPolicy must not fail"); + let hex = blake3::hash(&bytes).to_hex().to_string(); + hex[..POLICY_HASH_HEX_LEN].to_string() + } + + fn chunk( + &self, + doc: &CanonicalDocument, + policy: &ChunkPolicy, + ) -> anyhow::Result> { + for b in &doc.blocks { + let c = match b { + Block::Code(c) => c, + _ => anyhow::bail!( + "CodeCppAstV1Chunker only handles code docs (got non-Code block)" + ), + }; + if !matches!(c.common.source_span, SourceSpan::Code { .. }) { + anyhow::bail!( + "CodeCppAstV1Chunker only handles code docs (got non-Code source_span)" + ); + } + } + + let base_policy_hash = self.policy_hash(policy); + let chunker_version = self.chunker_version(); + let mut out: Vec = Vec::new(); + + for b in &doc.blocks { + let cb = match b { + Block::Code(c) => c, + _ => unreachable!("validated above"), + }; + let (ls, le, symbol, lang) = match &cb.common.source_span { + SourceSpan::Code { line_start, line_end, symbol, lang } => { + (*line_start, *line_end, symbol.clone(), lang.clone()) + } + _ => unreachable!("validated above"), + }; + let block_ids: Vec = vec![cb.common.block_id.clone()]; + let span_lines = le.saturating_sub(ls) + 1; + + if span_lines <= AST_CHUNK_MAX_LINES { + let span = SourceSpan::Code { + line_start: ls, + line_end: le, + symbol: symbol.clone(), + lang: lang.clone(), + }; + out.push(make_chunk( + doc, &chunker_version, &block_ids, &base_policy_hash, + None, span, cb.code.clone(), + )); + } else { + let parts = split_oversize(&cb.code); + let n = parts.len(); + for (i, (off_start, off_end, text)) in parts.into_iter().enumerate() { + let part_ls = ls + off_start; + let part_le = ls + off_end; + let part_sym = symbol + .as_ref() + .map(|s| format!("{s} [part {}/{n}]", i + 1)); + let span = SourceSpan::Code { + line_start: part_ls, + line_end: part_le, + symbol: part_sym, + lang: lang.clone(), + }; + out.push(make_chunk( + doc, &chunker_version, &block_ids, &base_policy_hash, + Some(part_ls), span, text, + )); + } + } + } + + tracing::debug!( + target: "kebab-chunk", + doc_id = %doc.doc_id, + chunks = out.len(), + "code-cpp-ast-v1 chunked", + ); + Ok(out) + } +} + +#[allow(clippy::too_many_arguments)] +fn make_chunk( + doc: &CanonicalDocument, + chunker_version: &ChunkerVersion, + block_ids: &[BlockId], + base_policy_hash: &str, + split_key: Option, + span: SourceSpan, + text: String, +) -> Chunk { + let id_hash = match split_key { + Some(k) => format!("{base_policy_hash}#L{k}"), + None => base_policy_hash.to_string(), + }; + let chunk_id = id_for_chunk(&doc.doc_id, chunker_version, block_ids, &id_hash); + let token_estimate = text.len().div_ceil(BYTES_PER_TOKEN); + Chunk { + chunk_id, + doc_id: DocumentId(doc.doc_id.0.clone()), + block_ids: block_ids.to_vec(), + text, + heading_path: Vec::new(), + source_spans: vec![span], + token_estimate, + chunker_version: chunker_version.clone(), + policy_hash: base_policy_hash.to_string(), + } +} + +/// Split an oversize unit at blank-line paragraph boundaries, greedily +/// gluing paragraphs until ~`AST_CHUNK_MAX_LINES` lines accumulate. +/// Returns `(line_offset_start, line_offset_end, text)` where offsets are +/// 0-based within the unit (caller adds the unit's absolute `line_start`). +fn split_oversize(code: &str) -> Vec<(u32, u32, String)> { + let lines: Vec<&str> = code.split('\n').collect(); + let total = lines.len() as u32; + let mut out: Vec<(u32, u32, String)> = Vec::new(); + let mut start: u32 = 0; + while start < total { + let mut end = (start + AST_CHUNK_MAX_LINES).min(total); + let floor = start + (AST_CHUNK_MAX_LINES * 4 / 5); + if end < total { + if let Some(b) = (floor.min(end)..end) + .rev() + .find(|&i| lines[i as usize].trim().is_empty()) + { + end = b + 1; + } + } + let text = lines[start as usize..end as usize].join("\n"); + out.push((start, end.saturating_sub(1), text)); + start = end; + } + if out.is_empty() { + out.push((0, total.saturating_sub(1), code.to_string())); + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + use kebab_core::{ + Block, CanonicalDocument, ChunkPolicy, Chunker, ChunkerVersion, CodeBlock, CommonBlock, + SourceSpan, id_for_block, id_for_doc, AssetId, Lang, Metadata, ParserVersion, Provenance, + SourceType, TrustLevel, WorkspacePath, + }; + use time::OffsetDateTime; + + fn code_doc(units: &[(&str, u32, u32, &str)]) -> CanonicalDocument { + let wp = WorkspacePath("crates/x/src/a.cpp".into()); + let aid = AssetId("a".repeat(64)); + let pv = ParserVersion("code-cpp-v1".into()); + let doc_id = id_for_doc(&wp, &aid, &pv); + let blocks = units + .iter() + .enumerate() + .map(|(i, (sym, ls, le, code))| { + let span = SourceSpan::Code { + line_start: *ls, + line_end: *le, + symbol: Some((*sym).to_string()), + lang: Some("cpp".into()), + }; + let bid = id_for_block(&doc_id, "code", &[], i as u32, &span); + Block::Code(CodeBlock { + common: CommonBlock { block_id: bid, heading_path: vec![], source_span: span }, + lang: Some("cpp".into()), + code: (*code).to_string(), + }) + }) + .collect(); + CanonicalDocument { + doc_id, source_asset_id: aid, workspace_path: wp, title: "a".into(), + lang: Lang("und".into()), blocks, + metadata: Metadata { + aliases: vec![], tags: vec![], + created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + updated_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + source_type: SourceType::Note, trust_level: TrustLevel::Primary, + user_id_alias: None, user: Default::default(), + repo: Some("kebab".into()), git_branch: Some("main".into()), + git_commit: Some("0".repeat(40)), code_lang: Some("cpp".into()), + }, + provenance: Provenance { events: vec![] }, + parser_version: pv, schema_version: 1, doc_version: 1, + last_chunker_version: None, last_embedding_version: None, + } + } + fn policy() -> ChunkPolicy { + ChunkPolicy { target_tokens: 500, overlap_tokens: 80, + respect_markdown_headings: false, + chunker_version: ChunkerVersion(VERSION_LABEL.into()) } + } + + #[test] + fn chunker_version_is_code_cpp_ast_v1() { + assert_eq!(CodeCppAstV1Chunker.chunker_version(), + ChunkerVersion("code-cpp-ast-v1".into())); + } + + #[test] + fn one_chunk_per_unit_preserves_code_span() { + let doc = code_doc(&[ + ("parse", 1, 3, "int parse() {\n\t// x\n}"), + ("print", 5, 7, "void print() {\n\t//\n\treturn;\n}"), + ]); + let chunks = CodeCppAstV1Chunker.chunk(&doc, &policy()).unwrap(); + assert_eq!(chunks.len(), 2); + for c in &chunks { + assert_eq!(c.source_spans.len(), 1); + assert!(matches!(c.source_spans[0], SourceSpan::Code { .. })); + assert_eq!(c.heading_path, Vec::::new()); + assert_eq!(c.chunker_version.0, "code-cpp-ast-v1"); + } + match &chunks[0].source_spans[0] { + SourceSpan::Code { symbol, line_start, line_end, .. } => { + assert_eq!(symbol.as_deref(), Some("parse")); + assert_eq!((*line_start, *line_end), (1, 3)); + } + _ => unreachable!(), + } + } + + #[test] + fn oversize_unit_splits_into_parts_with_unique_ids() { + let body = (0..500).map(|i| format!("\tx{i} = {i};\n")).collect::>().join(""); + let code = format!("int big() {{\n{body}\n}}"); + let doc = code_doc(&[("big", 1, 502, &code)]); + let chunks = CodeCppAstV1Chunker.chunk(&doc, &policy()).unwrap(); + assert!(chunks.len() >= 2, "oversize unit must split, got {}", chunks.len()); + for c in &chunks { + match &c.source_spans[0] { + SourceSpan::Code { symbol, .. } => { + assert!(symbol.as_deref().unwrap().starts_with("big [part "), + "part-numbered symbol, got {symbol:?}"); + } + _ => unreachable!(), + } + } + let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect(); + let n = ids.len(); ids.sort(); ids.dedup(); + assert_eq!(ids.len(), n, "chunk_ids unique across split parts"); + } + + #[test] + fn non_code_doc_errors() { + use kebab_core::TextBlock; + let mut doc = code_doc(&[("parse", 1, 1, "int parse() {}")]); + doc.blocks = vec![Block::Paragraph(TextBlock { + common: CommonBlock { + block_id: kebab_core::BlockId("b".into()), + heading_path: vec![], + source_span: SourceSpan::Line { start: 1, end: 1 }, + }, + text: "x".into(), inlines: vec![], + })]; + let err = CodeCppAstV1Chunker.chunk(&doc, &policy()).unwrap_err(); + assert!(err.to_string().contains("CodeCppAstV1Chunker")); + } + + #[test] + fn deterministic_chunk_ids_1000() { + let doc = code_doc(&[("parse", 1, 2, "int parse() {}\n")]); + let base: Vec = CodeCppAstV1Chunker.chunk(&doc, &policy()) + .unwrap().into_iter().map(|c| c.chunk_id.0).collect(); + for _ in 0..1000 { + let again: Vec = CodeCppAstV1Chunker.chunk(&doc, &policy()) + .unwrap().into_iter().map(|c| c.chunk_id.0).collect(); + assert_eq!(again, base); + } + } + + #[test] + fn policy_hash_matches_md_heading_v1() { + let p = policy(); + assert_eq!(CodeCppAstV1Chunker.policy_hash(&p), + crate::MdHeadingV1Chunker.policy_hash(&p)); + } +} diff --git a/crates/kebab-chunk/src/lib.rs b/crates/kebab-chunk/src/lib.rs index f1636ea..1be8bd2 100644 --- a/crates/kebab-chunk/src/lib.rs +++ b/crates/kebab-chunk/src/lib.rs @@ -16,6 +16,7 @@ //! It consumes `CanonicalDocument` purely through `kb-core` types. mod code_c_ast_v1; +mod code_cpp_ast_v1; mod code_go_ast_v1; mod code_java_ast_v1; mod code_js_ast_v1; @@ -32,6 +33,7 @@ pub mod manifest_file_v1; pub mod code_text_paragraph_v1; pub use code_c_ast_v1::CodeCAstV1Chunker; +pub use code_cpp_ast_v1::CodeCppAstV1Chunker; pub use code_go_ast_v1::CodeGoAstV1Chunker; pub use code_java_ast_v1::CodeJavaAstV1Chunker; pub use code_js_ast_v1::CodeJsAstV1Chunker; diff --git a/crates/kebab-chunk/tests/code_cpp_ast_snapshot.rs b/crates/kebab-chunk/tests/code_cpp_ast_snapshot.rs new file mode 100644 index 0000000..0b7724f --- /dev/null +++ b/crates/kebab-chunk/tests/code_cpp_ast_snapshot.rs @@ -0,0 +1,200 @@ +//! Snapshot test pinning the `Vec` JSON for a +//! representative C++ code `CanonicalDocument`. +//! +//! This is an integration test. `kebab-parse-code` is intentionally NOT +//! a dev-dep (design §6.3 / §8 boundary: AST extraction is parser-side). +//! The `CanonicalDocument` is built inline from hand-crafted `Block::Code` +//! units, which is the same pattern used in `code_c_ast_v1.rs`'s +//! internal `code_doc` test helper. +//! +//! Set `UPDATE_SNAPSHOTS=1` to re-bake the baseline. + +use std::path::PathBuf; + +use kebab_chunk::CodeCppAstV1Chunker; +use kebab_core::{ + AssetId, Block, CanonicalDocument, ChunkPolicy, Chunker, ChunkerVersion, CodeBlock, CommonBlock, + Lang, Metadata, ParserVersion, Provenance, SourceSpan, SourceType, TrustLevel, WorkspacePath, + id_for_block, id_for_doc, +}; +use serde_json::Value; +use time::OffsetDateTime; + +fn fixtures_dir() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("tests") + .join("fixtures") +} + +fn fixed_doc() -> CanonicalDocument { + let wp = WorkspacePath("projects/record.cpp".into()); + let aid = AssetId("c".repeat(64)); + // Pin parser_version so doc_id / block_ids are reproducible. + let pv = ParserVersion("code-cpp-v1".into()); + let doc_id = id_for_doc(&wp, &aid, &pv); + + // Representative units (C++ specific): + // 0. includes + namespace opening (lines 1–4, ≤200) + // 1. class definition (lines 6–20, ≤200) + // 2. template function (lines 22–25, ≤200) + // 3. namespace closing + free fn (lines 27–29, ≤200) + // 4. main fn (lines 31–34, ≤200) + let raw_units: Vec<(&str, u32, u32, String)> = vec![ + ( + "", + 1, + 4, + "#include \n#include \n\nnamespace kebab {".to_string(), + ), + ( + "kebab::chunk::MdHeadingV1Chunker", + 6, + 20, + "class MdHeadingV1Chunker {\npublic:\n MdHeadingV1Chunker() = default;\n ~MdHeadingV1Chunker() = default;\n\n std::string chunk_doc(const std::string& doc) {\n return doc;\n }\n\n int operator()(int x) const {\n return x * 2;\n }\n\nprivate:\n int counter_ = 0;\n};".to_string(), + ), + ( + "kebab::identity", + 22, + 25, + "template \nT identity(T value) {\n return value;\n}".to_string(), + ), + ( + "kebab::global_helper", + 27, + 29, + "void global_helper() {\n // free function in kebab namespace\n}".to_string(), + ), + ( + "main", + 31, + 34, + "int main() {\n kebab::chunk::MdHeadingV1Chunker c;\n return 0;\n}".to_string(), + ), + ]; + + let blocks: Vec = raw_units + .iter() + .enumerate() + .map(|(i, (sym, ls, le, code))| { + let span = SourceSpan::Code { + line_start: *ls, + line_end: *le, + symbol: Some((*sym).to_string()), + lang: Some("cpp".into()), + }; + let bid = id_for_block(&doc_id, "code", &[], i as u32, &span); + Block::Code(CodeBlock { + common: CommonBlock { + block_id: bid, + heading_path: vec![], + source_span: span, + }, + lang: Some("cpp".into()), + code: code.clone(), + }) + }) + .collect(); + + CanonicalDocument { + doc_id, + source_asset_id: aid, + workspace_path: wp, + title: "record.cpp".into(), + lang: Lang("und".into()), + blocks, + metadata: Metadata { + aliases: vec![], + tags: vec![], + created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + updated_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + source_type: SourceType::Note, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: Some("kebab".into()), + git_branch: Some("main".into()), + git_commit: Some("0".repeat(40)), + code_lang: Some("cpp".into()), + }, + provenance: Provenance { events: vec![] }, + parser_version: pv, + schema_version: 1, + doc_version: 1, + last_chunker_version: None, + last_embedding_version: None, + } +} + +fn fixed_policy() -> ChunkPolicy { + ChunkPolicy { + target_tokens: 500, + overlap_tokens: 80, + respect_markdown_headings: false, + chunker_version: ChunkerVersion("code-cpp-ast-v1".into()), + } +} + +#[test] +fn code_cpp_ast_chunks_snapshot() { + let doc = fixed_doc(); + let policy = fixed_policy(); + + let chunks = CodeCppAstV1Chunker.chunk(&doc, &policy).expect("chunk"); + let actual = serde_json::to_value(&chunks).unwrap(); + + let dir = fixtures_dir(); + let baseline_path = dir.join("code-sample.cpp.chunks.snapshot.json"); + let baseline_text = match std::fs::read_to_string(&baseline_path) { + Ok(s) => s, + Err(_) if std::env::var("UPDATE_SNAPSHOTS").is_ok() => { + std::fs::create_dir_all(&dir).unwrap(); + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + return; + } + Err(e) => panic!( + "missing baseline {}; run with UPDATE_SNAPSHOTS=1 to create: {e}", + baseline_path.display() + ), + }; + let expected: Value = serde_json::from_str(&baseline_text).expect("baseline parses as json"); + + if actual != expected { + if std::env::var("UPDATE_SNAPSHOTS").is_ok() { + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + std::fs::write(&baseline_path, format!("{pretty}\n")).unwrap(); + eprintln!("updated baseline {}", baseline_path.display()); + return; + } + let pretty = serde_json::to_string_pretty(&actual).unwrap(); + panic!( + "code-cpp-ast-v1 chunks snapshot drift\n\ + --- expected ({}) ---\n{baseline_text}\n\ + --- actual ---\n{pretty}\n\ + If intentional, re-run with UPDATE_SNAPSHOTS=1.", + baseline_path.display() + ); + } +} + +/// Determinism cross-check: re-running the same pipeline yields the same +/// chunk_ids byte-for-byte. +#[test] +fn code_cpp_ast_chunks_are_deterministic() { + let policy = fixed_policy(); + let baseline: Vec = CodeCppAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + for _ in 0..5 { + let again: Vec = CodeCppAstV1Chunker + .chunk(&fixed_doc(), &policy) + .unwrap() + .into_iter() + .map(|c| c.chunk_id.0) + .collect(); + assert_eq!(again, baseline); + } +} diff --git a/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json b/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json new file mode 100644 index 0000000..257d6e9 --- /dev/null +++ b/crates/kebab-chunk/tests/fixtures/code-sample.cpp.chunks.snapshot.json @@ -0,0 +1,107 @@ +[ + { + "block_ids": [ + "53292605459065d170cd36c118e20546" + ], + "chunk_id": "50a5b324300d9082eac4ce2a422810e1", + "chunker_version": "code-cpp-ast-v1", + "doc_id": "fff1e1f0a7ff70ef682937470e5d1d28", + "heading_path": [], + "policy_hash": "71f3c07bb9ec1d09", + "source_spans": [ + { + "kind": "code", + "lang": "cpp", + "line_end": 4, + "line_start": 1, + "symbol": "" + } + ], + "text": "#include \n#include \n\nnamespace kebab {", + "token_estimate": 18 + }, + { + "block_ids": [ + "f349acad94c9fa4cf9ad1c0a93e83610" + ], + "chunk_id": "0e6bc7c522665af8a4b0f66afb9d29c8", + "chunker_version": "code-cpp-ast-v1", + "doc_id": "fff1e1f0a7ff70ef682937470e5d1d28", + "heading_path": [], + "policy_hash": "71f3c07bb9ec1d09", + "source_spans": [ + { + "kind": "code", + "lang": "cpp", + "line_end": 20, + "line_start": 6, + "symbol": "kebab::chunk::MdHeadingV1Chunker" + } + ], + "text": "class MdHeadingV1Chunker {\npublic:\n MdHeadingV1Chunker() = default;\n ~MdHeadingV1Chunker() = default;\n\n std::string chunk_doc(const std::string& doc) {\n return doc;\n }\n\n int operator()(int x) const {\n return x * 2;\n }\n\nprivate:\n int counter_ = 0;\n};", + "token_estimate": 95 + }, + { + "block_ids": [ + "8b9811387717d0bd4abf84abcc35b8b1" + ], + "chunk_id": "d9326d252905b665b2adb9a416c20451", + "chunker_version": "code-cpp-ast-v1", + "doc_id": "fff1e1f0a7ff70ef682937470e5d1d28", + "heading_path": [], + "policy_hash": "71f3c07bb9ec1d09", + "source_spans": [ + { + "kind": "code", + "lang": "cpp", + "line_end": 25, + "line_start": 22, + "symbol": "kebab::identity" + } + ], + "text": "template \nT identity(T value) {\n return value;\n}", + "token_estimate": 21 + }, + { + "block_ids": [ + "1754cb6b971f6a4cb292f144a4f0570b" + ], + "chunk_id": "56ee5f991de4a413c016da8dc4acfc35", + "chunker_version": "code-cpp-ast-v1", + "doc_id": "fff1e1f0a7ff70ef682937470e5d1d28", + "heading_path": [], + "policy_hash": "71f3c07bb9ec1d09", + "source_spans": [ + { + "kind": "code", + "lang": "cpp", + "line_end": 29, + "line_start": 27, + "symbol": "kebab::global_helper" + } + ], + "text": "void global_helper() {\n // free function in kebab namespace\n}", + "token_estimate": 22 + }, + { + "block_ids": [ + "14b5f3393d6d25f822f5b70763d24acd" + ], + "chunk_id": "c0d7c043cdd575c530db3909b54cc906", + "chunker_version": "code-cpp-ast-v1", + "doc_id": "fff1e1f0a7ff70ef682937470e5d1d28", + "heading_path": [], + "policy_hash": "71f3c07bb9ec1d09", + "source_spans": [ + { + "kind": "code", + "lang": "cpp", + "line_end": 34, + "line_start": 31, + "symbol": "main" + } + ], + "text": "int main() {\n kebab::chunk::MdHeadingV1Chunker c;\n return 0;\n}", + "token_estimate": 23 + } +] diff --git a/crates/kebab-chunk/tests/fixtures/sample.cpp b/crates/kebab-chunk/tests/fixtures/sample.cpp new file mode 100644 index 0000000..2b95a60 --- /dev/null +++ b/crates/kebab-chunk/tests/fixtures/sample.cpp @@ -0,0 +1,40 @@ +#include +#include + +namespace kebab { +namespace chunk { + +class MdHeadingV1Chunker { +public: + MdHeadingV1Chunker() = default; + ~MdHeadingV1Chunker() = default; + + std::string chunk_doc(const std::string& doc) { + return doc; + } + + int operator()(int x) const { + return x * 2; + } + +private: + int counter_ = 0; +}; + +template +T identity(T value) { + return value; +} + +} // namespace chunk + +void global_helper() { + // free function in kebab namespace +} + +} // namespace kebab + +int main() { + kebab::chunk::MdHeadingV1Chunker c; + return 0; +}