chore(p10-1d-followup): reviewer nit cleanup — C extractor tests + HOTFIXES + cpp snapshot (#157)
This commit was merged in pull request #157.
This commit is contained in:
@@ -16,12 +16,13 @@ tracing = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
# kb-parse-md / kb-normalize are dev-only — used by the snapshot integration
|
||||
# test to build a CanonicalDocument from a fixture Markdown file. Forbidden as
|
||||
# regular deps per design §8 (chunker consumes CanonicalDocument from kb-core
|
||||
# only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps)
|
||||
# confirms this.
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true }
|
||||
# kb-parse-md / kb-normalize / kb-parse-code are dev-only — used by the
|
||||
# snapshot integration tests to build a CanonicalDocument from fixture files.
|
||||
# Forbidden as regular deps per design §8 (chunker consumes CanonicalDocument
|
||||
# from kb-core only); `cargo tree -p kb-chunk --depth 1` (default scope,
|
||||
# excludes dev-deps) confirms this.
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-parse-code = { path = "../kebab-parse-code" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true }
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
//! Snapshot test pinning the `Vec<Chunk>` JSON for a
|
||||
//! representative C++ code `CanonicalDocument`.
|
||||
//!
|
||||
//! This is an integration test. `kebab-parse-code` is intentionally NOT
|
||||
//! a dev-dep (design §6.3 / §8 boundary: AST extraction is parser-side).
|
||||
//! The `CanonicalDocument` is built inline from hand-crafted `Block::Code`
|
||||
//! units, which is the same pattern used in `code_c_ast_v1.rs`'s
|
||||
//! internal `code_doc` test helper.
|
||||
//! Two complementary tests:
|
||||
//! 1. `code_cpp_ast_chunks_snapshot` — hand-built `fixed_doc()` validates the
|
||||
//! chunker's 1:1 mapping (design §6.3 / §8 boundary: no parse-code dep needed).
|
||||
//! 2. `code_cpp_ast_extractor_snapshot` — invokes `CppAstExtractor` against the
|
||||
//! real `tests/fixtures/sample.cpp` fixture, validating the extractor → chunker
|
||||
//! end-to-end pipeline. `kebab-parse-code` is a dev-dep (same pattern as
|
||||
//! `kebab-parse-md` in Markdown snapshot tests).
|
||||
//!
|
||||
//! Set `UPDATE_SNAPSHOTS=1` to re-bake the baseline.
|
||||
|
||||
@@ -17,6 +19,7 @@ use kebab_core::{
|
||||
Lang, Metadata, ParserVersion, Provenance, SourceSpan, SourceType, TrustLevel, WorkspacePath,
|
||||
id_for_block, id_for_doc,
|
||||
};
|
||||
use kebab_parse_code::CppAstExtractor;
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -134,6 +137,47 @@ fn fixed_policy() -> ChunkPolicy {
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: run the real CppAstExtractor against tests/fixtures/sample.cpp
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
fn extract_cpp_fixture() -> CanonicalDocument {
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, ExtractConfig, ExtractContext, Extractor, RawAsset,
|
||||
SourceUri, WorkspacePath,
|
||||
};
|
||||
use std::path::PathBuf;
|
||||
|
||||
let bytes = std::fs::read(fixtures_dir().join("sample.cpp")).expect("read sample.cpp fixture");
|
||||
let src = String::from_utf8(bytes).expect("fixture is valid UTF-8");
|
||||
let wp = WorkspacePath("tests/fixtures/sample.cpp".to_string());
|
||||
let asset = RawAsset {
|
||||
asset_id: AssetId("e".repeat(64)),
|
||||
source_uri: SourceUri::File(PathBuf::from("tests/fixtures/sample.cpp")),
|
||||
workspace_path: wp,
|
||||
media_type: kebab_core::MediaType::Code("cpp".to_string()),
|
||||
byte_len: src.len() as u64,
|
||||
checksum: Checksum("f".repeat(64)),
|
||||
discovered_at: time::OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
|
||||
stored: AssetStorage::Reference {
|
||||
path: PathBuf::from("tests/fixtures/sample.cpp"),
|
||||
sha: Checksum("f".repeat(64)),
|
||||
},
|
||||
};
|
||||
let cfg = ExtractConfig::default();
|
||||
let root = PathBuf::from("/tmp");
|
||||
let ctx = ExtractContext {
|
||||
asset: &asset,
|
||||
workspace_root: &root,
|
||||
config: &cfg,
|
||||
};
|
||||
CppAstExtractor::new().extract(&ctx, src.as_bytes()).unwrap()
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 1 (hand-built): chunker-only 1:1 mapping validation
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn code_cpp_ast_chunks_snapshot() {
|
||||
let doc = fixed_doc();
|
||||
@@ -198,3 +242,84 @@ fn code_cpp_ast_chunks_are_deterministic() {
|
||||
assert_eq!(again, baseline);
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test 2 (real extractor): end-to-end extractor → chunker pipeline
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Validates that the real `CppAstExtractor` processes `sample.cpp` and
|
||||
/// emits the expected set of symbols through the full chunker pipeline.
|
||||
///
|
||||
/// `sample.cpp` contains:
|
||||
/// - `#include` directives + nested namespace `kebab::chunk` → glue + struct unit
|
||||
/// - `class MdHeadingV1Chunker` with methods (ctor, dtor, chunk_doc, operator())
|
||||
/// - `template <typename T> T identity(T value)` (template fn)
|
||||
/// - `void kebab::global_helper()` (free fn in namespace)
|
||||
/// - `int main()` (global free fn)
|
||||
#[test]
|
||||
fn code_cpp_ast_extractor_snapshot() {
|
||||
let doc = extract_cpp_fixture();
|
||||
|
||||
// Verify the extractor emits all expected named units.
|
||||
let block_syms: Vec<Option<String>> = doc.blocks.iter().filter_map(|b| match b {
|
||||
Block::Code(c) => match &c.common.source_span {
|
||||
SourceSpan::Code { symbol, .. } => Some(symbol.clone()),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}).collect();
|
||||
|
||||
// Must include namespace-qualified class and its methods
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::MdHeadingV1Chunker")),
|
||||
"class unit missing: {block_syms:?}"
|
||||
);
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::MdHeadingV1Chunker::MdHeadingV1Chunker")),
|
||||
"ctor unit missing: {block_syms:?}"
|
||||
);
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::MdHeadingV1Chunker::~MdHeadingV1Chunker")),
|
||||
"dtor unit missing: {block_syms:?}"
|
||||
);
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::MdHeadingV1Chunker::chunk_doc")),
|
||||
"chunk_doc unit missing: {block_syms:?}"
|
||||
);
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::MdHeadingV1Chunker::operator()")),
|
||||
"operator() unit missing: {block_syms:?}"
|
||||
);
|
||||
// Template function (inside kebab::chunk namespace in the fixture)
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::chunk::identity")),
|
||||
"identity template fn unit missing: {block_syms:?}"
|
||||
);
|
||||
// Free function in outer namespace
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("kebab::global_helper")),
|
||||
"global_helper unit missing: {block_syms:?}"
|
||||
);
|
||||
// Global main
|
||||
assert!(
|
||||
block_syms.iter().any(|s| s.as_deref() == Some("main")),
|
||||
"main unit missing: {block_syms:?}"
|
||||
);
|
||||
}
|
||||
|
||||
/// End-to-end chunker output from real extractor is deterministic.
|
||||
#[test]
|
||||
fn code_cpp_ast_extractor_chunks_deterministic() {
|
||||
let doc1 = extract_cpp_fixture();
|
||||
let doc2 = extract_cpp_fixture();
|
||||
assert_eq!(doc1.blocks, doc2.blocks, "extractor output non-deterministic");
|
||||
|
||||
let policy = fixed_policy();
|
||||
let chunks1 = CodeCppAstV1Chunker.chunk(&doc1, &policy).unwrap();
|
||||
let chunks2 = CodeCppAstV1Chunker.chunk(&doc2, &policy).unwrap();
|
||||
assert_eq!(
|
||||
chunks1.iter().map(|c| c.chunk_id.0.clone()).collect::<Vec<_>>(),
|
||||
chunks2.iter().map(|c| c.chunk_id.0.clone()).collect::<Vec<_>>(),
|
||||
"chunker output non-deterministic"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -333,5 +333,260 @@ fn flush_glue(glue: &mut Vec<(u32, u32)>, units: &mut Vec<(String, u32, u32, boo
|
||||
glue.clear();
|
||||
}
|
||||
|
||||
// Tests for CAstExtractor (snapshot + unit assertions) are added in Task D
|
||||
// alongside the C fixture file. This module is intentionally empty until then.
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_support {
|
||||
use kebab_core::*;
|
||||
use std::path::PathBuf;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
pub fn fixed_code_asset(workspace_path: &str, lang: &str) -> RawAsset {
|
||||
RawAsset {
|
||||
asset_id: AssetId("a".repeat(64)),
|
||||
source_uri: SourceUri::File(PathBuf::from(workspace_path)),
|
||||
workspace_path: WorkspacePath(workspace_path.to_string()),
|
||||
media_type: MediaType::Code(lang.to_string()),
|
||||
byte_len: 0,
|
||||
checksum: Checksum("b".repeat(64)),
|
||||
discovered_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
|
||||
stored: AssetStorage::Reference {
|
||||
path: PathBuf::from(workspace_path),
|
||||
sha: Checksum("b".repeat(64)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_c(src: &str, path: &str) -> kebab_core::CanonicalDocument {
|
||||
use super::CAstExtractor;
|
||||
use kebab_core::Extractor;
|
||||
let asset = fixed_code_asset(path, "c");
|
||||
let cfg = ExtractConfig::default();
|
||||
let root = PathBuf::from("/tmp");
|
||||
let ctx = ExtractContext {
|
||||
asset: &asset,
|
||||
workspace_root: &root,
|
||||
config: &cfg,
|
||||
};
|
||||
CAstExtractor::new().extract(&ctx, src.as_bytes()).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kebab_core::{Block, MediaType, SourceSpan};
|
||||
|
||||
fn syms(doc: &kebab_core::CanonicalDocument) -> Vec<String> {
|
||||
doc.blocks
|
||||
.iter()
|
||||
.filter_map(|b| match b {
|
||||
Block::Code(c) => match &c.common.source_span {
|
||||
SourceSpan::Code { symbol, .. } => symbol.clone(),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn extractor_supports_only_media_code_c() {
|
||||
let e = CAstExtractor::new();
|
||||
assert!(e.supports(&MediaType::Code("c".into())));
|
||||
assert!(!e.supports(&MediaType::Code("cpp".into())));
|
||||
assert!(!e.supports(&MediaType::Code("rust".into())));
|
||||
assert!(!e.supports(&MediaType::Markdown));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_simple_function() {
|
||||
let src = "int add(int a, int b) { return a + b; }\n";
|
||||
let doc = tests_support::extract_c(src, "x/math.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "add"), "got {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_pointer_return_function() {
|
||||
let src = "int *find(int *arr, int n) { return arr; }\n";
|
||||
let doc = tests_support::extract_c(src, "x/find.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "find"), "ptr-return fn missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_static_function() {
|
||||
let src = "static void helper(void) {}\n";
|
||||
let doc = tests_support::extract_c(src, "x/helper.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "helper"), "static fn missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_extern_function() {
|
||||
let src = "extern int compute(int x);\n";
|
||||
// extern prototype is a declaration → glue
|
||||
let doc = tests_support::extract_c(src, "x/compute.c");
|
||||
let s = syms(&doc);
|
||||
// declaration (prototype) falls into glue → "<module>"
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for extern proto: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_inline_function() {
|
||||
let src = "inline int square(int x) { return x * x; }\n";
|
||||
let doc = tests_support::extract_c(src, "x/square.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "square"), "inline fn missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_named_struct() {
|
||||
let src = "struct Point { int x; int y; };\n";
|
||||
let doc = tests_support::extract_c(src, "x/point.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "Point"), "struct missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_named_enum() {
|
||||
let src = "enum Color { RED, GREEN, BLUE };\n";
|
||||
let doc = tests_support::extract_c(src, "x/color.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "Color"), "enum missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_named_union() {
|
||||
let src = "union Data { int i; float f; };\n";
|
||||
let doc = tests_support::extract_c(src, "x/data.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "Data"), "union missing: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_anonymous_struct_falls_into_glue() {
|
||||
// Anonymous struct (no name field) → glue → "<module>" (only glue, no real unit)
|
||||
let src = "struct { int x; int y; } origin;\n";
|
||||
let doc = tests_support::extract_c(src, "x/anon.c");
|
||||
let s = syms(&doc);
|
||||
// anonymous struct is a declaration containing anonymous struct_specifier → glue
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for anon struct: {s:?}"
|
||||
);
|
||||
// Must NOT emit a unit named after anything else
|
||||
assert!(
|
||||
!s.iter().any(|x| x == "origin"),
|
||||
"unexpected 'origin' unit: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_typedef_struct_falls_into_glue() {
|
||||
// typedef struct { ... } Foo; — inner struct_specifier is anonymous,
|
||||
// outer node is type_definition → glue. See HOTFIXES.md 2026-05-21.
|
||||
let src = "typedef struct { int x; int y; } Point;\n";
|
||||
let doc = tests_support::extract_c(src, "x/typedef.c");
|
||||
let s = syms(&doc);
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for typedef struct: {s:?}"
|
||||
);
|
||||
// The typedef alias should NOT surface as a Code symbol
|
||||
assert!(
|
||||
!s.iter().any(|x| x == "Point"),
|
||||
"unexpected 'Point' unit for typedef struct: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_preprocessor_directives_are_glue() {
|
||||
let src = "#include <stdio.h>\n#define MAX 100\n#ifdef DEBUG\n#endif\n";
|
||||
let doc = tests_support::extract_c(src, "x/macros.c");
|
||||
let s = syms(&doc);
|
||||
// Only preprocessor → no real unit → "<module>"
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for preproc-only file: {s:?}"
|
||||
);
|
||||
assert_eq!(s.len(), 1, "expected exactly 1 block: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_multiple_functions_correct_count() {
|
||||
let src = "int foo(void) { return 1; }\nint bar(void) { return 2; }\nint baz(void) { return 3; }\n";
|
||||
let doc = tests_support::extract_c(src, "x/multi.c");
|
||||
let s = syms(&doc);
|
||||
assert!(s.iter().any(|x| x == "foo"), "foo missing: {s:?}");
|
||||
assert!(s.iter().any(|x| x == "bar"), "bar missing: {s:?}");
|
||||
assert!(s.iter().any(|x| x == "baz"), "baz missing: {s:?}");
|
||||
assert_eq!(s.len(), 3, "expected 3 units: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_empty_file_produces_module() {
|
||||
let src = "";
|
||||
let doc = tests_support::extract_c(src, "x/empty.c");
|
||||
let s = syms(&doc);
|
||||
assert_eq!(s, vec!["<module>"], "expected <module>: got {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_preprocessor_only_produces_module() {
|
||||
let src = "#include <stdlib.h>\n#define VERSION \"1.0\"\n";
|
||||
let doc = tests_support::extract_c(src, "x/header.c");
|
||||
let s = syms(&doc);
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for preproc-only file: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_mixed_functions_and_glue() {
|
||||
let src = r#"#include <stdio.h>
|
||||
|
||||
int compute(int x) {
|
||||
return x * 2;
|
||||
}
|
||||
|
||||
extern int lookup(int key);
|
||||
|
||||
void print_result(int v) {
|
||||
printf("%d\n", v);
|
||||
}
|
||||
"#;
|
||||
let doc = tests_support::extract_c(src, "x/mixed.c");
|
||||
let s = syms(&doc);
|
||||
// Two real functions + one glue block
|
||||
assert!(s.iter().any(|x| x == "compute"), "compute missing: {s:?}");
|
||||
assert!(s.iter().any(|x| x == "print_result"), "print_result missing: {s:?}");
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<top-level>"),
|
||||
"<top-level> glue missing: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_deterministic_across_runs() {
|
||||
let src = r#"
|
||||
struct Node { int val; };
|
||||
int sum(int a, int b) { return a + b; }
|
||||
void noop(void) {}
|
||||
"#;
|
||||
let a = tests_support::extract_c(src, "x/det.c");
|
||||
for _ in 0..20 {
|
||||
assert_eq!(
|
||||
tests_support::extract_c(src, "x/det.c").blocks,
|
||||
a.blocks
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,20 @@ historical contract that was implemented; this file accumulates the
|
||||
deltas so phase 5+ readers can find the live behavior without diffing
|
||||
git history.
|
||||
|
||||
## 2026-05-21 — p10-1D: typedef-wrapped struct/enum in C falls into glue
|
||||
|
||||
**Origin**: PR #156 (p10-1d) code-reviewer review. Verified during dogfood.
|
||||
|
||||
**Symptom**: `typedef struct { ... } Foo;` in a `.c` file does NOT emit a struct-level unit. tree-sitter-c classifies the construct as a top-level `type_definition` with an *anonymous* inner `struct_specifier` (no `name` field), so the extractor's `struct_specifier` arm doesn't fire — the whole declaration falls into `<top-level>` glue. The named typedef alias `Foo` is therefore not searchable as a symbol.
|
||||
|
||||
**Status**: Consistent with spec p10-1d-c-cpp-ast-chunker.md's Risks/notes ("Anonymous union / struct … anonymous → glue"), but the spec's main body line 22 ("struct_specifier (named, top-level) → 1 unit") suggests this idiom WOULD emit. Tension noted, not yet fixed.
|
||||
|
||||
**Workaround**: search the struct by its field/function names, or use `--code-lang c` to broaden scope. Typedef-aliased struct names won't surface as `Citation::Code.symbol`.
|
||||
|
||||
**Next step**: dogfood real C code for a week+; if this turns out to be a frequent pain point (kernel-style code, libuv, etc.), revisit the extractor to detect `type_definition` → inner `struct_specifier` and emit a synthetic unit named after the typedef alias.
|
||||
|
||||
Cross-link: `tasks/p10/p10-1d-c-cpp-ast-chunker.md` Risks/notes section.
|
||||
|
||||
## 2026-05-20 — p10-1B: Rust 1A-2 symbol path is file-scope-only; 1B+ uses workspace path → module prefix
|
||||
|
||||
**무엇이 바뀌었나**: P10-1A-2 의 Rust `code-rust-ast-v1` chunker 가 생성하는 symbol 은 file-scope mod-path nesting 만 사용한다 (예: `Foo::double`). P10-1B 이후 Python / TypeScript / JavaScript 의 symbol 은 workspace 경로 → module path prefix 를 포함한다 (예: `kebab_eval.metrics.compute_mrr`, `src/Foo.Foo.search`).
|
||||
|
||||
@@ -113,6 +113,7 @@ crates/kebab-parse-code/Cargo.toml [edit] — 위 2 dep 신규 entry.
|
||||
- **Template specialization** (`template<> class Foo<int>`): tree-sitter-cpp 의 `template_declaration` 안의 `class_specifier` name 만 추출 — `Foo` 만 symbol 에 들어가고 `<int>` 미포함. design 의 generic 무시 룰 일관.
|
||||
- **`extern "C"` block 안의 fn**: 일반 fn 처리. 외부 wrapping block 은 glue.
|
||||
- **Anonymous union / struct** (`struct { int x; }` 변수 안에): 흔치 않음 + named 만 unit. anonymous 는 glue.
|
||||
- **typedef-wrapped struct/enum idiom** (`typedef struct { ... } Foo;`) — anonymous inner struct → glue. Named typedef alias 미캡처. dogfood 후 HOTFIXES 검토. See [HOTFIXES.md 2026-05-21 entry](../HOTFIXES.md).
|
||||
- **Macro-heavy code** (Linux kernel 등): `#define FOO(x) ...` 매크로가 function-like 라도 parser 가 fn 으로 인식 안 함. preprocessor glue 로 처리 — symbol 안 잡힘. 의도된 동작 (parser 의 macro expansion 안 함).
|
||||
- **`__attribute__((...))`** annotations: tree-sitter-c 의 attribute 노드는 declarator 옆 sibling. 무시 가능. function name 추출에 영향 없음.
|
||||
- **fixture 크기**: sample.c 는 ~30 line (top-level fn + struct + enum + preprocessor), sample.cpp 는 ~50 line (nested namespace + class + method + template + free fn). oversize fallback 의 별도 검증은 1A-2 의 long_section_snapshot 패턴이 이미 cover (필요 시 별도 fixture).
|
||||
|
||||
Reference in New Issue
Block a user