feat: v0.17.0 PR-B — C typedef-wrapped struct/enum/union → typedef alias unit #160
@@ -880,6 +880,22 @@ fn try_skip_unchanged(
|
||||
// logic self-documenting and guards against future id_for_doc
|
||||
// changes.
|
||||
if existing_doc.parser_version != *current_parser_version {
|
||||
// v0.17.0 PR-B: parser_version bump cascade. Same bytes (same
|
||||
// asset_id) → asset-keyed `stale_chunk_ids_at` is a no-op, but
|
||||
// the stale `documents` row at this workspace_path still
|
||||
// collides with `idx_docs_workspace_path` on the next INSERT
|
||||
// and the LanceDB rows under the old chunk_ids orphan. Sweep
|
||||
// both stores here, before returning Ok(None), so the caller's
|
||||
// full-ingest path lands a clean slate. The `keep_doc_id = ""`
|
||||
// sentinel removes every doc at this path (the new doc_id is
|
||||
// not yet known here — it's computed downstream from the new
|
||||
// PARSER_VERSION).
|
||||
purge_workspace_path_for_parser_bump(app, asset).with_context(|| {
|
||||
format!(
|
||||
"parser-bump orphan purge at {}",
|
||||
asset.workspace_path.0
|
||||
)
|
||||
})?;
|
||||
return Ok(None);
|
||||
}
|
||||
// 3. Chunker unchanged.
|
||||
@@ -1486,6 +1502,53 @@ fn record_image_analysis_failure(
|
||||
warning_notes.push(note);
|
||||
}
|
||||
|
||||
/// v0.17.0 PR-B: parser-bump cascade. When a code extractor ships a
|
||||
/// new `PARSER_VERSION` (e.g. `code-c-v1` → `code-c-v2`), the same
|
||||
/// (workspace_path, asset_id) pair re-emerges with a fresh `doc_id`.
|
||||
/// The existing asset-keyed [`purge_vector_orphans_for_workspace_path`]
|
||||
/// only fires on asset_id changes (file bytes edited) and is a no-op
|
||||
/// here. Without an explicit doc-keyed sweep the next INSERT raises
|
||||
/// `idx_docs_workspace_path` UNIQUE and the LanceDB rows under the
|
||||
/// stale chunk_ids orphan. This helper:
|
||||
///
|
||||
/// 1. Fetches every stale chunk_id at `workspace_path` from SQLite
|
||||
/// (`keep_doc_id = ""` means "all existing docs are stale" —
|
||||
/// `try_skip_unchanged` calls this before the new doc_id is
|
||||
/// computed).
|
||||
/// 2. Deletes the matching vectors from every Lance table (no-op if
|
||||
/// embeddings are disabled).
|
||||
/// 3. Sweeps the SQLite `documents` row (CASCADE drops `blocks` /
|
||||
/// `chunks` / `embedding_records`). The `assets` row stays — same
|
||||
/// bytes, same asset_id, only the derived `doc_id` changed.
|
||||
fn purge_workspace_path_for_parser_bump(
|
||||
app: &App,
|
||||
asset: &RawAsset,
|
||||
) -> anyhow::Result<()> {
|
||||
let path = &asset.workspace_path.0;
|
||||
let stale = app
|
||||
.sqlite
|
||||
.stale_chunk_ids_for_workspace_path_except_doc_id(path, "")
|
||||
.context("SqliteStore::stale_chunk_ids_for_workspace_path_except_doc_id")?;
|
||||
if !stale.is_empty() {
|
||||
if let Some(vec_store) = app.vector().context("App::vector")? {
|
||||
use kebab_core::VectorStore as _;
|
||||
vec_store
|
||||
.delete_by_chunk_ids(&stale)
|
||||
.context("VectorStore::delete_by_chunk_ids (parser-bump orphans)")?;
|
||||
}
|
||||
}
|
||||
app.sqlite
|
||||
.purge_document_at_workspace_path_except_doc_id(path, "")
|
||||
.context("SqliteStore::purge_document_at_workspace_path_except_doc_id")?;
|
||||
tracing::debug!(
|
||||
target: "kebab-app",
|
||||
path = %path,
|
||||
count = stale.len(),
|
||||
"purged orphan vectors + document for parser_version bump"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// HOTFIXES 2026-05-02 P7-3 follow-up: when a tracked file's bytes
|
||||
/// change, `purge_orphan_at_workspace_path` (in `kebab-store-sqlite`)
|
||||
/// sweeps the SQLite chain (documents → blocks / chunks / embedding_records)
|
||||
|
||||
@@ -1145,8 +1145,8 @@ fn tier1_c_ingest_searchable() {
|
||||
.expect("parser.c item present");
|
||||
assert_eq!(
|
||||
c_item.parser_version.as_ref().map(|p| p.0.as_str()),
|
||||
Some("code-c-v1"),
|
||||
"parser_version must be code-c-v1"
|
||||
Some("code-c-v2"),
|
||||
"parser_version must be code-c-v2 (v0.17.0 PR-B: typedef-wrapped struct/enum/union 이 typedef alias unit 으로 방출)"
|
||||
);
|
||||
assert_eq!(
|
||||
c_item.chunker_version.as_ref().map(|c| c.0.as_str()),
|
||||
|
||||
@@ -31,7 +31,7 @@ use time::OffsetDateTime;
|
||||
|
||||
use crate::scaffold::{filename_from_workspace_path, strip_extension};
|
||||
|
||||
pub const PARSER_VERSION: &str = "code-c-v1";
|
||||
pub const PARSER_VERSION: &str = "code-c-v2";
|
||||
|
||||
/// C AST extractor. Per-unit blocks via tree-sitter-c 0.24.2
|
||||
/// (`LANGUAGE: LanguageFn`) parsed by tree-sitter 0.26.
|
||||
@@ -257,13 +257,33 @@ fn build_blocks(
|
||||
flush_glue(&mut glue, &mut units);
|
||||
units.push((name.to_string(), s, e, true));
|
||||
} else {
|
||||
// Anonymous struct/enum/union — glue.
|
||||
// Anonymous struct/enum/union at the top level (not
|
||||
// wrapped in typedef) — glue. typedef-wrapped case
|
||||
// is recovered in the `type_definition` arm below.
|
||||
glue.push((s, e));
|
||||
}
|
||||
}
|
||||
// Everything else: preprocessor directives, declarations
|
||||
// (typedef / global var / fn prototype), type_definition,
|
||||
// linkage_specification, etc. — all collapse into glue.
|
||||
"type_definition" => {
|
||||
// v0.17.0 PR-B: typedef-wrapped anonymous aggregate
|
||||
// recovery. `typedef struct { ... } Foo;` exposes only
|
||||
// the alias `Foo` as a useful symbol — the inner
|
||||
// struct_specifier has no `name` field. Pre-v0.17.0
|
||||
// this whole construct collapsed into glue and hid the
|
||||
// alias from search (HOTFIXES 2026-05-21). v2 recovers
|
||||
// the alias from the `declarator` field and emits a
|
||||
// synthetic unit so `Citation::Code.symbol = "Foo"`.
|
||||
// Plain `typedef int MyInt;` (no inner aggregate) stays
|
||||
// glue — there's no struct body to name.
|
||||
if let Some(name) = recover_typedef_alias(child, source) {
|
||||
flush_glue(&mut glue, &mut units);
|
||||
units.push((name, s, e, true));
|
||||
} else {
|
||||
glue.push((s, e));
|
||||
}
|
||||
}
|
||||
// Everything else: preprocessor directives, plain declarations
|
||||
// (global var / fn prototype), linkage_specification, etc.
|
||||
// — all collapse into glue.
|
||||
_ => {
|
||||
glue.push((s, e));
|
||||
}
|
||||
@@ -323,6 +343,62 @@ fn build_blocks(
|
||||
Ok(blocks)
|
||||
}
|
||||
|
||||
/// v0.17.0 PR-B: try to recover the typedef alias name from a
|
||||
/// `type_definition` node *iff* the inner type-specifier is an
|
||||
/// anonymous struct/enum/union. Returns `None` for any other shape
|
||||
/// (named aggregate handled elsewhere, plain type alias has no body
|
||||
/// worth naming).
|
||||
fn recover_typedef_alias(node: tree_sitter::Node, source: &str) -> Option<String> {
|
||||
let mut has_anon_aggregate = false;
|
||||
let mut cursor = node.walk();
|
||||
for sub in node.children(&mut cursor) {
|
||||
match sub.kind() {
|
||||
"struct_specifier" | "enum_specifier" | "union_specifier" => {
|
||||
if sub.child_by_field_name("name").is_none() {
|
||||
has_anon_aggregate = true;
|
||||
} else {
|
||||
// Named inner aggregate (e.g. `typedef struct Pt {...} P;`)
|
||||
// — the named struct itself is the primary symbol and
|
||||
// is *not* extracted at the top level today (it lives
|
||||
// inside `type_definition`, not as a sibling
|
||||
// `struct_specifier`). For v2 we keep behavior conservative:
|
||||
// return None so the type_definition stays glue, matching
|
||||
// pre-v2 behavior for this minor case. Real-world C tends
|
||||
// to use one of: bare named struct, typedef alias only,
|
||||
// or typedef on anonymous body — the latter is what we fix.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
if !has_anon_aggregate {
|
||||
return None;
|
||||
}
|
||||
let decl = node.child_by_field_name("declarator")?;
|
||||
extract_typedef_alias_name(decl, source).map(str::to_string)
|
||||
}
|
||||
|
||||
/// Extract the typedef alias identifier from a declarator subtree.
|
||||
/// Handles the common shapes: direct `type_identifier`, or one wrapped
|
||||
/// in pointer / function declarator nodes (the alias is always the
|
||||
/// rightmost `type_identifier` descendant).
|
||||
fn extract_typedef_alias_name<'a>(
|
||||
decl: tree_sitter::Node,
|
||||
source: &'a str,
|
||||
) -> Option<&'a str> {
|
||||
if decl.kind() == "type_identifier" {
|
||||
return Some(&source[decl.start_byte()..decl.end_byte()]);
|
||||
}
|
||||
let mut cursor = decl.walk();
|
||||
for sub in decl.children(&mut cursor) {
|
||||
if let Some(found) = extract_typedef_alias_name(sub, source) {
|
||||
return Some(found);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn flush_glue(glue: &mut Vec<(u32, u32)>, units: &mut Vec<(String, u32, u32, bool)>) {
|
||||
if glue.is_empty() {
|
||||
return;
|
||||
@@ -489,20 +565,72 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_typedef_struct_falls_into_glue() {
|
||||
// typedef struct { ... } Foo; — inner struct_specifier is anonymous,
|
||||
// outer node is type_definition → glue. See HOTFIXES.md 2026-05-21.
|
||||
fn c_extractor_typedef_struct_emits_unit() {
|
||||
// v0.17.0 PR-B: `typedef struct { ... } Foo;` was previously a
|
||||
// hotfix-tracked deviation (HOTFIXES.md 2026-05-21) — the inner
|
||||
// struct_specifier is anonymous so the named-struct arm didn't
|
||||
// fire, dropping the whole construct into glue and hiding the
|
||||
// `Foo` alias from symbol search. The v2 extractor recovers the
|
||||
// typedef alias from the `declarator` field on the
|
||||
// `type_definition` node and emits a synthetic unit with that
|
||||
// name. parser_version bumped `code-c-v1` → `code-c-v2`.
|
||||
let src = "typedef struct { int x; int y; } Point;\n";
|
||||
let doc = tests_support::extract_c(src, "x/typedef.c");
|
||||
let s = syms(&doc);
|
||||
// The typedef alias surfaces as a Code symbol.
|
||||
assert!(
|
||||
s.iter().any(|x| x == "Point"),
|
||||
"expected 'Point' unit from typedef alias: {s:?}"
|
||||
);
|
||||
// No `<module>` (the file has exactly one semantic unit now,
|
||||
// the typedef alias — no glue-only fallback needed).
|
||||
assert!(
|
||||
!s.iter().any(|x| x == "<module>"),
|
||||
"no <module> fallback expected when typedef emits a unit: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_typedef_enum_emits_unit() {
|
||||
// Parallel coverage for enum_specifier — same typedef-alias
|
||||
// synthesis path. `typedef enum { A, B } Color;` → unit `Color`.
|
||||
let src = "typedef enum { A, B } Color;\n";
|
||||
let doc = tests_support::extract_c(src, "x/typedef_enum.c");
|
||||
let s = syms(&doc);
|
||||
assert!(
|
||||
s.iter().any(|x| x == "Color"),
|
||||
"expected 'Color' unit from typedef enum alias: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_typedef_union_emits_unit() {
|
||||
// Parallel coverage for union_specifier.
|
||||
let src = "typedef union { int i; float f; } IntOrFloat;\n";
|
||||
let doc = tests_support::extract_c(src, "x/typedef_union.c");
|
||||
let s = syms(&doc);
|
||||
assert!(
|
||||
s.iter().any(|x| x == "IntOrFloat"),
|
||||
"expected 'IntOrFloat' unit from typedef union alias: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn c_extractor_typedef_to_existing_type_stays_glue() {
|
||||
// Negative case: `typedef int MyInt;` has no inner struct/enum/
|
||||
// union — there's no struct body to attach the alias to, so the
|
||||
// construct falls into glue (becomes `<module>` when alone).
|
||||
// Confirms the new arm only fires for anonymous-struct typedef.
|
||||
let src = "typedef int MyInt;\n";
|
||||
let doc = tests_support::extract_c(src, "x/typedef_alias.c");
|
||||
let s = syms(&doc);
|
||||
assert!(
|
||||
s.iter().any(|x| x == "<module>"),
|
||||
"expected <module> for typedef struct: {s:?}"
|
||||
"expected <module> for plain typedef alias: {s:?}"
|
||||
);
|
||||
// The typedef alias should NOT surface as a Code symbol
|
||||
assert!(
|
||||
!s.iter().any(|x| x == "Point"),
|
||||
"unexpected 'Point' unit for typedef struct: {s:?}"
|
||||
!s.iter().any(|x| x == "MyInt"),
|
||||
"plain typedef alias must not emit a unit: {s:?}"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -464,6 +464,74 @@ impl SqliteStore {
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// v0.17.0 PR-B: sister of [`Self::stale_chunk_ids_at`] for the
|
||||
/// `parser_version` bump cascade. When `doc_id` depends on
|
||||
/// `parser_version` (design §9) and an extractor ships a new
|
||||
/// `PARSER_VERSION`, the next ingest computes a fresh `doc_id` for
|
||||
/// the *same* `(workspace_path, asset_id)` pair. The existing
|
||||
/// asset_id-keyed [`Self::stale_chunk_ids_at`] does NOT fire (same
|
||||
/// asset), so the legacy `chunks` rows and their LanceDB shadows
|
||||
/// would orphan. This helper queries by `workspace_path` instead,
|
||||
/// excluding the freshly-computed `keep_doc_id` so a re-entry
|
||||
/// during the same ingest doesn't re-sweep the new row.
|
||||
///
|
||||
/// Caller usage: pass the *new* `doc_id` if known; pass an empty
|
||||
/// string when called before the new INSERT (the case in
|
||||
/// `try_skip_unchanged`) — all existing docs at `workspace_path`
|
||||
/// are then collected as stale.
|
||||
pub fn stale_chunk_ids_for_workspace_path_except_doc_id(
|
||||
&self,
|
||||
workspace_path: &str,
|
||||
keep_doc_id: &str,
|
||||
) -> Result<Vec<kebab_core::ChunkId>> {
|
||||
let conn = self.lock_conn();
|
||||
let mut stmt = conn
|
||||
.prepare(
|
||||
"SELECT c.chunk_id
|
||||
FROM chunks c
|
||||
INNER JOIN documents d ON c.doc_id = d.doc_id
|
||||
WHERE d.workspace_path = ?1 AND d.doc_id != ?2",
|
||||
)
|
||||
.map_err(StoreError::from)?;
|
||||
let rows = stmt
|
||||
.query_map(params![workspace_path, keep_doc_id], |row| {
|
||||
row.get::<_, String>(0)
|
||||
})
|
||||
.map_err(StoreError::from)?;
|
||||
let mut out: Vec<kebab_core::ChunkId> = Vec::new();
|
||||
for row in rows {
|
||||
let id = row.map_err(StoreError::from)?;
|
||||
out.push(kebab_core::ChunkId(id));
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// v0.17.0 PR-B: sweep the SQLite document chain (`documents` →
|
||||
/// `blocks` / `chunks` / `embedding_records` via CASCADE) for every
|
||||
/// row at `workspace_path` whose `doc_id` differs from `keep_doc_id`.
|
||||
/// Pair with [`Self::stale_chunk_ids_for_workspace_path_except_doc_id`]
|
||||
/// — caller fetches the chunk_ids first, hands them to
|
||||
/// `VectorStore::delete_by_chunk_ids`, then calls this sweep.
|
||||
/// `assets` row is preserved (same bytes, same asset_id — only the
|
||||
/// derived `doc_id` changed).
|
||||
///
|
||||
/// `keep_doc_id = ""` deletes every doc at `workspace_path`
|
||||
/// (semantics mirror the sister helper above — used by
|
||||
/// `try_skip_unchanged` before the new INSERT exists).
|
||||
pub fn purge_document_at_workspace_path_except_doc_id(
|
||||
&self,
|
||||
workspace_path: &str,
|
||||
keep_doc_id: &str,
|
||||
) -> Result<()> {
|
||||
let conn = self.lock_conn();
|
||||
conn.execute(
|
||||
"DELETE FROM documents WHERE workspace_path = ?1 AND doc_id != ?2",
|
||||
params![workspace_path, keep_doc_id],
|
||||
)
|
||||
.map_err(StoreError::from)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Sweep stale `assets` + `documents` + downstream rows when the file
|
||||
|
||||
@@ -64,6 +64,20 @@ multi-root 도그푸딩(2026-05-20)에서 관찰한 본문 vs 테스트 / glue c
|
||||
|
||||
Cross-link: `tasks/p10/INDEX.md`, `migrations/V002__fts.sql`, design §5.5 / §3.5.
|
||||
|
||||
## 2026-05-24 — v0.17.0 PR-B: C typedef-wrapped struct/enum/union 이 typedef alias unit 으로 방출 (closure of 2026-05-21)
|
||||
|
||||
`crates/kebab-parse-code/src/c.rs::extract_blocks` 에 `type_definition` 분기 추가. 내부 anonymous `struct_specifier` / `enum_specifier` / `union_specifier` (name field 없음) 인 typedef 일 때 declarator 의 typedef alias identifier 를 추출해 synthetic unit 방출. named inner aggregate (`typedef struct Pt { ... } P;`) 와 plain alias (`typedef int MyInt;`) 는 기존대로 glue (top-level typedef-wrapped anonymous aggregate 만 v2 의 1차 범위).
|
||||
|
||||
**parser_version cascade**: `PARSER_VERSION` `code-c-v1` → `code-c-v2` bump. design §9 — `doc_id = (workspace_path, asset_id, parser_version)`. 같은 file (asset_id 불변) + 새 parser_version → 새 doc_id. 즉 같은 workspace_path 에 옛 doc_id 와 새 doc_id 가 동시 INSERT 시도 → `idx_docs_workspace_path` UNIQUE 충돌.
|
||||
|
||||
**Same-workspace_path orphan purge (B1 Step 5b)**: `crates/kebab-store-sqlite/src/store.rs` 에 두 helper 신규 — `stale_chunk_ids_for_workspace_path_except_doc_id(workspace_path, keep_doc_id)` (chunk_ids 수집) + `purge_document_at_workspace_path_except_doc_id(workspace_path, keep_doc_id)` (CASCADE document/chunks 제거). `crates/kebab-app/src/lib.rs::try_skip_unchanged` 의 parser_mismatch 분기에서 `purge_workspace_path_for_parser_bump` wrapper 호출 → 옛 chunk_ids 의 LanceDB orphan 도 `delete_by_chunk_ids` 로 정리 후 SQLite document row 제거 → 이후 `Ok(None)` 반환 → caller 가 새 doc_id 로 INSERT. 기존 `purge_orphan_at_workspace_path` (asset_id 변경 케이스) 는 그대로 — bytes 변경 경로 회귀 없음.
|
||||
|
||||
**사용자 영향**: 기존 v0.16.x KB 의 C 파일은 v0.17.0 binary 로 다음 ingest 시 자동 재처리 (parser_version mismatch → cleanup → 새 doc). 명시적 re-ingest 명령 불필요 (다음 `kebab ingest` 가 자연스럽게 처리). `typedef struct {...} Foo;` 가 `Citation::Code.symbol = "Foo"` 로 search 에 노출.
|
||||
|
||||
**미해결 (Risks)**: nested typedef (`typedef struct { struct {...} inner; } Outer;`) 의 inner 익명 struct 는 여전히 glue — v2 의 1차 범위는 top-level typedef alias 만.
|
||||
|
||||
Cross-link: `crates/kebab-parse-code/src/c.rs::recover_typedef_alias`, `tasks/p10/p10-1d-c-cpp-ast-chunker.md` Risks/notes section.
|
||||
|
||||
## 2026-05-21 — p10-2: k8s multi-resource YAML chunk_id collision
|
||||
|
||||
**Origin**: P10 종합 도그푸딩 (`/tmp/kebab-p10-dogfood/`, 16 파일). 한 파일에 2+ k8s document (Deployment + Service, `---` 구분) 인 YAML 이 ingest 실패.
|
||||
@@ -84,11 +98,11 @@ Cross-link: `tasks/p10/p10-2-tier2-resource-aware.md` Risks/notes section.
|
||||
|
||||
**Symptom**: `typedef struct { ... } Foo;` in a `.c` file does NOT emit a struct-level unit. tree-sitter-c classifies the construct as a top-level `type_definition` with an *anonymous* inner `struct_specifier` (no `name` field), so the extractor's `struct_specifier` arm doesn't fire — the whole declaration falls into `<top-level>` glue. The named typedef alias `Foo` is therefore not searchable as a symbol.
|
||||
|
||||
**Status**: Consistent with spec p10-1d-c-cpp-ast-chunker.md's Risks/notes ("Anonymous union / struct … anonymous → glue"), but the spec's main body line 22 ("struct_specifier (named, top-level) → 1 unit") suggests this idiom WOULD emit. Tension noted, not yet fixed.
|
||||
**Status**: ✅ closed — v0.17.0 (2026-05-24) PR-B 에서 extractor 의 `type_definition` 분기 추가로 해소. 영향은 위 2026-05-24 PR-B 절 참조. 이하는 closure 전 round-2 dogfood 관찰 기록 (frozen).
|
||||
|
||||
**Workaround**: search the struct by its field/function names, or use `--code-lang c` to broaden scope. Typedef-aliased struct names won't surface as `Citation::Code.symbol`.
|
||||
**Workaround (pre-v0.17.0)**: search the struct by its field/function names, or use `--code-lang c` to broaden scope. Typedef-aliased struct names won't surface as `Citation::Code.symbol`.
|
||||
|
||||
**Next step**: dogfood real C code for a week+; if this turns out to be a frequent pain point (kernel-style code, libuv, etc.), revisit the extractor to detect `type_definition` → inner `struct_specifier` and emit a synthetic unit named after the typedef alias.
|
||||
**Resolution (v0.17.0)**: extractor 가 top-level `type_definition` 노드를 만나 내부 anonymous `struct_specifier` / `enum_specifier` / `union_specifier` 가 있으면 `declarator` field 의 typedef alias 이름으로 synthetic unit 방출. `PARSER_VERSION` `code-c-v1` → `code-c-v2` bump. design §9 cascade 동작 — 같은 `(workspace_path, asset_id)` 의 `doc_id` 가 새 parser_version 으로 다르게 계산됨. 옛 doc/chunks row + LanceDB orphan 회피용 same-workspace_path orphan purge helper 동반 (`stale_chunk_ids_for_workspace_path_except_doc_id` + `purge_document_at_workspace_path_except_doc_id`).
|
||||
|
||||
Cross-link: `tasks/p10/p10-1d-c-cpp-ast-chunker.md` Risks/notes section.
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ crates/kebab-parse-code/Cargo.toml [edit] — 위 2 dep 신규 entry.
|
||||
- **Template specialization** (`template<> class Foo<int>`): tree-sitter-cpp 의 `template_declaration` 안의 `class_specifier` name 만 추출 — `Foo` 만 symbol 에 들어가고 `<int>` 미포함. design 의 generic 무시 룰 일관.
|
||||
- **`extern "C"` block 안의 fn**: 일반 fn 처리. 외부 wrapping block 은 glue.
|
||||
- **Anonymous union / struct** (`struct { int x; }` 변수 안에): 흔치 않음 + named 만 unit. anonymous 는 glue.
|
||||
- **typedef-wrapped struct/enum idiom** (`typedef struct { ... } Foo;`) — anonymous inner struct → glue. Named typedef alias 미캡처. dogfood 후 HOTFIXES 검토. See [HOTFIXES.md 2026-05-21 entry](../HOTFIXES.md).
|
||||
- **typedef-wrapped struct/enum idiom** (`typedef struct { ... } Foo;`) — ✅ v0.17.0 (2026-05-24) PR-B 에서 해소. extractor 의 `type_definition` 분기가 inner anonymous `struct_specifier` / `enum_specifier` / `union_specifier` 를 탐지해 declarator 의 typedef alias 이름으로 synthetic unit 방출. `PARSER_VERSION` `code-c-v1` → `code-c-v2` bump + same-workspace_path orphan purge cascade 동반. **잔여 미해결**: nested typedef (`typedef struct { struct {...} inner; } Outer;`) 의 inner 익명 struct 는 여전히 glue — v2 의 1차 범위는 top-level typedef alias 만. See [HOTFIXES.md 2026-05-21 entry](../HOTFIXES.md) (frozen 관찰) + 2026-05-24 closure entry.
|
||||
- **Macro-heavy code** (Linux kernel 등): `#define FOO(x) ...` 매크로가 function-like 라도 parser 가 fn 으로 인식 안 함. preprocessor glue 로 처리 — symbol 안 잡힘. 의도된 동작 (parser 의 macro expansion 안 함).
|
||||
- **`__attribute__((...))`** annotations: tree-sitter-c 의 attribute 노드는 declarator 옆 sibling. 무시 가능. function name 추출에 영향 없음.
|
||||
- **fixture 크기**: sample.c 는 ~30 line (top-level fn + struct + enum + preprocessor), sample.cpp 는 ~50 line (nested namespace + class + method + template + free fn). oversize fallback 의 별도 검증은 1A-2 의 long_section_snapshot 패턴이 이미 cover (필요 시 별도 fixture).
|
||||
|
||||
Reference in New Issue
Block a user