From a1192ce3b2171e438021815dadac245e4b071e52 Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 20 May 2026 14:35:20 +0000 Subject: [PATCH 1/3] =?UTF-8?q?docs(p10-2-followup):=20README=20Mermaid=20?= =?UTF-8?q?chunker=5Fversion=20list=20=E2=80=94=20Java/Kotlin=20+=20Tier?= =?UTF-8?q?=202?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit p10-1C-JK 이후 누락된 code-java-ast-v1 / code-kotlin-ast-v1 + p10-2 의 k8s-manifest-resource-v1 / dockerfile-file-v1 / manifest-file-v1 추가. 표기 단순화를 위해 code-* 는 brace 묶음. Reviewer nit #2 (PR #153 code-reviewer). Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 833432b..a948a10 100644 --- a/README.md +++ b/README.md @@ -132,7 +132,7 @@ flowchart TB subgraph Pipeline["도메인 + 파이프라인"] parse["parse-md / parse-pdf / parse-image / parse-code"] - chunker["chunker (md-heading-v1, pdf-page-v1, code-rust-ast-v1, code-python-ast-v1, code-ts-ast-v1, code-js-ast-v1, code-go-ast-v1)"] + chunker["chunker (md-heading-v1, pdf-page-v1, code-{rust,python,ts,js,go,java,kotlin}-ast-v1, k8s-manifest-resource-v1, dockerfile-file-v1, manifest-file-v1)"] embedder["embedder (fastembed multilingual-e5-large)"] retriever["retriever (lexical / vector / hybrid RRF)"] rag["RAG pipeline"] From b5c12ecb6f591c84729e45cbb503e799abbddf50 Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 20 May 2026 14:39:02 +0000 Subject: [PATCH 2/3] docs(p10-2-followup): clarify synthesize_tier2_document path resolution comment Earlier comment claimed the function "mirrors RustAstExtractor pattern" but the two differ: RustAstExtractor joins ctx.workspace_root to handle relative paths, while Tier 2 trusts FsSourceConnector's absolute-path invariant. Rephrase to document the actual rationale + the Kb URI fallback. Reviewer nit #3 (PR #153 code-reviewer). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/lib.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 149b687..585b47f 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -2101,11 +2101,15 @@ fn synthesize_tier2_document( }, ]; - // Resolve abs path for repo detection (mirrors RustAstExtractor pattern). - let workspace_root = std::path::PathBuf::new(); // not needed for detect_repo walk + // Resolve absolute path for repo detection. FsSourceConnector always + // emits absolute paths in SourceUri::File (verified in connector.rs); Kb + // URIs were rejected earlier in ingest_one_code_asset (returns Skipped), + // so the fallback below is purely defensive. This does NOT mirror + // RustAstExtractor — that extractor joins ctx.workspace_root for relative + // paths, but Tier 2 trusts the connector invariant. let abs_path = match &asset.source_uri { kebab_core::SourceUri::File(p) => p.clone(), - kebab_core::SourceUri::Kb(_) => workspace_root, + kebab_core::SourceUri::Kb(_) => std::path::PathBuf::new(), }; let (repo, git_branch, git_commit) = match kebab_parse_code::detect_repo(&abs_path) { Some(r) => (Some(r.name), r.branch, r.commit), From 75c1c7b9112737e4b63142d5ecb42461d0becf58 Mon Sep 17 00:00:00 2001 From: altair823 Date: Wed, 20 May 2026 14:40:37 +0000 Subject: [PATCH 3/3] test(p10-2-followup): cover tier2_shared oversize fallback with >200-line k8s ConfigMap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec p10-2 risks section calls out "거대 ConfigMap" but no test exercised the line-window split branch of tier2_shared::push_chunks_with_oversize. This adds a 256-line ConfigMap fixture (generated inline) and asserts: - ≥2 chunks emitted (split happened), - all chunks share symbol `ConfigMap/prod/big`, - chunk_ids all distinct (id_for_chunk's #L{k} suffix disambiguation), - line ranges form a contiguous partition (prev.line_end + 1 == next.line_start). Reviewer nit #1 (PR #153 code-reviewer). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tests/k8s_manifest_resource_v1.rs | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/crates/kebab-chunk/tests/k8s_manifest_resource_v1.rs b/crates/kebab-chunk/tests/k8s_manifest_resource_v1.rs index 42625a0..d3234d7 100644 --- a/crates/kebab-chunk/tests/k8s_manifest_resource_v1.rs +++ b/crates/kebab-chunk/tests/k8s_manifest_resource_v1.rs @@ -203,3 +203,75 @@ rules: other => panic!("expected Code span, got {other:?}"), } } + +/// 200+ line resource exercises `tier2_shared::push_chunks_with_oversize`'s +/// line-window split branch. All chunks must share the same symbol +/// (`//`); their line ranges must form a contiguous +/// partition; chunk_ids must all differ (the `#L{k}` suffix on `id_for_chunk` +/// ensures uniqueness across windows). Spec p10-2 risks section explicitly +/// flags "거대 ConfigMap" — this test covers that path. +#[test] +fn k8s_oversize_splits_into_line_windows_sharing_symbol() { + // ConfigMap with 250 data keys → ~256 total lines, > AST_CHUNK_MAX_LINES (200). + let mut yaml = String::from( + "apiVersion: v1\nkind: ConfigMap\nmetadata:\n name: big\n namespace: prod\ndata:\n", + ); + for i in 0..250 { + yaml.push_str(&format!(" key{i}: value{i}\n")); + } + + let doc = yaml_doc(&yaml); + let chunks = K8sManifestResourceV1Chunker + .chunk(&doc, &policy()) + .expect("chunk"); + + assert!( + chunks.len() >= 2, + "expected ≥2 chunks for oversize resource, got {}", + chunks.len() + ); + + // Every chunk must share the same symbol + lang. + let expected_symbol = "ConfigMap/prod/big"; + for (i, c) in chunks.iter().enumerate() { + match &c.source_spans[0] { + SourceSpan::Code { symbol, lang, .. } => { + assert_eq!( + symbol.as_deref(), + Some(expected_symbol), + "chunk[{i}] symbol must equal `{expected_symbol}`" + ); + assert_eq!(lang.as_deref(), Some("yaml")); + } + other => panic!("chunk[{i}]: expected Code span, got {other:?}"), + } + } + + // chunk_ids must all be distinct (oversize fallback's #L{k} suffix). + let ids: std::collections::HashSet<_> = chunks.iter().map(|c| c.chunk_id.clone()).collect(); + assert_eq!( + ids.len(), + chunks.len(), + "oversize chunks must have distinct chunk_ids (the #L{{k}} suffix should disambiguate)" + ); + + // Line ranges must form a contiguous partition: chunk[i].line_end + 1 == chunk[i+1].line_start. + let ranges: Vec<(u32, u32)> = chunks + .iter() + .map(|c| match &c.source_spans[0] { + SourceSpan::Code { line_start, line_end, .. } => (*line_start, *line_end), + other => panic!("expected Code span, got {other:?}"), + }) + .collect(); + for w in ranges.windows(2) { + let (_, prev_end) = w[0]; + let (next_start, _) = w[1]; + assert_eq!( + prev_end + 1, + next_start, + "line ranges must be contiguous: {} → {} (got gap or overlap)", + prev_end, + next_start + ); + } +}