diff --git a/Cargo.lock b/Cargo.lock index 5530765..13becb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4494,7 +4494,6 @@ dependencies = [ "ignore", "kebab-config", "kebab-core", - "kebab-parse-code", "serde", "serde_json", "tempfile", diff --git a/crates/kebab-core/src/metadata.rs b/crates/kebab-core/src/metadata.rs index bed5cc2..1367705 100644 --- a/crates/kebab-core/src/metadata.rs +++ b/crates/kebab-core/src/metadata.rs @@ -33,7 +33,7 @@ pub struct Metadata { pub git_commit: Option, /// p10-1A-1: programming language identifier (lowercase canonical). null - /// for markdown / pdf / image. Set by `kebab_parse_code::lang::code_lang_for_path`. + /// for markdown / pdf / image. Set by the local-filesystem source connector during ingest. #[serde(default, skip_serializing_if = "Option::is_none")] pub code_lang: Option, } diff --git a/crates/kebab-parse-code/src/lang.rs b/crates/kebab-parse-code/src/lang.rs index 9f974a2..4590e95 100644 --- a/crates/kebab-parse-code/src/lang.rs +++ b/crates/kebab-parse-code/src/lang.rs @@ -1,69 +1,6 @@ -//! Canonical extension → language identifier mapping (spec §3.5). -//! -//! Lowercase canonical identifiers, matching tree-sitter parser conventions: -//! `rust`, `python`, `typescript`, `javascript`, `go`, `java`, `kotlin`, `c`, -//! `cpp`, `yaml`, `toml`, `json`, `shell`, `make`, `dockerfile`. - -use std::path::Path; - -/// Returns the canonical language identifier for a given file path, or -/// `None` if the extension / filename is not recognized. -/// -/// Matching priority: -/// 1. Tier 1 basename exact match (e.g. `Dockerfile`, `Makefile`) -/// 2. Tier 2 basename match (e.g. `Cargo.toml`, `package.json`, `build.gradle`) -/// 3. Tier 2 `Dockerfile.*` prefix variant -/// 4. Tier 1 + Tier 2 extension fallback (lowercase) -pub fn code_lang_for_path(path: &Path) -> Option<&'static str> { - if let Some(name) = path.file_name().and_then(|n| n.to_str()) { - // Tier 1 basename exact match - match name { - "Dockerfile" => return Some("dockerfile"), - "Makefile" | "GNUmakefile" => return Some("make"), - _ => {} - } - - // Tier 2 basename match (configuration / manifest files) - match name { - "Cargo.toml" | "pyproject.toml" => return Some("toml"), - "package.json" | "tsconfig.json" => return Some("json"), - "go.mod" => return Some("go-mod"), - "pom.xml" => return Some("xml"), - "build.gradle" => return Some("groovy"), - _ => {} - } - - // Tier 2: `Dockerfile.*` prefix variant (e.g. `Dockerfile.dev`, `Dockerfile.prod`) - if name.starts_with("Dockerfile.") && name.len() > "Dockerfile.".len() { - return Some("dockerfile"); - } - } - - // Extension fallback (Tier 1 + Tier 2) - let ext = path.extension()?.to_str()?.to_ascii_lowercase(); - match ext.as_str() { - // Tier 1 extensions - "rs" => Some("rust"), - "py" | "pyi" => Some("python"), - "ts" | "tsx" | "mts" | "cts" => Some("typescript"), - "js" | "mjs" | "cjs" | "jsx" => Some("javascript"), - "go" => Some("go"), - "java" => Some("java"), - "kt" | "kts" => Some("kotlin"), - "c" | "h" => Some("c"), - "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some("cpp"), - "sh" | "bash" | "zsh" => Some("shell"), - "mk" => Some("make"), - // Tier 2 extensions - "yaml" | "yml" => Some("yaml"), - "toml" => Some("toml"), - "json" => Some("json"), - "xml" => Some("xml"), - "dockerfile" => Some("dockerfile"), - "gradle" => Some("groovy"), - _ => None, - } -} +//! Workspace-relative path → module-path conversion for P10-1B AST extractors +//! (Python dotted form / TS+JS slash form). 본 module 의 `code_lang_for_path` +//! 는 v0.18.0+ 부터 `kebab-source-fs::code_meta` 로 이동. /// p10-1B: workspace-relative Python file path → dotted module-path prefix. /// See plan §Task C for the exact rules + tasks/p10/p10-1b for the §3.4 @@ -142,28 +79,4 @@ mod tests { assert_eq!(module_path_for_tsjs("a/b/c.ts"), "a/b/c"); assert_eq!(module_path_for_tsjs("packages/x/src/Foo.ts"), "packages/x/src/Foo"); } - - #[test] - fn tier2_basename_takes_precedence_over_extension() { - assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile")); - assert_eq!(code_lang_for_path(Path::new("foo/Dockerfile.dev")), Some("dockerfile")); - assert_eq!(code_lang_for_path(Path::new("myapp.dockerfile")), Some("dockerfile")); - assert_eq!(code_lang_for_path(Path::new("repo/Cargo.toml")), Some("toml")); - assert_eq!(code_lang_for_path(Path::new("pyproject.toml")), Some("toml")); - assert_eq!(code_lang_for_path(Path::new("repo/package.json")), Some("json")); - assert_eq!(code_lang_for_path(Path::new("tsconfig.json")), Some("json")); - assert_eq!(code_lang_for_path(Path::new("go.mod")), Some("go-mod")); - assert_eq!(code_lang_for_path(Path::new("pom.xml")), Some("xml")); - assert_eq!(code_lang_for_path(Path::new("build.gradle")), Some("groovy")); - } - - #[test] - fn tier2_extension_fallback() { - assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yaml")), Some("yaml")); - assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yml")), Some("yaml")); - assert_eq!(code_lang_for_path(Path::new("foo/bar.toml")), Some("toml")); - assert_eq!(code_lang_for_path(Path::new("foo/bar.json")), Some("json")); - assert_eq!(code_lang_for_path(Path::new("foo/bar.xml")), Some("xml")); - assert_eq!(code_lang_for_path(Path::new("foo/bar.gradle")), Some("groovy")); - } } diff --git a/crates/kebab-parse-code/src/lib.rs b/crates/kebab-parse-code/src/lib.rs index 7659fdb..3126801 100644 --- a/crates/kebab-parse-code/src/lib.rs +++ b/crates/kebab-parse-code/src/lib.rs @@ -1,17 +1,10 @@ //! `kebab-parse-code` — language-aware parsing for code corpora. //! -//! Phase 1A-1 ships infrastructure only: +//! Repo metadata (`detect_repo`) + per-language AST extractors (Rust = P10-1A-2, Python/TS/JS = P10-1B, Go = P10-1C-Go, Java+Kotlin = P10-1C-JK, C+C++ = P10-1D). //! -//! - [`lang::code_lang_for_path`] — extension → language identifier. -//! - [`repo::detect_repo`] — `.git/` walk-up → repo / branch / commit metadata. -//! - [`skip::is_generated_file`] / [`skip::is_oversized`] — pre-ingest skip -//! helpers consulted by `kebab-source-fs`. -//! - [`skip::BUILTIN_BLACKLIST`] — 6-entry safety-net pattern list. +//! lang detect (`code_lang_for_path`) + pre-ingest skip helpers (`is_generated_file`, `is_oversized`, `BUILTIN_BLACKLIST`) 는 v0.18.0+ 부터 `kebab-source-fs::code_meta` 로 이동 — refactor 2026-05-26. //! -//! Per-language parser modules (`rust`, `python`, `typescript`, …) land in -//! later phases (1A-2 onwards). The crate boundary follows other -//! `kebab-parse-*` crates per design §8: must NOT depend on store / embed -//! / llm / rag. +//! 본 crate 의 boundary 는 design §8 — store / embed / llm / rag / UI 의존 금지. pub mod c; pub mod cpp; @@ -24,7 +17,6 @@ pub mod python; pub mod repo; pub mod rust; pub(crate) mod scaffold; -pub mod skip; pub mod typescript; pub use c::{PARSER_VERSION as C_PARSER_VERSION, CAstExtractor}; @@ -33,9 +25,8 @@ pub use go::{PARSER_VERSION as GO_PARSER_VERSION, GoAstExtractor}; pub use java::{PARSER_VERSION as JAVA_PARSER_VERSION, JavaAstExtractor}; pub use javascript::{PARSER_VERSION as JS_PARSER_VERSION, JavascriptAstExtractor}; pub use kotlin::{PARSER_VERSION as KOTLIN_PARSER_VERSION, KotlinAstExtractor}; -pub use lang::{code_lang_for_path, module_path_for_python, module_path_for_tsjs}; +pub use lang::{module_path_for_python, module_path_for_tsjs}; pub use python::{PARSER_VERSION as PYTHON_PARSER_VERSION, PythonAstExtractor}; pub use repo::{RepoMeta, detect_repo}; pub use rust::{PARSER_VERSION as RUST_PARSER_VERSION, RustAstExtractor}; -pub use skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized}; pub use typescript::{PARSER_VERSION as TS_PARSER_VERSION, TypescriptAstExtractor}; diff --git a/crates/kebab-parse-code/src/skip.rs b/crates/kebab-parse-code/src/skip.rs deleted file mode 100644 index eafecf8..0000000 --- a/crates/kebab-parse-code/src/skip.rs +++ /dev/null @@ -1,65 +0,0 @@ -//! Pre-ingest skip helpers (spec §5.2 + §5.3 + §5.4). -//! -//! - [`BUILTIN_BLACKLIST`] — 6 gitignore-style patterns universal across -//! ecosystems. Source of truth: spec §5.2. -//! - [`is_generated_file`] — reads first ~512 bytes, checks for 7 -//! case-insensitive markers. -//! - [`is_oversized`] — byte cap then line cap. - -use anyhow::Result; -use std::fs::File; -use std::io::{BufRead, BufReader, Read}; -use std::path::Path; - -/// 6 built-in gitignore-style patterns. Applied in addition to `.gitignore` -/// + `.kebabignore`. User can override via `.kebabignore` negation -/// (`!pattern`). -pub const BUILTIN_BLACKLIST: &[&str] = &[ - "**/node_modules/**", - "**/target/**", - "**/__pycache__/**", - "**/.venv/**", - "**/venv/**", - "**/env/**", -]; - -/// Read first 512 bytes, check for any of 7 case-insensitive generated-file -/// markers. Returns Ok(true) on match, Ok(false) otherwise. -pub fn is_generated_file(path: &Path) -> Result { - let mut buf = [0u8; 512]; - let mut f = File::open(path)?; - let n = f.read(&mut buf)?; - if n == 0 { - return Ok(false); - } - let head = std::str::from_utf8(&buf[..n]).unwrap_or(""); - let lower: String = head.lines().take(10).collect::>().join("\n").to_ascii_lowercase(); - Ok( - lower.contains("@generated") - || lower.contains("code generated by") - || lower.contains("do not edit") - || lower.contains("do not modify") - || lower.contains("automatically generated") - || lower.contains("auto-generated") - || lower.contains("autogenerated"), - ) -} - -/// Check if `path` exceeds `max_bytes` or `max_lines`. Byte cap first -/// (cheap), then line cap (streaming with early exit). -pub fn is_oversized(path: &Path, max_bytes: u64, max_lines: u32) -> Result { - let meta = std::fs::metadata(path)?; - if meta.len() > max_bytes { - return Ok(true); - } - let reader = BufReader::new(File::open(path)?); - let mut count: u32 = 0; - for line in reader.lines() { - let _ = line?; - count = count.saturating_add(1); - if count > max_lines { - return Ok(true); - } - } - Ok(false) -} diff --git a/crates/kebab-parse-code/tests/lang.rs b/crates/kebab-parse-code/tests/lang.rs deleted file mode 100644 index 62ec9bd..0000000 --- a/crates/kebab-parse-code/tests/lang.rs +++ /dev/null @@ -1,67 +0,0 @@ -use kebab_parse_code::code_lang_for_path; -use std::path::Path; - -#[test] -fn known_extensions_map_to_canonical_identifiers() { - let cases = [ - ("foo.rs", Some("rust")), - ("foo.py", Some("python")), - ("foo.pyi", Some("python")), - ("foo.ts", Some("typescript")), - ("foo.tsx", Some("typescript")), - ("foo.mts", Some("typescript")), // ESM TS — same grammar - ("foo.cts", Some("typescript")), // CommonJS TS — same grammar - ("foo.js", Some("javascript")), - ("foo.mjs", Some("javascript")), - ("foo.cjs", Some("javascript")), - ("foo.jsx", Some("javascript")), - ("foo.go", Some("go")), - ("foo.java", Some("java")), - ("foo.kt", Some("kotlin")), - ("foo.kts", Some("kotlin")), - ("foo.c", Some("c")), - ("foo.h", Some("c")), - ("foo.cpp", Some("cpp")), - ("foo.cc", Some("cpp")), - ("foo.cxx", Some("cpp")), - ("foo.hpp", Some("cpp")), - ("foo.hh", Some("cpp")), - ("foo.hxx", Some("cpp")), - ("foo.yaml", Some("yaml")), - ("foo.yml", Some("yaml")), - ("foo.toml", Some("toml")), - ("foo.json", Some("json")), - ("foo.sh", Some("shell")), - ("foo.bash", Some("shell")), - ("foo.zsh", Some("shell")), - ("foo.mk", Some("make")), - ]; - for (path, expected) in cases { - assert_eq!( - code_lang_for_path(Path::new(path)), - expected, - "path = {path}" - ); - } -} - -#[test] -fn special_filenames_map_to_identifiers() { - assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile")); - assert_eq!(code_lang_for_path(Path::new("foo.dockerfile")), Some("dockerfile")); - assert_eq!(code_lang_for_path(Path::new("Makefile")), Some("make")); - assert_eq!(code_lang_for_path(Path::new("GNUmakefile")), Some("make")); -} - -#[test] -fn unknown_extension_returns_none() { - assert_eq!(code_lang_for_path(Path::new("foo.docx")), None); - assert_eq!(code_lang_for_path(Path::new("foo")), None); - assert_eq!(code_lang_for_path(Path::new("foo.unknown")), None); -} - -#[test] -fn case_insensitive() { - assert_eq!(code_lang_for_path(Path::new("Foo.RS")), Some("rust")); - assert_eq!(code_lang_for_path(Path::new("FOO.YAML")), Some("yaml")); -} diff --git a/crates/kebab-parse-code/tests/skip.rs b/crates/kebab-parse-code/tests/skip.rs deleted file mode 100644 index b85dafe..0000000 --- a/crates/kebab-parse-code/tests/skip.rs +++ /dev/null @@ -1,74 +0,0 @@ -use kebab_parse_code::skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized}; -use std::fs; -use tempfile::NamedTempFile; - -#[test] -fn generated_header_markers_trigger_skip() { - let cases = [ - "// @generated\nfn foo() {}\n", - "// Code generated by tonic-build. DO NOT EDIT.\nfn x() {}\n", - "/* DO NOT EDIT */\nfn x() {}\n", - "/* do not modify */\nfn x() {}\n", - "// AUTOMATICALLY GENERATED\nfn x() {}\n", - "# auto-generated\ndef x(): pass\n", - "// autogenerated\nfn x() {}\n", - ]; - for content in cases { - let f = NamedTempFile::new().unwrap(); - fs::write(f.path(), content).unwrap(); - assert!(is_generated_file(f.path()).unwrap(), "content: {content:?}"); - } -} - -#[test] -fn normal_code_is_not_flagged_generated() { - let f = NamedTempFile::new().unwrap(); - fs::write(f.path(), "fn main() {\n println!(\"hi\");\n}\n").unwrap(); - assert!(!is_generated_file(f.path()).unwrap()); -} - -#[test] -fn is_generated_returns_false_for_empty_file() { - let f = NamedTempFile::new().unwrap(); - fs::write(f.path(), "").unwrap(); - assert!(!is_generated_file(f.path()).unwrap()); -} - -#[test] -fn oversized_by_bytes_returns_true() { - let f = NamedTempFile::new().unwrap(); - let body: String = "x".repeat(300_000); - fs::write(f.path(), &body).unwrap(); - assert!(is_oversized(f.path(), 262_144, 5_000).unwrap()); -} - -#[test] -fn oversized_by_lines_returns_true() { - let f = NamedTempFile::new().unwrap(); - let body: String = "x\n".repeat(6_000); - fs::write(f.path(), &body).unwrap(); - assert!(is_oversized(f.path(), 262_144, 5_000).unwrap()); -} - -#[test] -fn small_file_returns_false_for_oversize() { - let f = NamedTempFile::new().unwrap(); - fs::write(f.path(), "fn foo() {}\n").unwrap(); - assert!(!is_oversized(f.path(), 262_144, 5_000).unwrap()); -} - -#[test] -fn builtin_blacklist_has_exactly_six_entries() { - assert_eq!(BUILTIN_BLACKLIST.len(), 6); - let expected = [ - "**/node_modules/**", - "**/target/**", - "**/__pycache__/**", - "**/.venv/**", - "**/venv/**", - "**/env/**", - ]; - for pat in expected { - assert!(BUILTIN_BLACKLIST.contains(&pat), "missing pattern: {pat}"); - } -} diff --git a/crates/kebab-source-fs/Cargo.toml b/crates/kebab-source-fs/Cargo.toml index b7236f3..9d5c20e 100644 --- a/crates/kebab-source-fs/Cargo.toml +++ b/crates/kebab-source-fs/Cargo.toml @@ -10,7 +10,6 @@ description = "Local filesystem SourceConnector — walks workspace.root + app [dependencies] kebab-core = { path = "../kebab-core" } kebab-config = { path = "../kebab-config" } -kebab-parse-code = { path = "../kebab-parse-code" } anyhow = { workspace = true } serde = { workspace = true } time = { workspace = true } diff --git a/crates/kebab-source-fs/src/code_meta.rs b/crates/kebab-source-fs/src/code_meta.rs new file mode 100644 index 0000000..afb7b53 --- /dev/null +++ b/crates/kebab-source-fs/src/code_meta.rs @@ -0,0 +1,285 @@ +//! Pre-ingest classification + skip helpers for the local-filesystem +//! SourceConnector. Moved from `kebab-parse-code` (refactor 2026-05-26) +//! to drop the 9 tree-sitter grammar drag from this crate's dep tree. +//! +//! `BUILTIN_BLACKLIST` is `pub` because it implements the **frozen contract +//! in design §5.2** (the 6-pattern safety-net list). External integration +//! tests (`tests/code_meta.rs`) verify the contract from outside the module +//! to prevent silent breakage. The 3 helper fns are `pub(crate)` — no +//! external consumer today. + +use std::fs::File; +use std::io::{BufRead, BufReader, Read}; +use std::path::Path; + +use anyhow::Result; + +/// 6 built-in gitignore-style patterns. Applied in addition to `.gitignore` +/// + `.kebabignore`. User can override via `.kebabignore` negation +/// (`!pattern`). +/// +/// Source of truth: design §5.2 (frozen). +pub const BUILTIN_BLACKLIST: &[&str] = &[ + "**/node_modules/**", + "**/target/**", + "**/__pycache__/**", + "**/.venv/**", + "**/venv/**", + "**/env/**", +]; + +/// Returns the canonical language identifier for a given file path, or +/// `None` if the extension / filename is not recognized. +/// +/// Matching priority: +/// 1. Tier 1 basename exact match (e.g. `Dockerfile`, `Makefile`) +/// 2. Tier 2 basename match (e.g. `Cargo.toml`, `package.json`, `build.gradle`) +/// 3. Tier 2 `Dockerfile.*` prefix variant +/// 4. Tier 1 + Tier 2 extension fallback (lowercase) +pub(crate) fn code_lang_for_path(path: &Path) -> Option<&'static str> { + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + // Tier 1 basename exact match + match name { + "Dockerfile" => return Some("dockerfile"), + "Makefile" | "GNUmakefile" => return Some("make"), + _ => {} + } + + // Tier 2 basename match (configuration / manifest files) + match name { + "Cargo.toml" | "pyproject.toml" => return Some("toml"), + "package.json" | "tsconfig.json" => return Some("json"), + "go.mod" => return Some("go-mod"), + "pom.xml" => return Some("xml"), + "build.gradle" => return Some("groovy"), + _ => {} + } + + // Tier 2: `Dockerfile.*` prefix variant (e.g. `Dockerfile.dev`, `Dockerfile.prod`) + if name.starts_with("Dockerfile.") && name.len() > "Dockerfile.".len() { + return Some("dockerfile"); + } + } + + // Extension fallback (Tier 1 + Tier 2) + let ext = path.extension()?.to_str()?.to_ascii_lowercase(); + match ext.as_str() { + // Tier 1 extensions + "rs" => Some("rust"), + "py" | "pyi" => Some("python"), + "ts" | "tsx" | "mts" | "cts" => Some("typescript"), + "js" | "mjs" | "cjs" | "jsx" => Some("javascript"), + "go" => Some("go"), + "java" => Some("java"), + "kt" | "kts" => Some("kotlin"), + "c" | "h" => Some("c"), + "cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some("cpp"), + "sh" | "bash" | "zsh" => Some("shell"), + "mk" => Some("make"), + // Tier 2 extensions + "yaml" | "yml" => Some("yaml"), + "toml" => Some("toml"), + "json" => Some("json"), + "xml" => Some("xml"), + "dockerfile" => Some("dockerfile"), + "gradle" => Some("groovy"), + _ => None, + } +} + +/// Read first 512 bytes, check for any of 7 case-insensitive generated-file +/// markers. Returns Ok(true) on match, Ok(false) otherwise. +pub(crate) fn is_generated_file(path: &Path) -> Result { + let mut buf = [0u8; 512]; + let mut f = File::open(path)?; + let n = f.read(&mut buf)?; + if n == 0 { + return Ok(false); + } + let head = std::str::from_utf8(&buf[..n]).unwrap_or(""); + let lower: String = head.lines().take(10).collect::>().join("\n").to_ascii_lowercase(); + Ok( + lower.contains("@generated") + || lower.contains("code generated by") + || lower.contains("do not edit") + || lower.contains("do not modify") + || lower.contains("automatically generated") + || lower.contains("auto-generated") + || lower.contains("autogenerated"), + ) +} + +/// Check if `path` exceeds `max_bytes` or `max_lines`. Byte cap first +/// (cheap), then line cap (streaming with early exit). +pub(crate) fn is_oversized(path: &Path, max_bytes: u64, max_lines: u32) -> Result { + let meta = std::fs::metadata(path)?; + if meta.len() > max_bytes { + return Ok(true); + } + let reader = BufReader::new(File::open(path)?); + let mut count: u32 = 0; + for line in reader.lines() { + let _ = line?; + count = count.saturating_add(1); + if count > max_lines { + return Ok(true); + } + } + Ok(false) +} + +#[cfg(test)] +mod tests { + use super::{code_lang_for_path, is_generated_file, is_oversized}; + use std::fs; + use std::path::Path; + use tempfile::NamedTempFile; + + // ── code_lang_for_path tests (ex-kebab-parse-code/tests/lang.rs) ───────── + + #[test] + fn known_extensions_map_to_canonical_identifiers() { + let cases = [ + ("foo.rs", Some("rust")), + ("foo.py", Some("python")), + ("foo.pyi", Some("python")), + ("foo.ts", Some("typescript")), + ("foo.tsx", Some("typescript")), + ("foo.mts", Some("typescript")), // ESM TS — same grammar + ("foo.cts", Some("typescript")), // CommonJS TS — same grammar + ("foo.js", Some("javascript")), + ("foo.mjs", Some("javascript")), + ("foo.cjs", Some("javascript")), + ("foo.jsx", Some("javascript")), + ("foo.go", Some("go")), + ("foo.java", Some("java")), + ("foo.kt", Some("kotlin")), + ("foo.kts", Some("kotlin")), + ("foo.c", Some("c")), + ("foo.h", Some("c")), + ("foo.cpp", Some("cpp")), + ("foo.cc", Some("cpp")), + ("foo.cxx", Some("cpp")), + ("foo.hpp", Some("cpp")), + ("foo.hh", Some("cpp")), + ("foo.hxx", Some("cpp")), + ("foo.yaml", Some("yaml")), + ("foo.yml", Some("yaml")), + ("foo.toml", Some("toml")), + ("foo.json", Some("json")), + ("foo.sh", Some("shell")), + ("foo.bash", Some("shell")), + ("foo.zsh", Some("shell")), + ("foo.mk", Some("make")), + ]; + for (path, expected) in cases { + assert_eq!( + code_lang_for_path(Path::new(path)), + expected, + "path = {path}" + ); + } + } + + #[test] + fn special_filenames_map_to_identifiers() { + assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile")); + assert_eq!(code_lang_for_path(Path::new("foo.dockerfile")), Some("dockerfile")); + assert_eq!(code_lang_for_path(Path::new("Makefile")), Some("make")); + assert_eq!(code_lang_for_path(Path::new("GNUmakefile")), Some("make")); + } + + #[test] + fn unknown_extension_returns_none() { + assert_eq!(code_lang_for_path(Path::new("foo.docx")), None); + assert_eq!(code_lang_for_path(Path::new("foo")), None); + assert_eq!(code_lang_for_path(Path::new("foo.unknown")), None); + } + + #[test] + fn case_insensitive() { + assert_eq!(code_lang_for_path(Path::new("Foo.RS")), Some("rust")); + assert_eq!(code_lang_for_path(Path::new("FOO.YAML")), Some("yaml")); + } + + #[test] + fn tier2_basename_takes_precedence_over_extension() { + assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile")); + assert_eq!(code_lang_for_path(Path::new("foo/Dockerfile.dev")), Some("dockerfile")); + assert_eq!(code_lang_for_path(Path::new("myapp.dockerfile")), Some("dockerfile")); + assert_eq!(code_lang_for_path(Path::new("repo/Cargo.toml")), Some("toml")); + assert_eq!(code_lang_for_path(Path::new("pyproject.toml")), Some("toml")); + assert_eq!(code_lang_for_path(Path::new("repo/package.json")), Some("json")); + assert_eq!(code_lang_for_path(Path::new("tsconfig.json")), Some("json")); + assert_eq!(code_lang_for_path(Path::new("go.mod")), Some("go-mod")); + assert_eq!(code_lang_for_path(Path::new("pom.xml")), Some("xml")); + assert_eq!(code_lang_for_path(Path::new("build.gradle")), Some("groovy")); + } + + #[test] + fn tier2_extension_fallback() { + assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yaml")), Some("yaml")); + assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yml")), Some("yaml")); + assert_eq!(code_lang_for_path(Path::new("foo/bar.toml")), Some("toml")); + assert_eq!(code_lang_for_path(Path::new("foo/bar.json")), Some("json")); + assert_eq!(code_lang_for_path(Path::new("foo/bar.xml")), Some("xml")); + assert_eq!(code_lang_for_path(Path::new("foo/bar.gradle")), Some("groovy")); + } + + // ── is_generated_file + is_oversized tests (ex-kebab-parse-code/tests/skip.rs) ── + + #[test] + fn generated_header_markers_trigger_skip() { + let cases = [ + "// @generated\nfn foo() {}\n", + "// Code generated by tonic-build. DO NOT EDIT.\nfn x() {}\n", + "/* DO NOT EDIT */\nfn x() {}\n", + "/* do not modify */\nfn x() {}\n", + "// AUTOMATICALLY GENERATED\nfn x() {}\n", + "# auto-generated\ndef x(): pass\n", + "// autogenerated\nfn x() {}\n", + ]; + for content in cases { + let f = NamedTempFile::new().unwrap(); + fs::write(f.path(), content).unwrap(); + assert!(is_generated_file(f.path()).unwrap(), "content: {content:?}"); + } + } + + #[test] + fn normal_code_is_not_flagged_generated() { + let f = NamedTempFile::new().unwrap(); + fs::write(f.path(), "fn main() {\n println!(\"hi\");\n}\n").unwrap(); + assert!(!is_generated_file(f.path()).unwrap()); + } + + #[test] + fn is_generated_returns_false_for_empty_file() { + let f = NamedTempFile::new().unwrap(); + fs::write(f.path(), "").unwrap(); + assert!(!is_generated_file(f.path()).unwrap()); + } + + #[test] + fn oversized_by_bytes_returns_true() { + let f = NamedTempFile::new().unwrap(); + let body: String = "x".repeat(300_000); + fs::write(f.path(), &body).unwrap(); + assert!(is_oversized(f.path(), 262_144, 5_000).unwrap()); + } + + #[test] + fn oversized_by_lines_returns_true() { + let f = NamedTempFile::new().unwrap(); + let body: String = "x\n".repeat(6_000); + fs::write(f.path(), &body).unwrap(); + assert!(is_oversized(f.path(), 262_144, 5_000).unwrap()); + } + + #[test] + fn small_file_returns_false_for_oversize() { + let f = NamedTempFile::new().unwrap(); + fs::write(f.path(), "fn foo() {}\n").unwrap(); + assert!(!is_oversized(f.path(), 262_144, 5_000).unwrap()); + } +} diff --git a/crates/kebab-source-fs/src/connector.rs b/crates/kebab-source-fs/src/connector.rs index fd73734..288b8a3 100644 --- a/crates/kebab-source-fs/src/connector.rs +++ b/crates/kebab-source-fs/src/connector.rs @@ -149,7 +149,7 @@ impl FsSourceConnector { // Generated-header sniff (config-gated). if self.skip_generated_header - && kebab_parse_code::is_generated_file(&abs_path).unwrap_or(false) + && crate::code_meta::is_generated_file(&abs_path).unwrap_or(false) { fs_skips.skipped_generated = fs_skips.skipped_generated.saturating_add(1); @@ -166,7 +166,7 @@ impl FsSourceConnector { } // Size-cap check (byte or line limit). - if kebab_parse_code::is_oversized( + if crate::code_meta::is_oversized( &abs_path, self.max_file_bytes, self.max_file_lines, diff --git a/crates/kebab-source-fs/src/lib.rs b/crates/kebab-source-fs/src/lib.rs index 6975271..258599a 100644 --- a/crates/kebab-source-fs/src/lib.rs +++ b/crates/kebab-source-fs/src/lib.rs @@ -8,9 +8,11 @@ //! normalization), §7.1 (SourceScope), §7.2 (SourceConnector), §8 (module //! boundaries). +mod code_meta; mod connector; mod hash; mod media; mod walker; +pub use code_meta::BUILTIN_BLACKLIST; // design §5.2 frozen contract — integration test (§5.1) 의 접근 surface. pub use connector::{FsScanSkips, FsSourceConnector}; diff --git a/crates/kebab-source-fs/src/media.rs b/crates/kebab-source-fs/src/media.rs index 0299b72..7ea35f5 100644 --- a/crates/kebab-source-fs/src/media.rs +++ b/crates/kebab-source-fs/src/media.rs @@ -14,7 +14,7 @@ use kebab_core::{AudioType, ImageType, MediaType}; pub(crate) fn media_type_for(path: &Path) -> MediaType { // p10-2: code_lang_for_path is the single source of truth for code lang // (design §3.5). Delegate before falling back to extension branches. - if let Some(lang) = kebab_parse_code::code_lang_for_path(path) { + if let Some(lang) = crate::code_meta::code_lang_for_path(path) { return MediaType::Code(lang.to_string()); } diff --git a/crates/kebab-source-fs/src/walker.rs b/crates/kebab-source-fs/src/walker.rs index cfc8252..4f8a51e 100644 --- a/crates/kebab-source-fs/src/walker.rs +++ b/crates/kebab-source-fs/src/walker.rs @@ -6,7 +6,7 @@ //! - `config.workspace.exclude` (user-supplied per workspace) //! - `/.kebabignore` (user-supplied kebab-specific exclude) //! - Built-in safety-net blacklist (`node_modules/`, `target/`, etc. — -//! spec §5.2, applied via `kebab_parse_code::BUILTIN_BLACKLIST`) +//! spec §5.2, applied via `crate::code_meta::BUILTIN_BLACKLIST`) //! - `/.gitignore` (repo-root only, no nested cascade — spec §5.2) //! //! All five are merged via `ignore::overrides::OverrideBuilder`, which @@ -82,7 +82,7 @@ pub(crate) struct WalkOverrides { pub gitignore: Override, /// Matcher built from `/.kebabignore` patterns only. pub kebabignore: Override, - /// Matcher built from `kebab_parse_code::BUILTIN_BLACKLIST` only. + /// Matcher built from `crate::code_meta::BUILTIN_BLACKLIST` only. pub builtin: Override, /// Compiled allow-list from `scope.include`. Empty set = pass all. pub include: GlobSet, @@ -128,7 +128,7 @@ fn build_single_matcher(root: &Path, patterns: &[&str]) -> Result { /// for attribution purposes. fn build_builtin_matcher(root: &Path) -> Result { let mut builder = OverrideBuilder::new(root); - for pat in kebab_parse_code::BUILTIN_BLACKLIST { + for pat in crate::code_meta::BUILTIN_BLACKLIST { // Register the original pattern (matches files inside the dir). builder .add(&format!("!{pat}")) @@ -158,7 +158,7 @@ fn build_single_matcher_owned(root: &Path, patterns: &[String]) -> Result/.gitignore` (root-only, no nested cascade). /// /// Each input pattern is registered as an *exclude* (gitignore-style: a @@ -208,7 +208,7 @@ pub(crate) fn build_overrides( .add(&format!("!{pat}")) .with_context(|| format!("invalid .kebabignore pattern: {pat}"))?; } - for pat in kebab_parse_code::BUILTIN_BLACKLIST { + for pat in crate::code_meta::BUILTIN_BLACKLIST { combined_builder .add(&format!("!{pat}")) .with_context(|| format!("built-in blacklist pattern: {pat}"))?; diff --git a/crates/kebab-source-fs/tests/code_meta.rs b/crates/kebab-source-fs/tests/code_meta.rs new file mode 100644 index 0000000..c03c318 --- /dev/null +++ b/crates/kebab-source-fs/tests/code_meta.rs @@ -0,0 +1,17 @@ +use kebab_source_fs::BUILTIN_BLACKLIST; + +#[test] +fn builtin_blacklist_has_exactly_six_entries() { + assert_eq!(BUILTIN_BLACKLIST.len(), 6); + let expected = [ + "**/node_modules/**", + "**/target/**", + "**/__pycache__/**", + "**/.venv/**", + "**/venv/**", + "**/env/**", + ]; + for pat in expected { + assert!(BUILTIN_BLACKLIST.contains(&pat), "missing pattern: {pat}"); + } +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 79a68d2..e7d3101 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -131,7 +131,7 @@ flowchart TB UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kebab-app` facade 만 통한다 (frozen 설계 §8). `kebab-cli` 가 `--config ` flag 를 honor 하려면 `kebab_app::*_with_config(cfg, …)` companion 을 통해 Config 을 명시적으로 thread 하는 패턴 — 자세한 이유는 [tasks/HOTFIXES.md](../tasks/HOTFIXES.md) 의 `--config` 항목. -`kebab-parse-code` 의 외부 tree-sitter grammar crate 의존: P10-1A-2 에서 `tree-sitter-rust` 추가, P10-1B 에서 `tree-sitter-python` / `tree-sitter-typescript` / `tree-sitter-javascript` 추가, P10-1C-Go 에서 `tree-sitter-go` 추가, P10-1C-JK 에서 `tree-sitter-java` / `tree-sitter-kotlin-ng` 추가, P10-1D 에서 `tree-sitter-c` / `tree-sitter-cpp` 추가. 모두 `kebab-parse-code` 에만 격리 (facade 룰 — UI crate / chunker 가 직접 import 금지). Kotlin 은 `tree-sitter-kotlin-ng` 사용 (bare `tree-sitter-kotlin` 은 tree-sitter 0.21–0.23 에 고착 — 사용 불가). +`kebab-parse-code` 의 외부 tree-sitter grammar crate 의존: P10-1A-2 에서 `tree-sitter-rust` 추가, P10-1B 에서 `tree-sitter-python` / `tree-sitter-typescript` / `tree-sitter-javascript` 추가, P10-1C-Go 에서 `tree-sitter-go` 추가, P10-1C-JK 에서 `tree-sitter-java` / `tree-sitter-kotlin-ng` 추가, P10-1D 에서 `tree-sitter-c` / `tree-sitter-cpp` 추가. 모두 `kebab-parse-code` 에만 격리 (facade 룰 — UI crate / chunker 가 직접 import 금지). Kotlin 은 `tree-sitter-kotlin-ng` 사용 (bare `tree-sitter-kotlin` 은 tree-sitter 0.21–0.23 에 고착 — 사용 불가). v0.18.0+ 부터 `kebab-source-fs` 는 자체 `code_meta` 모듈 (lang detect + skip helpers + BUILTIN_BLACKLIST) 을 보유, kebab-parse-code 와 분리 (refactor 2026-05-26). ## 디렉토리 구조 diff --git a/docs/superpowers/plans/2026-05-26-source-fs-dep-lightening-plan.md b/docs/superpowers/plans/2026-05-26-source-fs-dep-lightening-plan.md new file mode 100644 index 0000000..6660e18 --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-source-fs-dep-lightening-plan.md @@ -0,0 +1,573 @@ +--- +status: open +target_version: 0.18.0 +spec: docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md +contract_sections: ["§3.5", "§3.7b", "§5.2", "§8"] +--- + +# kebab-source-fs dep lightening — implementation plan + +> **Round 2 reflection**: 12 step → 10 step. parse-code cleanup 을 atomic clippy gate 로 통합 (구 Step 9+10 합침), 그리고 kebab-core doc / ARCHITECTURE / design §8 / workspace 회귀 / commit 을 single closure step 으로 통합 (구 Step 11+12 합침). 모든 BLOCKER + MAJOR + MINOR + NIT 의 reflection 위치는 §9 closure table 참조. + +## §0 Pre-flight + branch state + +- **Branch**: `refactor/source-fs-dep-lightening` (현재 위치, main 위에서 분기 완료). +- **Base SHA**: `b02ac82` (HOTFIX #15 + S3 NLI 머지 직후, v0.18.0 cut 완료 시점 — spec §1.6 / §5.2 baseline 과 동일). +- **Working dir**: `/home/altair823/kebab`. +- **Env 강제** (CLAUDE.md disk-protection): + - `export CARGO_TARGET_DIR=/build/out/kebab/target` — 본 plan 의 모든 cargo 명령에 적용. `target/` 가 repo root 아래에 생성되지 않게. + - `export TMPDIR=/build/cache/tmp` — 대용량 임시 파일 발생 시 보호. +- **Cargo build 직렬화**: 모든 cargo 명령 `-j 1` 강제 (CLAUDE.md "Build / test / lint" — 18 integration-test binary 동시 link 시 OOM). per-crate `-p` 명령은 `-j 1` 없어도 OK 지만, workspace 단위 `--workspace` 만 `-j 1` 필수. 본 plan 은 일관성을 위해 모든 cargo 호출에 `-j 1` 명시. +- **Memory persistence** (`~/.claude/projects/-home-altair823-kebab/memory/MEMORY.md` 의 `feedback_serial_build_only.md` 참조): cargo test/clippy/build 동시 bg 실행 금지. 하나 끝난 후 다음. +- **HOTFIXES.md / HANDOFF.md / README.md / tasks/INDEX.md / 4 frozen task spec 변경 0** (spec §7 명시). +- **workspace `Cargo.toml` version bump 0** (spec NG5). +- **wire schema / Config / V00X migration 영향 0** (spec §7). + +## §1 Approach summary + +Spec §3 의 핵심 sequencing: + +1. **신규 module 부터 작성** — `kebab-source-fs/src/code_meta.rs` 에 4 surface (`BUILTIN_BLACKLIST` `pub`, 3 helper fn `pub(crate)`) 본문 byte-identical 이동 + 12 unit test 이전. +2. **lib.rs 에 module + pub use 등록** — 이 시점부터 양쪽 surface (`kebab_parse_code::...` 와 `crate::code_meta::...`) 공존. cargo check 통과. +3. **5 callsite migration** — `media.rs` (1) → `walker.rs` (2 + 주석 3) → `connector.rs` (2). +4. **`Cargo.toml` 의 `kebab-parse-code` dep 제거** — 본 plan 의 anchor step. G1 + G5 (`cargo tree | grep tree-sitter` 0 줄) 달성. +5. **integration test 신설** — `tests/code_meta.rs::builtin_blacklist_has_exactly_six_entries` 가 design §5.2 frozen contract 의 외부 검증 surface. +6. **parse-code 측 atomic cleanup** — skip.rs 삭제 + lib.rs (skip 줄 + lang 줄) + lang.rs narrow edit (`code_lang_for_path` 함수 + 관련 2 unit test + `use std::path::Path;` import 제거) + tests/{lang,skip}.rs 삭제 + 헤더 doc rewrite. atomic clippy gate 통과. +7. **doc 갱신 + workspace 회귀 + commit** — `kebab-core/src/metadata.rs:36` docstring + `docs/ARCHITECTURE.md` 산문 + frozen design §8 graph 두 줄 + workspace 회귀 + 1 clean commit. design §8 + ARCHITECTURE 갱신은 검증 cli 로 falsifiable acceptance. + +핵심 ordering 보장: +- callsite migration (Step 4-6) 완료 전에 Cargo.toml dep 제거 (Step 7) 금지. +- source-fs callsite + Cargo.toml + integration test (Step 4-8) 완료 전에 parse-code 측 surface 삭제 (Step 9) 금지. +- parse-code 측 surface 삭제 (Step 9) 완료 전에 kebab-core docstring 정리 (Step 10 의 첫 action) 의미 없음. + +## §2 Steps (10 steps) + +### Step 1: Pre-flight baseline 측정 + env 확인 + +- **Files affected**: 변경 0 (측정 only). +- **Action**: + - `cd /home/altair823/kebab && git rev-parse HEAD` → `b02ac82` 또는 그 위 commit 확인 (refactor branch 의 base). + - env 확인: `echo $CARGO_TARGET_DIR` 가 `/build/out/...` 인지. 비어있으면 §0 의 export 적용. + - baseline workspace test 수 측정 (PR description 의 "before N passing" 용). **awk 합산 cli 명시** (MINOR #1): + ```sh + cargo test --workspace --no-fail-fast -j 1 2>&1 \ + | awk '/^test result: ok\./ {for(i=1;i<=NF;i++) if($i=="passed;") sum += $(i-1)} END {print sum}' \ + > .omc/state/baseline_N.txt + ``` + `.omc/state/baseline_N.txt` 에 N 값 한 줄 기록 (working dir 의 `.omc/` 는 git untracked — repo 에 들어가지 않음). PR description 에도 inline 인용 (primary record, local file 은 optional convenience). + - 검증: `cargo tree -p kebab-source-fs | grep tree-sitter` → **non-zero 줄** (현재 9 grammar drag 존재 확인 = before-state baseline). +- **Spec reference**: §1.1, §5.2. +- **Exit gate**: + - `git rev-parse HEAD` ≥ `b02ac82` (또는 동일). + - baseline N 기록됨 (`.omc/state/baseline_N.txt` + PR description inline). + - `cargo tree -p kebab-source-fs | grep tree-sitter | wc -l` ≥ 9 (before-state 확인). + +### Step 2: 신규 `crates/kebab-source-fs/src/code_meta.rs` 생성 + +- **Files affected**: `crates/kebab-source-fs/src/code_meta.rs` (신규). +- **Action**: + - 신규 file 작성. module-level doc 은 spec §3.3 의 cross-link wording 그대로 (5-line `//!` 블록 — "Pre-ingest classification ... `BUILTIN_BLACKLIST` is `pub` because ... `tests/code_meta.rs` ... 3 helper fns are `pub(crate)`"). + - `use std::fs::File; use std::io::{BufRead, BufReader, Read}; use std::path::Path; use anyhow::Result;` (kebab-parse-code/src/skip.rs:9-12 + lang.rs:7 의 use 절 합집합). + - `pub const BUILTIN_BLACKLIST: &[&str] = &[...6 entry...];` — kebab-parse-code/src/skip.rs **line 17-24** 본문 byte-identical (MINOR #2 정정 — 17-24 가 실제 const 본문 line range), 단 doc 주석에 spec §3.4 의 "Source of truth: design §5.2 (frozen)" 줄 추가. + - `pub(crate) fn code_lang_for_path(path: &Path) -> Option<&'static str>` — kebab-parse-code/src/lang.rs:17-66 본문 byte-identical (visibility 만 `pub` → `pub(crate)`). + - `pub(crate) fn is_generated_file(path: &Path) -> Result` — kebab-parse-code/src/skip.rs:28-46 본문 byte-identical (visibility `pub` → `pub(crate)`). + - `pub(crate) fn is_oversized(path: &Path, max_bytes: u64, max_lines: u32) -> Result` — kebab-parse-code/src/skip.rs:50-65 본문 byte-identical (visibility `pub` → `pub(crate)`). + - `#[cfg(test)] mod tests { ... }` 블록 — 12 unit test 본문 이전 (spec §3.9 table). + - **consolidated imports** (MINOR #4) — `#[cfg(test)] mod tests` 블록 최상단: + ```rust + #[cfg(test)] mod tests { + use super::{is_generated_file, is_oversized, code_lang_for_path}; + use super::BUILTIN_BLACKLIST; // unit tests 에는 미사용이나 import resolver 단순화 — 단, BLACKLIST 6-entry 검증은 integration test (§3.7) 로 분리되므로 본 import 는 실제로는 생략 가능. 본 plan 은 **포함 안 함** (false unused-import warn 회피). + use std::fs; + use std::path::Path; + use tempfile::NamedTempFile; + // ... 12 test fn ... + } + ``` + 실제 적용: `use super::{is_generated_file, is_oversized, code_lang_for_path}; use std::fs; use std::path::Path; use tempfile::NamedTempFile;` — 4 줄. `BUILTIN_BLACKLIST` import 는 unit 측에서 미사용이므로 생략 (사용은 integration test 에서만). + - 12 unit test mapping (spec §3.9): + - `tests/lang.rs` 의 4 test (`known_extensions_map_to_canonical_identifiers`, `special_filenames_map_to_identifiers`, `unknown_extension_returns_none`, `case_insensitive`) — 본문 byte-identical, `use kebab_parse_code::code_lang_for_path;` 줄 제거 (consolidated import 가 대체). + - `src/lang.rs::tests` 의 2 test (`tier2_basename_takes_precedence_over_extension`, `tier2_extension_fallback`) — 본문 byte-identical. + - `tests/skip.rs` 의 6 test (`generated_header_markers_trigger_skip`, `normal_code_is_not_flagged_generated`, `is_generated_returns_false_for_empty_file`, `oversized_by_bytes_returns_true`, `oversized_by_lines_returns_true`, `small_file_returns_false_for_oversize`) — 본문 byte-identical, `use kebab_parse_code::skip::{...};` 줄 제거 + `use tempfile::NamedTempFile; use std::fs;` 는 consolidated import 로 대체. + - **이전 안 함**: `builtin_blacklist_has_exactly_six_entries` (= Step 8 의 integration test 로 분리). +- **Spec reference**: §3.2, §3.3, §3.4, §3.7, §3.9. +- **Exit gate**: + - file 신설됨. 이 step 만으로는 `lib.rs` 미등록 → 컴파일러 무시 (`cargo check -p kebab-source-fs -j 1` 변화 없음, untouched dead file). 검증 항목은 Step 3 에 위임. + +### Step 3: `crates/kebab-source-fs/src/lib.rs` 에 module + pub use 등록 + +- **Files affected**: `crates/kebab-source-fs/src/lib.rs`. +- **Action** (spec §3.2): + - 기존 `mod walker;` 다음 줄에 `mod code_meta;` 한 줄 추가. + - 기존 `pub use connector::{FsScanSkips, FsSourceConnector};` 다음 줄에 `pub use code_meta::BUILTIN_BLACKLIST;` 한 줄 추가 (인라인 주석: `// design §5.2 frozen contract — integration test (§5.1) 의 접근 surface.`). + - **변경 안 함**: 기존 `mod connector; mod hash; mod media; mod walker;` (NEW MAJOR #2 의 surface 무근거 확장 회피). +- **Spec reference**: §3.2. +- **Exit gate**: + - `cargo check -p kebab-source-fs -j 1` 통과. + - 이 시점: `crate::code_meta::*` + `kebab_parse_code::*` 양쪽 surface 공존. + - `cargo test -p kebab-source-fs -j 1 code_meta::tests` → 12 passing (12 unit test 모두 통과). + +### Step 4: `crates/kebab-source-fs/src/media.rs` callsite migration + +- **Files affected**: `crates/kebab-source-fs/src/media.rs`. +- **Action** (spec §3.5 row 1): + - line 17: `if let Some(lang) = kebab_parse_code::code_lang_for_path(path) {` → `if let Some(lang) = crate::code_meta::code_lang_for_path(path) {` +- **Spec reference**: §3.5. +- **Exit gate**: + - `cargo check -p kebab-source-fs -j 1` clean (warn-free). + - `cargo test -p kebab-source-fs -j 1 media` 통과. + +### Step 5: `crates/kebab-source-fs/src/walker.rs` callsite migration + 주석 갱신 + +- **Files affected**: `crates/kebab-source-fs/src/walker.rs`. +- **Action** (spec §3.5 row 2-3 + comment row): + - line 131: `for pat in kebab_parse_code::BUILTIN_BLACKLIST {` → `for pat in crate::code_meta::BUILTIN_BLACKLIST {` + - line 211: 동일 패턴. + - line 9 (module-level `//!` 주석): `kebab_parse_code::BUILTIN_BLACKLIST` → `crate::code_meta::BUILTIN_BLACKLIST` + - line 85, 161 (function-level `///` 주석): 동일 패턴. +- **Spec reference**: §3.5. +- **Exit gate**: + - `cargo check -p kebab-source-fs -j 1` clean. + - `cargo test -p kebab-source-fs -j 1 walker` 통과. + +### Step 6: `crates/kebab-source-fs/src/connector.rs` callsite migration + +- **Files affected**: `crates/kebab-source-fs/src/connector.rs`. +- **Action** (spec §3.5 row 4-5): + - line 152: `&& kebab_parse_code::is_generated_file(&abs_path).unwrap_or(false)` → `&& crate::code_meta::is_generated_file(&abs_path).unwrap_or(false)` + - line 169: `if kebab_parse_code::is_oversized(` → `if crate::code_meta::is_oversized(` +- **Spec reference**: §3.5. +- **Exit gate**: + - `cargo check -p kebab-source-fs -j 1` clean. + - **추가 가드** (spec §6.2): `grep -rn "kebab_parse_code\|kebab-parse-code" crates/kebab-source-fs/src/ crates/kebab-source-fs/tests/` → 0 줄. (Cargo.toml 은 제외 — Step 7). + +### Step 7: `crates/kebab-source-fs/Cargo.toml` 에서 `kebab-parse-code` dep 제거 — **anchor** + +- **Files affected**: `crates/kebab-source-fs/Cargo.toml`. +- **Action** (spec §3.8 diff): + - line 13 `kebab-parse-code = { path = "../kebab-parse-code" }` 한 줄 삭제. + - **변경 안 함**: `kebab-core`, `kebab-config`, 기타 모든 dep + `[dev-dependencies]`. +- **Spec reference**: §3.8, G1, G5. +- **Exit gate** — 본 plan 의 **anchor step**, 4 검증 모두 통과 필수: + - `cargo build -p kebab-source-fs -j 1` clean. + - `cargo clippy -p kebab-source-fs --all-targets -j 1 -- -D warnings` clean (workspace pedantic 그대로). + - `cargo test -p kebab-source-fs -j 1` 통과 (기존 integration test 3개 + Step 2 의 12 unit test). + - `cargo tree -p kebab-source-fs | grep tree-sitter | wc -l` → **0 줄** (G5 + spec §5.3). + - 이 step 통과 = **G1 (source-fs dep lightening) 달성 시점**. + +### Step 8: 신규 `crates/kebab-source-fs/tests/code_meta.rs` integration test 생성 + +- **Files affected**: `crates/kebab-source-fs/tests/code_meta.rs` (신규). +- **Action** (spec §3.7, §3.9 의 integration row): + - 신규 file: + ```rust + use kebab_source_fs::BUILTIN_BLACKLIST; + + #[test] + fn builtin_blacklist_has_exactly_six_entries() { + assert_eq!(BUILTIN_BLACKLIST.len(), 6); + let expected = [ + "**/node_modules/**", + "**/target/**", + "**/__pycache__/**", + "**/.venv/**", + "**/venv/**", + "**/env/**", + ]; + for pat in expected { + assert!(BUILTIN_BLACKLIST.contains(&pat), "missing pattern: {pat}"); + } + } + ``` + - `kebab-parse-code/tests/skip.rs:60-74` 의 본문을 import 만 갈아끼우고 byte-identical 이전. +- **Spec reference**: §3.7, §3.9, §5.1. +- **Exit gate**: + - `cargo test -p kebab-source-fs -j 1 code_meta` → 13 passing (12 unit + 1 integration). + - `cargo test -p kebab-source-fs --test code_meta -j 1` → 1 passing (`--test` flag 로 integration binary 만 선택, false-positive 회피). + +### Step 9: `kebab-parse-code` 측 atomic cleanup — skip.rs 삭제 + lang.rs narrow edit + lib.rs (skip + lang) 재구성 + tests/{lang,skip}.rs 삭제 + 헤더 doc rewrite + +- **Files affected**: + - `crates/kebab-parse-code/src/skip.rs` (삭제). + - `crates/kebab-parse-code/src/lang.rs` (narrow edit). + - `crates/kebab-parse-code/src/lib.rs` (edit — skip 줄 + lang 줄 + 헤더 doc). + - `crates/kebab-parse-code/tests/lang.rs` (삭제). + - `crates/kebab-parse-code/tests/skip.rs` (삭제). +- **Action**: + - **(a) `crates/kebab-parse-code/src/skip.rs` 파일 삭제** (spec §3.6 skip.rs 행). + - **(b) `crates/kebab-parse-code/src/lang.rs` narrow edit** (spec §3.6 lang.rs 행 + BLOCKER #2): + - **line 7 의 `use std::path::Path;` 삭제** (BLOCKER #2 — `code_lang_for_path` 가 유일한 consumer 였음. 보존되는 `module_path_for_python` / `module_path_for_tsjs` 둘 다 `workspace_path: &str` 인자, 보존되는 2 unit test 도 `Path::new(...)` 부재. 미삭제 시 `cargo clippy -- -D warnings` 의 `unused_imports` lint fail). + - 함수 본문 `pub fn code_lang_for_path(path: &Path) -> Option<&'static str> { ... }` (line 17-66) 전체 삭제. + - `#[cfg(test)] mod tests` 안의 `tier2_basename_takes_precedence_over_extension` (line 147-158) + `tier2_extension_fallback` (line 161-168) unit test 삭제. + - **보존**: `pub fn module_path_for_python(...)` (line 77-103), `pub fn module_path_for_tsjs(...)` (line 107-115), `#[cfg(test)] mod tests` 안의 `module_path_for_python_strips_src_roots_and_extensions` (line 122-133), `module_path_for_tsjs_keeps_slashes_and_strips_ext` (line 136-144) — caller 는 본 crate 자체 (`python.rs:78`, `typescript.rs:88`, `javascript.rs:95`). (round 3 MINOR #1 — off-by-one 정정.) + - 헤더 doc (line 1-5) 한 단락 rewrite (spec §3.6 MINOR #2; round 3 MINOR #2 — line 7 `use std::path::Path;` 는 별도 sub-bullet 의 삭제 대상이라 doc range 에서 제외): + - 기존: `//! Canonical extension → language identifier mapping (spec §3.5).\n//!\n//! Lowercase canonical identifiers, matching tree-sitter parser conventions:\n//! \`rust\`, \`python\`, ...\n` + - 신규: `//! Workspace-relative path → module-path conversion for P10-1B AST extractors (Python dotted form / TS+JS slash form). 본 module 의 \`code_lang_for_path\` 는 v0.18.0+ 부터 \`kebab-source-fs::code_meta\` 로 이동.` + - **(c) `crates/kebab-parse-code/src/lib.rs` edit** (spec §3.6 lib.rs 행 전체): + - `pub mod skip;` (line 27) 삭제. + - `pub use lang::{code_lang_for_path, module_path_for_python, module_path_for_tsjs};` → `pub use lang::{module_path_for_python, module_path_for_tsjs};` (`code_lang_for_path` 제거). + - `pub use skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized};` (line 40) 삭제. + - 헤더 doc `//!` 단락 (line 1-14) rewrite (spec §3.6 MINOR #3): + - 기존: `//! \`kebab-parse-code\` — language-aware parsing for code corpora.\n//!\n//! Phase 1A-1 ships infrastructure only:\n//! ... 4 bullet ... //!\n//! Per-language parser modules ...` + - 신규: `//! \`kebab-parse-code\` — language-aware parsing for code corpora.\n//!\n//! Repo metadata (\`detect_repo\`) + per-language AST extractors (Rust = P10-1A-2, Python/TS/JS = P10-1B, Go = P10-1C-Go, Java+Kotlin = P10-1C-JK, C+C++ = P10-1D).\n//!\n//! lang detect (\`code_lang_for_path\`) + pre-ingest skip helpers (\`is_generated_file\`, \`is_oversized\`, \`BUILTIN_BLACKLIST\`) 는 v0.18.0+ 부터 \`kebab-source-fs::code_meta\` 로 이동 — refactor 2026-05-26.\n//!\n//! 본 crate 의 boundary 는 design §8 — store / embed / llm / rag / UI 의존 금지.` + - **(d) `crates/kebab-parse-code/tests/lang.rs` 삭제** (4 test case 가 §3.9 매핑대로 Step 2 의 source-fs unit 으로 이미 이전됨). + - **(e) `crates/kebab-parse-code/tests/skip.rs` 삭제** (7 test case 가 §3.9 매핑대로 Step 2 (6) + Step 8 (1) 으로 이미 이전됨). + - **변경 안 함** (spec §3.6 + §3.8): `crates/kebab-parse-code/Cargo.toml` (CRITICAL #1 — `[dev-dependencies] tempfile` 는 `tests/repo.rs:4` 가 계속 소비), 9 grammar AST extractor file (`c.rs ~ rust.rs`), `repo.rs`, `scaffold.rs`, `tests/repo.rs`. +- **Spec reference**: §3.6 (전체), §6.5. +- **Exit gate** — atomic clippy gate (모든 sub-action 적용 후 단발 검증): + - `cargo check -p kebab-parse-code -j 1` clean. + - `cargo clippy -p kebab-parse-code --all-targets -j 1 -- -D warnings` clean (Path import + 함수 + unit test + tests/{lang,skip}.rs + lib.rs export 모두 동시 정리되어 unused-import / dead-code lint 0). + - `cargo test -p kebab-parse-code -j 1` 통과 (module_path_for_* + AST extractor + repo + 9 grammar 보존). + - **§6.5 sibling 안전망**: `cargo test -p kebab-parse-code -j 1 module_path_for_` → 2 passing (`module_path_for_python_strips_src_roots_and_extensions` + `module_path_for_tsjs_keeps_slashes_and_strips_ext`). + - **추가 가드 — lib.rs 의 skip module 등록 + re-export 0 건** (MAJOR #2 명료화): + ```sh + grep -nE '^pub mod skip|^pub use skip' crates/kebab-parse-code/src/lib.rs | wc -l + ``` + → **0**. 헤더 doc 산문 내 단어 "skip" 은 미터치 (의미 보존). 본 정규식은 declaration / re-export 만 잡고 산문 미스매치. + - **추가 가드 — lang.rs 의 code_lang_for_path 부재**: + ```sh + grep -nE '^pub fn code_lang_for_path|^use std::path::Path' crates/kebab-parse-code/src/lang.rs | wc -l + ``` + → **0**. 함수 정의 + Path import 둘 다 사라졌는지 확인. + - **sub-action 가시성 가드** (round 3 optional GAP #2 — 5 sub-action atomic 의 partial-apply 시 어느 sub-action 빠졌는지 clippy 결과 추적 전에 즉시 가시): + ```sh + test ! -f crates/kebab-parse-code/src/skip.rs # (a) skip.rs 삭제 + ! grep -qE '^pub fn code_lang_for_path' crates/kebab-parse-code/src/lang.rs # (b1) 함수 본문 제거 + ! grep -qE '^use std::path::Path' crates/kebab-parse-code/src/lang.rs # (b2) Path import 제거 (BLOCKER #2) + ! grep -qE '^pub mod skip|^pub use skip|^pub use lang::.*code_lang_for_path' crates/kebab-parse-code/src/lib.rs # (c) skip/lang code_lang_for_path 줄 부재 — round 4 CRITICAL #1: 세 번째 alternative `^pub use lang::.*` anchor 로 한정, 새 헤더 doc 의 backtick 산문 `code_lang_for_path` 산문 매치 회피 + test ! -f crates/kebab-parse-code/tests/lang.rs # (d) tests/lang.rs 삭제 + test ! -f crates/kebab-parse-code/tests/skip.rs # (e) tests/skip.rs 삭제 + ``` + 여섯 줄 모두 exit 0. 한 줄이라도 fail → partial-apply 진단 (clippy 결과 분석 전에 어느 sub-action 빠졌는지 즉시 식별). + +### Step 10: `kebab-core` docstring + `ARCHITECTURE.md` + 설계 §8 graph 갱신 + workspace 회귀 + 1 clean commit — **closure** + +- **Files affected**: + - `crates/kebab-core/src/metadata.rs` (line 36 doc 한 줄). + - `docs/ARCHITECTURE.md` (산문 한 줄 추가). + - `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` (§8 graph 두 줄 — edge 제거 (a) + inline note 추가 (b)). +- **Action 1 — `kebab-core/src/metadata.rs:36` docstring 정리** (spec §3.5 comment-only row, MINOR #5 honest wording): + - 기존: `Set by kebab_parse_code::lang::code_lang_for_path.` + - 신규: `Set by the local-filesystem source connector during ingest.` + - **Rationale (honest wording, MINOR #5)**: backtick inline code 는 rustdoc 자동 intra-doc-link 처리 대상 아님 (대괄호 부재). 또한 `kebab-core` 는 `kebab-parse-code` 의존 0 (design §8 — kebab-core 는 도메인 타입만, 다른 kebab-* crate 미참조) — cross-crate resolution 시도조차 안 됨. 따라서 본 edit 의 목적은 "broken intra-doc link 회피" 가 아니라 **stale dependency reference 제거** (design §8 cross-crate forbidden 룰 정합 — 변경 후의 surface 위치를 정확히 반영). +- **Action 2 — `docs/ARCHITECTURE.md` 산문 갱신** (spec §7 row "docs/ARCHITECTURE.md"): + - line 134 단락 (현재 wording: `'kebab-parse-code' 의 외부 tree-sitter grammar crate 의존: P10-1A-2 에서 'tree-sitter-rust' 추가, ...`) 끝에 한 줄 추가: + - `v0.18.0+ 부터 'kebab-source-fs' 는 자체 'code_meta' 모듈 (lang detect + skip helpers + BUILTIN_BLACKLIST) 을 보유, 'kebab-parse-code' 와 분리 (refactor 2026-05-26).` + - Mermaid 변경 0 (`srcfs → pcode` arrow 부재). +- **Action 3 — frozen design §8 graph 갱신** (spec §7 row "design §8 graph" + MAJOR #4): + - frozen design 의 line 1460-1461 block: + ```text + ├─> kebab-source-fs + │ └─> kebab-parse-code (p10-1A-1: lang detect / repo detect / skip policy) + ``` + → + ```text + ├─> kebab-source-fs + │ (p10-2 이후: lang detect + skip policy 내장; kebab-parse-code 와 분리) + ``` + - 즉: (a) edge 한 줄 제거, (b) inline note 한 줄 추가. Sibling row `├─> kebab-parse-code\n│ └─> kebab-core ...` (line 1464-1465) 는 그대로 — `kebab-parse-code` 가 워크스페이스 의 별도 crate 로 계속 존재. +- **Action 4 — 변경 0 명시** (spec §7 cross-check): + - `README.md` / `HANDOFF.md` / `tasks/HOTFIXES.md` / `tasks/INDEX.md` 변경 0. + - `tasks/p10/p10-1a-1-code-ingest-framework.md`, `tasks/p10/p10-2-tier2-resource-aware.md`, `docs/superpowers/plans/2026-05-15-p10-1a-1-code-ingest-framework.md`, `docs/superpowers/plans/2026-05-20-p10-2-tier2-resource-aware.md` — frozen 보존 (spec §1.6 + §6.6 — "may" reference 는 contract violation 0). + - workspace `Cargo.toml` `version` bump 0. 양 crate `[features]` 변경 0. +- **Action 5 — workspace 회귀** (acceptance): + ```sh + cargo clippy --workspace --all-targets -j 1 -- -D warnings + cargo test --workspace --no-fail-fast -j 1 2>&1 \ + | awk '/^test result: ok\./ {for(i=1;i<=NF;i++) if($i=="passed;") sum += $(i-1)} END {print sum}' + cargo test -p kebab-app --test code_ingest_smoke -j 1 # BLOCKER #1 정정 — `--test` 강제 + RUSTDOCFLAGS="-D rustdoc::broken-intra-doc-links" cargo doc -p kebab-core --no-deps -j 1 # MINOR #3 — flag 강제 + cargo build --release -j 1 + cargo tree -p kebab-source-fs | grep tree-sitter | wc -l # → 0 + ``` +- **Action 6 — design §8 + ARCHITECTURE 갱신 검증** (MAJOR #3 — falsifiable acceptance, 3 idempotent grep): + ```sh + # (i) 옛 tree-edge 부재 확인 — 'kebab-source-fs └─> kebab-parse-code (p10-1A-1: ...)' 구문이 사라졌는지 + # (round 3 CRITICAL #1 — 산문 inline note 의 'kebab-parse-code 와 분리' 와 syntactic 구분 위해 tree-edge format 까지 anchor) + ! grep -qE '└─>\s*kebab-parse-code\s*\(p10-1A-1' \ + docs/superpowers/specs/2026-04-27-kebab-final-form-design.md + + # (ii) inline note 추가 확인 + test "$(grep -c 'p10-2 이후: lang detect + skip policy 내장' \ + docs/superpowers/specs/2026-04-27-kebab-final-form-design.md)" -ge 1 + + # (iii) ARCHITECTURE.md 산문 한 줄 확인 + test "$(grep -c 'kebab-source-fs.*code_meta.*kebab-parse-code 와 분리' docs/ARCHITECTURE.md)" -ge 1 + ``` + 세 줄 모두 통과해야 acceptance 충족. +- **Action 7 — 1 clean commit** (spec §5 + plan §5): + - commit message draft (한국어, spec §5.2 의 baseline N 인용): + ```text + refactor(source-fs): drop kebab-parse-code dep — extract code_meta module + + Move 4 surface (BUILTIN_BLACKLIST + 3 helper fn) from kebab-parse-code + into kebab-source-fs::code_meta. Drops 9 tree-sitter grammar drag from + source-fs's dep tree (cargo tree -p kebab-source-fs | grep tree-sitter + → 0 lines). + + Visibility 정책 (mixed): + - BUILTIN_BLACKLIST: pub (design §5.2 frozen contract — integration + test 의 외부 검증 surface) + - 3 helper fn: pub(crate) (source-fs 내부 호출만) + + Test 이전: 12 unit (src/code_meta.rs::tests) + 1 integration + (tests/code_meta.rs::builtin_blacklist_has_exactly_six_entries). + kebab-parse-code 의 module_path_for_python / module_path_for_tsjs 와 + 그 2 unit test 는 보존 (sibling caller = python.rs / typescript.rs / + javascript.rs). + + Spec: docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md + Design §8 graph: edge 'kebab-source-fs → kebab-parse-code' 제거 + + inline note 추가. + + Verification: + - cargo test --workspace --no-fail-fast -j 1 → baseline N maintained + - cargo test -p kebab-app --test code_ingest_smoke -j 1 → pass + - cargo clippy --workspace --all-targets -j 1 -- -D warnings → clean + - cargo tree -p kebab-source-fs | grep tree-sitter → 0 lines + - workspace.version bump 0, wire schema impact 0, V00X 0. + ``` +- **Spec reference**: §3.5 (comment-only), §5.1, §5.2, §5.3, §6.5, §7. +- **Exit gate** — plan exit gate 와 동일 (acceptance): + - Action 5 의 6 cli 모두 통과. + - Action 6 의 3 idempotent grep 모두 통과. + - 1 commit on `refactor/source-fs-dep-lightening` branch. + +## §3 Step dependency graph + +```text +Step 1 (baseline + env) + ↓ +Step 2 (code_meta.rs 신설 — dead file 까지) + ↓ +Step 3 (lib.rs mod + pub use — 양쪽 surface 공존) + ↓ +Step 4 (media.rs callsite — 1 곳) + ↓ +Step 5 (walker.rs callsite — 2 곳 + 주석 3) + ↓ +Step 6 (connector.rs callsite — 2 곳) + ↓ +Step 7 (Cargo.toml dep 제거) ← **anchor: G1 + G5 달성, source-fs 측 완료** + ↓ +Step 8 (integration test 신설) + ↓ +Step 9 (parse-code atomic cleanup — skip.rs 삭제 + lang.rs narrow + lib.rs + tests 삭제 + 헤더 doc) + ↓ +Step 10 (kebab-core doc + ARCHITECTURE + design §8 + workspace 회귀 + commit) ← **acceptance: G2/G3/G4 달성, plan complete** (NIT #1) +``` + +**Linear chain — 모든 step 직렬, parallelism 0.** 근거: + +- Step 3 가 Step 2 의 file 존재 전제. +- Step 4-6 의 각 callsite migration 은 Step 3 의 surface 등록 전제. 순서는 무관하지만 (file 별 독립) plan checklist 의 추적 단순성을 위해 linear. +- Step 7 (Cargo.toml dep 제거) 는 Step 4-6 의 모든 callsite migration 완료 전제. 그렇지 않으면 cargo build fail. +- Step 8 (integration test) 가 Step 7 의 `pub use code_meta::BUILTIN_BLACKLIST;` (= Step 3 에서 등록) + source-fs 의 parse-code 무의존 (= Step 7) 양쪽 전제. +- Step 9 (parse-code 측 surface 삭제) 는 Step 7 + Step 8 의 source-fs 측 완료 후만 안전. atomic clippy gate — skip + lang + Path import + tests 모두 동시 정리되어 `cargo clippy -p kebab-parse-code -- -D warnings` 가 한 번에 통과. +- Step 10 (closure) 가 Step 9 의 `pub use lang::code_lang_for_path` 제거 후. design §8 graph + ARCHITECTURE 갱신 + workspace 회귀 + commit 의 단일 closure step. + +## §4 Verification gate (acceptance) + +Plan exit gate = spec §5 + Step 9 의 atomic gate + Step 10 의 acceptance gate. + +### §4.1 Source-fs 측 (spec §5.1) — Step 8 시점 통과 확인 + +```sh +cargo test -p kebab-source-fs -j 1 code_meta +cargo test -p kebab-source-fs --test code_meta -j 1 # integration binary 단독 검증 +``` + +기대: 13 passing (12 unit + 1 integration) + 1 passing (integration 단독). + +### §4.2 Workspace 회귀 (spec §5.2) — Step 10 시점 통과 확인 + +```sh +cargo test --workspace --no-fail-fast -j 1 2>&1 \ + | awk '/^test result: ok\./ {for(i=1;i<=NF;i++) if($i=="passed;") sum += $(i-1)} END {print sum}' +cargo test -p kebab-app --test code_ingest_smoke -j 1 # BLOCKER #1 — --test flag 강제 +``` + +기대: +- workspace test sum: Step 1 의 baseline N 과 동일 (회귀 0). +- code_ingest_smoke: `--test code_ingest_smoke` 가 16+ fn 모두 실행 (substring filter 가 아니라 binary 선택). `test result: ok. N passed; 0 failed` 의 N ≥ 16 확인. + +### §4.3 Clippy + build + dep tree (spec §5.3) — Step 10 시점 통과 확인 + +```sh +cargo clippy --workspace --all-targets -j 1 -- -D warnings +cargo build --release -j 1 +cargo tree -p kebab-source-fs | grep tree-sitter | wc -l +RUSTDOCFLAGS="-D rustdoc::broken-intra-doc-links" cargo doc -p kebab-core --no-deps -j 1 # MINOR #3 +``` + +기대: +- clippy: clean. +- build: clean release binary. +- `cargo tree` grep: **0 줄** (G5 final acceptance). +- `cargo doc`: 0 broken intra-doc link. + +### §4.4 Design §8 + ARCHITECTURE 갱신 acceptance (MAJOR #3) — Step 10 시점 통과 확인 + +```sh +# (i) 옛 tree-edge 부재 확인 — round 3 CRITICAL #1 정정 (산문 inline note 와 syntactic 구분) +! grep -qE '└─>\s*kebab-parse-code\s*\(p10-1A-1' \ + docs/superpowers/specs/2026-04-27-kebab-final-form-design.md + +# (ii) inline note 추가 확인 +test "$(grep -c 'p10-2 이후: lang detect + skip policy 내장' \ + docs/superpowers/specs/2026-04-27-kebab-final-form-design.md)" -ge 1 + +# (iii) ARCHITECTURE.md 산문 한 줄 확인 +test "$(grep -c 'kebab-source-fs.*code_meta.*kebab-parse-code 와 분리' docs/ARCHITECTURE.md)" -ge 1 +``` + +세 줄 모두 통과해야 G4 acceptance 충족. + +### §4.5 Optional informational only (spec §5.4) — acceptance 가 아님 + +PR description 에 부기 가능. plan exit gate 에 포함 안 함 (MINOR #4 — `informational only`). + +## §5 Commit strategy + +**1 clean commit** 권장 — 본 refactor 는 internal-only + 10 step 이 모두 Step 10 의 verification gate 한 묶음으로 묶임. step-별 atomic commit 으로 쪼개면 중간 commit 이 cargo build 깨진 상태 (예: Step 5 후 Step 6 전) 거나 step 별 의미 단편이라 review 가치 낮음. + +Commit message draft = §2 Step 10 Action 7 의 draft 그대로 유지 (~30 줄, substantive surface — round 2 open-question 답변 4 의 권장). + +**push / PR 생성 0** — team-lead 책임. + +## §6 Risks + mitigation + +### §6.1 중간 단계 cargo build 깨짐 (step ordering 깨짐) + +- **Risk**: Step 4-6 의 callsite migration 중 한 file 만 migrate 하고 Step 7 (Cargo.toml dep 제거) 로 점프하면 다른 file 의 `kebab_parse_code::*` 가 unresolved → cargo build fail. +- **Mitigation**: 각 step 의 exit gate 가 `cargo check -p kebab-source-fs -j 1` 통과 강제. Step 6 의 exit gate 의 보조 grep — `kebab_parse_code` 잔여 0 확인. + +### §6.2 Step 9 의 atomic clippy gate — partial-apply 위험 (BLOCKER #2 + MAJOR #2) + +- **Risk**: Step 9 의 5 sub-action 중 일부만 적용하면: + - skip.rs 파일 남기고 lib.rs 만 제거 → `unused file` (warn 0, but stale). + - lang.rs 의 `code_lang_for_path` 함수만 지우고 `use std::path::Path;` 보존 → `unused_imports` warn → clippy `-D warnings` fail. + - lib.rs 의 `pub use lang::code_lang_for_path` 보존 채로 lang.rs 함수 본문만 삭제 → cargo check 단계 unresolved-name fail. + - 헤더 doc 의 산문 내 "skip" 단어 미터치 (= 의도) 인데, exit gate 의 grep 패턴이 산문까지 매치하면 (= `grep -n "skip"`) self-contradiction. +- **Mitigation**: + - Step 9 가 **atomic step** — 5 sub-action 모두 적용 후만 exit gate 검증. + - Step 9 exit gate 의 grep 패턴이 **declaration / re-export 전용 정규식**: `grep -nE '^pub mod skip|^pub use skip'`. 산문 미스매치 (MAJOR #2 명료화). + - BLOCKER #2 의 Path import 삭제는 Step 9 (b) 의 첫 줄로 명시. + +### §6.3 Sibling `module_path_for_*` 의 accidental drop (CRITICAL #2) + +- **Risk**: Step 9 의 lang.rs narrow edit 시 `module_path_for_python` / `module_path_for_tsjs` 함수 또는 그 unit test 를 같이 지움 → P10-1B AST extractor (`python.rs:78`, `typescript.rs:88`, `javascript.rs:95`) 가 compile fail 또는 e2e fixture 가 runtime fail. +- **Mitigation**: + - Step 9 의 exit gate: `cargo test -p kebab-parse-code -j 1 module_path_for_` → 2 passing 명시. + - Step 10 Action 5 의 workspace 회귀: `cargo test -p kebab-app --test code_ingest_smoke -j 1` 명시 (가장 강한 안전망, BLOCKER #1 정정 후). + - 2 단 cover. + +### §6.4 `kebab-core::metadata.rs` stale reference + +- **Risk**: Step 9 가 `pub use lang::code_lang_for_path` 를 제거하면 `kebab-core/src/metadata.rs:36` 의 backtick inline code `` `kebab_parse_code::lang::code_lang_for_path` `` 가 stale (실제 path 미존재). +- **Rationale 정정 (MINOR #5)**: backtick inline code 는 rustdoc 자동 intra-doc-link 처리 대상 아님 (대괄호 부재) + `kebab-core` 가 `kebab-parse-code` 의존 0 → cross-crate resolution 시도 0. 따라서 "broken intra-doc link 회피" 가 아니라 **stale dependency reference 제거** (design §8 cross-crate forbidden 룰 정합). +- **Mitigation**: Step 10 Action 1 의 doc rewrite — abstract wording ("Set by the local-filesystem source connector during ingest"). +- **추가 가드**: Step 10 Action 5 의 `RUSTDOCFLAGS="-D rustdoc::broken-intra-doc-links" cargo doc -p kebab-core --no-deps -j 1` — 만약 향후 누군가 대괄호 link 로 다시 wrapping 하더라도 catch (MINOR #3 — flag 강제). + +### §6.5 4 surface 외 hidden callsite (spec §6.2) + +- **Risk**: 어떤 file 이 `kebab_parse_code::skip::BUILTIN_BLACKLIST` 같은 풀 path 또는 alias / re-export 로 4 surface 를 우회 호출. +- **Mitigation**: + - spec §1.5 의 grep 결과 (NIT #1 보강) — 외부 명시 path consumer 0 확정. + - Step 6 의 추가 가드 grep — source-fs 측 잔여 0 확인. + - Step 9 후 추가 가드: `grep -rn "kebab_parse_code::skip\|kebab_parse_code::lang::code_lang" crates/ --include="*.rs"` → 0 줄 (parse-code 자체 test file 도 Step 9 에서 삭제됨). + +### §6.6 cargo `-j 1` 미준수 시 OOM + +- **Risk**: workspace test (Step 10 의 `cargo test --workspace`) 시 18 integration-test binary 동시 link → linker SIGKILL (CLAUDE.md "Build / test / lint" 문서화된 패턴). 본 plan 의 Step 8 이후 19 integration-test binary 가 됨 (`kebab-source-fs/tests/code_meta.rs` 추가). +- **Note (NIT #2)**: source-fs 는 lance / datafusion 무링크 → 추가 1 개 binary 의 link cost 증분 단발적 + RAM peak 영향 0. 본 plan 의 `-j 1` 룰 자체와 무관 (lance / datafusion 합산 link 폭주 vs 단일 lightweight binary). +- **Mitigation**: 모든 cargo workspace 명령 `-j 1` 명시. plan §0 의 env 룰 강조. + +### §6.7 design §8 graph 갱신 형식 misread + +- **Risk**: Step 10 의 frozen design §8 graph 두 줄 갱신 시 다른 row 영향을 줄 수 있음 (예: `kebab-app` 의 sibling row). +- **Mitigation**: + - Step 10 Action 3 의 정확한 before/after block 인용. line range 1460-1461 만 변경, 다른 row 변경 0. + - Step 10 Action 6 의 3 idempotent grep (MAJOR #3) — falsifiable acceptance. + +## §7 Out of scope (plan-level) + +Spec §8 (out of scope) 전부 + plan-level 추가: + +- `kebab-parse-code` 의 9 tree-sitter grammar feature gating / dynamic loading — v0.19+ candidate. +- `kebab-parse-code/src/repo.rs` ownership 검토. +- `kebab-core::media.rs` 와 `kebab-source-fs::code_meta` 의 medium-vs-lang detection 통합 (Lens 3). +- `kebab-chunk` Tier 2 helper 정리 (Lens 2). +- `kebab-normalize` 흡수, `kebab-parse-types` 추가 정리 (Lens 1 다른 묶음). +- `deny.toml` 신설 / cargo-deny CI 도입 (spec §4.6 + §6.7 — design §8 의 미래 state). +- `tasks/INDEX.md` doc-sync (spec §1.6 — INDEX.md 가 stale 한 P10 phase status 표시. 별도 PR). + +**HOTFIXES.md 갱신 0** (spec §7 명시 — design §8 자체를 same-PR 로 갱신하므로 frozen vs ship deviation 0, CLAUDE.md HOTFIXES rule 미트리거). + +## §8 References + +- Spec: `docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md` (v3, 623 lines, Round 1+2+3 critic APPROVE + round 2 reflection 시 §5.2/§6.5 cli micro-patch 동반). +- Frozen design: `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §3.5, §3.7b, §5.2, §8 (graph block line 1455-1478). +- `docs/ARCHITECTURE.md` line 134 단락 — same-PR 갱신 target. +- Frozen task spec 보존 (spec §1.6, §6.6 분석 결과): + - `tasks/p10/p10-1a-1-code-ingest-framework.md` line 23 ("may" reference, contract violation 0). + - `tasks/p10/p10-2-tier2-resource-aware.md`. + - `docs/superpowers/plans/2026-05-15-p10-1a-1-code-ingest-framework.md`. + - `docs/superpowers/plans/2026-05-20-p10-2-tier2-resource-aware.md`. +- Sibling caller evidence (CRITICAL #2 안전망): + - `crates/kebab-parse-code/src/python.rs:78`. + - `crates/kebab-parse-code/src/typescript.rs:88`. + - `crates/kebab-parse-code/src/javascript.rs:95`. + - `crates/kebab-app/tests/code_ingest_smoke.rs:165, 242, 319`. +- Baseline evidence (CRITICAL #1): + - `crates/kebab-parse-code/tests/repo.rs:4` — `tempfile::TempDir` 사용 (dev-dep 보존 정당화). +- BLOCKER #1 evidence (verifier-plan round 1): + - `crates/kebab-app/tests/code_ingest_smoke.rs` 의 16+ `#[test]` fn 중 substring `code_ingest_smoke` 0건 — bare `cargo test -p kebab-app code_ingest_smoke` 는 false-positive PASS (0 tests run + exit 0). `--test code_ingest_smoke` 강제. + +## §9 Round 2 closure status + +| Finding | Severity | 반영 | 위치 | +|---------|----------|------|------| +| **BLOCKER #1** `cargo test code_ingest_smoke` substring filter false-positive | BLOCKER | **reflected** | spec §5.2 + §6.5 cli 정정 (`--test code_ingest_smoke`), plan Step 10 Action 5, plan §4.2, plan §6.3 mitigation. | +| **BLOCKER #2** lang.rs `use std::path::Path;` dead import → clippy fail | BLOCKER | **reflected** | plan Step 9 (b) 의 첫 줄 명시, plan §6.2 mitigation, plan §8 references (BLOCKER #2 evidence). | +| **MAJOR #2** Step 9 grep "skip" 가드 자가 모순 | MAJOR | **reflected** (option a — declaration-only regex) | plan Step 9 exit gate 의 `grep -nE '^pub mod skip\|^pub use skip'` 패턴 명시, plan §6.2 mitigation. | +| **MAJOR #3** design §8 갱신 검증 acceptance 누락 | MAJOR | **reflected** | plan Step 10 Action 6 의 3 idempotent grep, plan §4.4 falsifiable acceptance, plan §6.7 mitigation. | +| **MINOR #1** baseline N awk cli 미명시 | MINOR | **reflected** | plan Step 1 Action 의 awk one-liner, plan §4.2 동일 cli 재인용. | +| **MINOR #2** skip.rs line range 14-24 → 17-24 | MINOR | **reflected** | plan Step 2 Action — `skip.rs **line 17-24**` 정정. | +| **MINOR #3** rustdoc broken-intra-doc-links flag 강제 | MINOR | **reflected** | plan Step 10 Action 5 의 `RUSTDOCFLAGS="-D rustdoc::broken-intra-doc-links"`, plan §4.3 동일. | +| **MINOR #4** Step 2 consolidated imports 불명확 | MINOR | **reflected** | plan Step 2 Action 의 4 줄 import 블록 명시. | +| **MINOR #5** Step 11 rustdoc broken-link rationale 의심 | MINOR | **reflected** (honest wording) | plan Step 10 Action 1 Rationale 단락 — "stale dependency reference 제거 (design §8 cross-crate forbidden)", plan §6.4 동일 rationale. | +| **MINOR #6** Step 9+10 합치기 + Step 11→12 흡수 (10 step) | MINOR | **reflected** | plan 전체 — 12 → 10 step. Step 9 atomic clippy gate, Step 10 closure step. | +| **NIT #1** §3 dep graph 에 Step 10 acceptance annotation | NIT | **reflected** | plan §3 — `Step 10 ... ← **acceptance: G2/G3/G4 달성, plan complete**` annotation. | +| **NIT #2** §6.6 link cost rationale 보강 | NIT | **reflected** | plan §6.6 — "source-fs 는 lance / datafusion 무링크 → 추가 1 개 binary 의 link cost 증분 단발적 + RAM peak 영향 0" 한 줄 inline. | + +**Round 2 closure summary**: 2 BLOCKER + 1 MAJOR (BLOCKER #2 = critic MAJOR #1 dedup) + 2 MAJOR (verifier-plan Gap #2/#3) + 6 MINOR + 2 NIT = **13 finding 모두 reflected**, rejection 0. Spec micro-patch 2 곳 동반 (spec §5.2 + §6.5, cli 1 줄씩, round 1-3 closure 영향 0). + +### §9.1 Spec micro-patch summary (round 2) + +| Spec section | Edit | Rationale | +|--------------|------|-----------| +| §5.2 (line 467) | `cargo test -p kebab-app code_ingest_smoke -j 1` → `cargo test -p kebab-app --test code_ingest_smoke -j 1` + 산문 한 단락 (false-positive 회피 근거) | BLOCKER #1 | +| §6.5 (정정된 안전망 두 번째 cli) | `cargo test -p kebab-app code_ingest_smoke -j 1` → `cargo test -p kebab-app --test code_ingest_smoke -j 1` + 인라인 cross-link "(`--test` flag 강제 — verifier-plan round 1 Gap #1)" | BLOCKER #1 | + +두 edit 모두 wording-only — spec round 1-3 closure status table (§10, §10.1) 의 finding-to-edit 매핑에 영향 0. spec round 4 critic 진입 불요 (verifier-plan round 1 의 Gap #1 가 cli precision 정정이므로 plan reflection 의 부수 작업). + +### §9.2 Round 3 closure status + +| Finding | Severity | 반영 | 위치 | +|---------|----------|------|------| +| **NEW CRITICAL #1** Step 10 Action 6 (i) grep self-contradictory — 'kebab-source-fs' 줄 다음 prose 의 'kebab-parse-code 와 분리' substring 이 grep 에 매치 → 올바른 edit 적용 후에도 영구 FAIL | CRITICAL | **reflected** | plan Step 10 Action 6 의 (i) 와 plan §4.4 의 (i) 두 곳 모두 `! grep -qE '└─>\s*kebab-parse-code\s*\(p10-1A-1' ...` 로 교체 — tree-edge format anchor 가 산문과 syntactic 구분. plan §6.7 mitigation cross-link 도 본 정정이 자동 cover (mitigation 본문이 "정확한 before/after block + 3 idempotent grep" wording 만 사용, 본 정정 후에도 의미 보존). | +| **NEW MINOR #1** lang.rs 보존 unit test line range off-by-one | MINOR | **reflected** | plan Step 9 (b) 보존 sub-bullet — `(line 121-133)` → `(line 122-133)`, `(line 135-144)` → `(line 136-144)` + 정정 근거 inline. | +| **NEW MINOR #2** lang.rs 헤더 doc rewrite "line 1-7" 표기 | MINOR | **reflected** | plan Step 9 (b) 의 헤더 doc sub-bullet — `(line 1-7)` → `(line 1-5)` + line 7 의 Path import 는 별도 sub-bullet 소관임을 명시. | +| **NEW NIT #1** double-space cosmetic | NIT | **reflected** | plan 본문 4 위치 (line 266 `cargo test --workspace`, line 268 `cargo test -p kebab-app`, line 271 `cargo tree -p kebab-source-fs`, line 388 동일) — `replace_all` 로 single-space normalize. bash 무영향. | +| **NEW (Optional) GAP #2** Step 9 sub-action 가시성 보강 | NIT (optional, low-severity) | **reflected** (적용) | plan Step 9 exit gate 끝에 "sub-action 가시성 가드" 6 줄 추가 — `test ! -f skip.rs`, lang.rs Path import + 함수 부재, lib.rs 의 skip + code_lang_for_path 부재 (third alternative 를 `^pub use lang::.*` anchor 로 한정 — round 4 critic-plan 보강, 새 헤더 doc breadcrumb 산문 매치 회피), tests/{lang,skip}.rs 부재. partial-apply 시 clippy 결과 분석 전에 어느 sub-action 빠졌는지 직접 식별. | + +**Round 3 closure summary**: 1 NEW CRITICAL + 2 NEW MINOR + 1 NEW NIT + 1 NEW OPTIONAL = **5 finding 모두 reflected**, rejection 0. spec edit 0 (round 3 의 모든 정정은 plan 본문 단독). round 2 closure (13 row) 영향 0 — round 3 정정은 round 2 가 확립한 atomic structure 위에 cli precision 만 보강. + +### §9.3 Round 4 closure status + +| Finding | Severity | 반영 | 위치 | +|---------|----------|------|------| +| **NEW CRITICAL #1** Step 9 sub-action 가드 (c) 줄의 세 번째 alternative `code_lang_for_path` 가 anchor 부재 → 새 헤더 doc 의 backtick 산문 `` `code_lang_for_path` `` 매치 → gate 영구 false-FAIL (round 2 MAJOR #2 와 동일 class — 산문 substring × unanchored regex) | CRITICAL | **reflected** | plan Step 9 exit gate 의 sub-action 가시성 가드 (c) 줄 — `code_lang_for_path` → `^pub use lang::.*code_lang_for_path` (re-export 라인만 매치, doc comment 산문 미터치). §9.2 GAP #2 row 의 wording 도 round 4 보강 inline cross-link. | + +**Round 4 closure summary**: 1 NEW CRITICAL = **1 finding reflected**, rejection 0. spec edit 0, plan 변경 line ≤ 2 (regex 1 줄 + §9.2 wording 한 alternative 추가). round 1-3 closure 영향 0 (anchored alternative 의 추가는 기존 sub-action 의 부재 검증 의미 동일, 새 산문 매치 회피만 보강). diff --git a/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md b/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md index 35aea1c..a479209 100644 --- a/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md +++ b/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md @@ -1458,7 +1458,7 @@ pub trait JobRepo { kebab-cli, kebab-tui, kebab-desktop └─> kebab-app ├─> kebab-source-fs - │ └─> kebab-parse-code (p10-1A-1: lang detect / repo detect / skip policy) + │ (p10-2 이후: lang detect + skip policy 내장; kebab-parse-code 와 분리) ├─> kebab-parse-md / kebab-parse-pdf / kebab-parse-image / kebab-parse-audio │ └─> kebab-parse-types (parser intermediate) ├─> kebab-parse-code diff --git a/docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md b/docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md new file mode 100644 index 0000000..95d39b9 --- /dev/null +++ b/docs/superpowers/specs/2026-05-26-source-fs-dep-lightening-spec.md @@ -0,0 +1,623 @@ +--- +status: drafting +target_version: 0.18.0 # 0.18.0 release 의 후속 internal-refactor PR — workspace.version bump 없음 (§7 NG5 + CLAUDE.md §Release 룰 3 트리거 미충족). +contract_sections: ["§3.5 (MediaType::Code dispatch)", "§3.7b (parser intermediate boundary)", "§5.2 (ingest skip policy)", "§8 (allowed deps)"] +related_specs: + - docs/superpowers/specs/2026-04-27-kebab-final-form-design.md + - tasks/p10/p10-1a-1-code-ingest-framework.md # frozen — "Source-fs *may* depend on kebab-parse-code" (line 23) 의 `may` 가 의무 아니므로 본 refactor 가 task spec contract 침범 0. +--- + +# kebab-source-fs dep lightening — 9 tree-sitter grammars drag 제거 + +## §1 Background + evidence chain + +### §1.1 현재 의존 그래프 + +`crates/kebab-source-fs/Cargo.toml` (현재 HEAD, refactor/source-fs-dep-lightening branch base = b02ac82) — `[dependencies]` 인용: + +```toml +[dependencies] +kebab-core = { path = "../kebab-core" } +kebab-config = { path = "../kebab-config" } +kebab-parse-code = { path = "../kebab-parse-code" } # ← 본 spec 의 제거 대상 +anyhow = { workspace = true } +serde = { workspace = true } +... +``` + +`crates/kebab-parse-code/Cargo.toml` — 본 의존이 transitively 끌어오는 무게: + +```toml +[dependencies] +kebab-core = { path = "../kebab-core" } +anyhow = { workspace = true } +gix = { workspace = true } +serde_json = { workspace = true } +time = { workspace = true } +tracing = { workspace = true } +tree-sitter = { workspace = true } +tree-sitter-rust = { workspace = true } +tree-sitter-python = { workspace = true } +tree-sitter-typescript = { workspace = true } +tree-sitter-javascript = { workspace = true } +tree-sitter-go = { workspace = true } +tree-sitter-java = { workspace = true } +tree-sitter-kotlin-ng = { workspace = true } +tree-sitter-c = { workspace = true } +tree-sitter-cpp = { workspace = true } +``` + +즉 `kebab-source-fs` build → `kebab-parse-code` build → tree-sitter core + 9 grammar crates (C-compiled grammars + libstdc++ on cpp) drag. + +#### ASCII dep graph — before / after (NIT #3 반영) + +```text +before: + kebab-source-fs ──> kebab-parse-code ──> [tree-sitter + 9 grammar crates] + \─> kebab-core + \─> kebab-config + +after: + kebab-source-fs ──> kebab-core + \─> kebab-config + (4 helper surface 가 kebab-source-fs::code_meta 내부로 이전) +``` + +### §1.2 Drag 의 cost (qualitative) + +정량 benchmark 는 본 spec 의 acceptance 에 포함하지 않는다 (workspace.version touch 0 + clean-build 측정 = 비용 대비 noise 큼). 정성적으로: + +- `target/` 의 incremental compile artifact 가 9 grammar 별 `.o` + crate 별 metadata 로 누적. CLAUDE.md "90+ GB after a few task cycles" 의 일부. +- `cargo test -p kebab-source-fs` 가 link 단계에서 9 grammar object 를 끌어들임. +- 미래 `kebab-cli` / `kebab-mcp` 가 `kebab-source-fs` 만 의존 (code ingest 비활성 사용자) 하는 시나리오에도 9 grammar drag 가 강제됨. + +본 cost 의 정량 측정은 **§5.4 informational only** 로 두고 acceptance 에서 분리. + +### §1.3 4 surface — callsite (step 2 결과) + +`grep -rn "kebab_parse_code\|kebab-parse-code" crates/kebab-source-fs/` 결과: + +``` +Cargo.toml:13 kebab-parse-code = { path = "../kebab-parse-code" } +src/media.rs:17 if let Some(lang) = kebab_parse_code::code_lang_for_path(path) { +src/walker.rs:9 //! spec §5.2, applied via `kebab_parse_code::BUILTIN_BLACKLIST`) +src/walker.rs:85 /// Matcher built from `kebab_parse_code::BUILTIN_BLACKLIST` only. +src/walker.rs:131 for pat in kebab_parse_code::BUILTIN_BLACKLIST { +src/walker.rs:161 /// built-in safety-net blacklist (`kebab_parse_code::BUILTIN_BLACKLIST`), +src/walker.rs:211 for pat in kebab_parse_code::BUILTIN_BLACKLIST { +src/connector.rs:152 && kebab_parse_code::is_generated_file(&abs_path).unwrap_or(false) +src/connector.rs:169 if kebab_parse_code::is_oversized( +``` + +→ **공식 surface = 4 개** (round 1 의 "3 leaf" 추정에서 누락된 `BUILTIN_BLACKLIST` 포함): + +| # | Surface | Kind | Real callsite count | +|---|---------|------|--------------------| +| 1 | `code_lang_for_path(&Path) -> Option<&'static str>` | fn | 1 (media.rs:17) | +| 2 | `is_generated_file(&Path) -> Result` | fn | 1 (connector.rs:152) | +| 3 | `is_oversized(&Path, u64, u32) -> Result` | fn | 1 (connector.rs:169) | +| 4 | `BUILTIN_BLACKLIST: &[&str]` (6 patterns) | `pub const` | 2 (walker.rs:131, 211) | + +### §1.4 tree-sitter 미사용 검증 (step 3 결과) + +3 leaf + 1 const 의 정의 file (`kebab-parse-code/src/lang.rs` + `kebab-parse-code/src/skip.rs`) 양쪽에 `grep -n "tree_sitter\|tree-sitter"`: + +``` +lang.rs:3: //! Lowercase canonical identifiers, matching tree-sitter parser conventions: +``` + +→ `lang.rs` 의 단 한 줄 — **docstring**. 본문 use 절: + +- `lang.rs::code_lang_for_path`: `use std::path::Path;` — pure pattern match on `path.file_name()` / `path.extension()`. +- `skip.rs::is_generated_file`: `use anyhow::Result; use std::fs::File; use std::io::Read;` — 첫 512 byte 읽고 marker string 검사. +- `skip.rs::is_oversized`: `use anyhow::Result; use std::fs::{File, metadata}; use std::io::{BufRead, BufReader};` — `metadata.len()` → line iter. +- `skip.rs::BUILTIN_BLACKLIST`: `pub const &[&str] = &[...]` (6 entries). + +→ tree-sitter / grammar crate 의존 0. 이동 가능 확정. + +### §1.5 Consumer 검증 (step 4 결과 — destination 결정 핵심) + +`grep -rn "code_lang_for_path\|is_generated_file\|is_oversized\|BUILTIN_BLACKLIST" crates/ --include="*.rs"` 결과에서 *kebab-parse-code 외부* 호출자: + +| Crate / file | Surface | Kind | +|--------------|---------|------| +| `kebab-source-fs/src/media.rs:17` | `code_lang_for_path` | **real call** | +| `kebab-source-fs/src/connector.rs:152` | `is_generated_file` | **real call** | +| `kebab-source-fs/src/connector.rs:169` | `is_oversized` | **real call** | +| `kebab-source-fs/src/walker.rs:131, 211` | `BUILTIN_BLACKLIST` | **real ref** | +| `kebab-core/src/metadata.rs:36` | `code_lang_for_path` | **docstring only** (no actual call) | + +→ **실 호출 consumer = `kebab-source-fs` 단일.** 그 외 `kebab-parse-code` 자체 tests (`tests/lang.rs`, `tests/skip.rs`) 에 호출 — destination 이동 시 함께 옮긴다. + +부가 verification (NIT #1 반영) — `kebab_parse_code::skip::*` / `kebab_parse_code::lang::*` 명시 path 의 외부 ref: + +``` +$ grep -rn "kebab_parse_code::skip\|kebab_parse_code::lang::code_lang" crates/ --include="*.rs" +kebab-parse-code/tests/lang.rs:1 use kebab_parse_code::code_lang_for_path; (re-export path) +kebab-parse-code/tests/skip.rs:1 use kebab_parse_code::skip::{BUILTIN_BLACKLIST, ...}; (전체 path) +``` + +→ 모두 `kebab-parse-code` 자체 test, 외부 명시 path consumer 0. + +### §1.6 설계 contract / phase status (step 5/6 결과) + +- `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §8 graph (현 frozen): + ```text + kebab-source-fs + └─> kebab-parse-code (p10-1A-1: lang detect / repo detect / skip policy) + ``` + → 본 spec 가 §8 의 해당 한 줄을 **edge 제거 + inline note 추가** (§7 MAJOR #4 반영) 형태로 갱신. +- `docs/ARCHITECTURE.md` Mermaid `srcfs → pcode` arrow 부재 (현 Mermaid 에 미표시) → Mermaid 변경 0, 산문 한 단락만 갱신. +- `tasks/INDEX.md`: P10 phase status — INDEX.md 의 dashboard 가 `p10-1B` / `p10-1C-Go` / `p10-1C-JK` 를 "PR 오픈" 으로 표시하나, branch base `b02ac82` 의 코드 트리에는 `module_path_for_python` (`lang.rs:77`), `python.rs` / `typescript.rs` / `javascript.rs` AST extractor, `go.rs` extractor, `java.rs`, `kotlin.rs` 모두 **이미 머지된 상태** (v0.18.0 cut 포함). INDEX.md 가 stale 한 것으로 추정 — 별도 doc-sync 영역. 본 refactor 와 conflict 가능 영역: 3 sub-task 의 코드는 모두 `kebab-chunk` 의 chunker 추가 (kebab-source-fs 의 dep 변경과 path 분리) → conflict 0. v0.18.0 cut 완료 (2026-05-26), active code-ingest PR / fb-* 진행 0. +- **task spec frozen 보존** (CLAUDE.md "Task specs themselves stay frozen…"): `tasks/p10/p10-1a-1-code-ingest-framework.md` line 23 "Source-fs **may** depend on `kebab-parse-code`" — "may" (의무 아니라 허용) 이므로 본 refactor 가 frozen task spec 의 contract 침범 0. 동일 logic 으로 `tasks/p10/p10-2`, `docs/superpowers/plans/2026-05-15-p10-1a-1`, `docs/superpowers/plans/2026-05-20-p10-2` 모두 frozen 보존. design §8 본체만 same-PR 갱신. + +--- + +## §2 Goals + non-goals + +### Goals (G) + +- **G1**: `kebab-source-fs/Cargo.toml` 에서 `kebab-parse-code` dep 제거. +- **G2**: 4 surface (`code_lang_for_path`, `is_generated_file`, `is_oversized`, `BUILTIN_BLACKLIST`) 의 callsite + 의미 (signature, return type, behavior, error variant) 보존. +- **G3**: 기존 unit test (`kebab-parse-code/tests/lang.rs`, `tests/skip.rs`) 의 cover 가 destination 으로 1:1 이동 + design §5.2 의 frozen contract (6 BUILTIN_BLACKLIST entry) 검증이 외부 시점에서 가능하게 유지. baseline 회귀 0. +- **G4**: design §8 의 allowed-deps graph 갱신 — `kebab-source-fs → kebab-parse-code` edge 제거 + `kebab-source-fs (lang detect + skip policy 내장)` inline note 추가. +- **G5**: `cargo tree -p kebab-source-fs` 결과의 dep tree 에서 `tree-sitter*` 부재 (objective acceptance — §5.3). + +### Non-goals (NG) + +- **NG1**: `kebab-parse-code` 의 9 tree-sitter grammar 자체 정리 / 동적 로딩 / feature gate. +- **NG2**: `kebab-source-fs::media.rs` 의 extension-match logic 재설계. +- **NG3**: `kebab-parse-code/src/lang.rs` 의 sibling `module_path_for_python` / `module_path_for_tsjs` 이동 — caller 는 본 crate 자체 (`python.rs:78`, `typescript.rs:88`, `javascript.rs:95`). concern 분리 (lang **detection** vs module-path **derivation**). §1.5 / §6.5 / §6.6 참조. +- **NG4**: `kebab-parse-code/src/repo.rs` 의 `detect_repo` / `RepoMeta` 이동 — kebab-source-fs 가 호출 안 함. +- **NG5**: workspace `Cargo.toml` 의 `version` bump — internal refactor (wire 변경 0). CLAUDE.md "Release / binary version bump" 3 트리거 (dogfooding 필요, schema/wire breaking, frozen design 변경) 모두 미충족. frontmatter `target_version: 0.18.0` 의 의미 = "본 PR 머지 시 워크스페이스 version 이 0.18.0 그대로 유지된다" (= NG5 와 정합). +- **NG6**: V00X SQLite migration / wire schema major bump. +- **NG7**: `kebab-core::media.rs` 와의 medium-detection 통합 (Lens 3 별도). + +--- + +## §3 Design + +### §3.1 Destination 선택 — Option B (`kebab-source-fs::code_meta`) + +| 후보 | 호환 | 트레이드오프 | 결정 | +|------|------|------------|------| +| **A. `kebab-core::code`** | OK | "kebab-core: domain types only" 룰 약 stretch (`kebab-core::media.rs` 의 precedent 는 enum 정의지만 본 helper 는 IO + match). 미래 2nd consumer 우월. | ✗ | +| **B. `kebab-source-fs::code_meta`** | OK | core 룰 0 stretch. dep graph 단순화 최대. 미래 2nd consumer 등장 시 promote 필요. | **✓ 채택** | +| **C. 신규 crate `kebab-code-meta`** | OK | workspace member 추가 ceremony. 1 consumer 대비 과함. | ✗ | + +**채택 근거** (consumer count = 1, leaf + const 가 pure logic): + +- Investigation step 4 결과로 외부 consumer = `kebab-source-fs` 1 개 확정. +- §8 boundary rule stretch 0 (core 영역 미침범). +- 미래 2nd consumer 발생 시 cost = visibility 확장 (§3.3 의 mixed-visibility 정책 참조). 본 작업 cost 와 비교해 deferred decision cost 낮음. + +### §3.2 Module placement + +신규 module + 기존 source 의 변경: + +```text +crates/kebab-source-fs/src/ +├── lib.rs +├── code_meta.rs ← 신규: 4 surface. lang detect + skip helpers + blacklist. +├── connector.rs ← edit: kebab_parse_code:: prefix 4 곳 → crate::code_meta:: 로 교체. +├── hash.rs +├── media.rs ← edit: kebab_parse_code:: prefix 1 곳 → crate::code_meta:: 로 교체. +└── walker.rs ← edit: kebab_parse_code:: prefix 2 곳 (block import) + comment 3 곳 → crate::code_meta:: 로 교체. +``` + +기존 `lib.rs` 의 module 선언 (branch base `b02ac82` 인용): + +```rust +// 현재 (branch base b02ac82) ────────────────────────────────────── +mod connector; +mod hash; +mod media; +mod walker; + +pub use connector::{FsScanSkips, FsSourceConnector}; +``` + +본 refactor 후: + +```rust +// 본 spec 적용 후 ───────────────────────────────────────────────── +mod connector; +mod hash; +mod media; +mod walker; +mod code_meta; // 신규 — visibility 정책은 §3.3 참조. + +pub use connector::{FsScanSkips, FsSourceConnector}; +pub use code_meta::BUILTIN_BLACKLIST; // §3.3 frozen contract — integration test (§5.1) 의 접근 surface. +``` + +→ `BUILTIN_BLACKLIST` 의 `pub use` 한 줄이 신규 추가되는 **유일한 외부 surface 증가** (현재의 `kebab_parse_code::skip::BUILTIN_BLACKLIST` 외부 ref 가 `kebab_source_fs::BUILTIN_BLACKLIST` 로 대칭 이동, 사실상 net surface 변화 0). 3 helper fn 은 `pub(crate)` 라서 `pub use` 미발생. §7 의 "wire/surface 변경 0" claim 과 정합 — 본 한 줄은 **internal Rust crate-API surface** (wire/CLI/TUI/MCP 의 user-facing surface 아님) 의 minimal 이동. + +### §3.3 Visibility 정책 — mixed `pub` / `pub(crate)` (MAJOR #1 반영) + +회차 1 critic MAJOR #1 의 트레이드오프: **모두 `pub(crate)` 로 좁히면 design §5.2 frozen contract (6 BUILTIN_BLACKLIST entry) 의 검증이 같은 module 안으로 한정 → silent breakage 가능**. 해결책 = **per-surface 차등 visibility**: + +| Surface | Visibility | 근거 | +|---------|-----------|------| +| `BUILTIN_BLACKLIST` | **`pub`** | design §5.2 의 frozen contract (6 entry, 정확 list). 외부 integration test (§5.1) 가 검증 surface 로 사용. | +| `code_lang_for_path` | `pub(crate)` | source-fs 내부 호출만 (media.rs). 미래 2nd consumer 시 promote. | +| `is_generated_file` | `pub(crate)` | source-fs 내부 호출만 (connector.rs). | +| `is_oversized` | `pub(crate)` | source-fs 내부 호출만 (connector.rs). | + +`code_meta.rs` 의 module-level doc 첫 줄에 본 visibility 정책을 cross-link: + +```rust +//! Pre-ingest classification + skip helpers for the local-filesystem +//! SourceConnector. Moved from `kebab-parse-code` (refactor 2026-05-26) +//! to drop the 9 tree-sitter grammar drag from this crate's dep tree. +//! +//! `BUILTIN_BLACKLIST` is `pub` because it implements the **frozen contract +//! in design §5.2** (the 6-pattern safety-net list). External integration +//! tests (`tests/code_meta.rs`) verify the contract from outside the module +//! to prevent silent breakage. The 3 helper fns are `pub(crate)` — no +//! external consumer today. +``` + +### §3.4 Function + const signatures (보존 — 1:1) + +```rust +// crates/kebab-source-fs/src/code_meta.rs (신규) + +use std::fs::File; +use std::io::{BufRead, BufReader, Read}; +use std::path::Path; + +use anyhow::Result; + +/// 6 built-in gitignore-style patterns. Applied in addition to `.gitignore` +/// + `.kebabignore`. User can override via `.kebabignore` negation (`!pattern`). +/// +/// Source of truth: design §5.2 (frozen). +pub const BUILTIN_BLACKLIST: &[&str] = &[ + "**/node_modules/**", + "**/target/**", + "**/__pycache__/**", + "**/.venv/**", + "**/venv/**", + "**/env/**", +]; + +/// Returns the canonical language identifier for a given file path. +/// 본문은 [kebab-parse-code/src/lang.rs:17] 와 byte-identical 보존. +pub(crate) fn code_lang_for_path(path: &Path) -> Option<&'static str> { /* ... */ } + +/// Read first 512 bytes; check 7 case-insensitive generated-file markers. +/// 본문은 [kebab-parse-code/src/skip.rs:28] 와 byte-identical 보존. +pub(crate) fn is_generated_file(path: &Path) -> Result { /* ... */ } + +/// Check if `path` exceeds `max_bytes` or `max_lines` (byte cap then line cap). +/// 본문은 [kebab-parse-code/src/skip.rs:50] 와 byte-identical 보존. +pub(crate) fn is_oversized(path: &Path, max_bytes: u64, max_lines: u32) -> Result { /* ... */ } +``` + +→ 시그니처 / 본문 / error type / return type 변경 0. visibility 만 §3.3 의 정책 적용. + +### §3.5 Callsite migration + +| File | Line | Before | After | +|------|------|--------|-------| +| `kebab-source-fs/src/media.rs` | 17 | `if let Some(lang) = kebab_parse_code::code_lang_for_path(path) {` | `if let Some(lang) = crate::code_meta::code_lang_for_path(path) {` | +| `kebab-source-fs/src/walker.rs` | 131 | `for pat in kebab_parse_code::BUILTIN_BLACKLIST {` | `for pat in crate::code_meta::BUILTIN_BLACKLIST {` | +| `kebab-source-fs/src/walker.rs` | 211 | `for pat in kebab_parse_code::BUILTIN_BLACKLIST {` | `for pat in crate::code_meta::BUILTIN_BLACKLIST {` | +| `kebab-source-fs/src/connector.rs` | 152 | `&& kebab_parse_code::is_generated_file(&abs_path).unwrap_or(false)` | `&& crate::code_meta::is_generated_file(&abs_path).unwrap_or(false)` | +| `kebab-source-fs/src/connector.rs` | 169 | `if kebab_parse_code::is_oversized(` | `if crate::code_meta::is_oversized(` | + +Comment-only update: + +| File | Line | Action | +|------|------|--------| +| `kebab-source-fs/src/walker.rs` | 9, 85, 161 | `kebab_parse_code::BUILTIN_BLACKLIST` → `crate::code_meta::BUILTIN_BLACKLIST` | +| `kebab-core/src/metadata.rs` | 36 | doc 주석 — `pub(crate)` 함수에 대한 rustdoc broken link 회피 위해 abstract wording 으로 정리 (MINOR #5): `Set by kebab_parse_code::lang::code_lang_for_path.` → `Set by the local-filesystem source connector during ingest.` | + +### §3.6 kebab-parse-code 측 cleanup + +| Path | Action | 비고 | +|------|--------|------| +| `crates/kebab-parse-code/src/skip.rs` | **삭제** | 본 file 의 모든 surface (3개) 가 source-fs 로 이동, 자체 사용처 0. | +| `crates/kebab-parse-code/src/lang.rs` | edit (narrow — `code_lang_for_path` + 관련 unit test 만 제거) | `code_lang_for_path` 함수 + `#[cfg(test)] mod tests::tier2_basename_takes_precedence_over_extension` + `#[cfg(test)] mod tests::tier2_extension_fallback` 만 제거. **`module_path_for_python`, `module_path_for_tsjs` + 그 두 unit test (`module_path_for_python_strips_src_roots_and_extensions`, `module_path_for_tsjs_keeps_slashes_and_strips_ext`) 보존** — caller 는 본 crate 자체 (`src/{python,typescript,javascript}.rs`). 헤더 doc 한 단락 rewrite (MINOR #2): "Lowercase canonical identifiers, matching tree-sitter parser conventions:" → "Workspace-relative path → module-path conversion for P10-1B AST extractors (Python dotted form / TS+JS slash form)." | +| `crates/kebab-parse-code/src/lib.rs` | edit | (a) `pub mod skip;` line 삭제. (b) `pub use lang::{code_lang_for_path, module_path_for_python, module_path_for_tsjs};` 의 `code_lang_for_path` 만 제거 (sibling 2 개 보존). (c) `pub use skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized};` line 전체 삭제. (d) `//!` 헤더 doc "Phase 1A-1 ships infrastructure only" 단락 rewrite (MINOR #3): infrastructure-only wording → "Repo metadata (`detect_repo`) + per-language AST extractors (Rust = P10-1A-2, Python/TS/JS = P10-1B, Go = P10-1C-Go, Java+Kotlin = P10-1C-JK, C+C++ = P10-1D)." | +| `crates/kebab-parse-code/tests/lang.rs` | **이동** | 본 test 가 `code_lang_for_path` 만 검증. 본문 4 case (`known_extensions_map_to_canonical_identifiers`, `special_filenames_map_to_identifiers`, `unknown_extension_returns_none`, `case_insensitive`) → `kebab-source-fs/tests/code_meta.rs` 의 integration test 로 이전 (§3.7 참조). (NIT #2 — "삭제 또는 이동" OR 단일화) | +| `crates/kebab-parse-code/tests/skip.rs` | **이동** | 7 test case 모두 → `kebab-source-fs/tests/code_meta.rs` 로 이전 (§3.7 참조). | +| `crates/kebab-parse-code/Cargo.toml` | **변경 0** | `[dev-dependencies] tempfile` 는 `tests/repo.rs:4 use tempfile::TempDir;` 가 계속 소비하므로 **유지** (CRITICAL #1). | +| `crates/kebab-parse-code/src/c.rs ~ rust.rs` (9 grammar AST extractor file) | **변경 0** | tree-sitter 본진. surface 보존. | + +### §3.7 Test placement — integration over unit (MAJOR #5 반영) + +신규 `crates/kebab-source-fs/tests/code_meta.rs` integration test 로 cover (unit 이 아님). + +**근거 (round 1 MAJOR #5 의 약한 unit-test 정당화 보강)**: + +- (a) `BUILTIN_BLACKLIST` 가 `pub` (§3.3) — design §5.2 frozen contract 의 외부 검증 surface 로서 integration test 가 자연. +- (b) source-fs 가 이미 integration test 3 개 (`include_allowlist.rs`, `snapshot_tree1.rs`, `symlink_cycle.rs`) 보유 — 단일 패턴 일관. +- (c) `code_lang_for_path` / `is_generated_file` / `is_oversized` 는 `pub(crate)` 라서 integration test 가 직접 호출 불가. 따라서 **mixed placement**: + - `BUILTIN_BLACKLIST` 6-entry contract → `tests/code_meta.rs` (integration). + - 3 helper fn 의 detail behavior (lang detection / generated marker / size cap) → `src/code_meta.rs` 의 `#[cfg(test)] mod tests` (unit, `pub(crate)` 접근 가능). +- (d) link 단계 추가 binary 1 개 (`tests/code_meta.rs`) — kebab-source-fs 가 lance / datafusion 미링크. CLAUDE.md `-j 1` 강제 트리거 (= lance/datafusion 합산 link 폭주) 에 영향 0 — source-fs 의 build cost 증분은 단발적이며 `-j 1` 강제와 무관. + +### §3.8 Cargo.toml diff + +`crates/kebab-source-fs/Cargo.toml` — 13번 줄 한 줄 삭제: + +```diff + [dependencies] + kebab-core = { path = "../kebab-core" } + kebab-config = { path = "../kebab-config" } +-kebab-parse-code = { path = "../kebab-parse-code" } + anyhow = { workspace = true } +``` + +`crates/kebab-parse-code/Cargo.toml` — **변경 0** (CRITICAL #1: `tempfile` 은 `tests/repo.rs` 가 계속 소비). + +Workspace `Cargo.toml` — 변경 0. + +### §3.9 Test 이동 path + +`kebab-source-fs/tests/code_meta.rs` (integration — `BUILTIN_BLACKLIST` 검증) + `kebab-source-fs/src/code_meta.rs` 의 `#[cfg(test)] mod tests` (unit — 3 helper fn 검증) 로 split: + +| 원본 (kebab-parse-code) | 목적지 | 비고 | +|----------------------|---------|------| +| `tests/lang.rs::known_extensions_map_to_canonical_identifiers` | source-fs unit (`pub(crate)` → 내부 호출) | 32 case | +| `tests/lang.rs::special_filenames_map_to_identifiers` | 동일 | Dockerfile / Makefile / GNUmakefile | +| `tests/lang.rs::unknown_extension_returns_none` | 동일 | 3 case | +| `tests/lang.rs::case_insensitive` | 동일 | `Foo.RS`, `FOO.YAML` | +| `src/lang.rs::tests::tier2_basename_takes_precedence_over_extension` | 동일 (unit) | tier2 basename | +| `src/lang.rs::tests::tier2_extension_fallback` | 동일 (unit) | tier2 ext | +| `tests/skip.rs::generated_header_markers_trigger_skip` | source-fs unit | 7 marker | +| `tests/skip.rs::normal_code_is_not_flagged_generated` | 동일 | | +| `tests/skip.rs::is_generated_returns_false_for_empty_file` | 동일 | | +| `tests/skip.rs::oversized_by_bytes_returns_true` | 동일 | | +| `tests/skip.rs::oversized_by_lines_returns_true` | 동일 | | +| `tests/skip.rs::small_file_returns_false_for_oversize` | 동일 | | +| `tests/skip.rs::builtin_blacklist_has_exactly_six_entries` | **integration** (`tests/code_meta.rs`) | `BUILTIN_BLACKLIST` 가 `pub` → 외부 검증 | + +Tempfile 의존: source-fs 의 `[dev-dependencies]` 에 이미 존재 (line 25: `tempfile = "3"`). + +--- + +## §4 Open questions + +### §4.1 `code_lang_for_path` 의 미래 second consumer + +- 현재: `source-fs::media.rs` 만 호출. `MediaType::Code(lang)` 가 downstream chunker (kebab-chunk) 의 dispatch key 가 되어 chunker 가 lang 을 직접 query 할 필요 없음. +- 미래 risk: `kebab-chunk` Tier 1 dispatch 가 path → lang 재 derivate 필요해질 경우 → `pub(crate)` → `pub` promote 필요. cost = visibility 한 줄 변경 + chunk crate 가 source-fs 의존 추가. deferred. + +### §4.2 `is_generated_file` 의 7-marker sniff logic 갱신 시 ownership + +- 본 spec 머지 후 ownership = source-fs maintainer (의도). 명문화 = `code_meta.rs` 의 module-level doc (§3.3) 의 "Moved from `kebab-parse-code` (refactor 2026-05-26)" 한 줄. + +### §4.3 `BUILTIN_BLACKLIST` 6 entry = design §5.2 frozen contract + +- ownership 이전 (parse-code → source-fs) 가 §5.2 의 "frozen" 의미와 충돌 0 — frozen 은 6 entry 내용 자체. owner crate 위치 변경은 frozen 대상 아님. +- 외부 검증: integration test (`tests/code_meta.rs`) 가 6 entry 의 byte-identical 보존 검증 (`assert_eq!(BUILTIN_BLACKLIST.len(), 6)` + 6 string 의 `contains` 검증). + +### §4.4 `cargo tree -p kebab-source-fs | grep tree-sitter` = 0 의 transitive scope + +- 본 acceptance 는 **검증 시점 snapshot**. 미래에 `kebab-config` / `kebab-core` 가 tree-sitter 끌어오면 본 acceptance 가 자동 fail — 단 그 경우 별도 spec 가 책임. + +### §4.5 build-time benchmark = optional informational only (§5.4) + +- 정량 측정은 acceptance 에서 분리. PR description 에 부기 권장이나 강제 아님. + +### §4.6 cargo-deny / workspace `deny.toml` (What's Missing #1) + +- 현 시점 repo 에 `deny.toml` 부재 (`ls /home/altair823/kebab/deny.toml` 결과: No such file). design §8 의 "cargo deny + workspace deny.toml + CI 체크로 강제" 는 frozen 의 미래 상태, 본 spec 머지 시점 미적용. 본 spec 가 deny.toml 신설 / 갱신 강제 안 함. 미래 cargo-deny 도입 시 본 refactor 의 edge 제거가 enforcement 와 정합 (= source-fs 에서 parse-code dep ban rule 가능). + +### §4.7 Future risk: parse-code 가 source-fs 를 reverse-import 욕구 추가 (What's Missing #4) + +- 가설: parse-code 의 AST extractor 가 어떤 helper 를 위해 source-fs 의 `code_meta` 를 호출하고 싶어질 경우 → 의존 cycle (source-fs → parse-code 가 끊겼는데, parse-code → source-fs 가 생기면 본 refactor 의 의도 무효화) 또는 design §8 forbidden edge. +- mitigation: 본 spec 의 destination 결정 (Option B) 가 future-coupling risk 를 키움. 만약 parse-code 가 lang detect 가 필요해지면 Option A (kebab-core::code) 로 promote 가 올바른 방향 — 즉 본 spec 의 `pub(crate)` choice (§3.3) 가 사실상 reverse-import risk 의 신호기 역할 (외부 호출 0 보장). + +--- + +## §5 Verification plan + +§5.4 는 **informational only**, acceptance 에서 분리 (MINOR #4). + +### §5.1 Unit + integration tests (source-fs) + +신규 `crates/kebab-source-fs/src/code_meta.rs::tests` (unit) + `crates/kebab-source-fs/tests/code_meta.rs` (integration) 에 다음 test name 이 모두 존재: + +**Unit (`src/code_meta.rs`)** — `pub(crate)` helper 검증: +``` +known_extensions_map_to_canonical_identifiers +special_filenames_map_to_identifiers +unknown_extension_returns_none +case_insensitive +tier2_basename_takes_precedence_over_extension +tier2_extension_fallback +generated_header_markers_trigger_skip +normal_code_is_not_flagged_generated +is_generated_returns_false_for_empty_file +oversized_by_bytes_returns_true +oversized_by_lines_returns_true +small_file_returns_false_for_oversize +``` + +**Integration (`tests/code_meta.rs`)** — `pub const` 검증: +``` +builtin_blacklist_has_exactly_six_entries +``` + +기대: `cargo test -p kebab-source-fs code_meta` → 13 passing. + +기존 `kebab-source-fs/src/{connector,media,walker}.rs::tests` + `kebab-source-fs/tests/{include_allowlist,snapshot_tree1,symlink_cycle}.rs` **변경 0** — callsite prefix 만 갱신. + +### §5.2 Workspace 회귀 + +```sh +cargo test --workspace --no-fail-fast -j 1 +``` + +기대: branch base `b02ac82` 에서 실측한 N passing 을 유지. **N = implementation phase 의 PR description 에 baseline 측정 결과로 명시** (MAJOR #2 — round 1 의 "1313" 은 v0.18.0 cut 시점 추정, b02ac82 = HOTFIX #15 + S3 NLI 머지 후 시점). 측정 방법: + +```sh +cargo test --workspace --no-fail-fast -j 1 2>&1 | tail -50 # baseline N 추출 +``` + +`-j 1` 필수 (CLAUDE.md "Build / test / lint" — 18 integration-test binary 동시 link 시 OOM). + +추가로 **가장 강한 안전망 (What's Missing #3 강조)**: + +```sh +cargo test -p kebab-app --test code_ingest_smoke -j 1 +``` + +`--test code_ingest_smoke` 는 integration test **binary** (file 이름 = binary 이름) 를 선택 — bare `cargo test -p kebab-app code_ingest_smoke` 는 substring test-name filter 로 해석돼 16+ fn 중 매치 0건 → "0 tests run" + exit 0 의 false-positive PASS 가 나므로 사용 금지 (verifier-plan round 1 Gap #1). 이 e2e fixture 가 `module_path_for_python` / `module_path_for_tsjs` 사용을 dogfooding KB 흐름으로 검증 (`code_ingest_smoke.rs:165, 242, 319` 의 doc-comment + fixture). 회귀 시 본 명령이 fail. + +### §5.3 Clippy + build + dep tree + +```sh +cargo clippy --workspace --all-targets -j 1 -- -D warnings +cargo build --release -j 1 +cargo tree -p kebab-source-fs | grep tree-sitter +``` + +기대: +- clippy: clean (workspace pedantic + inline 30+ allow 그대로). +- build: clean release binary. +- `cargo tree` grep: **0 줄 출력**. + +### §5.4 Optional: build time benchmark (informational only — NOT acceptance) + +```sh +cargo clean +time cargo build -p kebab-source-fs --release -j 1 # baseline +# checkout refactor branch +cargo clean +time cargo build -p kebab-source-fs --release -j 1 # after refactor +``` + +PR description 에 부기. + +--- + +## §6 Risks + +### §6.1 Destination 의 §8 stretch (Option B 채택 시 0) + +§3.1 의 채택 근거로 해소. + +### §6.2 4 surface 외 hidden callsite + +- Investigation step 2 + NIT #1 의 보조 grep 으로 검증 완료. alias / re-export 0. +- 추가 가드: refactor 머지 직전 `grep -rn "parse_code\|parse-code" crates/kebab-source-fs/` 재확인 (implementation phase 의 plan checklist). + +### §6.3 `BUILTIN_BLACKLIST` 의 link-time / .rodata 영향 + +- const 가 source-fs 로 이동 시 binary 의 `.rodata` 위치 변경 only. 의미 변경 0. + +### §6.4 `kebab_parse_code::skip` 의 외부 ref + +- NIT #1 grep 결과 매치 2 곳 모두 parse-code 자체 test → §3.6 의 test 이동과 함께 해소. 안전. + +### §6.5 `lang.rs` narrow edit 시 sibling `module_path_for_*` accidental drop (CRITICAL #2 반영) + +- Round 1 의 잘못된 회귀 catch crate: `kebab-chunk` 가 `module_path_for_*` 호출 0. 정정된 caller: + - `kebab-parse-code/src/python.rs:78` + - `kebab-parse-code/src/typescript.rs:88` + - `kebab-parse-code/src/javascript.rs:95` + - `kebab-app/tests/code_ingest_smoke.rs:165, 242, 319` (e2e fixture, doc-comment + 동작 검증) +- 정정된 안전망: + - `cargo test -p kebab-parse-code --no-fail-fast -j 1 module_path_for_` — sibling unit test (`module_path_for_python_strips_src_roots_and_extensions` + `module_path_for_tsjs_keeps_slashes_and_strips_ext`) 가 fail 하면 catch. + - `cargo test -p kebab-app --test code_ingest_smoke -j 1` — P10-1B e2e fixture 가 fail 하면 catch (`--test` flag 강제 — verifier-plan round 1 Gap #1). +- 둘 다 §5.2 의 workspace 회귀 + 본 §6.5 의 명시 cli 양쪽으로 cover. + +### §6.6 ARCHITECTURE.md / design §8 drift + +- §7 의 ARCHITECTURE.md + design §8 갱신을 same-PR 로 진행. CLAUDE.md "Changing the design doc requires updating every referencing task spec in the same PR" 룰 — frozen task spec (`tasks/p10/p10-1a-1`, `tasks/p10/p10-2`, `plans/2026-05-15-p10-1a-1`, `plans/2026-05-20-p10-2`) 는 §1.6 의 분석 (모두 "may" 수준의 reference, contract violation 0) 으로 frozen 보존. design §8 의 graph block 만 갱신. + +### §6.7 cargo-deny enforcement 의 의도-vs-현실 gap (§4.6 cross-link) + +- design §8 의 "cargo deny + deny.toml + CI 체크" frozen wording 이 현 시점 미적용. 본 spec 머지가 enforcement gap 신설 아님 — 이미 존재하는 gap 의 영향 받지 않음. + +--- + +## §7 Wire / surface impact + +| Surface | 변경 | 비고 | +|---------|------|------| +| wire schema (`*.v1`) | 0 | 본 4 surface 는 wire 출력에 미surface. | +| CLI subcommand / flag / `--json` field / exit code | 0 | | +| TUI / desktop / MCP | 0 | | +| **Cargo workspace.version** | **0 bump** | CLAUDE.md "Release / binary version bump" 3 트리거 미충족. frontmatter `target_version: 0.18.0` = "본 PR 머지 시 0.18.0 그대로". | +| **Cargo features** | **0** (MINOR #6) | `kebab-source-fs` / `kebab-parse-code` 양 crate 의 `[features]` 변경 0. | +| **parser_version cascade** | **0** (MINOR #6) | design §9 의 cascade identifier (`parser_version`, `chunker_version`, `embedding_version`, `prompt_template_version`, `index_version`) 변경 0. 회귀 시 cascade 영향 0. | +| `Config` / `KEBAB_*` env | 0 | `ingest.code.skip_generated_header` / `max_file_bytes` / `max_file_lines` 는 의미 + 위치 그대로 (kebab-config 의 `IngestCodeCfg`). callsite 만 source-fs internal 로 정리. | +| SQLite migration (V00X) | 0 | DDL 미접촉. | +| README | 변경 0 | | +| HANDOFF.md | 변경 0 | phase 단위 변화 0 (single-crate internal refactor). | +| **docs/ARCHITECTURE.md** | **갱신 (same-PR)** | (a) "kebab-parse-code 의 외부 tree-sitter grammar crate 의존" 산문 끝에 한 줄 추가 — "v0.18.0+ 부터 `kebab-source-fs` 는 자체 `code_meta` 모듈 (lang detect + skip helpers + BUILTIN_BLACKLIST) 을 보유, `kebab-parse-code` 와 분리." (b) Mermaid 변경 0 (`srcfs → pcode` arrow 미포함). | +| **design §8 graph** | **갱신 (same-PR — MAJOR #4 반영, 두 줄)** | (a) **edge 제거**: 기존 `kebab-source-fs └─> kebab-parse-code (p10-1A-1: lang detect / repo detect / skip policy)` 라인 삭제. (b) **inline note 추가**: `kebab-source-fs` row 아래 `(p10-2 이후: lang detect + skip policy 내장; kebab-parse-code 와 분리)` 한 줄 보강. | +| tasks/HOTFIXES.md | **추가 불필요** | design §8 자체를 갱신하므로 frozen vs ship deviation 0. CLAUDE.md HOTFIXES rule 미트리거. | +| referencing task spec | **frozen 보존** | §1.6 분석: `tasks/p10/p10-1a-1` line 23 의 "may" reference 는 contract violation 0 → frozen. `tasks/p10/p10-2`, `plans/2026-05-15-p10-1a-1`, `plans/2026-05-20-p10-2` 동일. | +| tasks/INDEX.md | 변경 0 | phase 단위 신규 task 아님. | + +--- + +## §8 Out of scope + +- Lens 1 다른 묶음 (`kebab-normalize` 흡수, `kebab-parse-types` 추가 정리) — 별도 spec. +- Lens 2 (`kebab-chunk` Tier 2 helper 정리) — 별도. +- Lens 3 (Extractor dispatch unification) — system-architect post-refactor report 의 차기 candidate, 별도. +- `kebab-parse-code` 의 9 tree-sitter grammar feature gating / dynamic loading — v0.19+ candidate, 별도. +- `kebab-parse-code/src/repo.rs` ownership 검토 — 본 spec 범위 밖. +- `kebab-core::media.rs` 와 `kebab-source-fs::code_meta` 의 medium-vs-lang detection 통합. +- `deny.toml` 신설 / cargo-deny CI 도입 — frozen design §8 의 미래 state, 별도. + +--- + +## §9 References + +- `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §3.5, §3.7b, §5.2, §8. +- `tasks/p10/p10-1a-1-code-ingest-framework.md` (frozen — line 23 "may" reference 보존). +- `tasks/INDEX.md` — P10 phase status (모두 ✅ 머지, v0.18.0 cut 완료 2026-05-26). +- `tasks/HOTFIXES.md` — 본 spec 머지 시 항목 추가 불필요. +- Investigation grep evidence — §1.3 + §1.5. +- Workspace `Cargo.toml` workspace.version = `0.18.0` (target frontmatter 동일, bump 미실시). +- `kebab-parse-code/tests/repo.rs:4` — `tempfile::TempDir` 사용 (CRITICAL #1 의 baseline evidence). +- `kebab-parse-code/src/python.rs:78`, `typescript.rs:88`, `javascript.rs:95` — `module_path_for_*` 실 caller (CRITICAL #2 의 baseline evidence). +- `kebab-app/tests/code_ingest_smoke.rs` — P10-1B e2e fixture (§5.2 의 가장 강한 안전망). + +--- + +## §10 Round 1 critic closure status + +| Finding | Severity | 반영 | 위치 | +|---------|----------|------|------| +| **CRITICAL #1** tempfile dev-dep 삭제 claim 거짓 | CRITICAL | **reflected** | §3.6 (Cargo.toml row 변경 0), §3.8 (`tempfile` 줄 삭제 제거), §9 (`tests/repo.rs:4` evidence). | +| **CRITICAL #2** module_path_for_* 회귀 catch crate 오인 | CRITICAL | **reflected** | §6.5 (verify 명령 교체 — `kebab-chunk` → `kebab-parse-code` + `kebab-app code_ingest_smoke`), §5.2 (e2e fixture cli 명시), §9 (caller evidence 3 file). | +| **MAJOR #1** BUILTIN_BLACKLIST `pub(crate)` 가 frozen verifiability erode | MAJOR | **reflected** (Option A 채택) | §3.3 (mixed visibility 정책 — `BUILTIN_BLACKLIST` `pub`, 3 fn `pub(crate)`), §3.7 (integration test 로 6-entry contract 보존), §4.3, §5.1. | +| **MAJOR #2** 1313 baseline 시점 부정확 | MAJOR | **reflected** | §5.2 (wording → "branch base b02ac82 에서 실측 N passing", 측정 방법 cli 명시). | +| **MAJOR #3** frontmatter target_version vs NG5 충돌 | MAJOR | **reflected** | frontmatter (target_version: 0.18.0 + 주석으로 의미 명시), §2 NG5 (frontmatter cross-link), §7 Cargo workspace.version row. | +| **MAJOR #4** design §8 graph 갱신 scope 부족 | MAJOR | **reflected** | §7 design §8 row (edge 제거 (a) + inline note 추가 (b) 두 줄). | +| **MAJOR #5** §3.7 unit test 정당화 약함 | MAJOR | **reflected** | §3.7 (4 가지 근거 재작성 — (a) frozen contract integration surface, (b) source-fs 의 integration test 패턴 일관, (c) `pub(crate)` 접근 위한 unit 필수성, (d) link cost 분석). | +| **MINOR #1** "edit (split)" wording | MINOR | **reflected** | §3.6 ("edit (narrow — code_lang_for_path + 관련 unit test 만 제거)"). | +| **MINOR #2** lang.rs 헤더 doc 갱신 명시 | MINOR | **reflected** | §3.6 lang.rs 행 ("Workspace-relative path → module-path conversion for P10-1B AST extractors…"). | +| **MINOR #3** lib.rs 헤더 doc 단락 rewrite | MINOR | **reflected** | §3.6 lib.rs 행 ("Repo metadata + per-language AST extractors…"). | +| **MINOR #4** §5.4 informational only 명시 | MINOR | **reflected** | §5 시작부 한 줄 + §5.4 제목 "informational only — NOT acceptance". | +| **MINOR #5** metadata.rs:36 abstract wording | MINOR | **reflected** | §3.5 ("Set by the local-filesystem source connector during ingest"). | +| **MINOR #6** wire/surface table 에 features + cascade row | MINOR | **reflected** | §7 (2 row 추가 — Cargo features = 0, parser_version cascade = 0). | +| **NIT #1** §1.5 또는 §6.2 끝에 추가 grep | NIT | **reflected** | §1.5 끝부 ("부가 verification" 블록 — `kebab_parse_code::skip\|kebab_parse_code::lang::code_lang` grep 결과). | +| **NIT #2** §3.5 "삭제 또는 이동" OR 단일화 | NIT | **reflected** | §3.6 ("이동 — 본문은 §3.7 참조"). | +| **NIT #3** ASCII before/after dep graph | NIT | **reflected** | §1.1 끝부 ASCII block. | +| **What's Missing #1** cargo deny / deny.toml | — | **reflected** | §4.6 + §6.7 + §8 (현 미적용, 본 spec 가 신설 강제 아님). | +| **What's Missing #2** task spec frozen contract rule | — | **reflected** | §1.6 (4 referencing task/plan 의 "may" reference 분석 — frozen 보존), §6.6, §7 (referencing task spec row), frontmatter `related_specs` cross-link. | +| **What's Missing #3** kebab-app code_ingest_smoke 가 가장 강한 안전망 | — | **reflected** | §5.2 ("가장 강한 안전망" 블록 + e2e fixture 의 module_path_for_* 검증 인용), §6.5 verify 명령. | +| **What's Missing #4** future risk: parse-code reverse-import | — | **reflected** | §4.7 (가설 + mitigation — `pub(crate)` 가 reverse-import risk 신호기). | + +**Round 1 closure summary**: 2 CRITICAL + 5 MAJOR + 6 MINOR + 3 NIT + 4 What's Missing = **20 finding 모두 reflected**, rejection 0. + +### §10.1 Round 2 critic 후속 closure (v2 → v3) + +| Finding | Severity | 반영 | 위치 | +|---------|----------|------|------| +| **NEW MAJOR #1** §1.6 P10 phase status 사실 오류 (INDEX.md stale 미언급) | MAJOR | **reflected** (option a wording — honest INDEX.md stale 알림 + conflict 0 진술) | §1.6 P10 status 단락 재작성. | +| **NEW MAJOR #2** §3.2 lib.rs 예시 의 surface 무근거 확장 (`pub mod connector` / `pub mod media`) | MAJOR | **reflected** (Option A 채택 — `mod` 보존 + `pub use code_meta::BUILTIN_BLACKLIST` 한 줄 신규) | §3.2 (before/after lib.rs 두 블록 + net surface 변화 0 분석 한 단락), §7 의 wire/surface 변경 0 claim 과 정합 확인 inline 명시. | +| **NEW MINOR #1** §3.7 (d) link cost 부정확 wording (18 → 19 비교) | MINOR | **reflected** | §3.7 (d) wording 정정 — "lance/datafusion 합산 link 폭주에 영향 0 — single binary 단발적 증분, `-j 1` 강제와 무관". | + +**Round 2 closure summary**: 0 CRITICAL + 2 NEW MAJOR + 1 NEW MINOR = **3 finding 모두 reflected**, rejection 0. Round 3 critic 의 verify review 준비 완료.