From 682f7dd3a25159a7a6c9bf040ef20dc3596554da Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Fri, 15 May 2026 16:53:21 +0900 Subject: [PATCH] feat(p10-1a-1): add [ingest.code] config section Add IngestCfg + IngestCodeCfg structs with serde defaults and embed ingest: IngestCfg into the top-level Config. Existing configs without an [ingest] section continue to load unchanged. Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-config/src/lib.rs | 103 +++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/crates/kebab-config/src/lib.rs b/crates/kebab-config/src/lib.rs index 02ab5ca..b9d1c4d 100644 --- a/crates/kebab-config/src/lib.rs +++ b/crates/kebab-config/src/lib.rs @@ -45,6 +45,11 @@ pub struct Config { /// `dark`). #[serde(default = "UiCfg::defaults")] pub ui: UiCfg, + /// p10-1A-1: code ingest settings. `#[serde(default)]` so existing + /// config files without an `[ingest]` / `[ingest.code]` section + /// load cleanly with built-in defaults. + #[serde(default)] + pub ingest: IngestCfg, /// p9-fb-05: directory of the on-disk config file this `Config` /// was loaded from, if any. Populated by `Config::from_file` / /// `Config::load` — never serialized (`#[serde(skip)]`). Used by @@ -265,6 +270,60 @@ impl UiCfg { } } +/// p10-1A-1: top-level ingest configuration wrapper. Contains per-media-type +/// sub-sections; currently only `code` is defined. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct IngestCfg { + pub code: IngestCodeCfg, +} + +impl Default for IngestCfg { + fn default() -> Self { + Self { + code: IngestCodeCfg::default(), + } + } +} + +/// p10-1A-1: settings for the code ingest pipeline. All fields have +/// reasonable defaults so the user need not set anything in `config.toml` +/// to get working code ingest. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(default)] +pub struct IngestCodeCfg { + /// Generated header sniff. Reads first ~512 bytes, checks 7 markers. + pub skip_generated_header: bool, + /// Max byte size per file. Bigger files skipped. + pub max_file_bytes: u64, + /// Max line count per file. Bigger files skipped (byte cap checked first). + pub max_file_lines: u32, + /// User extra skip globs (gitignore syntax). Applied on top of built-in + /// + `.gitignore` + `.kebabignore`. + pub extra_skip_globs: Vec, + /// AST chunk size cap. Functions/classes longer than this fall back to + /// paragraph-based split (1A-2 and later). + pub ast_chunk_max_lines: u32, + /// Tier 3 fallback chunker: lines per chunk. + pub fallback_lines_per_chunk: u32, + /// Tier 3 fallback chunker: line overlap between adjacent chunks. + pub fallback_lines_overlap: u32, +} + +impl Default for IngestCodeCfg { + fn default() -> Self { + Self { + skip_generated_header: true, + max_file_bytes: 262_144, + max_file_lines: 5_000, + extra_skip_globs: vec![], + ast_chunk_max_lines: 200, + fallback_lines_per_chunk: 80, + fallback_lines_overlap: 20, + } + } +} + impl Config { /// Defaults per design §6.4. pub fn defaults() -> Self { @@ -336,6 +395,7 @@ impl Config { }, image: ImageCfg::defaults(), ui: UiCfg::defaults(), + ingest: IngestCfg::default(), // p9-fb-05: defaults are not loaded from disk, so no // source_dir. Relative `workspace.root` (rare with // defaults) falls back to caller `cwd` via the @@ -1060,6 +1120,49 @@ max_context_tokens = 8000 } } } + + #[test] + fn ingest_code_cfg_defaults() { + let cfg: IngestCodeCfg = toml::from_str("").unwrap(); + assert_eq!(cfg.max_file_bytes, 262_144); + assert_eq!(cfg.max_file_lines, 5_000); + assert!(cfg.skip_generated_header); + assert!(cfg.extra_skip_globs.is_empty()); + assert_eq!(cfg.ast_chunk_max_lines, 200); + assert_eq!(cfg.fallback_lines_per_chunk, 80); + assert_eq!(cfg.fallback_lines_overlap, 20); + } + + #[test] + fn ingest_code_cfg_user_override() { + let toml = r#" + max_file_bytes = 1048576 + max_file_lines = 20000 + skip_generated_header = false + extra_skip_globs = ["**/fixtures/**", "**/snapshots/**"] + "#; + let cfg: IngestCodeCfg = toml::from_str(toml).unwrap(); + assert_eq!(cfg.max_file_bytes, 1_048_576); + assert_eq!(cfg.max_file_lines, 20_000); + assert!(!cfg.skip_generated_header); + assert_eq!(cfg.extra_skip_globs.len(), 2); + } + + #[test] + fn config_with_ingest_code_section() { + // Build a full valid Config serialization and patch only the + // [ingest.code] field we care about — avoids having to enumerate + // every required Config field in the test fixture. + let base = Config::defaults(); + let mut toml_text = toml::to_string(&base).unwrap(); + // Inject max_file_bytes override into the [ingest.code] table. + toml_text = toml_text.replace( + "max_file_bytes = 262144", + "max_file_bytes = 524288", + ); + let cfg: Config = toml::from_str(&toml_text).unwrap(); + assert_eq!(cfg.ingest.code.max_file_bytes, 524_288); + } } #[cfg(test)]