diff --git a/Cargo.lock b/Cargo.lock index 499f233..f330c3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4345,7 +4345,6 @@ version = "0.6.0" dependencies = [ "anyhow", "gix", - "kebab-core", "tempfile", ] @@ -4460,6 +4459,7 @@ dependencies = [ "ignore", "kebab-config", "kebab-core", + "kebab-parse-code", "serde", "serde_json", "tempfile", diff --git a/crates/kebab-source-fs/src/connector.rs b/crates/kebab-source-fs/src/connector.rs index d3cdb1f..599673e 100644 --- a/crates/kebab-source-fs/src/connector.rs +++ b/crates/kebab-source-fs/src/connector.rs @@ -66,7 +66,7 @@ impl FsSourceConnector { fn resolve_scan_params( &self, scope: &SourceScope, - ) -> Result<(PathBuf, Vec, WalkOverrides)> { + ) -> Result<(PathBuf, WalkOverrides)> { let root = if scope.root.as_os_str().is_empty() { self.default_root.clone() } else { @@ -78,7 +78,7 @@ impl FsSourceConnector { let kbignore = read_kbignore(&root)?; let overrides = build_overrides(&root, &excludes, &kbignore)?; - Ok((root, kbignore, overrides)) + Ok((root, overrides)) } /// Scan the workspace and return the accepted assets together with @@ -92,10 +92,7 @@ impl FsSourceConnector { &self, scope: &SourceScope, ) -> Result<(Vec, FsScanSkips)> { - let (root, _kbignore, overrides) = self.resolve_scan_params(scope)?; - - // Suppress unused-variable warning — kbignore patterns are already - // baked into `overrides`; we don't need them again here. + let (root, overrides) = self.resolve_scan_params(scope)?; log_scope_include_warning(scope); diff --git a/crates/kebab-source-fs/src/walker.rs b/crates/kebab-source-fs/src/walker.rs index e963c0b..8eaf0ff 100644 --- a/crates/kebab-source-fs/src/walker.rs +++ b/crates/kebab-source-fs/src/walker.rs @@ -21,6 +21,16 @@ //! `follow_links(true)`; we layer our own visited-set on top, keyed by the //! canonical path of every entry, and skip any entry we've already seen. //! +//! ## Per-source skip attribution (spec §5.5) +//! +//! `walk_files_with_skips` returns a `WalkOverrides` struct that carries +//! both a `combined` matcher (used for the actual walk decision) and three +//! per-source matchers (`gitignore`, `kebabignore`, `builtin`). When an +//! entry is excluded, `classify_skip` probes the per-source matchers in +//! priority order (built-in > gitignore > kebabignore) to determine which +//! `IngestReport` counter should be incremented — without requiring a +//! second walker pass over the filesystem. +//! //! ## Why `walkdir` instead of `ignore::WalkBuilder`? //! //! `ignore::WalkBuilder` bundles gitignore semantics + cycle detection in diff --git a/crates/kebab-source-fs/tests/symlink_cycle.rs b/crates/kebab-source-fs/tests/symlink_cycle.rs index 1bd8d50..52fbcaa 100644 --- a/crates/kebab-source-fs/tests/symlink_cycle.rs +++ b/crates/kebab-source-fs/tests/symlink_cycle.rs @@ -9,7 +9,7 @@ //! Expected: `scan` returns in O(seconds), every emitted path is unique, //! and `alpha.md` appears at least once. //! -//! The cycle guard lives in `walker::walk_files`; this test exists to +//! The cycle guard lives in `walker::walk_files_with_skips`; this test exists to //! prove it catches the realistic shape (cycle through one or more //! symlinks) end-to-end via the public API. @@ -100,7 +100,7 @@ fn two_step_directory_cycle_visited_set_breaks_loop() { // // Without the visited-set, walkdir would descend // a → a/loop (=b) → a/loop/loop (=a) → … forever. - // The canonical-path visited-set in `walker::walk_files` must break + // The canonical-path visited-set in `walker::walk_files_with_skips` must break // the loop and yield a finite, deterministic result. let dir = tempfile::tempdir().unwrap(); let root = dir.path();