refactor(rename): kb crates → kebab — Cargo packages, folders, Rust modules
프로젝트 이름 `kb` → `kebab` rename 의 첫 단계. - workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`, repository URL `altair823/kb` → `altair823/kebab`. - 18 crate 폴더 rename via `git mv` (history 보존). - 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps `../kb-*` → `../kebab-*`. - 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`, `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`, `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`, `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`, `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어 경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염). CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths, tracing target, 그리고 docs sweep 은 다음 commit 에서. ## 검증 - `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
139
crates/kebab-source-fs/tests/snapshot_tree1.rs
Normal file
139
crates/kebab-source-fs/tests/snapshot_tree1.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
//! Snapshot + determinism tests against `fixtures/source-fs/tree-1`.
|
||||
//!
|
||||
//! Layout (committed under `<repo>/fixtures/source-fs/tree-1/`):
|
||||
//!
|
||||
//! ```
|
||||
//! tree-1/
|
||||
//! ├── README.md
|
||||
//! ├── notes/
|
||||
//! │ ├── alpha.md
|
||||
//! │ └── beta.md
|
||||
//! ├── ignored/
|
||||
//! │ └── skip.tmp # excluded by .kbignore
|
||||
//! ├── .kbignore # contains: *.tmp
|
||||
//! └── .DS_Store # implicitly excluded
|
||||
//! ```
|
||||
//!
|
||||
//! Two assertions:
|
||||
//! 1. Snapshot stability — `scan` output (with `discovered_at` stripped)
|
||||
//! matches the committed baseline JSON byte-for-byte.
|
||||
//! 2. Determinism — running `scan` twice produces byte-identical JSON
|
||||
//! after stripping `discovered_at`.
|
||||
//!
|
||||
//! `discovered_at` is wall-clock and intentionally NOT part of the
|
||||
//! contract: the task spec says strip it before comparison.
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{SourceConnector, SourceScope};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
use serde_json::Value;
|
||||
|
||||
/// Repo root, derived from `CARGO_MANIFEST_DIR` (= `crates/kb-source-fs`).
|
||||
fn repo_root() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
.unwrap()
|
||||
.parent()
|
||||
.unwrap()
|
||||
.to_path_buf()
|
||||
}
|
||||
|
||||
fn fixture_root() -> PathBuf {
|
||||
repo_root().join("fixtures/source-fs/tree-1")
|
||||
}
|
||||
|
||||
fn baseline_path() -> PathBuf {
|
||||
repo_root().join("fixtures/source-fs/tree-1.snapshot.json")
|
||||
}
|
||||
|
||||
fn cfg_for_fixture(root: &str) -> Config {
|
||||
let mut c = Config::defaults();
|
||||
c.workspace.root = root.to_string();
|
||||
// Clear default excludes (`.git/**`, `node_modules/**`, `.obsidian/**`)
|
||||
// so the snapshot is purely a function of the fixture + .kbignore +
|
||||
// baked-in default-excludes.
|
||||
c.workspace.exclude.clear();
|
||||
c
|
||||
}
|
||||
|
||||
/// Run `scan` against the fixture and return the JSON value with every
|
||||
/// `discovered_at` field replaced by the literal string "<stripped>".
|
||||
/// Also strip `source_uri.value` and `stored.path` because they contain
|
||||
/// absolute paths that vary by checkout location — the snapshot must be
|
||||
/// portable across machines and CI checkout dirs.
|
||||
fn scan_and_strip() -> Value {
|
||||
let root = fixture_root();
|
||||
let cfg = cfg_for_fixture(root.to_str().unwrap());
|
||||
let conn = FsSourceConnector::new(&cfg).expect("connector init");
|
||||
let assets = conn
|
||||
.scan(&SourceScope::default())
|
||||
.expect("scan must succeed against committed fixture");
|
||||
|
||||
let mut v = serde_json::to_value(&assets).expect("serialize");
|
||||
if let Value::Array(items) = &mut v {
|
||||
for item in items {
|
||||
if let Value::Object(map) = item {
|
||||
map.insert(
|
||||
"discovered_at".to_string(),
|
||||
Value::String("<stripped>".to_string()),
|
||||
);
|
||||
// source_uri = { kind: "file", value: "<abs>" } — strip value.
|
||||
if let Some(Value::Object(s)) = map.get_mut("source_uri") {
|
||||
if s.contains_key("value") {
|
||||
s.insert("value".to_string(), Value::String("<stripped>".to_string()));
|
||||
}
|
||||
}
|
||||
// stored = { kind: "copied"|"reference", path: "<abs>", ... } — strip path.
|
||||
if let Some(Value::Object(s)) = map.get_mut("stored") {
|
||||
if s.contains_key("path") {
|
||||
s.insert("path".to_string(), Value::String("<stripped>".to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
v
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tree_1_snapshot_matches_baseline() {
|
||||
let actual = scan_and_strip();
|
||||
|
||||
// If KB_REGEN_SNAPSHOT is set, (re)write the baseline and exit
|
||||
// *before* attempting to read it. This is the only path that may
|
||||
// create the file from scratch.
|
||||
if std::env::var_os("KB_REGEN_SNAPSHOT").is_some() {
|
||||
let pretty = serde_json::to_string_pretty(&actual).unwrap() + "\n";
|
||||
std::fs::write(baseline_path(), pretty).expect("write baseline");
|
||||
panic!("regenerated baseline; rerun without KB_REGEN_SNAPSHOT to verify");
|
||||
}
|
||||
|
||||
let baseline_text = std::fs::read_to_string(baseline_path()).unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"missing baseline at {} — regenerate via `KB_REGEN_SNAPSHOT=1 cargo test \
|
||||
-p kb-source-fs --test snapshot_tree1 -- tree_1_snapshot_matches_baseline`",
|
||||
baseline_path().display()
|
||||
)
|
||||
});
|
||||
let expected: Value = serde_json::from_str(&baseline_text)
|
||||
.expect("baseline JSON must parse");
|
||||
|
||||
if actual != expected {
|
||||
let actual_pretty = serde_json::to_string_pretty(&actual).unwrap();
|
||||
let expected_pretty = serde_json::to_string_pretty(&expected).unwrap();
|
||||
panic!(
|
||||
"snapshot drift.\n--- expected ---\n{expected_pretty}\n--- actual ---\n{actual_pretty}\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tree_1_scan_is_deterministic() {
|
||||
let v1 = scan_and_strip();
|
||||
let v2 = scan_and_strip();
|
||||
let s1 = serde_json::to_string(&v1).unwrap();
|
||||
let s2 = serde_json::to_string(&v2).unwrap();
|
||||
assert_eq!(s1, s2, "two consecutive scans diverged");
|
||||
}
|
||||
160
crates/kebab-source-fs/tests/symlink_cycle.rs
Normal file
160
crates/kebab-source-fs/tests/symlink_cycle.rs
Normal file
@@ -0,0 +1,160 @@
|
||||
//! Integration test: a `notes/` symlink whose target points back at the
|
||||
//! workspace root MUST NOT cause `scan` to loop forever or panic.
|
||||
//!
|
||||
//! Layout (built per-test in a tempdir):
|
||||
//! root/
|
||||
//! ├── alpha.md
|
||||
//! ├── notes/ (symlink → root) ← cycle: root → notes → root → …
|
||||
//!
|
||||
//! Expected: `scan` returns in O(seconds), every emitted path is unique,
|
||||
//! and `alpha.md` appears at least once.
|
||||
//!
|
||||
//! The cycle guard lives in `walker::walk_files`; this test exists to
|
||||
//! prove it catches the realistic shape (cycle through one or more
|
||||
//! symlinks) end-to-end via the public API.
|
||||
|
||||
#![cfg(unix)]
|
||||
|
||||
use std::os::unix::fs::symlink;
|
||||
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{SourceConnector, SourceScope};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
|
||||
fn cfg_with_root(root: &str) -> Config {
|
||||
let mut c = Config::defaults();
|
||||
c.workspace.root = root.to_string();
|
||||
c.workspace.exclude.clear();
|
||||
c
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symlink_cycle_does_not_loop_or_crash() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let root = dir.path();
|
||||
|
||||
std::fs::write(root.join("alpha.md"), b"alpha").unwrap();
|
||||
// Symlink: root/notes → root (a → a cycle through the link `notes`).
|
||||
symlink(root, root.join("notes")).unwrap();
|
||||
|
||||
let conn = FsSourceConnector::new(&cfg_with_root(root.to_str().unwrap()))
|
||||
.expect("connector init");
|
||||
let v = conn
|
||||
.scan(&SourceScope::default())
|
||||
.expect("scan must return, not loop");
|
||||
|
||||
// Determinism check: no duplicate workspace paths.
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for asset in &v {
|
||||
assert!(
|
||||
seen.insert(asset.workspace_path.0.clone()),
|
||||
"duplicate workspace_path: {}",
|
||||
asset.workspace_path.0
|
||||
);
|
||||
}
|
||||
// The original alpha.md must appear.
|
||||
assert!(
|
||||
v.iter().any(|a| a.workspace_path.0 == "alpha.md"),
|
||||
"expected alpha.md in scan output, got: {:?}",
|
||||
v.iter().map(|a| &a.workspace_path.0).collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangling_symlink_pseudo_cycle_does_not_crash() {
|
||||
// root/
|
||||
// ├── alpha.md
|
||||
// ├── a → b (b does not exist as a real file/dir)
|
||||
// └── b → a (a does not exist as a real file/dir)
|
||||
//
|
||||
// Both symlinks are dangling — neither resolves to anything. This is
|
||||
// NOT a real two-step directory cycle (see
|
||||
// `two_step_directory_cycle_visited_set_breaks_loop` for that case);
|
||||
// it merely verifies the scan tolerates broken-link pseudo-cycles
|
||||
// without crashing or looping.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let root = dir.path();
|
||||
std::fs::write(root.join("alpha.md"), b"alpha").unwrap();
|
||||
symlink(root.join("b"), root.join("a")).unwrap();
|
||||
symlink(root.join("a"), root.join("b")).unwrap();
|
||||
|
||||
let conn = FsSourceConnector::new(&cfg_with_root(root.to_str().unwrap()))
|
||||
.expect("connector init");
|
||||
// Even though a→b→a never resolves to a real directory (broken
|
||||
// pseudo-cycle of dangling symlinks), the scan must complete and
|
||||
// surface alpha.md.
|
||||
let v = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
assert!(v.iter().any(|a| a.workspace_path.0 == "alpha.md"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_step_directory_cycle_visited_set_breaks_loop() {
|
||||
// Real two-step directory cycle through symlinks:
|
||||
// root/
|
||||
// ├── a/
|
||||
// │ ├── inside_a.md
|
||||
// │ └── loop → ../b (symlink, target IS a real directory)
|
||||
// └── b/
|
||||
// ├── inside_b.md
|
||||
// └── loop → ../a (symlink, target IS a real directory)
|
||||
//
|
||||
// Without the visited-set, walkdir would descend
|
||||
// a → a/loop (=b) → a/loop/loop (=a) → … forever.
|
||||
// The canonical-path visited-set in `walker::walk_files` must break
|
||||
// the loop and yield a finite, deterministic result.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let root = dir.path();
|
||||
std::fs::create_dir(root.join("a")).unwrap();
|
||||
std::fs::create_dir(root.join("b")).unwrap();
|
||||
std::fs::write(root.join("a/inside_a.md"), b"a-content").unwrap();
|
||||
std::fs::write(root.join("b/inside_b.md"), b"b-content").unwrap();
|
||||
// Use relative targets so the symlink truly points at the sibling
|
||||
// directory regardless of where the tempdir lives.
|
||||
symlink("../b", root.join("a/loop")).unwrap();
|
||||
symlink("../a", root.join("b/loop")).unwrap();
|
||||
|
||||
let conn = FsSourceConnector::new(&cfg_with_root(root.to_str().unwrap()))
|
||||
.expect("connector init");
|
||||
|
||||
// Run scan twice — both must terminate AND produce identical
|
||||
// workspace_path lists (visited-set is deterministic per scan).
|
||||
let v1 = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
let v2 = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
|
||||
let names1: Vec<String> = v1.iter().map(|a| a.workspace_path.0.clone()).collect();
|
||||
let names2: Vec<String> = v2.iter().map(|a| a.workspace_path.0.clone()).collect();
|
||||
assert_eq!(names1, names2, "scan must be deterministic across runs");
|
||||
|
||||
// No duplicate workspace paths (visited-set should suppress
|
||||
// re-emission of the same canonical file via the cycle).
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for asset in &v1 {
|
||||
assert!(
|
||||
seen.insert(asset.workspace_path.0.clone()),
|
||||
"duplicate workspace_path: {}",
|
||||
asset.workspace_path.0
|
||||
);
|
||||
}
|
||||
|
||||
// Both real files must appear at least once. Their exact relative
|
||||
// paths depend on which side of the cycle the walker descended into
|
||||
// first; assert by basename to keep the check robust.
|
||||
assert!(
|
||||
v1.iter().any(|a| a.workspace_path.0.ends_with("inside_a.md")),
|
||||
"expected inside_a.md in scan output, got: {names1:?}"
|
||||
);
|
||||
assert!(
|
||||
v1.iter().any(|a| a.workspace_path.0.ends_with("inside_b.md")),
|
||||
"expected inside_b.md in scan output, got: {names1:?}"
|
||||
);
|
||||
|
||||
// Sanity bound: with two real files and a working cycle guard the
|
||||
// output should be tiny. If we ever produce >50 entries the visited
|
||||
// set has regressed.
|
||||
assert!(
|
||||
v1.len() < 50,
|
||||
"scan emitted {} assets — cycle guard likely regressed: {:?}",
|
||||
v1.len(),
|
||||
names1
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user