diff --git a/crates/kb-source-fs/tests/snapshot_tree1.rs b/crates/kb-source-fs/tests/snapshot_tree1.rs new file mode 100644 index 0000000..6eb0ed9 --- /dev/null +++ b/crates/kb-source-fs/tests/snapshot_tree1.rs @@ -0,0 +1,139 @@ +//! Snapshot + determinism tests against `fixtures/source-fs/tree-1`. +//! +//! Layout (committed under `/fixtures/source-fs/tree-1/`): +//! +//! ``` +//! tree-1/ +//! ├── README.md +//! ├── notes/ +//! │ ├── alpha.md +//! │ └── beta.md +//! ├── ignored/ +//! │ └── skip.tmp # excluded by .kbignore +//! ├── .kbignore # contains: *.tmp +//! └── .DS_Store # implicitly excluded +//! ``` +//! +//! Two assertions: +//! 1. Snapshot stability — `scan` output (with `discovered_at` stripped) +//! matches the committed baseline JSON byte-for-byte. +//! 2. Determinism — running `scan` twice produces byte-identical JSON +//! after stripping `discovered_at`. +//! +//! `discovered_at` is wall-clock and intentionally NOT part of the +//! contract: the task spec says strip it before comparison. + +use std::path::PathBuf; + +use kb_config::Config; +use kb_core::{SourceConnector, SourceScope}; +use kb_source_fs::FsSourceConnector; +use serde_json::Value; + +/// Repo root, derived from `CARGO_MANIFEST_DIR` (= `crates/kb-source-fs`). +fn repo_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .parent() + .unwrap() + .to_path_buf() +} + +fn fixture_root() -> PathBuf { + repo_root().join("fixtures/source-fs/tree-1") +} + +fn baseline_path() -> PathBuf { + repo_root().join("fixtures/source-fs/tree-1.snapshot.json") +} + +fn cfg_for_fixture(root: &str) -> Config { + let mut c = Config::defaults(); + c.workspace.root = root.to_string(); + // Clear default excludes (`.git/**`, `node_modules/**`, `.obsidian/**`) + // so the snapshot is purely a function of the fixture + .kbignore + + // baked-in default-excludes. + c.workspace.exclude.clear(); + c +} + +/// Run `scan` against the fixture and return the JSON value with every +/// `discovered_at` field replaced by the literal string "". +/// Also strip `source_uri.value` and `stored.path` because they contain +/// absolute paths that vary by checkout location — the snapshot must be +/// portable across machines and CI checkout dirs. +fn scan_and_strip() -> Value { + let root = fixture_root(); + let cfg = cfg_for_fixture(root.to_str().unwrap()); + let conn = FsSourceConnector::new(&cfg).expect("connector init"); + let assets = conn + .scan(&SourceScope::default()) + .expect("scan must succeed against committed fixture"); + + let mut v = serde_json::to_value(&assets).expect("serialize"); + if let Value::Array(items) = &mut v { + for item in items { + if let Value::Object(map) = item { + map.insert( + "discovered_at".to_string(), + Value::String("".to_string()), + ); + // source_uri = { kind: "file", value: "" } — strip value. + if let Some(Value::Object(s)) = map.get_mut("source_uri") { + if s.contains_key("value") { + s.insert("value".to_string(), Value::String("".to_string())); + } + } + // stored = { kind: "copied"|"reference", path: "", ... } — strip path. + if let Some(Value::Object(s)) = map.get_mut("stored") { + if s.contains_key("path") { + s.insert("path".to_string(), Value::String("".to_string())); + } + } + } + } + } + v +} + +#[test] +fn tree_1_snapshot_matches_baseline() { + let actual = scan_and_strip(); + + // If KB_REGEN_SNAPSHOT is set, (re)write the baseline and exit + // *before* attempting to read it. This is the only path that may + // create the file from scratch. + if std::env::var_os("KB_REGEN_SNAPSHOT").is_some() { + let pretty = serde_json::to_string_pretty(&actual).unwrap() + "\n"; + std::fs::write(baseline_path(), pretty).expect("write baseline"); + panic!("regenerated baseline; rerun without KB_REGEN_SNAPSHOT to verify"); + } + + let baseline_text = std::fs::read_to_string(baseline_path()).unwrap_or_else(|_| { + panic!( + "missing baseline at {} — regenerate via `KB_REGEN_SNAPSHOT=1 cargo test \ + -p kb-source-fs --test snapshot_tree1 -- tree_1_snapshot_matches_baseline`", + baseline_path().display() + ) + }); + let expected: Value = serde_json::from_str(&baseline_text) + .expect("baseline JSON must parse"); + + if actual != expected { + let actual_pretty = serde_json::to_string_pretty(&actual).unwrap(); + let expected_pretty = serde_json::to_string_pretty(&expected).unwrap(); + panic!( + "snapshot drift.\n--- expected ---\n{expected_pretty}\n--- actual ---\n{actual_pretty}\n" + ); + } +} + +#[test] +fn tree_1_scan_is_deterministic() { + let v1 = scan_and_strip(); + let v2 = scan_and_strip(); + let s1 = serde_json::to_string(&v1).unwrap(); + let s2 = serde_json::to_string(&v2).unwrap(); + assert_eq!(s1, s2, "two consecutive scans diverged"); +} diff --git a/fixtures/source-fs/tree-1.snapshot.json b/fixtures/source-fs/tree-1.snapshot.json new file mode 100644 index 0000000..350d53c --- /dev/null +++ b/fixtures/source-fs/tree-1.snapshot.json @@ -0,0 +1,68 @@ +[ + { + "asset_id": "bd6e5649e546d6ac94c3269ffe7192c5", + "byte_len": 6, + "checksum": "f6b71def043f1fd92f2d34969a7272a9d134730551de8c9754c4be79fbc0aef3", + "discovered_at": "", + "media_type": { + "other": "" + }, + "source_uri": { + "kind": "file", + "value": "" + }, + "stored": { + "kind": "copied", + "path": "" + }, + "workspace_path": ".kbignore" + }, + { + "asset_id": "ba6cd31cab86eff7a86638ee76494bcf", + "byte_len": 169, + "checksum": "b0124489083674f6ad99a57ee5fc425feb71754a538a97a1ab580e8eb9b1f1c1", + "discovered_at": "", + "media_type": "markdown", + "source_uri": { + "kind": "file", + "value": "" + }, + "stored": { + "kind": "copied", + "path": "" + }, + "workspace_path": "README.md" + }, + { + "asset_id": "3381fcc34cf9415a391ba6b0dc6037c5", + "byte_len": 11, + "checksum": "e9fa9a5e0725d7bf6ec9d1565d3921eb6b62aa7f0db40c1c3ffebda7475d4258", + "discovered_at": "", + "media_type": "markdown", + "source_uri": { + "kind": "file", + "value": "" + }, + "stored": { + "kind": "copied", + "path": "" + }, + "workspace_path": "notes/alpha.md" + }, + { + "asset_id": "e300aa98aec843d2df1dd8f43702b257", + "byte_len": 10, + "checksum": "3e4df2f43563730d61672ce67a9bf479bc7c7a2f1384e2081d52e06f143353ed", + "discovered_at": "", + "media_type": "markdown", + "source_uri": { + "kind": "file", + "value": "" + }, + "stored": { + "kind": "copied", + "path": "" + }, + "workspace_path": "notes/beta.md" + } +] diff --git a/fixtures/source-fs/tree-1/.DS_Store b/fixtures/source-fs/tree-1/.DS_Store new file mode 100644 index 0000000..c24ae00 --- /dev/null +++ b/fixtures/source-fs/tree-1/.DS_Store @@ -0,0 +1 @@ +macOS Finder metadata placeholder. Implicitly excluded by FsSourceConnector. diff --git a/fixtures/source-fs/tree-1/.kbignore b/fixtures/source-fs/tree-1/.kbignore new file mode 100644 index 0000000..1944fd6 --- /dev/null +++ b/fixtures/source-fs/tree-1/.kbignore @@ -0,0 +1 @@ +*.tmp diff --git a/fixtures/source-fs/tree-1/README.md b/fixtures/source-fs/tree-1/README.md new file mode 100644 index 0000000..1948784 --- /dev/null +++ b/fixtures/source-fs/tree-1/README.md @@ -0,0 +1,5 @@ +# tree-1 + +Fixture for `kb-source-fs` snapshot tests. Contents are intentionally tiny +and stable — bumping a byte here will require regenerating the snapshot +baseline. diff --git a/fixtures/source-fs/tree-1/ignored/skip.tmp b/fixtures/source-fs/tree-1/ignored/skip.tmp new file mode 100644 index 0000000..73cdccd --- /dev/null +++ b/fixtures/source-fs/tree-1/ignored/skip.tmp @@ -0,0 +1 @@ +should be excluded by .kbignore diff --git a/fixtures/source-fs/tree-1/notes/alpha.md b/fixtures/source-fs/tree-1/notes/alpha.md new file mode 100644 index 0000000..b01a892 --- /dev/null +++ b/fixtures/source-fs/tree-1/notes/alpha.md @@ -0,0 +1 @@ +alpha note diff --git a/fixtures/source-fs/tree-1/notes/beta.md b/fixtures/source-fs/tree-1/notes/beta.md new file mode 100644 index 0000000..e689c5e --- /dev/null +++ b/fixtures/source-fs/tree-1/notes/beta.md @@ -0,0 +1 @@ +beta note