p1-1: address review (AssetStorage doc-comment, real directory-cycle test)
- Document AssetStorage::Copied / Reference path semantics so P1-6 (asset writer) knows that at scan time `Copied.path` is the SOURCE path and the writer is responsible for both copying bytes AND overwriting `path` with the destination. - Rename the dangling-symlink test to make its scope explicit (`dangling_symlink_pseudo_cycle_does_not_crash`); the prior name implied a real two-step directory cycle but the targets were broken links. - Add `two_step_directory_cycle_visited_set_breaks_loop`: builds `a/loop -> ../b` and `b/loop -> ../a` over real directories with real files, asserting scan terminates with a finite, deterministic asset list — exercises the canonical-path visited-set in walker::walk_files. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -40,6 +40,18 @@ impl WorkspacePath {
|
||||
}
|
||||
}
|
||||
|
||||
/// On-disk storage decision for a `RawAsset`.
|
||||
///
|
||||
/// **Important convention** — `path` field semantics differ by variant:
|
||||
///
|
||||
/// - `Copied { path }`: at scan time, `path` is the **source** path on the
|
||||
/// user's filesystem. The asset writer (P1-6) is responsible for actually
|
||||
/// copying the bytes into the workspace asset store, AND for overwriting
|
||||
/// `path` with the destination path after the copy completes.
|
||||
///
|
||||
/// - `Reference { path, sha }`: `path` is always the **source** path. No
|
||||
/// bytes are ever copied; downstream readers stream from `path` directly.
|
||||
/// `sha` is the BLAKE3 full hex (matches `RawAsset::checksum`).
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase", tag = "kind")]
|
||||
pub enum AssetStorage {
|
||||
|
||||
@@ -61,11 +61,17 @@ fn symlink_cycle_does_not_loop_or_crash() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_step_symlink_cycle_does_not_loop() {
|
||||
fn dangling_symlink_pseudo_cycle_does_not_crash() {
|
||||
// root/
|
||||
// ├── alpha.md
|
||||
// ├── a → b
|
||||
// └── b → a
|
||||
// ├── a → b (b does not exist as a real file/dir)
|
||||
// └── b → a (a does not exist as a real file/dir)
|
||||
//
|
||||
// Both symlinks are dangling — neither resolves to anything. This is
|
||||
// NOT a real two-step directory cycle (see
|
||||
// `two_step_directory_cycle_visited_set_breaks_loop` for that case);
|
||||
// it merely verifies the scan tolerates broken-link pseudo-cycles
|
||||
// without crashing or looping.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let root = dir.path();
|
||||
std::fs::write(root.join("alpha.md"), b"alpha").unwrap();
|
||||
@@ -80,3 +86,75 @@ fn two_step_symlink_cycle_does_not_loop() {
|
||||
let v = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
assert!(v.iter().any(|a| a.workspace_path.0 == "alpha.md"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two_step_directory_cycle_visited_set_breaks_loop() {
|
||||
// Real two-step directory cycle through symlinks:
|
||||
// root/
|
||||
// ├── a/
|
||||
// │ ├── inside_a.md
|
||||
// │ └── loop → ../b (symlink, target IS a real directory)
|
||||
// └── b/
|
||||
// ├── inside_b.md
|
||||
// └── loop → ../a (symlink, target IS a real directory)
|
||||
//
|
||||
// Without the visited-set, walkdir would descend
|
||||
// a → a/loop (=b) → a/loop/loop (=a) → … forever.
|
||||
// The canonical-path visited-set in `walker::walk_files` must break
|
||||
// the loop and yield a finite, deterministic result.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let root = dir.path();
|
||||
std::fs::create_dir(root.join("a")).unwrap();
|
||||
std::fs::create_dir(root.join("b")).unwrap();
|
||||
std::fs::write(root.join("a/inside_a.md"), b"a-content").unwrap();
|
||||
std::fs::write(root.join("b/inside_b.md"), b"b-content").unwrap();
|
||||
// Use relative targets so the symlink truly points at the sibling
|
||||
// directory regardless of where the tempdir lives.
|
||||
symlink("../b", root.join("a/loop")).unwrap();
|
||||
symlink("../a", root.join("b/loop")).unwrap();
|
||||
|
||||
let conn = FsSourceConnector::new(&cfg_with_root(root.to_str().unwrap()))
|
||||
.expect("connector init");
|
||||
|
||||
// Run scan twice — both must terminate AND produce identical
|
||||
// workspace_path lists (visited-set is deterministic per scan).
|
||||
let v1 = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
let v2 = conn.scan(&SourceScope::default()).expect("scan must return");
|
||||
|
||||
let names1: Vec<String> = v1.iter().map(|a| a.workspace_path.0.clone()).collect();
|
||||
let names2: Vec<String> = v2.iter().map(|a| a.workspace_path.0.clone()).collect();
|
||||
assert_eq!(names1, names2, "scan must be deterministic across runs");
|
||||
|
||||
// No duplicate workspace paths (visited-set should suppress
|
||||
// re-emission of the same canonical file via the cycle).
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for asset in &v1 {
|
||||
assert!(
|
||||
seen.insert(asset.workspace_path.0.clone()),
|
||||
"duplicate workspace_path: {}",
|
||||
asset.workspace_path.0
|
||||
);
|
||||
}
|
||||
|
||||
// Both real files must appear at least once. Their exact relative
|
||||
// paths depend on which side of the cycle the walker descended into
|
||||
// first; assert by basename to keep the check robust.
|
||||
assert!(
|
||||
v1.iter().any(|a| a.workspace_path.0.ends_with("inside_a.md")),
|
||||
"expected inside_a.md in scan output, got: {names1:?}"
|
||||
);
|
||||
assert!(
|
||||
v1.iter().any(|a| a.workspace_path.0.ends_with("inside_b.md")),
|
||||
"expected inside_b.md in scan output, got: {names1:?}"
|
||||
);
|
||||
|
||||
// Sanity bound: with two real files and a working cycle guard the
|
||||
// output should be tiny. If we ever produce >50 entries the visited
|
||||
// set has regressed.
|
||||
assert!(
|
||||
v1.len() < 50,
|
||||
"scan emitted {} assets — cycle guard likely regressed: {:?}",
|
||||
v1.len(),
|
||||
names1
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user