kebab/crates/kebab-store-sqlite/tests/asset_writer.rs

//! Asset writer tests: copy mode (file written 0o644), reference mode
//! (no copy, row records source), and checksum mismatch (Conflict).

use std::path::PathBuf;

use kebab_core::{AssetId, AssetStorage, Checksum, MediaType, RawAsset, SourceUri, WorkspacePath};
use kebab_store_sqlite::SqliteStore;
use time::OffsetDateTime;

mod common;

fn fixed_asset(_bytes: &[u8], byte_len: u64, declared_checksum: &str) -> RawAsset {
    RawAsset {
        // 32-hex AssetId per kb-core newtype invariant.
        asset_id: AssetId("a".repeat(32)),
        source_uri: SourceUri::File(PathBuf::from("/some/source.md")),
        workspace_path: WorkspacePath::new("notes/foo.md".into()).unwrap(),
        media_type: MediaType::Markdown,
        byte_len,
        checksum: Checksum(declared_checksum.into()),
        discovered_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
        stored: AssetStorage::Reference {
            path: PathBuf::from("/some/source.md"),
            sha: Checksum("0".repeat(64)),
        },
    }
}

fn b3_full_hex(bytes: &[u8]) -> String {
    blake3::hash(bytes).to_hex().to_string()
}

#[test]
fn copy_mode_writes_file_with_0o644_and_correct_bytes() {
    let env = common::TestEnv::with_threshold(100);
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let bytes = b"hello, sqlite";
    let cs = b3_full_hex(bytes);
    let asset = fixed_asset(bytes, bytes.len() as u64, &cs);

    store.put_asset_with_bytes(&asset, bytes).expect("write");

    // Path: data_dir/assets/aa/aaaaaa…aa
    let aa = &asset.asset_id.0[..2];
    let dest = env.data_dir().join("assets").join(aa).join(&asset.asset_id.0);
    assert!(dest.exists(), "asset file not written at {}", dest.display());
    let on_disk = std::fs::read(&dest).unwrap();
    assert_eq!(on_disk, bytes);

    // Mode 0o644 on Unix.
    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        let mode = std::fs::metadata(&dest).unwrap().permissions().mode() & 0o777;
        assert_eq!(mode, 0o644, "expected 0o644, got 0o{mode:o}");
    }

    // Row recorded copied.
    let storage_kind: String = env.with_conn(|c| {
        c.query_row(
            "SELECT storage_kind FROM assets WHERE asset_id = ?",
            [&asset.asset_id.0],
            |r| r.get(0),
        )
    });
    assert_eq!(storage_kind, "copied");
}

#[test]
fn reference_mode_does_not_write_file_but_records_path() {
    // copy_threshold_mb=0 → every byte lands on the reference branch.
    let env = common::TestEnv::with_threshold(0);
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let bytes = b"big-pretend-bytes";
    let cs = b3_full_hex(bytes);
    // byte_len declared > 0 so the threshold check picks reference. With
    // copy_threshold_bytes=0 even byte_len=1 trips the else branch.
    let mut asset = fixed_asset(bytes, 1, &cs);
    asset.source_uri = SourceUri::File(PathBuf::from("/path/to/original.md"));

    store.put_asset_with_bytes(&asset, bytes).expect("ref write");

    let aa = &asset.asset_id.0[..2];
    let dest = env.data_dir().join("assets").join(aa).join(&asset.asset_id.0);
    assert!(!dest.exists(), "reference mode must not copy bytes");

    let (storage_kind, storage_path): (String, String) = env.with_conn(|c| {
        c.query_row(
            "SELECT storage_kind, storage_path FROM assets WHERE asset_id = ?",
            [&asset.asset_id.0],
            |r| Ok((r.get(0)?, r.get(1)?)),
        )
    });
    assert_eq!(storage_kind, "reference");
    assert_eq!(storage_path, "/path/to/original.md");
}

#[test]
fn put_asset_with_bytes_sweeps_workspace_path_orphan() {
    // HOTFIXES 2026-05-02 P7-3: the original behaviour erred on
    // workspace_path UNIQUE conflict (`ON CONFLICT(asset_id)` only) so
    // a re-ingest of an edited file was unrecoverable. The fix is
    // `purge_orphan_at_workspace_path`, which sweeps the stale
    // documents → assets chain before the new INSERT lands.
    //
    // This test exercises the no-documents flavour (raw asset row only)
    // — the put_asset_with_bytes path. The documents-cascade flavour
    // is exercised end-to-end in `kebab-app::tests::pdf_pipeline::
    // re_ingest_edited_pdf_produces_new_doc_id`.
    let env = common::TestEnv::with_threshold(100);
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    // Pre-populate a row that owns `notes/foo.md` under a *different*
    // asset_id, simulating a prior ingest of an earlier byte version.
    env.with_conn(|c| {
        c.execute(
            "INSERT INTO assets (
                asset_id, source_uri, workspace_path, media_type, byte_len,
                checksum, storage_kind, storage_path, discovered_at
            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
            rusqlite::params![
                "b".repeat(32),
                "file:///elsewhere/foo.md",
                "notes/foo.md",
                "\"markdown\"",
                7_i64,
                "0".repeat(64),
                "reference",
                "/elsewhere/foo.md",
                "2024-01-01T00:00:00Z",
            ],
        )
    });

    let bytes = b"hello, sqlite";
    let cs = b3_full_hex(bytes);
    let asset = fixed_asset(bytes, bytes.len() as u64, &cs);

    store
        .put_asset_with_bytes(&asset, bytes)
        .expect("orphan sweep + INSERT must succeed");

    // Stale row gone, new row owns the workspace_path.
    let stale_count: i64 = env.with_conn(|c| {
        c.query_row(
            "SELECT COUNT(*) FROM assets WHERE asset_id = ?",
            rusqlite::params!["b".repeat(32)],
            |row| row.get(0),
        )
    });
    assert_eq!(stale_count, 0, "stale asset_id must be purged");
    let new_count: i64 = env.with_conn(|c| {
        c.query_row(
            "SELECT COUNT(*) FROM assets WHERE asset_id = ?",
            rusqlite::params![asset.asset_id.0],
            |row| row.get(0),
        )
    });
    assert_eq!(new_count, 1, "new asset_id must own the workspace_path slot");

    // New asset's bytes published at the final destination.
    let aa = &asset.asset_id.0[..2];
    let dest = env.data_dir().join("assets").join(aa).join(&asset.asset_id.0);
    assert!(
        dest.exists(),
        "new asset bytes must be visible at {}",
        dest.display()
    );
}

#[test]
fn put_asset_with_bytes_rejects_invalid_asset_id() {
    // `kebab_core::AssetId(pub String)` lets a hand-construction bypass the
    // 32-hex `FromStr` invariant. The store boundary must reject any ID
    // whose shape would let path construction escape `data_dir/assets/`.
    let env = common::TestEnv::with_threshold(100);
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    // 32 chars but contains a `/` — would let `assets_path_for` stitch
    // together a path outside the shard tree.
    let evil_id = "../etc/passwd_padded_to_xx_xxxxx".to_string();
    assert_eq!(evil_id.len(), 32, "test fixture must be 32 chars to exercise length-only checks");
    let mut asset = fixed_asset(b"x", 1, &b3_full_hex(b"x"));
    asset.asset_id = AssetId(evil_id.clone());

    let err = store
        .put_asset_with_bytes(&asset, b"x")
        .expect_err("must reject non-hex AssetId");
    let msg = format!("{err:#}");
    assert!(
        msg.contains("invalid AssetId shape"),
        "expected AssetId-shape rejection, got: {msg}"
    );

    // And the bytes must NOT have been staged anywhere under the assets
    // tree (no I/O should have happened before validation).
    let assets_dir = env.data_dir().join("assets");
    if assets_dir.exists() {
        for entry in std::fs::read_dir(&assets_dir).unwrap().flatten() {
            // Recurse one level into shard dirs and assert empty.
            if let Some(sub) = std::fs::read_dir(entry.path()).unwrap().flatten().next() {
                panic!(
                    "invalid AssetId still produced filesystem artifact at {}",
                    sub.path().display()
                );
            }
        }
    }
}

#[test]
fn checksum_mismatch_returns_conflict() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let bytes = b"the real bytes";
    // Tampered checksum: hash a different payload.
    let wrong_cs = b3_full_hex(b"different bytes");
    let asset = fixed_asset(bytes, bytes.len() as u64, &wrong_cs);

    let err = store
        .put_asset_with_bytes(&asset, bytes)
        .expect_err("must reject checksum mismatch");
    let msg = format!("{err:#}");
    assert!(
        msg.contains("checksum mismatch") || msg.contains("conflict"),
        "expected Conflict-flavoured error, got: {msg}"
    );
}