두 번째 commit. 사용자 facing surface (CLI binary, env vars, XDG paths) + 코드 안 single-letter token (`KB_`, `kb.sqlite`, `/kb/`, tracing target) 일괄 rename. 그리고 3 개 file rename: - 디자인 doc `2026-04-27-kb-final-form-design.md` → `2026-04-27-kebab-final-form-design.md` - 최초 보고서 `kb_local_rust_report.md` → `kebab_local_rust_report.md` - workspace ignore `.kbignore` → `.kebabignore` ## 변경 - `crates/kebab-cli/Cargo.toml`: `[[bin]] name = "kb"` → `"kebab"`. - `crates/kebab-cli/src/main.rs`: `#[command(name = "kb", …)]` → `name = "kebab"`. - 모든 `KB_*` env var (코드 + doc + 테스트) → `KEBAB_*`. apply_env prefix 매칭 + 30+ 개 setting 키 모두. - XDG paths: `~/.config/kb` / `~/.local/share/kb` / `~/.cache/kb` / `~/.local/state/kb` → `~/.config/kebab` 등. config defaults + expand_path tests + paths.rs 의 hardcode 모두. - SQLite filename: `kb.sqlite` → `kebab.sqlite` (`SQLITE_FILE` const + 테스트 hardcode 모두). - tracing target: `target: "kb-*"` → `"kebab-*"` (10+ 곳). - snapshot fixture: `.kbignore` → `.kebabignore` (`fixtures/source-fs/ tree-1.snapshot.json` 갱신). ## 검증 - `cargo test --workspace -j 1` clean (linker OOM 회피 위해 직렬). - `cargo clippy --workspace --all-targets -- -D warnings` clean. 다음 commit 에서 docs sweep. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
4.1 KiB
Rust
120 lines
4.1 KiB
Rust
//! Snapshot test: a fixed corpus + fixed query produces a stable
|
|
//! `Vec<VectorHit>` JSON. Pinning the snapshot here catches accidental
|
|
//! drift in score scaling, payload shape, or top-k ordering.
|
|
//!
|
|
//! This test is `#[ignore]` and requires AVX-capable hardware. Run
|
|
//! with `cargo test -p kb-store-vector -- --ignored snapshot`.
|
|
//!
|
|
//! The committed fixture at `tests/fixtures/vector/run-1.json` is a
|
|
//! placeholder until first regenerated on AVX hardware. The test
|
|
//! detects the placeholder via its `_comment` field and panics with
|
|
//! a clear "regenerate me" message — see `assert_no_placeholder`
|
|
//! below.
|
|
|
|
use std::path::PathBuf;
|
|
|
|
use kebab_core::{SearchFilters, VectorStore};
|
|
use serde_json::json;
|
|
|
|
mod common;
|
|
use common::{TestEnv, make_record, require_avx_or_panic};
|
|
|
|
const MODEL: &str = "snapshot-model";
|
|
|
|
#[test]
|
|
#[ignore = "requires AVX-capable hardware (LanceDB)"]
|
|
fn vector_hits_snapshot_run_1() {
|
|
require_avx_or_panic();
|
|
let env = TestEnv::new();
|
|
// Fixed deterministic corpus: 4 unit-norm vectors, each with a
|
|
// known doc / chunk / heading. The query points squarely at
|
|
// chunk 0 so the expected ordering is 0, then the others by
|
|
// distance from dir(0).
|
|
let corpus = vec![
|
|
(0u8, vec![1.0_f32, 0.0, 0.0, 0.0], "alpha", &["A"][..]),
|
|
(1u8, vec![0.95_f32, 0.31, 0.0, 0.0], "beta", &["A", "B"][..]),
|
|
(2u8, vec![0.0_f32, 1.0, 0.0, 0.0], "gamma", &["B"][..]),
|
|
(3u8, vec![0.0_f32, 0.0, 1.0, 0.0], "delta", &[][..]),
|
|
];
|
|
|
|
let mut recs = Vec::new();
|
|
for (i, vec, text, headings) in &corpus {
|
|
let rec = make_record(*i, *i, vec.clone(), text, headings, MODEL);
|
|
env.seed_chunk(
|
|
&rec.chunk_id.0,
|
|
&rec.doc_id.0,
|
|
&format!("notes/{i}.md"),
|
|
"en",
|
|
&[],
|
|
"primary",
|
|
);
|
|
recs.push(rec);
|
|
}
|
|
env.vector.upsert(&recs).unwrap();
|
|
|
|
let q = vec![1.0_f32, 0.0, 0.0, 0.0];
|
|
let hits = env.vector.search(&q, 3, &SearchFilters::default()).unwrap();
|
|
|
|
// The snapshot pins:
|
|
// - top-3 chunk_id ordering (by score desc)
|
|
// - payload shape: { doc_id, text, heading_path }
|
|
// - that scores live in [0, 1] and are sorted descending
|
|
let actual = json!(
|
|
hits.iter().map(|h| json!({
|
|
"chunk_id": h.chunk_id.0,
|
|
"score_in_unit_interval": (0.0..=1.0).contains(&h.score),
|
|
"payload": h.payload,
|
|
})).collect::<Vec<_>>()
|
|
);
|
|
|
|
let fixture = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
|
.join("tests")
|
|
.join("fixtures")
|
|
.join("vector")
|
|
.join("run-1.json");
|
|
|
|
if std::env::var_os("KEBAB_UPDATE_SNAPSHOTS").is_some() {
|
|
std::fs::create_dir_all(fixture.parent().unwrap()).unwrap();
|
|
std::fs::write(&fixture, serde_json::to_string_pretty(&actual).unwrap())
|
|
.unwrap();
|
|
return;
|
|
}
|
|
|
|
let expected: serde_json::Value =
|
|
serde_json::from_str(&std::fs::read_to_string(&fixture).unwrap_or_else(
|
|
|_| panic!(
|
|
"missing snapshot fixture at {}; run with KEBAB_UPDATE_SNAPSHOTS=1 to create",
|
|
fixture.display()
|
|
),
|
|
))
|
|
.unwrap();
|
|
|
|
// Refuse to silently "pass" when the fixture is the committed
|
|
// placeholder. The placeholder JSON carries a `_comment` field
|
|
// with regeneration instructions; production fixtures (a captured
|
|
// hits array) do not.
|
|
if expected.get("_comment").is_some() {
|
|
panic!(
|
|
"snapshot fixture is a placeholder — regenerate on AVX hardware then commit. \
|
|
Path: {}. To regenerate: \
|
|
`KEBAB_UPDATE_SNAPSHOTS=1 cargo test -p kb-store-vector -- --ignored snapshot`.",
|
|
fixture.display()
|
|
);
|
|
}
|
|
|
|
assert_eq!(
|
|
actual, expected,
|
|
"snapshot drift; rerun with KEBAB_UPDATE_SNAPSHOTS=1 to regenerate"
|
|
);
|
|
|
|
// Independent guard: scores must be non-increasing.
|
|
for w in hits.windows(2) {
|
|
assert!(
|
|
w[0].score >= w[1].score,
|
|
"scores not in descending order: {} then {}",
|
|
w[0].score,
|
|
w[1].score
|
|
);
|
|
}
|
|
}
|