refactor(rename): kb crates → kebab — Cargo packages, folders, Rust modules

프로젝트 이름 `kb` → `kebab` rename 의 첫 단계.

- workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`,
  repository URL `altair823/kb` → `altair823/kebab`.
- 18 crate 폴더 rename via `git mv` (history 보존).
- 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps
  `../kb-*` → `../kebab-*`.
- 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`,
  `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`,
  `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`,
  `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`,
  `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어
  경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염).

CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths,
tracing target, 그리고 docs sweep 은 다음 commit 에서.

## 검증

- `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 03:28:08 +00:00
parent 2aecbf3d9f
commit 911fb49550
143 changed files with 727 additions and 727 deletions

View File

@@ -0,0 +1,176 @@
//! Integration tests for `MockEmbedder`. Gated behind the `mock` feature.
//!
//! Canonical invocation: `cargo test -p kb-embed --features mock`.
#![cfg(feature = "mock")]
use kebab_embed::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder,
assert_unit_norm, assert_vector_shape,
};
use proptest::prelude::*;
fn mk(dims: usize) -> MockEmbedder {
MockEmbedder::new(
EmbeddingModelId("mock-test".into()),
EmbeddingVersion("0".into()),
dims,
)
}
#[test]
fn dyn_dispatch_through_box() {
let e: Box<dyn Embedder> = Box::new(mk(8));
assert_eq!(e.dimensions(), 8);
assert_eq!(e.model_id(), EmbeddingModelId("mock-test".into()));
assert_eq!(e.model_version(), EmbeddingVersion("0".into()));
let inputs = [EmbeddingInput {
text: "a fox",
kind: EmbeddingKind::Document,
}];
let v = e.embed(&inputs).expect("embed via box");
assert_eq!(v.len(), 1);
assert_vector_shape(&v, 8);
}
#[test]
fn identical_input_yields_byte_identical_vector() {
let e = mk(16);
let a = e
.embed(&[EmbeddingInput {
text: "the quick brown fox",
kind: EmbeddingKind::Document,
}])
.unwrap();
let b = e
.embed(&[EmbeddingInput {
text: "the quick brown fox",
kind: EmbeddingKind::Document,
}])
.unwrap();
// Vec<Vec<f32>> equality is byte-equal because we did not mutate
// either side and the hash + normalization path is pure.
assert_eq!(a, b);
}
#[test]
fn document_and_query_kinds_differ_for_same_text() {
let e = mk(32);
let inputs = [
EmbeddingInput {
text: "needle in haystack",
kind: EmbeddingKind::Document,
},
EmbeddingInput {
text: "needle in haystack",
kind: EmbeddingKind::Query,
},
];
let v = e.embed(&inputs).unwrap();
assert_eq!(v.len(), 2);
assert_vector_shape(&v, 32);
assert_ne!(
v[0], v[1],
"Document and Query kinds must produce different vectors for identical text"
);
}
#[test]
fn dimensions_match_construction() {
for dims in [1usize, 4, 64, 384, 768, 1024] {
let e = mk(dims);
assert_eq!(e.dimensions(), dims);
let v = e
.embed(&[EmbeddingInput {
text: "x",
kind: EmbeddingKind::Document,
}])
.unwrap();
assert_vector_shape(&v, dims);
}
}
#[test]
fn different_seeds_produce_different_vectors() {
let a = MockEmbedder::with_seed(
EmbeddingModelId("m".into()),
EmbeddingVersion("0".into()),
16,
0,
);
let b = MockEmbedder::with_seed(
EmbeddingModelId("m".into()),
EmbeddingVersion("0".into()),
16,
1,
);
let inputs = [EmbeddingInput {
text: "same input",
kind: EmbeddingKind::Document,
}];
assert_ne!(a.embed(&inputs).unwrap(), b.embed(&inputs).unwrap());
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 100,
..ProptestConfig::default()
})]
/// 100 random `(text, kind)` pairs: every output vector must have
/// `len == dimensions`, contain only finite floats, contain no NaNs,
/// be L2 unit-norm within tolerance, be re-deterministic across calls,
/// differ between Document/Query kinds, and differ between distinct texts.
#[test]
fn random_inputs_yield_well_formed_vectors(
text in ".{0,256}",
text2 in ".{0,256}",
is_query in any::<bool>(),
// dims ≥ 2: a 1-dim unit-norm vector has only two possible values
// (`[1.0]` or `[-1.0]`), which makes the kind/text differential
// assertions degenerate. Pick a floor of 2 so the differentials
// exercise non-degenerate vector space.
dims in 2usize..=128,
) {
// Skip degenerate case where the two random texts collide; the
// "distinct text → distinct vector" assertion below requires them to
// differ.
prop_assume!(text != text2);
let e = mk(dims);
let kind = if is_query { EmbeddingKind::Query } else { EmbeddingKind::Document };
let v = e.embed(&[EmbeddingInput { text: &text, kind }]).unwrap();
prop_assert_eq!(v.len(), 1);
prop_assert_eq!(v[0].len(), dims);
for x in &v[0] {
prop_assert!(x.is_finite(), "component {x} not finite");
prop_assert!(!x.is_nan(), "component {x} is NaN");
}
// L2 unit-norm within tolerance. `5e-4` is a safe upper bound up to
// dims = 128 here (would-be floor: f32::EPSILON × √dims).
assert_unit_norm(&v, 5e-4);
// Re-determinism: embedding `text` as Document twice → byte-equal.
let doc_a = e
.embed(&[EmbeddingInput { text: &text, kind: EmbeddingKind::Document }])
.unwrap();
let doc_b = e
.embed(&[EmbeddingInput { text: &text, kind: EmbeddingKind::Document }])
.unwrap();
prop_assert_eq!(&doc_a, &doc_b, "Doc(text) must be byte-equal across calls");
// Kind differential: Doc(text) != Query(text).
let q = e
.embed(&[EmbeddingInput { text: &text, kind: EmbeddingKind::Query }])
.unwrap();
prop_assert_ne!(&doc_a, &q, "Doc(text) must differ from Query(text)");
// Text differential: Doc(text) != Doc(text2) when text != text2.
let doc_other = e
.embed(&[EmbeddingInput { text: &text2, kind: EmbeddingKind::Document }])
.unwrap();
prop_assert_ne!(&doc_a, &doc_other, "distinct texts must yield distinct Doc vectors");
}
}