diff --git a/crates/kebab-embed-local/Cargo.toml b/crates/kebab-embed-local/Cargo.toml index 41c30c8..452b2f1 100644 --- a/crates/kebab-embed-local/Cargo.toml +++ b/crates/kebab-embed-local/Cargo.toml @@ -5,14 +5,14 @@ edition = { workspace = true } rust-version = { workspace = true } license = { workspace = true } repository = { workspace = true } -description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)" +description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-large default, e5-small backwards-compat)" [dependencies] kebab-config = { path = "../kebab-config" } kebab-embed = { path = "../kebab-embed" } # Default features bring `ort-download-binaries` (bundled ONNX runtime) # and `hf-hub-native-tls` (first-run model download). No extra features -# needed for the multilingual-e5-small path. +# needed for the multilingual-e5-{small,large} paths. fastembed = { workspace = true } tracing = { workspace = true } anyhow = { workspace = true } diff --git a/crates/kebab-embed-local/src/lib.rs b/crates/kebab-embed-local/src/lib.rs index fbf57fe..2033515 100644 --- a/crates/kebab-embed-local/src/lib.rs +++ b/crates/kebab-embed-local/src/lib.rs @@ -1,8 +1,9 @@ //! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed //! [`Embedder`](kebab_embed::Embedder) implementation. //! -//! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small` -//! (384-dim) model. Honors `config.models.embedding.batch_size` and applies +//! Wraps [`fastembed::TextEmbedding`]. Default is `multilingual-e5-large` +//! (1024-dim, p9-fb-39b); `multilingual-e5-small` (384-dim) is also supported +//! for backwards-compat. Honors `config.models.embedding.batch_size` and applies //! the e5 prefix convention (§11.3 of the design report): //! //! * `EmbeddingKind::Document` → `"passage: "` prefix @@ -69,9 +70,9 @@ impl FastembedEmbedder { .with_context(|| format!("create fastembed cache dir {}", cache_dir.display()))?; // 2. Resolve the fastembed enum variant from - // `config.models.embedding.model`. Currently only the default - // `multilingual-e5-small` is wired; other model names error - // out with a clear message rather than silently misconfiguring. + // `config.models.embedding.model`. Currently `multilingual-e5-large` + // (default) and `multilingual-e5-small` are wired; other model names + // error out with a clear message rather than silently misconfiguring. let model_name = resolve_model(&config.models.embedding.model)?; // 3. Verify dim match BEFORE loading the model — if the config @@ -100,7 +101,7 @@ impl FastembedEmbedder { target: "kebab-embed-local", model = %config.models.embedding.model, cache_dir = %cache_dir.display(), - "loading embedding model (first run will download ~470MB)" + "loading embedding model (first run downloads model weights — ~470MB for e5-small, ~1.3GB for e5-large)" ); let inner = TextEmbedding::try_new(opts) .context("fastembed: TextEmbedding::try_new")?; diff --git a/crates/kebab-embed-local/tests/embed_model.rs b/crates/kebab-embed-local/tests/embed_model.rs index fee9c5f..2212184 100644 --- a/crates/kebab-embed-local/tests/embed_model.rs +++ b/crates/kebab-embed-local/tests/embed_model.rs @@ -3,10 +3,11 @@ //! //! ## Why every test in this file is `#[ignore]` //! -//! The first call to `FastembedEmbedder::new` downloads ~470 MB of -//! weights from Hugging Face into `data_dir/models/fastembed/`. Doing -//! that on every `cargo test` invocation is wasteful, so the bare -//! invocation skips this file entirely. +//! The first call to `FastembedEmbedder::new` downloads ~1.3 GB of +//! weights (multilingual-e5-large per p9-fb-39b default) from Hugging +//! Face into `data_dir/models/fastembed/`. Doing that on every +//! `cargo test` invocation is wasteful, so the bare invocation skips +//! this file entirely. //! //! Run the full suite with: //! ```text @@ -58,19 +59,20 @@ fn shared_embedder() -> &'static FastembedEmbedder { // ─── construction ───────────────────────────────────────────────────── #[test] -#[ignore = "downloads ~470MB ONNX model on first run; CI-only"] -fn default_config_constructs_with_dims_384() { +#[ignore = "downloads ~1.3GB ONNX model on first run; CI-only"] +fn default_config_constructs_with_dims_1024() { + // p9-fb-39b: default flipped to multilingual-e5-large (1024 dim). let emb = shared_embedder(); - assert_eq!(emb.dimensions(), 384); - assert_eq!(emb.model_id().0, "multilingual-e5-small"); + assert_eq!(emb.dimensions(), 1024); + assert_eq!(emb.model_id().0, "multilingual-e5-large"); assert_eq!(emb.model_version().0, "v1"); } #[test] -#[ignore = "downloads ~470MB ONNX model on first run; CI-only"] +#[ignore = "downloads ~1.3GB ONNX model on first run; CI-only"] fn mismatched_dims_in_config_errors_at_construction() { let (mut cfg, _tmp) = test_config(); - cfg.models.embedding.dimensions = 512; // model is 384 + cfg.models.embedding.dimensions = 512; // model is 1024 (e5-large default) // `FastembedEmbedder` deliberately does not implement `Debug` // (its inner ONNX session has no useful debug shape), so we // can't use `expect_err`; match the Result manually. @@ -80,7 +82,7 @@ fn mismatched_dims_in_config_errors_at_construction() { }; let msg = format!("{err}"); assert!(msg.contains("dimension mismatch"), "msg={msg}"); - assert!(msg.contains("384"), "msg={msg}"); + assert!(msg.contains("1024"), "msg={msg}"); assert!(msg.contains("512"), "msg={msg}"); } @@ -104,8 +106,8 @@ fn document_and_query_yield_different_vectors() { ]) .expect("embed two inputs"); assert_eq!(out.len(), 2); - assert_eq!(out[0].len(), 384); - assert_eq!(out[1].len(), 384); + assert_eq!(out[0].len(), 1024); + assert_eq!(out[1].len(), 1024); // Both vectors are L2-normalized → cosine similarity == dot product. let cos: f32 = out[0] @@ -142,11 +144,11 @@ fn output_vectors_are_l2_normalized() { ]; let out = emb.embed(&inputs).expect("embed"); // Per `kebab_embed::assert_unit_norm` docs: `5e-4` is the safe bound at - // 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add + // 1024 dims (f32::EPSILON × √1024 ≈ 2.3e-6, but ONNX kernels add // their own per-component noise; 1e-3 is very generous and matches // the spec's `± 1e-3`). kebab_embed::assert_unit_norm(&out, 1e-3); - kebab_embed::assert_vector_shape(&out, 384); + kebab_embed::assert_vector_shape(&out, 1024); } // ─── determinism ────────────────────────────────────────────────────── @@ -254,7 +256,7 @@ fn snapshot_aggregate_hash_is_stable() { // Round every component to 4 decimal places, hash deterministically. let mut hasher = DefaultHasher::new(); for (i, v) in out.iter().enumerate() { - assert_eq!(v.len(), 384, "row {i} dim mismatch"); + assert_eq!(v.len(), 1024, "row {i} dim mismatch"); for x in v { let rounded: i32 = (*x * 1.0e4).round() as i32; rounded.hash(&mut hasher); diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index 765b9d1..d6c9938 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -26,7 +26,9 @@ git history. **search/ask 결과**: re-embed 전까지는 empty hit (새 모델에 데이터가 없음). `kebab ingest` 후 정상 검색 가능. -**Spec contract 와의 관계**: design §5 (storage) + §9 (versioning cascade) 의 embedding_model.id / dimensions 변경. embedding_version bump 아님 — 동일 binary 에서 config 만 변경해도 진행 가능 (cascade rule 준수). +**Spec contract 와의 관계**: design §5 (storage) + §9 (versioning cascade) 의 embedding_model.id / dimensions 변경. wire 의 `embedding_version` 필드 (kebab-app schema.v1.models.embedding_version 가 config.models.embedding.model 값을 그대로 emit) 변경 — CLAUDE.md cascade rule 의 release 트리거. 본 PR 머지 후 `chore: bump version 0.5 → 0.6` + tag 필요. + +**Spec deviation**: design `2026-05-10-p9-fb-39b-embedding-upgrade-design.md` 의 §Migration policy + §Public surface delta 가 `LanceVectorStore::open` 안 신규 `error.v1.code = "embedding_dim_mismatch"` 명시했으나 구현 제외. 이유: LanceDB tables 가 `(model, dim)` namespaced — silent orphan + empty-hit 으로 surface (hard error 아님). 명시 error 필요 시 별도 startup health check 작업 필요 (fb-39c 후보 또는 doctor 확장). ## 2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1 diff --git a/tasks/p9/p9-fb-39b-embedding-upgrade.md b/tasks/p9/p9-fb-39b-embedding-upgrade.md index 6a4e990..ba4c66b 100644 --- a/tasks/p9/p9-fb-39b-embedding-upgrade.md +++ b/tasks/p9/p9-fb-39b-embedding-upgrade.md @@ -62,14 +62,15 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — Claude Code 가 kebab CLI ## Backward compat notes - Pre-fb-39b user 가 config 에서 명시하지 않은 embedding → new default (large) 자동 적용. TOML 에 `model = "multilingual-e5-small"` 명시하면 유지. -- `kebab_app::config::Config` 의 `embedding_model` field 는 Optional 이므로 old config (small) 도 parse 성공 (v1 설계 §9 cascade 규칙). +- `kebab-config` 의 `EmbeddingCfg.model` 은 String 필드 — TOML 에 명시한 값이 default 를 override (serde 기본 동작). - Orphan LanceDB table (`chunk_embeddings_multilingual-e5-small_384`) 은 다음 `kebab ingest` 실행 후 stale 취급 — 사용자가 수동 `kebab reset --vector-only` 로 정리 가능. ## Binary version bump -- 0.6.0 → 0.7.0 (design §9 cascade rule: embedding_model change = minor bump). +- 0.5.0 → 0.6.0 (current Cargo.toml = 0.5.0; embedding_version cascade triggers minor bump per design §9). - Release notes: `embedding default: multilingual-e5-small (384d) → multilingual-e5-large (1024d), P@k metric ↑`. ## Post-merge deviation -None — 설계 contract 대로 구현 완료. +- **`embedding_dim_mismatch` ErrorV1 dropped**: design spec §Migration policy 가 `LanceVectorStore::open` 안 dim mismatch 감지 + 신규 `error.v1.code = "embedding_dim_mismatch"` 를 명시했으나 구현에서 제외. 이유: LanceDB tables 가 `(model, dim)` namespaced (`crates/kebab-store-vector/src/paths.rs:21`) — 신규 model 변경 시 새 table 자동 생성, 옛 table orphan. dim mismatch 가 hard error 되지 않고 검색 결과 0건 (silent precision loss) 으로 surface. HOTFIXES 항목이 documentation source. 명시 error 가 의미 있으려면 별도 startup health check 필요 — fb-39c 후보 또는 v0.7.0 의 doctor 확장. +- 영향: 사용자가 model 변경 후 `kebab ingest` 안 하면 검색 결과 0건. README + SMOKE walkthrough 가 reset --vector-only && ingest 시퀀스 안내.