- kebab-embed-candle: `metal` feature → candle metal backend; select_device() picks Device::new_metal(0) (CPU fallback) under the feature, else Device::Cpu. .contiguous() before to_vec2 (Metal rejects strided views; CPU tolerates). - feature passthrough: kebab-app/embed_metal → kebab-cli/embed_metal. Build on macOS: cargo build --release --features embed_metal. - default (non-metal) path unchanged: clippy 0, candle units + thread_cap + parity pass. - README + HOTFIXES: Mac-GPU-ingest → copy sqlite+lancedb → server CPU-query workflow. - version 0.22.0 → 0.23.0 (opt-in build surface). macOS-only compile; Metal execution/speed/parity validated by user on M4 Pro (not buildable on the Linux CI/dev machine). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
48 lines
1.9 KiB
TOML
48 lines
1.9 KiB
TOML
[package]
|
||
name = "kebab-embed-candle"
|
||
version = { workspace = true }
|
||
edition = { workspace = true }
|
||
rust-version = { workspace = true }
|
||
license = { workspace = true }
|
||
repository = { workspace = true }
|
||
description = "Pure-Rust candle adapter implementing kb_core::Embedder (multilingual-e5-large, NUMA-safe thread cap)"
|
||
|
||
[dependencies]
|
||
kebab-core = { path = "../kebab-core" }
|
||
kebab-config = { path = "../kebab-config" }
|
||
# candle stack — pinned to the workspace-locked crates.io release (0.10.x),
|
||
# same versions the Phase 0 spike compiled so build artifacts are reused.
|
||
candle-core = "0.10.2"
|
||
candle-nn = "0.10.2"
|
||
candle-transformers = "0.10.2"
|
||
tokenizers = "0.21"
|
||
hf-hub = { version = "0.4", features = ["ureq"] }
|
||
serde_json = { workspace = true }
|
||
# Thread cap: a one-shot global rayon pool sizes candle's CPU threads
|
||
# (the Phase 0 spike proved RAYON_NUM_THREADS caps candle), so a NUMA host
|
||
# can keep onnxruntime's hard-coded 48-intra-op heap corruption at bay.
|
||
rayon = "1"
|
||
anyhow = { workspace = true }
|
||
tracing = { workspace = true }
|
||
|
||
[features]
|
||
# opt-in: run candle on the Apple Silicon GPU (Metal). macOS-only — the build
|
||
# enables candle's metal backend and `select_device()` picks Metal (CPU fallback
|
||
# on failure). Lets an M-series Mac ingest e5-large on GPU (10×+ vs CPU); the
|
||
# resulting vectors are cross-compatible with the CPU path (same model), so the
|
||
# Linux server can serve queries on CPU candle.
|
||
metal = ["candle-core/metal", "candle-nn/metal", "candle-transformers/metal"]
|
||
|
||
[dev-dependencies]
|
||
# Integration-test binaries can only see the library's public API + these,
|
||
# not the library's own (non-dev) dependencies — so rayon/kebab-config/kebab-core
|
||
# are repeated here for tests/parity.rs and tests/thread_cap.rs.
|
||
kebab-embed-local = { path = "../kebab-embed-local" }
|
||
kebab-config = { path = "../kebab-config" }
|
||
kebab-core = { path = "../kebab-core" }
|
||
rayon = "1"
|
||
tempfile = { workspace = true }
|
||
|
||
[lints]
|
||
workspace = true
|