spike(embed-candle): candle e5-large 타당성 검증 — VERDICT PASS

Track 1 / Phase 0 격리 스파이크. candle(순수 Rust)로
intfloat/multilingual-e5-large 를 돌려 기존 onnxruntime
FastembedEmbedder 와 비교.

결과:
- 패리티: 한/영 10문장 cosine min=mean=1.000000 (완전 일치)
- padding_idx: XLM-R 규약 정상 (소스 + 패리티 이중 확인)
- 스레드 제어: RAYON_NUM_THREADS=4 로 컴퓨트 스레드 12→4 캡 확인
  (fastembed 4.9.1 의 48-하드코딩+override불가 문제 구조적 부재)
- latency: batch=32 candle 2.161s vs fastembed 0.536s (~4×, 4 vs 12 스레드)

→ candle 본 구현 진행 권고 (GREEN). 상세 SPIKE_REPORT.md.

candle 의존성은 crates/spike-embed-candle 에만 격리. 프로덕션
crate 동작 변경 없음. 결정적 NUMA 검증은 그 듀얼소켓 서버에서
사용자 실행 필요 (meta-spec §4.3).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-01 14:23:51 +00:00
parent 980e20fd8d
commit 76841af7d3
6 changed files with 849 additions and 2 deletions

401
Cargo.lock generated
View File

@@ -827,6 +827,20 @@ name = "bytemuck"
version = "1.25.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
dependencies = [
"bytemuck_derive",
]
[[package]]
name = "bytemuck_derive"
version = "1.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "byteorder"
@@ -874,6 +888,65 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "candle-core"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6bd9895436c1ba5dc1037a19935d084b838db066ff4e15ef7dded020b7c12a4a"
dependencies = [
"byteorder",
"float8",
"gemm",
"half",
"libm",
"memmap2",
"num-traits",
"num_cpus",
"rand 0.9.4",
"rand_distr 0.5.1",
"rayon",
"safetensors",
"thiserror 2.0.18",
"tokenizers 0.22.2",
"yoke",
"zip",
]
[[package]]
name = "candle-nn"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9317a09d6530b758990ed7f625ac69ff43653bc9ee28b0464644ad1169ada87"
dependencies = [
"candle-core",
"half",
"libc",
"num-traits",
"rayon",
"safetensors",
"serde",
"thiserror 2.0.18",
]
[[package]]
name = "candle-transformers"
version = "0.10.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f59d08c89e9f4af9c464e2f3a8e16199e7cc601e6f34538c2cfbb42b623b1783"
dependencies = [
"byteorder",
"candle-core",
"candle-nn",
"fancy-regex",
"num-traits",
"rand 0.9.4",
"rayon",
"serde",
"serde_json",
"serde_plain",
"tracing",
]
[[package]]
name = "cassowary"
version = "0.3.0"
@@ -2238,6 +2311,22 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
[[package]]
name = "dyn-stack"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8"
dependencies = [
"bytemuck",
"dyn-stack-macros",
]
[[package]]
name = "dyn-stack-macros"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9"
[[package]]
name = "earcutr"
version = "0.4.3"
@@ -2278,6 +2367,18 @@ dependencies = [
"encoding_rs",
]
[[package]]
name = "enum-as-inner"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "equator"
version = "0.4.2"
@@ -2319,6 +2420,9 @@ name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
dependencies = [
"cc",
]
[[package]]
name = "ethnum"
@@ -2374,6 +2478,17 @@ version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
[[package]]
name = "fancy-regex"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
dependencies = [
"bit-set",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "fast-float2"
version = "0.2.3"
@@ -2400,7 +2515,7 @@ dependencies = [
"ort-sys",
"rayon",
"serde_json",
"tokenizers",
"tokenizers 0.21.4",
]
[[package]]
@@ -2480,6 +2595,18 @@ dependencies = [
"zlib-rs",
]
[[package]]
name = "float8"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c2d1f04709a8ac06e8e8042875a3c466cc4832d3c1a18dbcb9dba3c6e83046bc"
dependencies = [
"half",
"num-traits",
"rand 0.9.4",
"rand_distr 0.5.1",
]
[[package]]
name = "float_next_after"
version = "1.0.0"
@@ -2657,6 +2784,125 @@ dependencies = [
"slab",
]
[[package]]
name = "gemm"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb"
dependencies = [
"dyn-stack",
"gemm-c32",
"gemm-c64",
"gemm-common",
"gemm-f16",
"gemm-f32",
"gemm-f64",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"seq-macro",
]
[[package]]
name = "gemm-c32"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c"
dependencies = [
"dyn-stack",
"gemm-common",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"seq-macro",
]
[[package]]
name = "gemm-c64"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f"
dependencies = [
"dyn-stack",
"gemm-common",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"seq-macro",
]
[[package]]
name = "gemm-common"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e"
dependencies = [
"bytemuck",
"dyn-stack",
"half",
"libm",
"num-complex",
"num-traits",
"once_cell",
"paste",
"pulp",
"raw-cpuid",
"rayon",
"seq-macro",
"sysctl",
]
[[package]]
name = "gemm-f16"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e"
dependencies = [
"dyn-stack",
"gemm-common",
"gemm-f32",
"half",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"rayon",
"seq-macro",
]
[[package]]
name = "gemm-f32"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c"
dependencies = [
"dyn-stack",
"gemm-common",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"seq-macro",
]
[[package]]
name = "gemm-f64"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a"
dependencies = [
"dyn-stack",
"gemm-common",
"num-complex",
"num-traits",
"paste",
"raw-cpuid",
"seq-macro",
]
[[package]]
name = "generator"
version = "0.8.8"
@@ -3475,9 +3721,12 @@ version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
dependencies = [
"bytemuck",
"cfg-if",
"crunchy",
"num-traits",
"rand 0.9.4",
"rand_distr 0.5.1",
"zerocopy",
]
@@ -3526,6 +3775,8 @@ dependencies = [
"allocator-api2",
"equivalent",
"foldhash 0.2.0",
"serde",
"serde_core",
]
[[package]]
@@ -3578,16 +3829,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
dependencies = [
"dirs 6.0.0",
"futures",
"http",
"indicatif",
"libc",
"log",
"native-tls",
"num_cpus",
"rand 0.9.4",
"reqwest 0.12.28",
"serde",
"serde_json",
"thiserror 2.0.18",
"tokio",
"ureq",
"windows-sys 0.60.2",
]
@@ -4490,7 +4744,7 @@ dependencies = [
"ort",
"serde",
"tempfile",
"tokenizers",
"tokenizers 0.21.4",
"tracing",
]
@@ -5761,6 +6015,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
dependencies = [
"libc",
"stable_deref_trait",
]
[[package]]
@@ -6026,6 +6281,7 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
dependencies = [
"bytemuck",
"num-traits",
]
@@ -6730,6 +6986,29 @@ dependencies = [
"unicase",
]
[[package]]
name = "pulp"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632"
dependencies = [
"bytemuck",
"cfg-if",
"libm",
"num-complex",
"paste",
"pulp-wasm-simd-flag",
"raw-cpuid",
"reborrow",
"version_check",
]
[[package]]
name = "pulp-wasm-simd-flag"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0"
[[package]]
name = "pxfm"
version = "0.1.29"
@@ -7051,6 +7330,15 @@ dependencies = [
"rgb",
]
[[package]]
name = "raw-cpuid"
version = "11.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
dependencies = [
"bitflags 2.11.1",
]
[[package]]
name = "rawpointer"
version = "0.2.1"
@@ -7088,6 +7376,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "reborrow"
version = "0.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430"
[[package]]
name = "recursive"
version = "0.1.1"
@@ -7618,6 +7912,17 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15"
[[package]]
name = "safetensors"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5"
dependencies = [
"hashbrown 0.16.1",
"serde",
"serde_json",
]
[[package]]
name = "same-file"
version = "1.0.6"
@@ -7798,6 +8103,15 @@ dependencies = [
"serde_json",
]
[[package]]
name = "serde_plain"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
dependencies = [
"serde",
]
[[package]]
name = "serde_repr"
version = "0.1.20"
@@ -8083,6 +8397,23 @@ dependencies = [
"smallvec",
]
[[package]]
name = "spike-embed-candle"
version = "0.0.0"
dependencies = [
"anyhow",
"candle-core",
"candle-nn",
"candle-transformers",
"hf-hub",
"kebab-config",
"kebab-embed",
"kebab-embed-local",
"rayon",
"serde_json",
"tokenizers 0.21.4",
]
[[package]]
name = "spm_precompiled"
version = "0.1.4"
@@ -8281,6 +8612,20 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "sysctl"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
dependencies = [
"bitflags 2.11.1",
"byteorder",
"enum-as-inner",
"libc",
"thiserror 1.0.69",
"walkdir",
]
[[package]]
name = "system-configuration"
version = "0.7.0"
@@ -8637,6 +8982,40 @@ name = "tokenizers"
version = "0.21.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
dependencies = [
"ahash",
"aho-corasick",
"compact_str 0.9.0",
"dary_heap",
"derive_builder",
"esaxx-rs",
"getrandom 0.3.4",
"indicatif",
"itertools 0.14.0",
"log",
"macro_rules_attribute",
"monostate",
"onig",
"paste",
"rand 0.9.4",
"rayon",
"rayon-cond",
"regex",
"regex-syntax",
"serde",
"serde_json",
"spm_precompiled",
"thiserror 2.0.18",
"unicode-normalization-alignments",
"unicode-segmentation",
"unicode_categories",
]
[[package]]
name = "tokenizers"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223"
dependencies = [
"ahash",
"aho-corasick",
@@ -9076,6 +9455,12 @@ dependencies = [
"rand 0.9.4",
]
[[package]]
name = "typed-path"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e"
[[package]]
name = "typenum"
version = "1.20.0"
@@ -10131,6 +10516,18 @@ dependencies = [
"syn 2.0.117",
]
[[package]]
name = "zip"
version = "7.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0"
dependencies = [
"crc32fast",
"indexmap 2.14.0",
"memchr",
"typed-path",
]
[[package]]
name = "zlib-rs"
version = "0.6.3"