spike(embed-candle): candle e5-large 타당성 검증 — VERDICT PASS
Track 1 / Phase 0 격리 스파이크. candle(순수 Rust)로 intfloat/multilingual-e5-large 를 돌려 기존 onnxruntime FastembedEmbedder 와 비교. 결과: - 패리티: 한/영 10문장 cosine min=mean=1.000000 (완전 일치) - padding_idx: XLM-R 규약 정상 (소스 + 패리티 이중 확인) - 스레드 제어: RAYON_NUM_THREADS=4 로 컴퓨트 스레드 12→4 캡 확인 (fastembed 4.9.1 의 48-하드코딩+override불가 문제 구조적 부재) - latency: batch=32 candle 2.161s vs fastembed 0.536s (~4×, 4 vs 12 스레드) → candle 본 구현 진행 권고 (GREEN). 상세 SPIKE_REPORT.md. candle 의존성은 crates/spike-embed-candle 에만 격리. 프로덕션 crate 동작 변경 없음. 결정적 NUMA 검증은 그 듀얼소켓 서버에서 사용자 실행 필요 (meta-spec §4.3). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
401
Cargo.lock
generated
401
Cargo.lock
generated
@@ -827,6 +827,20 @@ name = "bytemuck"
|
||||
version = "1.25.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck_derive"
|
||||
version = "1.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
@@ -874,6 +888,65 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-core"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bd9895436c1ba5dc1037a19935d084b838db066ff4e15ef7dded020b7c12a4a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"float8",
|
||||
"gemm",
|
||||
"half",
|
||||
"libm",
|
||||
"memmap2",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"rand 0.9.4",
|
||||
"rand_distr 0.5.1",
|
||||
"rayon",
|
||||
"safetensors",
|
||||
"thiserror 2.0.18",
|
||||
"tokenizers 0.22.2",
|
||||
"yoke",
|
||||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-nn"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9317a09d6530b758990ed7f625ac69ff43653bc9ee28b0464644ad1169ada87"
|
||||
dependencies = [
|
||||
"candle-core",
|
||||
"half",
|
||||
"libc",
|
||||
"num-traits",
|
||||
"rayon",
|
||||
"safetensors",
|
||||
"serde",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-transformers"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f59d08c89e9f4af9c464e2f3a8e16199e7cc601e6f34538c2cfbb42b623b1783"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"candle-core",
|
||||
"candle-nn",
|
||||
"fancy-regex",
|
||||
"num-traits",
|
||||
"rand 0.9.4",
|
||||
"rayon",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_plain",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cassowary"
|
||||
version = "0.3.0"
|
||||
@@ -2238,6 +2311,22 @@ version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555"
|
||||
|
||||
[[package]]
|
||||
name = "dyn-stack"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c4713e43e2886ba72b8271aa66c93d722116acf7a75555cce11dcde84388fe8"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"dyn-stack-macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dyn-stack-macros"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1d926b4d407d372f141f93bb444696142c29d32962ccbd3531117cf3aa0bfa9"
|
||||
|
||||
[[package]]
|
||||
name = "earcutr"
|
||||
version = "0.4.3"
|
||||
@@ -2278,6 +2367,18 @@ dependencies = [
|
||||
"encoding_rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "enum-as-inner"
|
||||
version = "0.6.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a1e6a265c649f3f5979b601d26f1d05ada116434c87741c9493cb56218f76cbc"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equator"
|
||||
version = "0.4.2"
|
||||
@@ -2319,6 +2420,9 @@ name = "esaxx-rs"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ethnum"
|
||||
@@ -2374,6 +2478,17 @@ version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a"
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72cf461f865c862bb7dc573f643dd6a2b6842f7c30b07882b56bd148cc2761b8"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fast-float2"
|
||||
version = "0.2.3"
|
||||
@@ -2400,7 +2515,7 @@ dependencies = [
|
||||
"ort-sys",
|
||||
"rayon",
|
||||
"serde_json",
|
||||
"tokenizers",
|
||||
"tokenizers 0.21.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2480,6 +2595,18 @@ dependencies = [
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float8"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c2d1f04709a8ac06e8e8042875a3c466cc4832d3c1a18dbcb9dba3c6e83046bc"
|
||||
dependencies = [
|
||||
"half",
|
||||
"num-traits",
|
||||
"rand 0.9.4",
|
||||
"rand_distr 0.5.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float_next_after"
|
||||
version = "1.0.0"
|
||||
@@ -2657,6 +2784,125 @@ dependencies = [
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-c32",
|
||||
"gemm-c64",
|
||||
"gemm-common",
|
||||
"gemm-f16",
|
||||
"gemm-f32",
|
||||
"gemm-f64",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-c32"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-c64"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-common"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "88027625910cc9b1085aaaa1c4bc46bb3a36aad323452b33c25b5e4e7c8e2a3e"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"dyn-stack",
|
||||
"half",
|
||||
"libm",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pulp",
|
||||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
"sysctl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f16"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-f32",
|
||||
"half",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f32"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f64"
|
||||
version = "0.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generator"
|
||||
version = "0.8.8"
|
||||
@@ -3475,9 +3721,12 @@ version = "2.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cfg-if",
|
||||
"crunchy",
|
||||
"num-traits",
|
||||
"rand 0.9.4",
|
||||
"rand_distr 0.5.1",
|
||||
"zerocopy",
|
||||
]
|
||||
|
||||
@@ -3526,6 +3775,8 @@ dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash 0.2.0",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -3578,16 +3829,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97"
|
||||
dependencies = [
|
||||
"dirs 6.0.0",
|
||||
"futures",
|
||||
"http",
|
||||
"indicatif",
|
||||
"libc",
|
||||
"log",
|
||||
"native-tls",
|
||||
"num_cpus",
|
||||
"rand 0.9.4",
|
||||
"reqwest 0.12.28",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror 2.0.18",
|
||||
"tokio",
|
||||
"ureq",
|
||||
"windows-sys 0.60.2",
|
||||
]
|
||||
@@ -4490,7 +4744,7 @@ dependencies = [
|
||||
"ort",
|
||||
"serde",
|
||||
"tempfile",
|
||||
"tokenizers",
|
||||
"tokenizers 0.21.4",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
@@ -5761,6 +6015,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -6026,6 +6281,7 @@ version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
@@ -6730,6 +6986,29 @@ dependencies = [
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.22.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e205bb30d5b916c55e584c22201771bcf2bad9aabd5d4127f38387140c38632"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cfg-if",
|
||||
"libm",
|
||||
"num-complex",
|
||||
"paste",
|
||||
"pulp-wasm-simd-flag",
|
||||
"raw-cpuid",
|
||||
"reborrow",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp-wasm-simd-flag"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0"
|
||||
|
||||
[[package]]
|
||||
name = "pxfm"
|
||||
version = "0.1.29"
|
||||
@@ -7051,6 +7330,15 @@ dependencies = [
|
||||
"rgb",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "raw-cpuid"
|
||||
version = "11.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rawpointer"
|
||||
version = "0.2.1"
|
||||
@@ -7088,6 +7376,12 @@ dependencies = [
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reborrow"
|
||||
version = "0.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430"
|
||||
|
||||
[[package]]
|
||||
name = "recursive"
|
||||
version = "0.1.1"
|
||||
@@ -7618,6 +7912,17 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15"
|
||||
|
||||
[[package]]
|
||||
name = "safetensors"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5"
|
||||
dependencies = [
|
||||
"hashbrown 0.16.1",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.6"
|
||||
@@ -7798,6 +8103,15 @@ dependencies = [
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_plain"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9ce1fc6db65a611022b23a0dec6975d63fb80a302cb3388835ff02c097258d50"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_repr"
|
||||
version = "0.1.20"
|
||||
@@ -8083,6 +8397,23 @@ dependencies = [
|
||||
"smallvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spike-embed-candle"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"candle-core",
|
||||
"candle-nn",
|
||||
"candle-transformers",
|
||||
"hf-hub",
|
||||
"kebab-config",
|
||||
"kebab-embed",
|
||||
"kebab-embed-local",
|
||||
"rayon",
|
||||
"serde_json",
|
||||
"tokenizers 0.21.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spm_precompiled"
|
||||
version = "0.1.4"
|
||||
@@ -8281,6 +8612,20 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysctl"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"byteorder",
|
||||
"enum-as-inner",
|
||||
"libc",
|
||||
"thiserror 1.0.69",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.7.0"
|
||||
@@ -8637,6 +8982,40 @@ name = "tokenizers"
|
||||
version = "0.21.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"aho-corasick",
|
||||
"compact_str 0.9.0",
|
||||
"dary_heap",
|
||||
"derive_builder",
|
||||
"esaxx-rs",
|
||||
"getrandom 0.3.4",
|
||||
"indicatif",
|
||||
"itertools 0.14.0",
|
||||
"log",
|
||||
"macro_rules_attribute",
|
||||
"monostate",
|
||||
"onig",
|
||||
"paste",
|
||||
"rand 0.9.4",
|
||||
"rayon",
|
||||
"rayon-cond",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"spm_precompiled",
|
||||
"thiserror 2.0.18",
|
||||
"unicode-normalization-alignments",
|
||||
"unicode-segmentation",
|
||||
"unicode_categories",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokenizers"
|
||||
version = "0.22.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"aho-corasick",
|
||||
@@ -9076,6 +9455,12 @@ dependencies = [
|
||||
"rand 0.9.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "typed-path"
|
||||
version = "0.12.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e"
|
||||
|
||||
[[package]]
|
||||
name = "typenum"
|
||||
version = "1.20.0"
|
||||
@@ -10131,6 +10516,18 @@ dependencies = [
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "7.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"indexmap 2.14.0",
|
||||
"memchr",
|
||||
"typed-path",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zlib-rs"
|
||||
version = "0.6.3"
|
||||
|
||||
Reference in New Issue
Block a user