Compare commits
8 Commits
v0.22.0
...
8bfa4ba76e
| Author | SHA1 | Date | |
|---|---|---|---|
| 8bfa4ba76e | |||
| ad0ccf4ccf | |||
| b351523e51 | |||
| a48b055358 | |||
| 581e1d5d55 | |||
| c17d6e67a8 | |||
| af8fd34716 | |||
| 369aeb3d24 |
452
Cargo.lock
generated
452
Cargo.lock
generated
@@ -712,6 +712,12 @@ dependencies = [
|
||||
"cpufeatures 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a"
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.10.4"
|
||||
@@ -895,23 +901,42 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6bd9895436c1ba5dc1037a19935d084b838db066ff4e15ef7dded020b7c12a4a"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"candle-metal-kernels",
|
||||
"candle-ug",
|
||||
"float8",
|
||||
"gemm",
|
||||
"gemm 0.19.0",
|
||||
"half",
|
||||
"libm",
|
||||
"memmap2",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"objc2-foundation",
|
||||
"objc2-metal",
|
||||
"rand 0.9.4",
|
||||
"rand_distr 0.5.1",
|
||||
"rayon",
|
||||
"safetensors",
|
||||
"safetensors 0.7.0",
|
||||
"thiserror 2.0.18",
|
||||
"tokenizers 0.22.2",
|
||||
"yoke",
|
||||
"yoke 0.8.2",
|
||||
"zip",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-metal-kernels"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b6b5a4cae6b4e1ab0efcee4dc05272d11b374a3d1ba121b3a961e36be54ab60"
|
||||
dependencies = [
|
||||
"half",
|
||||
"objc2",
|
||||
"objc2-foundation",
|
||||
"objc2-metal",
|
||||
"once_cell",
|
||||
"thiserror 2.0.18",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-nn"
|
||||
version = "0.10.2"
|
||||
@@ -919,11 +944,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a9317a09d6530b758990ed7f625ac69ff43653bc9ee28b0464644ad1169ada87"
|
||||
dependencies = [
|
||||
"candle-core",
|
||||
"candle-metal-kernels",
|
||||
"half",
|
||||
"libc",
|
||||
"num-traits",
|
||||
"objc2-metal",
|
||||
"rayon",
|
||||
"safetensors",
|
||||
"safetensors 0.7.0",
|
||||
"serde",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
@@ -947,6 +974,16 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "candle-ug"
|
||||
version = "0.10.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca0fc3167cbc99c8ec1be618cb620aa21dca95038f118c3579a79370e3dc5f77"
|
||||
dependencies = [
|
||||
"ug",
|
||||
"ug-metal",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cassowary"
|
||||
version = "0.3.0"
|
||||
@@ -1210,6 +1247,17 @@ version = "0.8.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||
|
||||
[[package]]
|
||||
name = "core-graphics-types"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation 0.9.4",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "counter"
|
||||
version = "0.7.1"
|
||||
@@ -2637,7 +2685,28 @@ version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
|
||||
dependencies = [
|
||||
"foreign-types-shared",
|
||||
"foreign-types-shared 0.1.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965"
|
||||
dependencies = [
|
||||
"foreign-types-macros",
|
||||
"foreign-types-shared 0.3.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-macros"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -2646,6 +2715,12 @@ version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
|
||||
|
||||
[[package]]
|
||||
name = "foreign-types-shared"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b"
|
||||
|
||||
[[package]]
|
||||
name = "form_urlencoded"
|
||||
version = "1.2.2"
|
||||
@@ -2784,6 +2859,26 @@ dependencies = [
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab96b703d31950f1aeddded248bc95543c9efc7ac9c4a21fda8703a83ee35451"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-c32 0.18.2",
|
||||
"gemm-c64 0.18.2",
|
||||
"gemm-common 0.18.2",
|
||||
"gemm-f16 0.18.2",
|
||||
"gemm-f32 0.18.2",
|
||||
"gemm-f64 0.18.2",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm"
|
||||
version = "0.19.0"
|
||||
@@ -2791,12 +2886,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa0673db364b12263d103b68337a68fbecc541d6f6b61ba72fe438654709eacb"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-c32",
|
||||
"gemm-c64",
|
||||
"gemm-common",
|
||||
"gemm-f16",
|
||||
"gemm-f32",
|
||||
"gemm-f64",
|
||||
"gemm-c32 0.19.0",
|
||||
"gemm-c64 0.19.0",
|
||||
"gemm-common 0.19.0",
|
||||
"gemm-f16 0.19.0",
|
||||
"gemm-f32 0.19.0",
|
||||
"gemm-f64 0.19.0",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-c32"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6db9fd9f40421d00eea9dd0770045a5603b8d684654816637732463f4073847"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common 0.18.2",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
@@ -2811,7 +2921,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "086936dbdcb99e37aad81d320f98f670e53c1e55a98bee70573e83f95beb128c"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-common 0.19.0",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-c64"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dfcad8a3d35a43758330b635d02edad980c1e143dc2f21e6fd25f9e4eada8edf"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common 0.18.2",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
@@ -2826,7 +2951,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20c8aeeeec425959bda4d9827664029ba1501a90a0d1e6228e48bef741db3a3f"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-common 0.19.0",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
@@ -2834,6 +2959,27 @@ dependencies = [
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-common"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a352d4a69cbe938b9e2a9cb7a3a63b7e72f9349174a2752a558a8a563510d0f3"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"dyn-stack",
|
||||
"half",
|
||||
"libm",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pulp 0.21.5",
|
||||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
"sysctl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-common"
|
||||
version = "0.19.0"
|
||||
@@ -2848,13 +2994,31 @@ dependencies = [
|
||||
"num-traits",
|
||||
"once_cell",
|
||||
"paste",
|
||||
"pulp",
|
||||
"pulp 0.22.2",
|
||||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
"sysctl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f16"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cff95ae3259432f3c3410eaa919033cd03791d81cebd18018393dc147952e109"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common 0.18.2",
|
||||
"gemm-f32 0.18.2",
|
||||
"half",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f16"
|
||||
version = "0.19.0"
|
||||
@@ -2862,8 +3026,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3df7a55202e6cd6739d82ae3399c8e0c7e1402859b30e4cb780e61525d9486e"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-f32",
|
||||
"gemm-common 0.19.0",
|
||||
"gemm-f32 0.19.0",
|
||||
"half",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
@@ -2873,6 +3037,21 @@ dependencies = [
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f32"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bc8d3d4385393304f407392f754cd2dc4b315d05063f62cf09f47b58de276864"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common 0.18.2",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f32"
|
||||
version = "0.19.0"
|
||||
@@ -2880,7 +3059,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "02e0b8c9da1fbec6e3e3ab2ce6bc259ef18eb5f6f0d3e4edf54b75f9fd41a81c"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-common 0.19.0",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
"raw-cpuid",
|
||||
"seq-macro",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gemm-f64"
|
||||
version = "0.18.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35b2a4f76ce4b8b16eadc11ccf2e083252d8237c1b589558a49b0183545015bd"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common 0.18.2",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
@@ -2895,7 +3089,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "056131e8f2a521bfab322f804ccd652520c79700d81209e9d9275bbdecaadc6a"
|
||||
dependencies = [
|
||||
"dyn-stack",
|
||||
"gemm-common",
|
||||
"gemm-common 0.19.0",
|
||||
"num-complex",
|
||||
"num-traits",
|
||||
"paste",
|
||||
@@ -4067,7 +4261,7 @@ checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"potential_utf",
|
||||
"yoke",
|
||||
"yoke 0.8.2",
|
||||
"zerofrom",
|
||||
"zerovec",
|
||||
]
|
||||
@@ -4134,7 +4328,7 @@ dependencies = [
|
||||
"displaydoc",
|
||||
"icu_locale_core",
|
||||
"writeable",
|
||||
"yoke",
|
||||
"yoke 0.8.2",
|
||||
"zerofrom",
|
||||
"zerotrie",
|
||||
"zerovec",
|
||||
@@ -4530,7 +4724,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-app"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.22.1",
|
||||
@@ -4577,7 +4771,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-chunk"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4595,7 +4789,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-cli"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
@@ -4616,7 +4810,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-config"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dirs 5.0.1",
|
||||
@@ -4632,7 +4826,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-core"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4646,7 +4840,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-embed"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4660,7 +4854,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-embed-candle"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"candle-core",
|
||||
@@ -4679,7 +4873,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-embed-local"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"fastembed",
|
||||
@@ -4692,7 +4886,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-eval"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-app",
|
||||
@@ -4711,7 +4905,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-llm"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-core",
|
||||
@@ -4720,7 +4914,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-llm-local"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-config",
|
||||
@@ -4737,7 +4931,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-mcp"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-app",
|
||||
@@ -4755,7 +4949,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-nli"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"hf-hub",
|
||||
@@ -4770,7 +4964,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-code"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"gix",
|
||||
@@ -4793,7 +4987,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-image"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"ab_glyph",
|
||||
"anyhow",
|
||||
@@ -4817,7 +5011,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-md"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-core",
|
||||
@@ -4834,7 +5028,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-pdf"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4849,7 +5043,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-rag"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4871,7 +5065,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-search"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"globset",
|
||||
@@ -4890,7 +5084,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-source-fs"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4908,7 +5102,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-store-sqlite"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
@@ -4928,7 +5122,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-store-vector"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"arrow",
|
||||
@@ -4952,7 +5146,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "kebab-tui"
|
||||
version = "0.22.0"
|
||||
version = "0.24.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"crossterm",
|
||||
@@ -5626,6 +5820,16 @@ dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libloading"
|
||||
version = "0.8.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-link",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
version = "0.2.16"
|
||||
@@ -5942,6 +6146,15 @@ version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30"
|
||||
|
||||
[[package]]
|
||||
name = "malloc_buf"
|
||||
version = "0.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maplit"
|
||||
version = "1.0.2"
|
||||
@@ -6038,6 +6251,21 @@ dependencies = [
|
||||
"stable_deref_trait",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "metal"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"block",
|
||||
"core-graphics-types",
|
||||
"foreign-types 0.5.0",
|
||||
"log",
|
||||
"objc",
|
||||
"paste",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mime"
|
||||
version = "0.3.17"
|
||||
@@ -6401,6 +6629,15 @@ version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
|
||||
|
||||
[[package]]
|
||||
name = "objc"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1"
|
||||
dependencies = [
|
||||
"malloc_buf",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2"
|
||||
version = "0.6.4"
|
||||
@@ -6410,12 +6647,50 @@ dependencies = [
|
||||
"objc2-encode",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-core-foundation"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"dispatch2",
|
||||
"objc2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-encode"
|
||||
version = "4.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ef25abbcd74fb2609453eb695bd2f860d389e457f67dc17cafc8b8cbc89d0c33"
|
||||
|
||||
[[package]]
|
||||
name = "objc2-foundation"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"block2",
|
||||
"libc",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "objc2-metal"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a0125f776a10d00af4152d74616409f0d4a2053a6f57fa5b7d6aa2854ac04794"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"block2",
|
||||
"dispatch2",
|
||||
"objc2",
|
||||
"objc2-core-foundation",
|
||||
"objc2-foundation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "object"
|
||||
version = "0.37.3"
|
||||
@@ -6497,7 +6772,7 @@ checksum = "f38c4372413cdaaf3cc79dd92d29d7d9f5ab09b51b10dded508fb90bb70b9222"
|
||||
dependencies = [
|
||||
"bitflags 2.11.1",
|
||||
"cfg-if",
|
||||
"foreign-types",
|
||||
"foreign-types 0.3.2",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"openssl-macros",
|
||||
@@ -7006,6 +7281,20 @@ dependencies = [
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.21.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96b86df24f0a7ddd5e4b95c94fc9ed8a98f1ca94d3b01bdce2824097e7835907"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"cfg-if",
|
||||
"libm",
|
||||
"num-complex",
|
||||
"reborrow",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulp"
|
||||
version = "0.22.2"
|
||||
@@ -7932,6 +8221,16 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd29631678d6fb0903b69223673e122c32e9ae559d0960a38d574695ebc0ea15"
|
||||
|
||||
[[package]]
|
||||
name = "safetensors"
|
||||
version = "0.4.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44560c11236a6130a46ce36c836a62936dc81ebf8c36a37947423571be0e55b6"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "safetensors"
|
||||
version = "0.7.0"
|
||||
@@ -9470,6 +9769,41 @@ version = "1.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de"
|
||||
|
||||
[[package]]
|
||||
name = "ug"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "76b761acf8af3494640d826a8609e2265e19778fb43306c7f15379c78c9b05b0"
|
||||
dependencies = [
|
||||
"gemm 0.18.2",
|
||||
"half",
|
||||
"libloading",
|
||||
"memmap2",
|
||||
"num",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"rayon",
|
||||
"safetensors 0.4.5",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
"tracing",
|
||||
"yoke 0.7.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ug-metal"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f7adf545a99a086d362efc739e7cf4317c18cbeda22706000fd434d70ea3d95"
|
||||
dependencies = [
|
||||
"half",
|
||||
"metal",
|
||||
"objc",
|
||||
"serde",
|
||||
"thiserror 1.0.69",
|
||||
"ug",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unarray"
|
||||
version = "0.1.4"
|
||||
@@ -10416,6 +10750,18 @@ version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448"
|
||||
|
||||
[[package]]
|
||||
name = "yoke"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"stable_deref_trait",
|
||||
"yoke-derive 0.7.5",
|
||||
"zerofrom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke"
|
||||
version = "0.8.2"
|
||||
@@ -10423,10 +10769,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
|
||||
dependencies = [
|
||||
"stable_deref_trait",
|
||||
"yoke-derive",
|
||||
"yoke-derive 0.8.2",
|
||||
"zerofrom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke-derive"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
"synstructure",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "yoke-derive"
|
||||
version = "0.8.2"
|
||||
@@ -10493,7 +10851,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
|
||||
dependencies = [
|
||||
"displaydoc",
|
||||
"yoke",
|
||||
"yoke 0.8.2",
|
||||
"zerofrom",
|
||||
]
|
||||
|
||||
@@ -10503,7 +10861,7 @@ version = "0.11.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
|
||||
dependencies = [
|
||||
"yoke",
|
||||
"yoke 0.8.2",
|
||||
"zerofrom",
|
||||
"zerovec-derive",
|
||||
]
|
||||
|
||||
@@ -31,7 +31,7 @@ edition = "2024"
|
||||
rust-version = "1.85"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/altair823/kebab"
|
||||
version = "0.22.0" # v0.22.0 — candle 임베딩 provider (NUMA-안전, opt-in `provider=candle` + `num_threads`/KEBAB_EMBED_THREADS). fastembed default 불변, embedding_version 유지(재색인 0). — CLAUDE.md §Release 도그푸딩 트리거
|
||||
version = "0.24.0" # v0.24.0 — 상세 ingest 진행 로깅: 신규 wire 이벤트 asset_chunked / expansion_progress / asset_timings (ingest_progress.v1 additive), CLI 진행바 sub-message + phase timing 한 줄. asset 내부 parse/chunk/expansion/embed/store 가시화. wire v1 backward-compat. — CLAUDE.md §Release
|
||||
|
||||
# pre-v0.18 workspace-wide cleanup: enable clippy::pedantic group with
|
||||
# intentional allow-list. The allowed lints are either cosmetic (doc style),
|
||||
|
||||
40
README.md
40
README.md
@@ -51,7 +51,24 @@ embedding 벡터와 별칭 LLM 결과를 청크 **내용 해시** 로 캐싱한
|
||||
|
||||
### 외부 계산 + 로컬 검색 워크플로
|
||||
|
||||
search/ask 는 asset 파일 없이 `kebab.sqlite` + `lancedb` 만으로 동작한다. 비싼 색인(임베딩·OCR·별칭 생성)을 성능 좋은 서버에서 수행한 뒤, 이 두 산출물만 로컬로 복사하면 그대로 검색·질문할 수 있다.
|
||||
search/ask 는 원본 파일 없이 KB 산출물만으로 동작한다 (청크 본문이 SQLite 에 저장되고 문서 경로는 상대경로로 기록됨). 비싼 색인(임베딩·OCR·별칭 생성)을 성능 좋은 머신에서 수행한 뒤(예: Apple Silicon 맥에서 candle Metal GPU), **두 산출물만** 다른 머신(예: NUMA 서버)으로 복사하면 그대로 검색·질문할 수 있다.
|
||||
|
||||
**무엇을 복사하나 — `[storage]` 에서 정의된 두 경로:**
|
||||
|
||||
| 복사 대상 | config 키 (`[storage]`) | 기본 경로 | 내용 |
|
||||
|-----------|------------------------|-----------|------|
|
||||
| `kebab.sqlite` | `sqlite = "{data_dir}/kebab.sqlite"` | `{data_dir}/kebab.sqlite` | 문서·청크·본문·FTS5·메타 |
|
||||
| `lancedb/` | `vector_dir = "{data_dir}/lancedb"` | `{data_dir}/lancedb/` | 임베딩 벡터 |
|
||||
|
||||
`{data_dir}` 는 `[storage].data_dir` (예: `~/.local/share/kebab`). `models/`(`model_dir`)·`assets/`(`asset_dir`)는 **복사 불필요** — 모델은 각 머신이 자기 캐시를 받고, asset 원본 바이트는 검색·질문에 쓰이지 않는다 (단일파일/`stdin` 색인의 원본 재읽기·재색인까지 보존하려면 `assets/` 도 함께 복사).
|
||||
|
||||
```bash
|
||||
# ingest 가 끝난(쓰기 없는) 상태에서 복사
|
||||
rsync -a <src-data_dir>/kebab.sqlite user@server:<dst-data_dir>/
|
||||
rsync -a <src-data_dir>/lancedb/ user@server:<dst-data_dir>/lancedb/
|
||||
```
|
||||
|
||||
조건: **양쪽 동일 `kebab` 버전 + 동일 임베딩 모델/차원** (`[models.embedding].model`·`dimensions`). provider 는 달라도 됨 (예: 맥 `candle`/Metal ↔ 서버 `candle`/CPU 또는 `fastembed` — 같은 모델이면 벡터 호환). 복사는 반드시 ingest 가 돌지 않을 때.
|
||||
|
||||
### 멀티미디어 색인
|
||||
|
||||
@@ -70,7 +87,7 @@ Markdown · PDF · 이미지(OCR + caption) · 소스코드(Rust/Python/TS/JS/Go
|
||||
| 명령 | 동작 |
|
||||
|------|------|
|
||||
| `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
|
||||
| `kebab ingest [<path>]` | 워크스페이스 스캔 후 새/변경 문서 색인 (idempotent · incremental, `--force-reingest` 로 강제 재처리). 미지원 확장자는 자동 skip |
|
||||
| `kebab ingest [<path>]` | 워크스페이스 스캔 후 새/변경 문서 색인 (idempotent · incremental, `--force-reingest` 로 강제 재처리). 미지원 확장자는 자동 skip. 진행바는 문서별 청크 수 · 별칭 확장 라이브 카운터 · 문서 종료 시 phase별 소요시간(parse/chunk/expand/embed/store)을 표시 (`--json` 은 `asset_chunked`/`expansion_progress`/`asset_timings` 이벤트로) |
|
||||
| `kebab ingest-file <path>` | 단일 파일 ingest (workspace 외부 가능 — `_external/` 로 deterministic copy) |
|
||||
| `kebab ingest-stdin --title <T>` | stdin 의 markdown 본문 ingest |
|
||||
| `kebab search --mode {lexical,vector,hybrid} "<query>" [flags]` | 검색 (default hybrid = RRF fusion, citation 포함). 필터/budget flag 는 `--help` |
|
||||
@@ -108,6 +125,25 @@ dimensions = 1024 # config 와 LanceDB stored dim 불일치 시
|
||||
num_threads = 0 # candle 전용 CPU 스레드 캡 (0=auto=#cores).
|
||||
# env KEBAB_EMBED_THREADS 가 우선. NUMA 노드 바인딩은
|
||||
# numactl 과 조합. fastembed provider 는 무시.
|
||||
```
|
||||
|
||||
**Apple Silicon GPU 가속 (candle / macOS)**: M-시리즈 맥에서 candle 임베딩을
|
||||
GPU(Metal)로 돌리면 CPU 대비 대용량 ingest 가 크게 빨라진다. 빌드 또는 설치 시
|
||||
`embed_metal` feature 를 켠다:
|
||||
|
||||
```bash
|
||||
# 빌드만:
|
||||
cargo build --release --features embed_metal
|
||||
# 전역 설치 (~/.cargo/bin/kebab):
|
||||
cargo install --path crates/kebab-cli --features embed_metal --locked
|
||||
```
|
||||
|
||||
벡터는 CPU candle 과 동일 모델이라 호환되므로, 맥에서 GPU 로 색인한
|
||||
`kebab.sqlite` + `lancedb/` 를 그대로 Linux 서버(CPU candle)로 복사해 질의할 수
|
||||
있다. 색인 로그에 `candle device = Metal (GPU)` 가 보이면 GPU 사용 중. metal
|
||||
feature 는 macOS 전용 (Linux/서버는 기본 CPU 빌드).
|
||||
|
||||
```toml
|
||||
|
||||
[models.llm]
|
||||
endpoint = "http://localhost:11434" # Ollama host:port
|
||||
|
||||
@@ -100,6 +100,8 @@ reqwest = { version = "0.12", default-features = false, features = ["blocki
|
||||
# disable path 없음; 이 feature 는 spec §6.3 명시를 honor 하는 role 만.
|
||||
default = ["fts_korean_morphological"]
|
||||
fts_korean_morphological = []
|
||||
# opt-in (macOS): candle embedder runs on the Apple Silicon GPU. See kebab-embed-candle.
|
||||
embed_metal = ["kebab-embed-candle/metal"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
@@ -47,11 +47,21 @@ pub struct AggregateCounts {
|
||||
///
|
||||
/// ```text
|
||||
/// ScanStarted < ScanCompleted
|
||||
/// < (AssetStarted [< (PdfOcrStarted < PdfOcrFinished)*] < AssetFinished)*
|
||||
/// < ( AssetStarted
|
||||
/// [< (PdfOcrStarted < PdfOcrFinished)*]
|
||||
/// [< AssetChunked]
|
||||
/// [< ExpansionProgress*]
|
||||
/// [< AssetTimings]
|
||||
/// < AssetFinished )*
|
||||
/// < (Completed | Aborted)
|
||||
/// ```
|
||||
///
|
||||
/// `[]` = optional, per-PDF asset only (v0.20.0 sub-item 1).
|
||||
/// `[]` = optional. `PdfOcr*` is per-PDF asset only (v0.20.0 sub-item 1).
|
||||
/// `AssetChunked` / `ExpansionProgress` / `AssetTimings` are the v0.24.0
|
||||
/// asset-internal phase events: `AssetChunked` fires once right after
|
||||
/// chunking (markdown / image / PDF); `ExpansionProgress` is a throttled
|
||||
/// counter through the alias-expansion loop (markdown, expansion enabled
|
||||
/// only); `AssetTimings` reports per-phase wall-clock once (markdown only).
|
||||
///
|
||||
/// Embed-batch events (`embed_batch_started` / `embed_batch_finished`
|
||||
/// in §2.4a) are reserved for a future iteration and are not emitted
|
||||
@@ -82,6 +92,41 @@ pub enum IngestEvent {
|
||||
result: IngestItemKind,
|
||||
chunks: u32,
|
||||
},
|
||||
/// v0.24.0 (additive): emitted right after an asset is chunked, before
|
||||
/// expansion / embed / store. Surfaces "this document is N chunks"
|
||||
/// immediately so a single large document no longer looks frozen at
|
||||
/// `idx/total` while its per-chunk phases churn. `chunks` is the chunk
|
||||
/// count for asset `idx`.
|
||||
AssetChunked { idx: u32, total: u32, chunks: u32 },
|
||||
/// v0.24.0 (additive): throttled progress through the per-chunk
|
||||
/// expansion (alias-LLM) loop — the slowest inner phase for large
|
||||
/// documents (~1–4s per chunk against a remote GPU Ollama). `done` is
|
||||
/// the number of chunks processed so far (cache hits included, so the
|
||||
/// counter still advances on a warm re-run); `chunks` is the asset's
|
||||
/// total chunk count. Emitted at most every 25 chunks or once per
|
||||
/// second (see the loop in `ingest_one_asset`), plus a final
|
||||
/// `done == chunks` frame.
|
||||
ExpansionProgress {
|
||||
idx: u32,
|
||||
total: u32,
|
||||
done: u32,
|
||||
chunks: u32,
|
||||
},
|
||||
/// v0.24.0 (additive): per-phase wall-clock (milliseconds) for asset
|
||||
/// `idx`, emitted once the asset's markdown pipeline finishes. Lets a
|
||||
/// user see *where* the time went (parse / chunk / expansion / embed /
|
||||
/// store) without parsing logs. Only the markdown path emits this; the
|
||||
/// image / PDF paths surface `AssetChunked` but skip phase timing (their
|
||||
/// phase shapes differ — OCR / caption rather than expansion).
|
||||
AssetTimings {
|
||||
idx: u32,
|
||||
total: u32,
|
||||
parse_ms: u64,
|
||||
chunk_ms: u64,
|
||||
expansion_ms: u64,
|
||||
embed_ms: u64,
|
||||
store_ms: u64,
|
||||
},
|
||||
/// Run finished normally. `counts` is the final aggregate.
|
||||
Completed { counts: AggregateCounts },
|
||||
/// Run finished by user cancellation. `counts` is the partial
|
||||
@@ -199,6 +244,79 @@ mod tests {
|
||||
assert_eq!(v.get("media").and_then(|s| s.as_str()), Some("markdown"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn asset_chunked_serializes_with_discriminator() {
|
||||
// v0.24.0 additive variant — `kind` must be snake_case
|
||||
// `asset_chunked` so wire v1 consumers branch on it cleanly.
|
||||
let ev = IngestEvent::AssetChunked {
|
||||
idx: 3,
|
||||
total: 10,
|
||||
chunks: 142,
|
||||
};
|
||||
let v = serde_json::to_value(&ev).unwrap();
|
||||
assert_eq!(
|
||||
v.get("kind").and_then(|s| s.as_str()),
|
||||
Some("asset_chunked")
|
||||
);
|
||||
assert_eq!(v.get("idx").and_then(serde_json::Value::as_u64), Some(3));
|
||||
assert_eq!(
|
||||
v.get("chunks").and_then(serde_json::Value::as_u64),
|
||||
Some(142)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expansion_progress_serializes_with_discriminator() {
|
||||
let ev = IngestEvent::ExpansionProgress {
|
||||
idx: 1,
|
||||
total: 5,
|
||||
done: 25,
|
||||
chunks: 200,
|
||||
};
|
||||
let v = serde_json::to_value(&ev).unwrap();
|
||||
assert_eq!(
|
||||
v.get("kind").and_then(|s| s.as_str()),
|
||||
Some("expansion_progress")
|
||||
);
|
||||
assert_eq!(v.get("done").and_then(serde_json::Value::as_u64), Some(25));
|
||||
assert_eq!(
|
||||
v.get("chunks").and_then(serde_json::Value::as_u64),
|
||||
Some(200)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn asset_timings_serializes_all_phase_fields() {
|
||||
let ev = IngestEvent::AssetTimings {
|
||||
idx: 2,
|
||||
total: 7,
|
||||
parse_ms: 12,
|
||||
chunk_ms: 3,
|
||||
expansion_ms: 45_000,
|
||||
embed_ms: 800,
|
||||
store_ms: 20,
|
||||
};
|
||||
let v = serde_json::to_value(&ev).unwrap();
|
||||
assert_eq!(
|
||||
v.get("kind").and_then(|s| s.as_str()),
|
||||
Some("asset_timings")
|
||||
);
|
||||
// All five phase fields are present (plain u64, always serialized).
|
||||
for (field, want) in [
|
||||
("parse_ms", 12u64),
|
||||
("chunk_ms", 3),
|
||||
("expansion_ms", 45_000),
|
||||
("embed_ms", 800),
|
||||
("store_ms", 20),
|
||||
] {
|
||||
assert_eq!(
|
||||
v.get(field).and_then(serde_json::Value::as_u64),
|
||||
Some(want),
|
||||
"field {field}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_event_completed_has_counts() {
|
||||
let ev = IngestEvent::Completed {
|
||||
|
||||
@@ -480,6 +480,8 @@ pub fn ingest_with_config_opts(
|
||||
let item = ingest_one_asset(
|
||||
&app,
|
||||
&asset,
|
||||
idx,
|
||||
scanned_count,
|
||||
&parser_version,
|
||||
&chunk_policy,
|
||||
embedder.as_ref(),
|
||||
@@ -1100,6 +1102,8 @@ fn embed_with_cache(
|
||||
fn ingest_one_asset(
|
||||
app: &App,
|
||||
asset: &RawAsset,
|
||||
idx: u32,
|
||||
total: u32,
|
||||
parser_version: &ParserVersion,
|
||||
chunk_policy: &ChunkPolicy,
|
||||
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
|
||||
@@ -1132,18 +1136,23 @@ fn ingest_one_asset(
|
||||
return ingest_one_image_asset(
|
||||
app,
|
||||
asset,
|
||||
idx,
|
||||
total,
|
||||
chunk_policy,
|
||||
embedder,
|
||||
vector_store,
|
||||
existing_doc_ids,
|
||||
image_pipeline,
|
||||
force_reingest,
|
||||
progress,
|
||||
);
|
||||
}
|
||||
MediaType::Pdf => {
|
||||
return ingest_one_pdf_asset(
|
||||
app,
|
||||
asset,
|
||||
idx,
|
||||
total,
|
||||
chunk_policy,
|
||||
embedder,
|
||||
vector_store,
|
||||
@@ -1252,6 +1261,10 @@ fn ingest_one_asset(
|
||||
return Ok(item);
|
||||
}
|
||||
|
||||
// v0.24.0 phase timing: parse spans from here (byte read) through
|
||||
// `build_canonical_document`, i.e. everything before the chunker runs.
|
||||
let t_parse = std::time::Instant::now();
|
||||
|
||||
let bytes = std::fs::read(&path)
|
||||
.with_context(|| format!("read asset bytes from {}", path.display()))?;
|
||||
|
||||
@@ -1286,9 +1299,26 @@ fn ingest_one_asset(
|
||||
build_canonical_document(asset, metadata, parsed_blocks, parser_version, all_warnings)
|
||||
.context("kb-parse-md::build_canonical_document")?;
|
||||
|
||||
let parse_ms = u64::try_from(t_parse.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
let t_chunk = std::time::Instant::now();
|
||||
let mut chunks = MdHeadingV1Chunker
|
||||
.chunk(&canonical, chunk_policy)
|
||||
.context("kb-chunk::MdHeadingV1Chunker::chunk")?;
|
||||
let chunk_ms = u64::try_from(t_chunk.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// v0.24.0: surface the chunk count immediately, before the (potentially
|
||||
// very slow) expansion / embed phases — so a single large document no
|
||||
// longer looks frozen at `idx/total` while its chunks churn.
|
||||
let total_chunks = u32::try_from(chunks.len()).unwrap_or(u32::MAX);
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::AssetChunked {
|
||||
idx,
|
||||
total,
|
||||
chunks: total_chunks,
|
||||
},
|
||||
);
|
||||
|
||||
// Phase 2 doc-side expansion: flag on 이면 청크당 별칭 생성 (fail-soft).
|
||||
// derivation cache(§3.4): 같은 청크 text + 같은 alias version_key 면 LLM
|
||||
@@ -1296,6 +1326,7 @@ fn ingest_one_asset(
|
||||
let mut alias_cache_hit = 0_usize;
|
||||
let mut alias_cache_miss = 0_usize;
|
||||
let mut alias_touch_keys: Vec<String> = Vec::new();
|
||||
let t_expansion = std::time::Instant::now();
|
||||
if app.config.ingest.expansion.enabled {
|
||||
let exp = &app.config.ingest.expansion;
|
||||
let alias_version_key = format!(
|
||||
@@ -1313,6 +1344,12 @@ fn ingest_one_asset(
|
||||
Ok(llm) => {
|
||||
let generator =
|
||||
crate::expansion::ExpansionGenerator::new(&llm, exp.max_aliases_per_chunk);
|
||||
// v0.24.0: throttled live counter through the per-chunk
|
||||
// expansion loop. Emit at most every 25 chunks or once per
|
||||
// second — never per chunk (would flood the mpsc channel).
|
||||
let mut done: u32 = 0;
|
||||
let mut last_emit = std::time::Instant::now();
|
||||
let mut last_done: u32 = 0;
|
||||
for chunk in &mut chunks {
|
||||
let key = kebab_core::derivation_cache_key(
|
||||
"alias",
|
||||
@@ -1345,6 +1382,40 @@ fn ingest_one_asset(
|
||||
.derivation_cache_put(&key, "alias", a.as_bytes())?;
|
||||
}
|
||||
}
|
||||
// Cache hits count toward `done` too (the brief: show the
|
||||
// warm-run fast-forward). Throttle: every 25 chunks or
|
||||
// ≥1s since the last emit.
|
||||
done += 1;
|
||||
if done % 25 == 0
|
||||
|| last_emit.elapsed() >= std::time::Duration::from_secs(1)
|
||||
{
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::ExpansionProgress {
|
||||
idx,
|
||||
total,
|
||||
done,
|
||||
chunks: total_chunks,
|
||||
},
|
||||
);
|
||||
last_emit = std::time::Instant::now();
|
||||
last_done = done;
|
||||
}
|
||||
}
|
||||
// Final frame so the counter lands on done == total — but only
|
||||
// if the last in-loop emit didn't already report this `done`
|
||||
// (avoids a duplicate frame when chunks is a multiple of the
|
||||
// throttle, and skips a 0/0 frame when there are no chunks).
|
||||
if done != last_done {
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::ExpansionProgress {
|
||||
idx,
|
||||
total,
|
||||
done,
|
||||
chunks: total_chunks,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -1355,6 +1426,7 @@ fn ingest_one_asset(
|
||||
}
|
||||
}
|
||||
}
|
||||
let expansion_ms = u64::try_from(t_expansion.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// Stamp chunker + embedding versions so Task 7's skip detection has
|
||||
// data on the second run.
|
||||
@@ -1367,7 +1439,7 @@ fn ingest_one_asset(
|
||||
// (per-document tx semantics per design §5.8); composing them is
|
||||
// the kb-app job. A failure mid-way leaves the DB in a state the
|
||||
// next ingest run can re-converge (UPSERT + DELETE-then-INSERT).
|
||||
purge_vector_orphans_for_workspace_path(app, asset, vector_store)?;
|
||||
let t_store = std::time::Instant::now();
|
||||
app.sqlite
|
||||
.put_asset_with_bytes(asset, &bytes)
|
||||
.context("DocumentStore::put_asset_with_bytes")?;
|
||||
@@ -1380,8 +1452,16 @@ fn ingest_one_asset(
|
||||
app.sqlite
|
||||
.put_chunks(&canonical.doc_id, &chunks)
|
||||
.context("DocumentStore::put_chunks")?;
|
||||
let store_ms = u64::try_from(t_store.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// Embed + vector upsert (only when both sides are configured).
|
||||
let t_embed = std::time::Instant::now();
|
||||
// Stale-vector purge is LanceDB I/O, so it belongs to the embed/vector
|
||||
// phase — not the SQLite `store` phase. Keeping it here makes `store_ms`
|
||||
// mean "SQLite persist only" and `embed_ms` cover all vector-store work
|
||||
// (purge + upsert), so per-phase timings attribute the bottleneck
|
||||
// correctly (review fix). Runs before any new upsert, as before.
|
||||
purge_vector_orphans_for_workspace_path(app, asset, vector_store)?;
|
||||
let mut emb_cache_hit = 0_usize;
|
||||
let mut emb_cache_miss = 0_usize;
|
||||
if let (Some(emb), Some(vec_store)) = (embedder, vector_store) {
|
||||
@@ -1511,6 +1591,22 @@ fn ingest_one_asset(
|
||||
}
|
||||
}
|
||||
|
||||
let embed_ms = u64::try_from(t_embed.elapsed().as_millis()).unwrap_or(u64::MAX);
|
||||
|
||||
// v0.24.0: phase-timing breakdown for this asset (markdown path only).
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::AssetTimings {
|
||||
idx,
|
||||
total,
|
||||
parse_ms,
|
||||
chunk_ms,
|
||||
expansion_ms,
|
||||
embed_ms,
|
||||
store_ms,
|
||||
},
|
||||
);
|
||||
|
||||
// 히트한 alias 키들의 last_used_at 갱신(LRU 보존, §3.5).
|
||||
app.sqlite.derivation_cache_touch(&alias_touch_keys)?;
|
||||
|
||||
@@ -1564,12 +1660,15 @@ fn ingest_one_asset(
|
||||
fn ingest_one_image_asset(
|
||||
app: &App,
|
||||
asset: &RawAsset,
|
||||
idx: u32,
|
||||
total: u32,
|
||||
chunk_policy: &ChunkPolicy,
|
||||
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
|
||||
vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
|
||||
existing_doc_ids: &std::collections::HashSet<String>,
|
||||
image_pipeline: &ImagePipeline<'_>,
|
||||
force_reingest: bool,
|
||||
progress: Option<&std::sync::mpsc::Sender<crate::ingest_progress::IngestEvent>>,
|
||||
) -> anyhow::Result<kebab_core::IngestItem> {
|
||||
let ocr_engine = image_pipeline.ocr_engine;
|
||||
let caption_llm = image_pipeline.caption_llm;
|
||||
@@ -1722,6 +1821,17 @@ fn ingest_one_image_asset(
|
||||
.chunk(&canonical, chunk_policy)
|
||||
.context("kb-chunk::MdHeadingV1Chunker::chunk (image)")?;
|
||||
|
||||
// v0.24.0: surface chunk count for the image path too (phase timing is
|
||||
// markdown-only, but AssetChunked is consistent across media).
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::AssetChunked {
|
||||
idx,
|
||||
total,
|
||||
chunks: u32::try_from(chunks.len()).unwrap_or(u32::MAX),
|
||||
},
|
||||
);
|
||||
|
||||
// 5. Persist + embed — identical sequence to markdown.
|
||||
// Stamp chunker + embedding versions (image uses MdHeadingV1Chunker
|
||||
// for its single-block doc, so we record that version).
|
||||
@@ -2127,6 +2237,8 @@ fn sweep_deleted_files(
|
||||
fn ingest_one_pdf_asset(
|
||||
app: &App,
|
||||
asset: &RawAsset,
|
||||
idx: u32,
|
||||
total: u32,
|
||||
chunk_policy: &ChunkPolicy,
|
||||
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
|
||||
vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
|
||||
@@ -2330,6 +2442,16 @@ fn ingest_one_pdf_asset(
|
||||
.chunk(&canonical, chunk_policy)
|
||||
.context("kb-chunk::PdfPageV1Chunker::chunk")?;
|
||||
|
||||
// v0.24.0: surface chunk count for the PDF path too.
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::AssetChunked {
|
||||
idx,
|
||||
total,
|
||||
chunks: u32::try_from(chunks.len()).unwrap_or(u32::MAX),
|
||||
},
|
||||
);
|
||||
|
||||
// Stamp chunker + embedding versions so Task 7's skip detection has
|
||||
// data on the second run.
|
||||
canonical.last_chunker_version = Some(chunker.chunker_version());
|
||||
|
||||
@@ -69,40 +69,74 @@ fn progress_event_sequence_matches_design_section_2_4a() {
|
||||
other => panic!("expected Completed last, got {other:?}"),
|
||||
}
|
||||
|
||||
// Middle: 3 AssetStarted/AssetFinished pairs in monotonic idx order.
|
||||
let asset_events: Vec<&IngestEvent> = events[2..events.len() - 1].iter().collect();
|
||||
assert_eq!(
|
||||
asset_events.len(),
|
||||
6,
|
||||
"expected 3 (Started + Finished) pairs, got {asset_events:?}"
|
||||
);
|
||||
for (chunk_idx, pair) in asset_events.chunks(2).enumerate() {
|
||||
let expected_idx = chunk_idx as u32 + 1;
|
||||
match (pair[0], pair[1]) {
|
||||
(
|
||||
IngestEvent::AssetStarted {
|
||||
idx: si,
|
||||
total: st,
|
||||
media,
|
||||
..
|
||||
},
|
||||
IngestEvent::AssetFinished {
|
||||
idx: fi,
|
||||
total: ft,
|
||||
result,
|
||||
chunks,
|
||||
},
|
||||
) => {
|
||||
assert_eq!(*si, expected_idx, "Started idx mismatch: {pair:?}");
|
||||
assert_eq!(*fi, expected_idx, "Finished idx mismatch: {pair:?}");
|
||||
assert_eq!(*st, 3, "Started total mismatch");
|
||||
assert_eq!(*ft, 3, "Finished total mismatch");
|
||||
assert_eq!(media, "markdown", "fixture is markdown only");
|
||||
assert_eq!(*result, IngestItemKind::New, "first ingest → New");
|
||||
assert!(*chunks >= 1, "chunks: {pair:?}");
|
||||
// Middle (v0.24.0 ordering invariant §2.4a): per asset the stream is
|
||||
// AssetStarted < AssetChunked < [ExpansionProgress*] < AssetTimings
|
||||
// < AssetFinished
|
||||
// Expansion is disabled in the lexical fixture, so no ExpansionProgress
|
||||
// frames appear here — but AssetChunked + AssetTimings are emitted for
|
||||
// every markdown asset.
|
||||
let middle = &events[2..events.len() - 1];
|
||||
|
||||
// 3 AssetStarted events, monotonic idx 1..=3, all markdown, total = 3.
|
||||
let started: Vec<u32> = middle
|
||||
.iter()
|
||||
.filter_map(|e| match e {
|
||||
IngestEvent::AssetStarted {
|
||||
idx, total, media, ..
|
||||
} => {
|
||||
assert_eq!(*total, 3, "Started total mismatch: {e:?}");
|
||||
assert_eq!(media, "markdown", "fixture is markdown only: {e:?}");
|
||||
Some(*idx)
|
||||
}
|
||||
other => panic!("expected Started+Finished pair, got {other:?}"),
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(started, vec![1, 2, 3], "AssetStarted idx order: {middle:?}");
|
||||
|
||||
// 3 AssetFinished events, monotonic idx 1..=3, each New with ≥1 chunk.
|
||||
let finished: Vec<u32> = middle
|
||||
.iter()
|
||||
.filter_map(|e| match e {
|
||||
IngestEvent::AssetFinished {
|
||||
idx,
|
||||
total,
|
||||
result,
|
||||
chunks,
|
||||
} => {
|
||||
assert_eq!(*total, 3, "Finished total mismatch: {e:?}");
|
||||
assert_eq!(*result, IngestItemKind::New, "first ingest → New: {e:?}");
|
||||
assert!(*chunks >= 1, "chunks: {e:?}");
|
||||
Some(*idx)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(finished, vec![1, 2, 3], "AssetFinished idx order: {middle:?}");
|
||||
|
||||
// v0.24.0 additive events: exactly one AssetChunked + one AssetTimings
|
||||
// per asset, each strictly bracketed by that asset's Started / Finished.
|
||||
for target in 1u32..=3 {
|
||||
let started_at = middle
|
||||
.iter()
|
||||
.position(|e| matches!(e, IngestEvent::AssetStarted { idx, .. } if *idx == target))
|
||||
.unwrap_or_else(|| panic!("missing AssetStarted for idx {target}: {middle:?}"));
|
||||
let finished_at = middle
|
||||
.iter()
|
||||
.position(|e| matches!(e, IngestEvent::AssetFinished { idx, .. } if *idx == target))
|
||||
.unwrap_or_else(|| panic!("missing AssetFinished for idx {target}: {middle:?}"));
|
||||
let chunked_at = middle
|
||||
.iter()
|
||||
.position(|e| matches!(e, IngestEvent::AssetChunked { idx, chunks, .. } if *idx == target && *chunks >= 1))
|
||||
.unwrap_or_else(|| panic!("missing AssetChunked for idx {target}: {middle:?}"));
|
||||
let timings_at = middle
|
||||
.iter()
|
||||
.position(|e| matches!(e, IngestEvent::AssetTimings { idx, .. } if *idx == target))
|
||||
.unwrap_or_else(|| panic!("missing AssetTimings for idx {target}: {middle:?}"));
|
||||
assert!(
|
||||
started_at < chunked_at && chunked_at < timings_at && timings_at < finished_at,
|
||||
"idx {target} ordering: started={started_at} chunked={chunked_at} \
|
||||
timings={timings_at} finished={finished_at}: {middle:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -51,5 +51,10 @@ tempfile = { workspace = true }
|
||||
rusqlite = { workspace = true }
|
||||
time = { workspace = true }
|
||||
|
||||
[features]
|
||||
# opt-in (macOS): build the `kebab` binary with candle on the Apple Silicon GPU.
|
||||
# cargo build --release --features embed_metal
|
||||
embed_metal = ["kebab-app/embed_metal"]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
@@ -632,6 +632,24 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
.map(|v| v.eq_ignore_ascii_case("plain"))
|
||||
.unwrap_or(false);
|
||||
let mode = progress::ProgressMode::from_flags(cli.json, cli.quiet, plain_env);
|
||||
|
||||
// Surface the active embedding backend/device on the terminal so the
|
||||
// user sees it without grepping kb.log (the per-device tracing line
|
||||
// only lands in the log file at --verbose). Suppressed under
|
||||
// --json/--quiet. The Metal note reflects the build (`embed_metal`);
|
||||
// the confirmed runtime device is in kb.log (`candle device = ...`).
|
||||
if !cli.json && !cli.quiet {
|
||||
let backend = match cfg.models.embedding.provider.as_str() {
|
||||
"candle" if cfg!(feature = "embed_metal") => "candle (Metal/GPU 빌드)",
|
||||
"candle" => "candle (CPU, 순수 Rust)",
|
||||
"fastembed" | "onnx" | "" => "fastembed (onnxruntime)",
|
||||
"none" => "비활성 (lexical-only)",
|
||||
other => other,
|
||||
};
|
||||
eprintln!("임베딩 백엔드: {backend} · 모델 {} ({}-dim)",
|
||||
cfg.models.embedding.model, cfg.models.embedding.dimensions);
|
||||
}
|
||||
|
||||
let (tx, rx) = std::sync::mpsc::channel::<kebab_app::IngestEvent>();
|
||||
let display_handle =
|
||||
std::thread::spawn(move || progress::ProgressDisplay::new(mode).run(rx));
|
||||
|
||||
@@ -157,6 +157,54 @@ impl ProgressDisplay {
|
||||
// in Completed handles the final state. No per-asset bar update
|
||||
// here avoids the duplicate-frame artifact in TTY scrollback.
|
||||
}
|
||||
// v0.24.0: asset-internal phase visibility. AssetChunked /
|
||||
// ExpansionProgress use the bar *message* (live sub-progress for
|
||||
// the current asset) — distinct from the per-file position draw,
|
||||
// so a single large document no longer looks frozen. AssetTimings
|
||||
// prints a one-line breakdown when the asset finishes.
|
||||
IngestEvent::AssetChunked { idx, total, chunks } => {
|
||||
if let Some(bar) = self.bar.as_ref() {
|
||||
bar.set_message(format!("→ {chunks} chunks"));
|
||||
}
|
||||
if !tty && !quiet {
|
||||
let mut err = std::io::stderr().lock();
|
||||
let _ = writeln!(err, "ingest: {idx}/{total} → {chunks} chunks");
|
||||
}
|
||||
}
|
||||
IngestEvent::ExpansionProgress {
|
||||
done, chunks, ..
|
||||
} => {
|
||||
if let Some(bar) = self.bar.as_ref() {
|
||||
bar.set_message(format!("별칭 확장 {done}/{chunks}"));
|
||||
}
|
||||
// Non-TTY: suppressed by default — throttled though it is, one
|
||||
// line per emit would still spam CI logs. The bar message
|
||||
// covers the interactive case; --json carries every frame.
|
||||
}
|
||||
IngestEvent::AssetTimings {
|
||||
parse_ms,
|
||||
chunk_ms,
|
||||
expansion_ms,
|
||||
embed_ms,
|
||||
store_ms,
|
||||
..
|
||||
} => {
|
||||
if let Some(bar) = self.bar.as_ref() {
|
||||
bar.set_message("");
|
||||
}
|
||||
if !quiet {
|
||||
let mut err = std::io::stderr().lock();
|
||||
let _ = writeln!(
|
||||
err,
|
||||
" ⏱ parse {} · chunk {} · expand {} · embed {} · store {}",
|
||||
fmt_ms(*parse_ms),
|
||||
fmt_ms(*chunk_ms),
|
||||
fmt_ms(*expansion_ms),
|
||||
fmt_ms(*embed_ms),
|
||||
fmt_ms(*store_ms),
|
||||
);
|
||||
}
|
||||
}
|
||||
IngestEvent::Completed { counts } => {
|
||||
if let Some(bar) = self.bar.take() {
|
||||
bar.finish_and_clear();
|
||||
@@ -239,6 +287,17 @@ fn emit_json(event: &IngestEvent) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Render a phase duration (milliseconds) compactly for the human-mode
|
||||
/// `AssetTimings` line: `< 1000ms` stays in `ms`, larger spans collapse to
|
||||
/// one-decimal seconds so a 45-second expansion reads `45.0s`, not `45000ms`.
|
||||
fn fmt_ms(ms: u64) -> String {
|
||||
if ms >= 1000 {
|
||||
format!("{:.1}s", ms as f64 / 1000.0)
|
||||
} else {
|
||||
format!("{ms}ms")
|
||||
}
|
||||
}
|
||||
|
||||
/// Format the current wall-clock as RFC 3339 — used by `wire_ingest_progress`
|
||||
/// so every emitted event carries an `ts` field per §2.4a / the wire schema.
|
||||
pub(crate) fn now_rfc3339() -> anyhow::Result<String> {
|
||||
@@ -285,6 +344,15 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fmt_ms_switches_unit_at_one_second() {
|
||||
assert_eq!(fmt_ms(0), "0ms");
|
||||
assert_eq!(fmt_ms(999), "999ms");
|
||||
assert_eq!(fmt_ms(1000), "1.0s");
|
||||
assert_eq!(fmt_ms(45_000), "45.0s");
|
||||
assert_eq!(fmt_ms(1500), "1.5s");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn now_rfc3339_parses_back() {
|
||||
let s = now_rfc3339().unwrap();
|
||||
|
||||
@@ -25,6 +25,14 @@ rayon = "1"
|
||||
anyhow = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
||||
[features]
|
||||
# opt-in: run candle on the Apple Silicon GPU (Metal). macOS-only — the build
|
||||
# enables candle's metal backend and `select_device()` picks Metal (CPU fallback
|
||||
# on failure). Lets an M-series Mac ingest e5-large on GPU (10×+ vs CPU); the
|
||||
# resulting vectors are cross-compatible with the CPU path (same model), so the
|
||||
# Linux server can serve queries on CPU candle.
|
||||
metal = ["candle-core/metal", "candle-nn/metal", "candle-transformers/metal"]
|
||||
|
||||
[dev-dependencies]
|
||||
# Integration-test binaries can only see the library's public API + these,
|
||||
# not the library's own (non-dev) dependencies — so rayon/kebab-config/kebab-core
|
||||
|
||||
@@ -128,7 +128,7 @@ impl CandleEmbedder {
|
||||
std::fs::create_dir_all(&cache_dir)
|
||||
.with_context(|| format!("create candle cache dir {}", cache_dir.display()))?;
|
||||
|
||||
let device = Device::Cpu;
|
||||
let device = select_device();
|
||||
|
||||
// 3. Fetch model files via hf-hub into the candle cache.
|
||||
tracing::info!(
|
||||
@@ -250,7 +250,9 @@ impl CandleEmbedder {
|
||||
let norm = mean.sqr()?.sum_keepdim(1)?.sqrt()?;
|
||||
let normalized = mean.broadcast_div(&norm)?;
|
||||
|
||||
Ok(normalized.to_vec2::<f32>()?)
|
||||
// `.contiguous()` before host copy: broadcast ops can leave a strided
|
||||
// view, which `to_vec2` rejects on the Metal backend (CPU tolerates it).
|
||||
Ok(normalized.contiguous()?.to_vec2::<f32>()?)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -307,6 +309,32 @@ fn prefix_input(input: &EmbeddingInput<'_>) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// Select the compute device. Built with the `metal` feature (Apple Silicon
|
||||
/// GPU), try Metal and fall back to CPU on failure; otherwise CPU. Metal only
|
||||
/// compiles/runs on macOS — the Linux server builds the CPU path. e5-large
|
||||
/// vectors are model-defined, so Metal-produced and CPU-produced embeddings are
|
||||
/// cross-compatible (a Mac can ingest on GPU, the server query on CPU).
|
||||
fn select_device() -> Device {
|
||||
#[cfg(feature = "metal")]
|
||||
{
|
||||
match Device::new_metal(0) {
|
||||
Ok(d) => {
|
||||
tracing::info!(target: "kebab-embed-candle", "candle device = Metal (GPU)");
|
||||
return d;
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
target: "kebab-embed-candle",
|
||||
error = %e,
|
||||
"Metal device unavailable; falling back to CPU"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
tracing::info!(target: "kebab-embed-candle", "candle device = CPU");
|
||||
Device::Cpu
|
||||
}
|
||||
|
||||
/// Apply a one-shot global rayon thread cap (the NUMA-safety lever). Returns
|
||||
/// `true` if this call set the pool, `false` if it was already initialized
|
||||
/// (cap not applied) or `n_threads == 0`. `#[doc(hidden)] pub` so the
|
||||
|
||||
@@ -154,7 +154,14 @@ fn apply_event(state: &mut IngestState, event: IngestEvent) {
|
||||
}
|
||||
// v0.20.0 sub-item 1: per-page PDF OCR events — TUI does not
|
||||
// surface per-page OCR progress in v1; no counter to update.
|
||||
IngestEvent::PdfOcrStarted { .. } | IngestEvent::PdfOcrFinished { .. } => {}
|
||||
IngestEvent::PdfOcrStarted { .. }
|
||||
| IngestEvent::PdfOcrFinished { .. }
|
||||
// v0.24.0 asset-internal phase events: the status-bar reducer tracks
|
||||
// per-asset counters, not sub-asset phase progress, so these are
|
||||
// no-ops here (the CLI / --json surfaces render them).
|
||||
| IngestEvent::AssetChunked { .. }
|
||||
| IngestEvent::ExpansionProgress { .. }
|
||||
| IngestEvent::AssetTimings { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,9 @@
|
||||
"scan_completed",
|
||||
"asset_started",
|
||||
"asset_finished",
|
||||
"asset_chunked",
|
||||
"expansion_progress",
|
||||
"asset_timings",
|
||||
"embed_batch_started",
|
||||
"embed_batch_finished",
|
||||
"pdf_ocr_started",
|
||||
@@ -33,7 +36,13 @@
|
||||
"enum": ["new", "updated", "skipped", "error"],
|
||||
"description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)."
|
||||
},
|
||||
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished: chunk count produced for this asset." },
|
||||
"chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked / expansion_progress (v0.24.0): chunk count produced for this asset." },
|
||||
"done": { "type": "integer", "minimum": 0, "description": "expansion_progress (v0.24.0, additive): chunks processed so far in the per-chunk alias-expansion loop (cache hits included). Throttled: emitted at most every 25 chunks or once per second, plus a final frame where done == chunks." },
|
||||
"parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." },
|
||||
"chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." },
|
||||
"expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): alias-expansion phase wall-clock (ms). Markdown path only; 0 when expansion is disabled." },
|
||||
"embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." },
|
||||
"store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." },
|
||||
"n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." },
|
||||
"ms": { "type": "integer", "minimum": 0, "description": "embed_batch_finished / pdf_ocr_finished: wall-clock duration (ms). pdf_ocr_finished skip path 의 의미는 mixed (DCTDecode 부재 시 0, engine 실패 시 latency-before-bail)." },
|
||||
"chars": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: char count of OCR result. Skip 시 0." },
|
||||
|
||||
@@ -14,6 +14,102 @@ historical contract that was implemented; this file accumulates the
|
||||
deltas so phase 5+ readers can find the live behavior without diffing
|
||||
git history.
|
||||
|
||||
## 2026-06-02 — 상세 ingest 진행 로깅 (asset 내부 phase 가시화, v0.24.0)
|
||||
|
||||
**무엇이 문제였나.** ingest 진행 이벤트가 asset(문서) 단위(`asset_started` /
|
||||
`asset_finished`)뿐이라 한 문서 내부의 parse / chunk / **expansion(별칭 LLM,
|
||||
청크당 순차 호출)** / embed / store 가 깜깜했다. expansion 은 청크당 ~1~4s
|
||||
(원격 GPU Ollama)이고 큰 문서는 청크 수백~천 개 → 그 한 문서에서 수십 분이
|
||||
걸리는데, 진행바는 `1/5150` 에 멈춘 듯 보여 사용자가 병목을 못 봤다.
|
||||
|
||||
**무엇을 추가했나 (wire `ingest_progress.v1` additive, 호환 유지).**
|
||||
`IngestEvent` 에 세 변이 추가 — `#[serde(tag="kind")]` 라 신규 `kind` 추가는
|
||||
wire v1 호환:
|
||||
|
||||
- `asset_chunked { idx, total, chunks }` — 청킹 직후(expansion/embed 전) 즉시
|
||||
"이 문서가 N청크" 노출. markdown / image / pdf 세 경로 모두 emit.
|
||||
- `expansion_progress { idx, total, done, chunks }` — expansion 루프 중
|
||||
**스로틀** 발신(매 25청크 또는 ≥1s, 종료 시 `done == chunks` 1프레임 더).
|
||||
캐시 히트 청크도 `done` 에 포함(warm 재색인 fast-forward 가시화). 채널 폭주
|
||||
방지 — 매 청크 emit 금지.
|
||||
- `asset_timings { idx, total, parse_ms, chunk_ms, expansion_ms, embed_ms,
|
||||
store_ms }` — asset 처리 phase 별 소요시간. **markdown 경로만** emit
|
||||
(image/pdf 는 phase shape 가 달라 생략; AssetChunked 만 emit).
|
||||
|
||||
**설계 결정 — AssetTimings 이벤트 vs AssetFinished 필드.** IMPL_BRIEF §1 은
|
||||
`AssetFinished` 에 optional phase-timing 필드를, §2 는 대안으로 신규
|
||||
`AssetTimings` 이벤트를 제시(권장). 후자를 택함 — `AssetFinished` 는 호출부
|
||||
(`ingest_with_config_progress` 루프)에서 만들어지는데 timing 데이터는
|
||||
`ingest_one_asset` 내부에만 있어, 필드를 채우려면 `kebab_core::IngestItem`
|
||||
(wire-stable struct) 변경 또는 별도 plumbing 이 필요. `ingest_one_asset` 가
|
||||
`progress` 핸들을 이미 들고 있으므로 새 이벤트를 직접 emit 하는 쪽이 crate
|
||||
경계(kebab-core 불변)도 지키고 더 깔끔. `AssetFinished` 는 손대지 않음.
|
||||
|
||||
**CLI 렌더(`kebab-cli` progress.rs).** `asset_chunked` → 진행바 message `→ N
|
||||
chunks`. `expansion_progress` → message `별칭 확장 {done}/{chunks}` (라이브).
|
||||
`asset_timings` → asset 종료 시 `⏱ parse Xs · chunk Ys · expand Zs · embed Ws
|
||||
· store Vs` 한 줄(`fmt_ms`: <1s 는 ms, ≥1s 는 1-decimal 초). `--json` 은
|
||||
`emit_json` 이 임의 이벤트를 직렬화하므로 자동 처리. `--quiet` 억제, 비-TTY
|
||||
expansion_progress 는 로그 폭주 방지로 기본 억제(진행바 message 로 커버).
|
||||
|
||||
**검증.** `cargo clippy --workspace --all-targets -- -D warnings` exit 0,
|
||||
`cargo test -p kebab-app -p kebab-cli` exit 0. 단위 테스트: ingest_progress.rs
|
||||
(3 신규 변이 직렬화 `kind` 판별 + 순서 불변식 재작성), progress.rs(`fmt_ms` 단위
|
||||
전환), 통합(`--json`/human stderr 에 새 이벤트 흐름). 실동작 smoke: 2-문서 ingest
|
||||
의 `--json` 에 `asset_chunked`/`asset_timings` 출현 + human `⏱ parse…·store…` 라인
|
||||
확인. expansion 라이브 카운터는 원격 LLM 필요라 단위/통합으로 커버.
|
||||
|
||||
**리뷰 반영.** (1) `store_ms` 경계 정정 — stale-vector orphan purge(LanceDB I/O)를
|
||||
`store_ms`(SQLite persist 전용)에서 빼 `embed_ms`(vector phase)로 이동. 진단
|
||||
정확도: store_ms 가 이제 SQLite put_* 만 의미(편집 재색인 시 920ms 가 실은 벡터
|
||||
삭제였던 오귀속 제거). purge 는 여전히 unconditional + 새 upsert 이전 실행 —
|
||||
기능 동등. (2) 최종 `expansion_progress` 프레임을 `done != last_done` 로 가드 —
|
||||
chunks 가 throttle 배수일 때의 중복 프레임 + chunks==0 시 0/0 프레임 제거.
|
||||
|
||||
**알려진 한계.** image/pdf 경로는 phase timing 없음(AssetChunked 만).
|
||||
expansion_progress 비-TTY 억제는 의도적(필요 시 `--json` 으로 전량 관측).
|
||||
|
||||
## 2026-06-02 — ingest 백엔드/디바이스 표시 + KB 이전 문서 (v0.23.1)
|
||||
|
||||
**동기.** Metal 빌드가 실제로 GPU 를 쓰는지 사용자가 터미널에서 못 봐서 Activity
|
||||
Monitor 로 확인해야 했다(`select_device()` 의 device 로그는 kb.log 파일로만, 기본
|
||||
EnvFilter=warn 이라 `--verbose` 필요). 또 "어떤 DB 파일을 옮기나" 가 README 에
|
||||
구체적이지 않았다.
|
||||
|
||||
**무엇.** (1) `kebab-cli` ingest 시작 시 임베딩 백엔드/모델/차원을 stderr 한 줄로
|
||||
표시(`임베딩 백엔드: candle (Metal/GPU 빌드) · 모델 …`), `--json`/`--quiet` 에선
|
||||
억제. Metal 표기는 `cfg!(feature="embed_metal")` 기반(빌드 사실); 확정 런타임
|
||||
디바이스는 여전히 kb.log(`candle device = …`). (2) README "외부 계산 + 로컬 검색"
|
||||
절에 복사 대상 2개(`kebab.sqlite`/`sqlite`, `lancedb/`/`vector_dir`)와 `[storage]`
|
||||
config 키·`models/`·`assets/` 복사 불필요·동일 버전/모델 조건·rsync 예시 추가.
|
||||
|
||||
**범위.** CLI 출력 + 문서만. 동작·wire·schema·벡터 변경 없음. 버전 0.23.0 → 0.23.1.
|
||||
|
||||
## 2026-06-02 — candle Metal(Apple Silicon GPU) opt-in build feature
|
||||
|
||||
**동기.** candle CPU 임베딩은 e5-large/512-tok 에서 ~1.5~1.9 s/chunk 로 느리고,
|
||||
코어를 더 줘도(rayon/MKL) 안 빨라진다(병목=커널 효율). 대용량 코퍼스(수만 청크)는
|
||||
CPU 로는 수 시간. 사용자 워크플로: **M4 Pro 맥에서 GPU 로 빠르게 색인 → sqlite +
|
||||
lancedb 만 Linux NUMA 서버로 복사 → 서버는 CPU candle 로 질의** (벡터 동일 모델이라
|
||||
호환, KB 이식성은 06-01 항목 + workspace_path 상대경로 + chunks.text 저장으로 확인).
|
||||
|
||||
**무엇.** `kebab-embed-candle` 에 `metal` feature 추가 →
|
||||
`candle-core/-nn/-transformers` 의 metal 백엔드 활성. `select_device()` 가 metal
|
||||
빌드 시 `Device::new_metal(0)` 선택(실패 시 CPU fallback), 비-metal 빌드는 기존
|
||||
`Device::Cpu` 그대로. host 복사 전 `.contiguous()` 추가(Metal 의 strided view 가
|
||||
`to_vec2` 거부 — CPU 는 허용). feature passthrough: `kebab-app/embed_metal` →
|
||||
`kebab-cli/embed_metal`. 빌드: `cargo build --release --features embed_metal`(macOS).
|
||||
|
||||
**제약 / 검증 분담.** metal 은 **macOS 전용 컴파일** — Linux CPU 머신(개발/서버)은
|
||||
비-metal 경로만 빌드(검증: clippy 0 + candle 단위 6 + thread_cap + parity, exit 0).
|
||||
**Metal 실행·속도·벡터 패리티(GPU vs CPU)는 M4 Pro 에서 사용자 검증** (Claude 의
|
||||
Linux 환경에서 불가). 로그 `candle device = Metal (GPU)` 로 GPU 사용 확인.
|
||||
|
||||
**호환성.** default(비-metal) 동작·벡터 불변. wire/schema 변경 없음. 버전 0.22.0 →
|
||||
**0.23.0** (신규 opt-in build feature surface).
|
||||
|
||||
amends: `docs/superpowers/specs/2026-06-01-embed-candle-track-spec.md` (§10 후속 — GPU 가속).
|
||||
|
||||
## 2026-06-01 — candle 임베딩 provider (NUMA double-free 회피, opt-in)
|
||||
|
||||
**무엇이 문제였나.** 듀얼소켓 NUMA 서버에서 `provider=fastembed`(onnxruntime)로
|
||||
|
||||
Reference in New Issue
Block a user