feat(p10-1a-1): kebab-parse-code crate (lang + repo + skip)
Tasks 5-8: new `kebab-parse-code` crate with three infrastructure modules for the code ingest framework. Ships lang.rs (extension→language identifier mapping), repo.rs (.git walk-up via gix 0.70 for RepoMeta), and skip.rs (BUILTIN_BLACKLIST, is_generated_file, is_oversized). 14 integration tests across three test files, all passing; clippy -D warnings clean. Note: gix pinned to 0.70 (not 0.83 as originally suggested) because 0.83 fails to compile against Rust 1.94.1 due to non-exhaustive match patterns in gix-hash. 0.70 resolves cleanly and has identical head_name/head_id API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
660
Cargo.lock
generated
660
Cargo.lock
generated
@@ -755,6 +755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
@@ -931,6 +932,15 @@ version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
|
||||
|
||||
[[package]]
|
||||
name = "clru"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "197fd99cb113a8d5d9b6376f3aa817f32c1078f2343b714fff7d2ca44fdf67d5"
|
||||
dependencies = [
|
||||
"hashbrown 0.16.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "color_quant"
|
||||
version = "1.1.0"
|
||||
@@ -2140,6 +2150,12 @@ version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc"
|
||||
|
||||
[[package]]
|
||||
name = "dunce"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
|
||||
|
||||
[[package]]
|
||||
name = "dyn-clone"
|
||||
version = "1.0.20"
|
||||
@@ -2302,6 +2318,15 @@ dependencies = [
|
||||
"tokenizers",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "faster-hex"
|
||||
version = "0.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a2a2b11eda1d40935b26cf18f6833c526845ae8c41e58d09af6adeb6f0269183"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fastrand"
|
||||
version = "2.4.1"
|
||||
@@ -2738,6 +2763,583 @@ dependencies = [
|
||||
"weezl",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix"
|
||||
version = "0.70.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "736f14636705f3a56ea52b553e67282519418d9a35bb1e90b3a9637a00296b68"
|
||||
dependencies = [
|
||||
"gix-actor",
|
||||
"gix-commitgraph",
|
||||
"gix-config",
|
||||
"gix-date",
|
||||
"gix-diff",
|
||||
"gix-discover",
|
||||
"gix-features",
|
||||
"gix-fs",
|
||||
"gix-glob",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-index",
|
||||
"gix-lock",
|
||||
"gix-object",
|
||||
"gix-odb",
|
||||
"gix-pack",
|
||||
"gix-path",
|
||||
"gix-protocol",
|
||||
"gix-ref",
|
||||
"gix-refspec",
|
||||
"gix-revision",
|
||||
"gix-revwalk",
|
||||
"gix-sec",
|
||||
"gix-shallow",
|
||||
"gix-tempfile",
|
||||
"gix-trace",
|
||||
"gix-traverse",
|
||||
"gix-url",
|
||||
"gix-utils",
|
||||
"gix-validate 0.9.4",
|
||||
"once_cell",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-actor"
|
||||
version = "0.33.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20018a1a6332e065f1fcc8305c1c932c6b8c9985edea2284b3c79dc6fa3ee4b2"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-date",
|
||||
"gix-utils",
|
||||
"itoa",
|
||||
"thiserror 2.0.18",
|
||||
"winnow 0.6.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-bitmap"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d982fc7ef0608e669851d0d2a6141dae74c60d5a27e8daa451f2a4857bbf41e2"
|
||||
dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-chunk"
|
||||
version = "0.4.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c356b3825677cb6ff579551bb8311a81821e184453cbd105e2fc5311b288eeb"
|
||||
dependencies = [
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-command"
|
||||
version = "0.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cb410b84d6575db45e62025a9118bdbf4d4b099ce7575a76161e898d9ca98df1"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-path",
|
||||
"gix-trace",
|
||||
"shell-words",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-commitgraph"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e23a8ec2d8a16026a10dafdb6ed51bcfd08f5d97f20fa52e200bc50cb72e4877"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-chunk",
|
||||
"gix-features",
|
||||
"gix-hash",
|
||||
"memmap2",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-config"
|
||||
version = "0.43.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "377c1efd2014d5d469e0b3cd2952c8097bce9828f634e04d5665383249f1d9e9"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-config-value",
|
||||
"gix-features",
|
||||
"gix-glob",
|
||||
"gix-path",
|
||||
"gix-ref",
|
||||
"gix-sec",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
"unicode-bom",
|
||||
"winnow 0.6.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-config-value"
|
||||
version = "0.14.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8dc2c844c4cf141884678cabef736fd91dd73068b9146e6f004ba1a0457944b6"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bstr",
|
||||
"gix-path",
|
||||
"libc",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-date"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "daa30058ec7d3511fbc229e4f9e696a35abd07ec5b82e635eff864a2726217e4"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"itoa",
|
||||
"jiff",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-diff"
|
||||
version = "0.50.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62afb7f4ca0acdf4e9dad92065b2eb1bf2993bcc5014b57bc796e3a365b17c4d"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-hash",
|
||||
"gix-object",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-discover"
|
||||
version = "0.38.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d0c2414bdf04064e0f5a5aa029dfda1e663cf9a6c4bfc8759f2d369299bb65d8"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"dunce",
|
||||
"gix-fs",
|
||||
"gix-hash",
|
||||
"gix-path",
|
||||
"gix-ref",
|
||||
"gix-sec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-features"
|
||||
version = "0.40.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8bfdd4838a8d42bd482c9f0cb526411d003ee94cc7c7b08afe5007329c71d554"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"flate2",
|
||||
"gix-hash",
|
||||
"gix-trace",
|
||||
"gix-utils",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"prodash",
|
||||
"sha1_smol",
|
||||
"thiserror 2.0.18",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-fs"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "182e7fa7bfdf44ffb7cfe7451b373cdf1e00870ac9a488a49587a110c562063d"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"gix-features",
|
||||
"gix-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-glob"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4e9c7249fa0a78f9b363aa58323db71e0a6161fd69860ed6f48dedf0ef3a314e"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bstr",
|
||||
"gix-features",
|
||||
"gix-path",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-hash"
|
||||
version = "0.16.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e81c5ec48649b1821b3ed066a44efb95f1a268b35c1d91295e61252539fbe9f8"
|
||||
dependencies = [
|
||||
"faster-hex",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-hashtable"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "189130bc372accd02e0520dc5ab1cef318dcc2bc829b76ab8d84bbe90ac212d1"
|
||||
dependencies = [
|
||||
"gix-hash",
|
||||
"hashbrown 0.14.5",
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-index"
|
||||
version = "0.38.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "acd12e3626879369310fffe2ac61acc828613ef656b50c4ea984dd59d7dc85d8"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bstr",
|
||||
"filetime",
|
||||
"fnv",
|
||||
"gix-bitmap",
|
||||
"gix-features",
|
||||
"gix-fs",
|
||||
"gix-hash",
|
||||
"gix-lock",
|
||||
"gix-object",
|
||||
"gix-traverse",
|
||||
"gix-utils",
|
||||
"gix-validate 0.9.4",
|
||||
"hashbrown 0.14.5",
|
||||
"itoa",
|
||||
"libc",
|
||||
"memmap2",
|
||||
"rustix 0.38.44",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-lock"
|
||||
version = "16.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9739815270ff6940968441824d162df9433db19211ca9ba8c3fc1b50b849c642"
|
||||
dependencies = [
|
||||
"gix-tempfile",
|
||||
"gix-utils",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-object"
|
||||
version = "0.47.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ddc4b3a0044244f0fe22347fb7a79cca165e37829d668b41b85ff46a43e5fd68"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-actor",
|
||||
"gix-date",
|
||||
"gix-features",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-path",
|
||||
"gix-utils",
|
||||
"gix-validate 0.9.4",
|
||||
"itoa",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
"winnow 0.6.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-odb"
|
||||
version = "0.67.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3e93457df69cd09573608ce9fa4f443fbd84bc8d15d8d83adecd471058459c1b"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"gix-date",
|
||||
"gix-features",
|
||||
"gix-fs",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-object",
|
||||
"gix-pack",
|
||||
"gix-path",
|
||||
"gix-quote",
|
||||
"parking_lot",
|
||||
"tempfile",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-pack"
|
||||
version = "0.57.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc13a475b3db735617017fb35f816079bf503765312d4b1913b18cf96f3fa515"
|
||||
dependencies = [
|
||||
"clru",
|
||||
"gix-chunk",
|
||||
"gix-features",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-object",
|
||||
"gix-path",
|
||||
"memmap2",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-packetline"
|
||||
version = "0.18.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "123844a70cf4d5352441dc06bab0da8aef61be94ec239cb631e0ba01dc6d3a04"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"faster-hex",
|
||||
"gix-trace",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-path"
|
||||
version = "0.10.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7cb06c3e4f8eed6e24fd915fa93145e28a511f4ea0e768bae16673e05ed3f366"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-trace",
|
||||
"gix-validate 0.10.1",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-protocol"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c61bd61afc6b67d213241e2100394c164be421e3f7228d3521b04f48ca5ba90"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-date",
|
||||
"gix-features",
|
||||
"gix-hash",
|
||||
"gix-ref",
|
||||
"gix-shallow",
|
||||
"gix-transport",
|
||||
"gix-utils",
|
||||
"maybe-async",
|
||||
"thiserror 2.0.18",
|
||||
"winnow 0.6.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-quote"
|
||||
version = "0.4.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e49357fccdb0c85c0d3a3292a9f6db32d9b3535959b5471bb9624908f4a066c6"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-utils",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-ref"
|
||||
version = "0.50.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47adf4c5f933429f8554e95d0d92eee583cfe4b95d2bf665cd6fd4a1531ee20c"
|
||||
dependencies = [
|
||||
"gix-actor",
|
||||
"gix-features",
|
||||
"gix-fs",
|
||||
"gix-hash",
|
||||
"gix-lock",
|
||||
"gix-object",
|
||||
"gix-path",
|
||||
"gix-tempfile",
|
||||
"gix-utils",
|
||||
"gix-validate 0.9.4",
|
||||
"memmap2",
|
||||
"thiserror 2.0.18",
|
||||
"winnow 0.6.26",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-refspec"
|
||||
version = "0.28.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59650228d8f612f68e7f7a25f517fcf386c5d0d39826085492e94766858b0a90"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-hash",
|
||||
"gix-revision",
|
||||
"gix-validate 0.9.4",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-revision"
|
||||
version = "0.32.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fe28bbccca55da6d66e6c6efc6bb4003c29d407afd8178380293729733e6b53"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"bstr",
|
||||
"gix-commitgraph",
|
||||
"gix-date",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-object",
|
||||
"gix-revwalk",
|
||||
"gix-trace",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-revwalk"
|
||||
version = "0.18.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4ecb80c235b1e9ef2b99b23a81ea50dd569a88a9eb767179793269e0e616247"
|
||||
dependencies = [
|
||||
"gix-commitgraph",
|
||||
"gix-date",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-object",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-sec"
|
||||
version = "0.10.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "47aeb0f13de9ef2f3033f5ff218de30f44db827ac9f1286f9ef050aacddd5888"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"gix-path",
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-shallow"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab72543011e303e52733c85bef784603ef39632ddf47f69723def52825e35066"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-hash",
|
||||
"gix-lock",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-tempfile"
|
||||
version = "16.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2558f423945ef24a8328c55d1fd6db06b8376b0e7013b1bb476cc4ffdf678501"
|
||||
dependencies = [
|
||||
"gix-fs",
|
||||
"libc",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-trace"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6f23569e55f2ffaf958617353b9734a7d52a7c19c439eeaa5e3efc217fd2270e"
|
||||
|
||||
[[package]]
|
||||
name = "gix-transport"
|
||||
version = "0.45.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11187418489477b1b5b862ae1aedbbac77e582f2c4b0ef54280f20cfe5b964d9"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-command",
|
||||
"gix-features",
|
||||
"gix-packetline",
|
||||
"gix-quote",
|
||||
"gix-sec",
|
||||
"gix-url",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-traverse"
|
||||
version = "0.44.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2bec70e53896586ef32a3efa7e4427b67308531ed186bb6120fb3eca0f0d61b4"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"gix-commitgraph",
|
||||
"gix-date",
|
||||
"gix-hash",
|
||||
"gix-hashtable",
|
||||
"gix-object",
|
||||
"gix-revwalk",
|
||||
"smallvec",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-url"
|
||||
version = "0.29.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "29218c768b53dd8f116045d87fec05b294c731a4b2bdd257eeca2084cc150b13"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"gix-features",
|
||||
"gix-path",
|
||||
"percent-encoding",
|
||||
"thiserror 2.0.18",
|
||||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-utils"
|
||||
version = "0.1.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ff08f24e03ac8916c478c8419d7d3c33393da9bb41fa4c24455d5406aeefd35f"
|
||||
dependencies = [
|
||||
"fastrand",
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-validate"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34b5f1253109da6c79ed7cf6e1e38437080bb6d704c76af14c93e2f255234084"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gix-validate"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b1e63a5b516e970a594f870ed4571a8fdcb8a344e7bd407a20db8bd61dbfde4"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"thiserror 2.0.18",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.3"
|
||||
@@ -3737,6 +4339,16 @@ dependencies = [
|
||||
"unicode-normalization",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-code"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"gix",
|
||||
"kebab-core",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kebab-parse-image"
|
||||
version = "0.6.0"
|
||||
@@ -4846,6 +5458,17 @@ dependencies = [
|
||||
"thread-tree",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maybe-async"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "746873a384ad60adc5db74471dfaba74bd278afbdcfd81db93fafcdfc8b5ca0c"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "maybe-rayon"
|
||||
version = "0.1.1"
|
||||
@@ -5702,6 +6325,16 @@ dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prodash"
|
||||
version = "29.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f04bb108f648884c23b98a0e940ebc2c93c0c3b89f04dbaf7eb8256ce617d1bc"
|
||||
dependencies = [
|
||||
"log",
|
||||
"parking_lot",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "profiling"
|
||||
version = "1.0.17"
|
||||
@@ -6841,6 +7474,12 @@ dependencies = [
|
||||
"unsafe-libyaml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sha1_smol"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
|
||||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.10.9"
|
||||
@@ -6861,6 +7500,12 @@ dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shell-words"
|
||||
version = "1.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc6fe69c597f9c37bfeeeeeb33da3530379845f10be461a66d16d03eca2ded77"
|
||||
|
||||
[[package]]
|
||||
name = "shellexpand"
|
||||
version = "3.1.2"
|
||||
@@ -7889,6 +8534,12 @@ version = "2.9.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbc4bc3a9f746d862c45cb89d705aa10f187bb96c76001afab07a0d35ce60142"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-bom"
|
||||
version = "2.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7eec5d1121208364f6793f7d2e222bf75a915c19557537745b195b253dd64217"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.24"
|
||||
@@ -8587,6 +9238,15 @@ version = "0.53.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.6.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e90edd2ac1aa278a5c4599b1d89cf03074b610800f866d4026dc199d7929a28"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winnow"
|
||||
version = "0.7.15"
|
||||
|
||||
@@ -23,6 +23,7 @@ members = [
|
||||
"crates/kebab-parse-pdf",
|
||||
"crates/kebab-tui",
|
||||
"crates/kebab-mcp",
|
||||
"crates/kebab-parse-code",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -81,6 +82,10 @@ rmcp = { version = "1.6", default-features = false, features = ["server"
|
||||
# sync via reqwest::blocking — wiremock is dev-only there).
|
||||
wiremock = "0.6"
|
||||
base64 = "0.22"
|
||||
# Pure-Rust git library for repo metadata detection (kebab-parse-code).
|
||||
# No `git` binary required. Default features include thread-safety + most
|
||||
# object-reading capabilities needed for HEAD name + commit SHA queries.
|
||||
gix = { version = "0.70", default-features = false, features = ["revision"] }
|
||||
|
||||
# Disk-footprint trim for dev / test builds. Codegen, opt-level, and
|
||||
# behavior are unchanged — only DWARF debug info is reduced (line
|
||||
|
||||
13
crates/kebab-parse-code/Cargo.toml
Normal file
13
crates/kebab-parse-code/Cargo.toml
Normal file
@@ -0,0 +1,13 @@
|
||||
[package]
|
||||
name = "kebab-parse-code"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
gix = { workspace = true }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
42
crates/kebab-parse-code/src/lang.rs
Normal file
42
crates/kebab-parse-code/src/lang.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
//! Canonical extension → language identifier mapping (spec §3.5).
|
||||
//!
|
||||
//! Lowercase canonical identifiers, matching tree-sitter parser conventions:
|
||||
//! `rust`, `python`, `typescript`, `javascript`, `go`, `java`, `kotlin`, `c`,
|
||||
//! `cpp`, `yaml`, `toml`, `json`, `shell`, `make`, `dockerfile`.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns the canonical language identifier for a given file path, or
|
||||
/// `None` if the extension / filename is not recognized.
|
||||
///
|
||||
/// Matching priority:
|
||||
/// 1. exact filename match (e.g. `Dockerfile`, `Makefile`)
|
||||
/// 2. lowercase extension match
|
||||
pub fn code_lang_for_path(path: &Path) -> Option<&'static str> {
|
||||
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
||||
match name {
|
||||
"Dockerfile" => return Some("dockerfile"),
|
||||
"Makefile" | "GNUmakefile" => return Some("make"),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let ext = path.extension()?.to_str()?.to_ascii_lowercase();
|
||||
match ext.as_str() {
|
||||
"rs" => Some("rust"),
|
||||
"py" | "pyi" => Some("python"),
|
||||
"ts" | "tsx" => Some("typescript"),
|
||||
"js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
|
||||
"go" => Some("go"),
|
||||
"java" => Some("java"),
|
||||
"kt" | "kts" => Some("kotlin"),
|
||||
"c" | "h" => Some("c"),
|
||||
"cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some("cpp"),
|
||||
"yaml" | "yml" => Some("yaml"),
|
||||
"toml" => Some("toml"),
|
||||
"json" => Some("json"),
|
||||
"sh" | "bash" | "zsh" => Some("shell"),
|
||||
"mk" => Some("make"),
|
||||
"dockerfile" => Some("dockerfile"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
22
crates/kebab-parse-code/src/lib.rs
Normal file
22
crates/kebab-parse-code/src/lib.rs
Normal file
@@ -0,0 +1,22 @@
|
||||
//! `kebab-parse-code` — language-aware parsing for code corpora.
|
||||
//!
|
||||
//! Phase 1A-1 ships infrastructure only:
|
||||
//!
|
||||
//! - [`lang::code_lang_for_path`] — extension → language identifier.
|
||||
//! - [`repo::detect_repo`] — `.git/` walk-up → repo / branch / commit metadata.
|
||||
//! - [`skip::is_generated_file`] / [`skip::is_oversized`] — pre-ingest skip
|
||||
//! helpers consulted by `kebab-source-fs`.
|
||||
//! - [`skip::BUILTIN_BLACKLIST`] — 6-entry safety-net pattern list.
|
||||
//!
|
||||
//! Per-language parser modules (`rust`, `python`, `typescript`, …) land in
|
||||
//! later phases (1A-2 onwards). The crate boundary follows other
|
||||
//! `kebab-parse-*` crates per design §8: must NOT depend on store / embed
|
||||
//! / llm / rag.
|
||||
|
||||
pub mod lang;
|
||||
pub mod repo;
|
||||
pub mod skip;
|
||||
|
||||
pub use lang::code_lang_for_path;
|
||||
pub use repo::{RepoMeta, detect_repo};
|
||||
pub use skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized};
|
||||
61
crates/kebab-parse-code/src/repo.rs
Normal file
61
crates/kebab-parse-code/src/repo.rs
Normal file
@@ -0,0 +1,61 @@
|
||||
//! Git repo auto-detection (spec §5.1).
|
||||
//!
|
||||
//! Walks up from `path` looking for a `.git/` directory. If found, reads
|
||||
//! repo dir name, current branch, and HEAD commit using `gix` (pure Rust;
|
||||
//! no `git` binary on PATH required).
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct RepoMeta {
|
||||
pub name: String,
|
||||
pub branch: Option<String>,
|
||||
pub commit: Option<String>,
|
||||
}
|
||||
|
||||
/// Walk up from `path` until a `.git/` directory is found. Returns repo
|
||||
/// metadata, or `None` if no repo boundary is reached before the filesystem
|
||||
/// root.
|
||||
///
|
||||
/// - `name`: directory name containing `.git/`.
|
||||
/// - `branch`: current HEAD branch, or `"detached"` if detached HEAD, or
|
||||
/// `None` if branch can't be read.
|
||||
/// - `commit`: 40-hex commit SHA at HEAD, or `None` if empty repo / read
|
||||
/// failure.
|
||||
///
|
||||
/// `.git/` as a file (worktree marker / submodule) returns `None` for
|
||||
/// `branch` and `commit` and falls back to the parent dir name for `name`.
|
||||
pub fn detect_repo(path: &Path) -> Option<RepoMeta> {
|
||||
let mut cur = if path.is_dir() { path } else { path.parent()? };
|
||||
loop {
|
||||
let dotgit = cur.join(".git");
|
||||
if dotgit.is_dir() {
|
||||
let name = cur.file_name()?.to_string_lossy().into_owned();
|
||||
let (branch, commit) = read_head(cur);
|
||||
return Some(RepoMeta { name, branch, commit });
|
||||
} else if dotgit.is_file() {
|
||||
let name = cur.file_name()?.to_string_lossy().into_owned();
|
||||
return Some(RepoMeta { name, branch: None, commit: None });
|
||||
}
|
||||
cur = cur.parent()?;
|
||||
}
|
||||
}
|
||||
|
||||
fn read_head(repo_dir: &Path) -> (Option<String>, Option<String>) {
|
||||
match gix::open(repo_dir) {
|
||||
Ok(repo) => {
|
||||
let branch = repo
|
||||
.head_name()
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|n| n.shorten().to_string())
|
||||
.or_else(|| Some("detached".to_string()));
|
||||
let commit = repo
|
||||
.head_id()
|
||||
.ok()
|
||||
.map(|id| id.to_string());
|
||||
(branch, commit)
|
||||
}
|
||||
Err(_) => (None, None),
|
||||
}
|
||||
}
|
||||
65
crates/kebab-parse-code/src/skip.rs
Normal file
65
crates/kebab-parse-code/src/skip.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
//! Pre-ingest skip helpers (spec §5.2 + §5.3 + §5.4).
|
||||
//!
|
||||
//! - [`BUILTIN_BLACKLIST`] — 6 gitignore-style patterns universal across
|
||||
//! ecosystems. Source of truth: spec §5.2.
|
||||
//! - [`is_generated_file`] — reads first ~512 bytes, checks for 7
|
||||
//! case-insensitive markers.
|
||||
//! - [`is_oversized`] — byte cap then line cap.
|
||||
|
||||
use anyhow::Result;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Read};
|
||||
use std::path::Path;
|
||||
|
||||
/// 6 built-in gitignore-style patterns. Applied in addition to `.gitignore`
|
||||
/// + `.kebabignore`. User can override via `.kebabignore` negation
|
||||
/// (`!pattern`).
|
||||
pub const BUILTIN_BLACKLIST: &[&str] = &[
|
||||
"**/node_modules/**",
|
||||
"**/target/**",
|
||||
"**/__pycache__/**",
|
||||
"**/.venv/**",
|
||||
"**/venv/**",
|
||||
"**/env/**",
|
||||
];
|
||||
|
||||
/// Read first 512 bytes, check for any of 7 case-insensitive generated-file
|
||||
/// markers. Returns Ok(true) on match, Ok(false) otherwise.
|
||||
pub fn is_generated_file(path: &Path) -> Result<bool> {
|
||||
let mut buf = [0u8; 512];
|
||||
let mut f = File::open(path)?;
|
||||
let n = f.read(&mut buf)?;
|
||||
if n == 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
let head = std::str::from_utf8(&buf[..n]).unwrap_or("");
|
||||
let lower: String = head.lines().take(10).collect::<Vec<_>>().join("\n").to_ascii_lowercase();
|
||||
Ok(
|
||||
lower.contains("@generated")
|
||||
|| lower.contains("code generated by")
|
||||
|| lower.contains("do not edit")
|
||||
|| lower.contains("do not modify")
|
||||
|| lower.contains("automatically generated")
|
||||
|| lower.contains("auto-generated")
|
||||
|| lower.contains("autogenerated"),
|
||||
)
|
||||
}
|
||||
|
||||
/// Check if `path` exceeds `max_bytes` or `max_lines`. Byte cap first
|
||||
/// (cheap), then line cap (streaming with early exit).
|
||||
pub fn is_oversized(path: &Path, max_bytes: u64, max_lines: u32) -> Result<bool> {
|
||||
let meta = std::fs::metadata(path)?;
|
||||
if meta.len() > max_bytes {
|
||||
return Ok(true);
|
||||
}
|
||||
let reader = BufReader::new(File::open(path)?);
|
||||
let mut count: u32 = 0;
|
||||
for line in reader.lines() {
|
||||
let _ = line?;
|
||||
count = count.saturating_add(1);
|
||||
if count > max_lines {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
64
crates/kebab-parse-code/tests/lang.rs
Normal file
64
crates/kebab-parse-code/tests/lang.rs
Normal file
@@ -0,0 +1,64 @@
|
||||
use kebab_parse_code::code_lang_for_path;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn known_extensions_map_to_canonical_identifiers() {
|
||||
let cases = [
|
||||
("foo.rs", Some("rust")),
|
||||
("foo.py", Some("python")),
|
||||
("foo.pyi", Some("python")),
|
||||
("foo.ts", Some("typescript")),
|
||||
("foo.tsx", Some("typescript")),
|
||||
("foo.js", Some("javascript")),
|
||||
("foo.mjs", Some("javascript")),
|
||||
("foo.cjs", Some("javascript")),
|
||||
("foo.jsx", Some("javascript")),
|
||||
("foo.go", Some("go")),
|
||||
("foo.java", Some("java")),
|
||||
("foo.kt", Some("kotlin")),
|
||||
("foo.kts", Some("kotlin")),
|
||||
("foo.c", Some("c")),
|
||||
("foo.h", Some("c")),
|
||||
("foo.cpp", Some("cpp")),
|
||||
("foo.cc", Some("cpp")),
|
||||
("foo.cxx", Some("cpp")),
|
||||
("foo.hpp", Some("cpp")),
|
||||
("foo.hh", Some("cpp")),
|
||||
("foo.hxx", Some("cpp")),
|
||||
("foo.yaml", Some("yaml")),
|
||||
("foo.yml", Some("yaml")),
|
||||
("foo.toml", Some("toml")),
|
||||
("foo.json", Some("json")),
|
||||
("foo.sh", Some("shell")),
|
||||
("foo.bash", Some("shell")),
|
||||
("foo.zsh", Some("shell")),
|
||||
("foo.mk", Some("make")),
|
||||
];
|
||||
for (path, expected) in cases {
|
||||
assert_eq!(
|
||||
code_lang_for_path(Path::new(path)),
|
||||
expected,
|
||||
"path = {path}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn special_filenames_map_to_identifiers() {
|
||||
assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile"));
|
||||
assert_eq!(code_lang_for_path(Path::new("foo.dockerfile")), Some("dockerfile"));
|
||||
assert_eq!(code_lang_for_path(Path::new("Makefile")), Some("make"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_extension_returns_none() {
|
||||
assert_eq!(code_lang_for_path(Path::new("foo.docx")), None);
|
||||
assert_eq!(code_lang_for_path(Path::new("foo")), None);
|
||||
assert_eq!(code_lang_for_path(Path::new("foo.unknown")), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_insensitive() {
|
||||
assert_eq!(code_lang_for_path(Path::new("Foo.RS")), Some("rust"));
|
||||
assert_eq!(code_lang_for_path(Path::new("FOO.YAML")), Some("yaml"));
|
||||
}
|
||||
62
crates/kebab-parse-code/tests/repo.rs
Normal file
62
crates/kebab-parse-code/tests/repo.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use kebab_parse_code::repo::detect_repo;
|
||||
use std::fs;
|
||||
use std::process::Command;
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn init_git_repo(root: &std::path::Path) {
|
||||
let run = |args: &[&str]| {
|
||||
Command::new("git")
|
||||
.args(args)
|
||||
.current_dir(root)
|
||||
.status()
|
||||
.expect("git command failed");
|
||||
};
|
||||
run(&["init", "-q"]);
|
||||
run(&["config", "user.email", "test@test"]);
|
||||
run(&["config", "user.name", "test"]);
|
||||
fs::write(root.join("README.md"), "hi").unwrap();
|
||||
run(&["add", "README.md"]);
|
||||
run(&["commit", "-q", "-m", "init"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_repo_returns_none_outside_git() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let nested = tmp.path().join("a/b/c.txt");
|
||||
fs::create_dir_all(nested.parent().unwrap()).unwrap();
|
||||
fs::write(&nested, "x").unwrap();
|
||||
assert!(detect_repo(&nested).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_repo_walks_up_to_git_dir() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let repo_root = tmp.path().join("myrepo");
|
||||
fs::create_dir_all(&repo_root).unwrap();
|
||||
init_git_repo(&repo_root);
|
||||
let nested = repo_root.join("src/deep/file.rs");
|
||||
fs::create_dir_all(nested.parent().unwrap()).unwrap();
|
||||
fs::write(&nested, "x").unwrap();
|
||||
|
||||
let meta = detect_repo(&nested).expect("should detect repo");
|
||||
assert_eq!(meta.name, "myrepo");
|
||||
assert!(meta.branch.is_some());
|
||||
assert!(meta.commit.is_some());
|
||||
assert_eq!(meta.commit.as_ref().unwrap().len(), 40);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn detect_repo_caches_per_path_call_for_repeated_files_in_same_repo() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let repo_root = tmp.path().join("myrepo");
|
||||
fs::create_dir_all(&repo_root).unwrap();
|
||||
init_git_repo(&repo_root);
|
||||
let f1 = repo_root.join("a.rs");
|
||||
let f2 = repo_root.join("b.rs");
|
||||
fs::write(&f1, "x").unwrap();
|
||||
fs::write(&f2, "x").unwrap();
|
||||
let m1 = detect_repo(&f1).unwrap();
|
||||
let m2 = detect_repo(&f2).unwrap();
|
||||
assert_eq!(m1.name, m2.name);
|
||||
assert_eq!(m1.commit, m2.commit);
|
||||
}
|
||||
74
crates/kebab-parse-code/tests/skip.rs
Normal file
74
crates/kebab-parse-code/tests/skip.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
use kebab_parse_code::skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized};
|
||||
use std::fs;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn generated_header_markers_trigger_skip() {
|
||||
let cases = [
|
||||
"// @generated\nfn foo() {}\n",
|
||||
"// Code generated by tonic-build. DO NOT EDIT.\nfn x() {}\n",
|
||||
"/* DO NOT EDIT */\nfn x() {}\n",
|
||||
"/* do not modify */\nfn x() {}\n",
|
||||
"// AUTOMATICALLY GENERATED\nfn x() {}\n",
|
||||
"# auto-generated\ndef x(): pass\n",
|
||||
"// autogenerated\nfn x() {}\n",
|
||||
];
|
||||
for content in cases {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
fs::write(f.path(), content).unwrap();
|
||||
assert!(is_generated_file(f.path()).unwrap(), "content: {content:?}");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normal_code_is_not_flagged_generated() {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
fs::write(f.path(), "fn main() {\n println!(\"hi\");\n}\n").unwrap();
|
||||
assert!(!is_generated_file(f.path()).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_generated_returns_false_for_empty_file() {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
fs::write(f.path(), "").unwrap();
|
||||
assert!(!is_generated_file(f.path()).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oversized_by_bytes_returns_true() {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
let body: String = "x".repeat(300_000);
|
||||
fs::write(f.path(), &body).unwrap();
|
||||
assert!(is_oversized(f.path(), 262_144, 5_000).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oversized_by_lines_returns_true() {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
let body: String = "x\n".repeat(6_000);
|
||||
fs::write(f.path(), &body).unwrap();
|
||||
assert!(is_oversized(f.path(), 262_144, 5_000).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_file_returns_false_for_oversize() {
|
||||
let f = NamedTempFile::new().unwrap();
|
||||
fs::write(f.path(), "fn foo() {}\n").unwrap();
|
||||
assert!(!is_oversized(f.path(), 262_144, 5_000).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builtin_blacklist_has_exactly_six_entries() {
|
||||
assert_eq!(BUILTIN_BLACKLIST.len(), 6);
|
||||
let expected = [
|
||||
"**/node_modules/**",
|
||||
"**/target/**",
|
||||
"**/__pycache__/**",
|
||||
"**/.venv/**",
|
||||
"**/venv/**",
|
||||
"**/env/**",
|
||||
];
|
||||
for pat in expected {
|
||||
assert!(BUILTIN_BLACKLIST.contains(&pat), "missing pattern: {pat}");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user