Tasks 5-8: new `kebab-parse-code` crate with three infrastructure modules for the code ingest framework. Ships lang.rs (extension→language identifier mapping), repo.rs (.git walk-up via gix 0.70 for RepoMeta), and skip.rs (BUILTIN_BLACKLIST, is_generated_file, is_oversized). 14 integration tests across three test files, all passing; clippy -D warnings clean. Note: gix pinned to 0.70 (not 0.83 as originally suggested) because 0.83 fails to compile against Rust 1.94.1 due to non-exhaustive match patterns in gix-hash. 0.70 resolves cleanly and has identical head_name/head_id API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
75 lines
2.2 KiB
Rust
75 lines
2.2 KiB
Rust
use kebab_parse_code::skip::{BUILTIN_BLACKLIST, is_generated_file, is_oversized};
|
|
use std::fs;
|
|
use tempfile::NamedTempFile;
|
|
|
|
#[test]
|
|
fn generated_header_markers_trigger_skip() {
|
|
let cases = [
|
|
"// @generated\nfn foo() {}\n",
|
|
"// Code generated by tonic-build. DO NOT EDIT.\nfn x() {}\n",
|
|
"/* DO NOT EDIT */\nfn x() {}\n",
|
|
"/* do not modify */\nfn x() {}\n",
|
|
"// AUTOMATICALLY GENERATED\nfn x() {}\n",
|
|
"# auto-generated\ndef x(): pass\n",
|
|
"// autogenerated\nfn x() {}\n",
|
|
];
|
|
for content in cases {
|
|
let f = NamedTempFile::new().unwrap();
|
|
fs::write(f.path(), content).unwrap();
|
|
assert!(is_generated_file(f.path()).unwrap(), "content: {content:?}");
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn normal_code_is_not_flagged_generated() {
|
|
let f = NamedTempFile::new().unwrap();
|
|
fs::write(f.path(), "fn main() {\n println!(\"hi\");\n}\n").unwrap();
|
|
assert!(!is_generated_file(f.path()).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn is_generated_returns_false_for_empty_file() {
|
|
let f = NamedTempFile::new().unwrap();
|
|
fs::write(f.path(), "").unwrap();
|
|
assert!(!is_generated_file(f.path()).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn oversized_by_bytes_returns_true() {
|
|
let f = NamedTempFile::new().unwrap();
|
|
let body: String = "x".repeat(300_000);
|
|
fs::write(f.path(), &body).unwrap();
|
|
assert!(is_oversized(f.path(), 262_144, 5_000).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn oversized_by_lines_returns_true() {
|
|
let f = NamedTempFile::new().unwrap();
|
|
let body: String = "x\n".repeat(6_000);
|
|
fs::write(f.path(), &body).unwrap();
|
|
assert!(is_oversized(f.path(), 262_144, 5_000).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn small_file_returns_false_for_oversize() {
|
|
let f = NamedTempFile::new().unwrap();
|
|
fs::write(f.path(), "fn foo() {}\n").unwrap();
|
|
assert!(!is_oversized(f.path(), 262_144, 5_000).unwrap());
|
|
}
|
|
|
|
#[test]
|
|
fn builtin_blacklist_has_exactly_six_entries() {
|
|
assert_eq!(BUILTIN_BLACKLIST.len(), 6);
|
|
let expected = [
|
|
"**/node_modules/**",
|
|
"**/target/**",
|
|
"**/__pycache__/**",
|
|
"**/.venv/**",
|
|
"**/venv/**",
|
|
"**/env/**",
|
|
];
|
|
for pat in expected {
|
|
assert!(BUILTIN_BLACKLIST.contains(&pat), "missing pattern: {pat}");
|
|
}
|
|
}
|