Tasks 5-8: new `kebab-parse-code` crate with three infrastructure modules for the code ingest framework. Ships lang.rs (extension→language identifier mapping), repo.rs (.git walk-up via gix 0.70 for RepoMeta), and skip.rs (BUILTIN_BLACKLIST, is_generated_file, is_oversized). 14 integration tests across three test files, all passing; clippy -D warnings clean. Note: gix pinned to 0.70 (not 0.83 as originally suggested) because 0.83 fails to compile against Rust 1.94.1 due to non-exhaustive match patterns in gix-hash. 0.70 resolves cleanly and has identical head_name/head_id API. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
43 lines
1.6 KiB
Rust
43 lines
1.6 KiB
Rust
//! Canonical extension → language identifier mapping (spec §3.5).
|
|
//!
|
|
//! Lowercase canonical identifiers, matching tree-sitter parser conventions:
|
|
//! `rust`, `python`, `typescript`, `javascript`, `go`, `java`, `kotlin`, `c`,
|
|
//! `cpp`, `yaml`, `toml`, `json`, `shell`, `make`, `dockerfile`.
|
|
|
|
use std::path::Path;
|
|
|
|
/// Returns the canonical language identifier for a given file path, or
|
|
/// `None` if the extension / filename is not recognized.
|
|
///
|
|
/// Matching priority:
|
|
/// 1. exact filename match (e.g. `Dockerfile`, `Makefile`)
|
|
/// 2. lowercase extension match
|
|
pub fn code_lang_for_path(path: &Path) -> Option<&'static str> {
|
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
|
match name {
|
|
"Dockerfile" => return Some("dockerfile"),
|
|
"Makefile" | "GNUmakefile" => return Some("make"),
|
|
_ => {}
|
|
}
|
|
}
|
|
let ext = path.extension()?.to_str()?.to_ascii_lowercase();
|
|
match ext.as_str() {
|
|
"rs" => Some("rust"),
|
|
"py" | "pyi" => Some("python"),
|
|
"ts" | "tsx" => Some("typescript"),
|
|
"js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
|
|
"go" => Some("go"),
|
|
"java" => Some("java"),
|
|
"kt" | "kts" => Some("kotlin"),
|
|
"c" | "h" => Some("c"),
|
|
"cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some("cpp"),
|
|
"yaml" | "yml" => Some("yaml"),
|
|
"toml" => Some("toml"),
|
|
"json" => Some("json"),
|
|
"sh" | "bash" | "zsh" => Some("shell"),
|
|
"mk" => Some("make"),
|
|
"dockerfile" => Some("dockerfile"),
|
|
_ => None,
|
|
}
|
|
}
|