Adds basename-first matching for Dockerfile / Cargo.toml / pyproject.toml / package.json / tsconfig.json / go.mod / pom.xml / build.gradle plus Dockerfile.* prefix variant. Extension fallback adds .yaml/.yml/.dockerfile/ .toml/.json/.xml/.gradle → yaml/dockerfile/toml/json/xml/groovy. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
170 lines
7.3 KiB
Rust
170 lines
7.3 KiB
Rust
//! Canonical extension → language identifier mapping (spec §3.5).
|
|
//!
|
|
//! Lowercase canonical identifiers, matching tree-sitter parser conventions:
|
|
//! `rust`, `python`, `typescript`, `javascript`, `go`, `java`, `kotlin`, `c`,
|
|
//! `cpp`, `yaml`, `toml`, `json`, `shell`, `make`, `dockerfile`.
|
|
|
|
use std::path::Path;
|
|
|
|
/// Returns the canonical language identifier for a given file path, or
|
|
/// `None` if the extension / filename is not recognized.
|
|
///
|
|
/// Matching priority:
|
|
/// 1. Tier 1 basename exact match (e.g. `Dockerfile`, `Makefile`)
|
|
/// 2. Tier 2 basename match (e.g. `Cargo.toml`, `package.json`, `build.gradle`)
|
|
/// 3. Tier 2 `Dockerfile.*` prefix variant
|
|
/// 4. Tier 1 + Tier 2 extension fallback (lowercase)
|
|
pub fn code_lang_for_path(path: &Path) -> Option<&'static str> {
|
|
if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
|
|
// Tier 1 basename exact match
|
|
match name {
|
|
"Dockerfile" => return Some("dockerfile"),
|
|
"Makefile" | "GNUmakefile" => return Some("make"),
|
|
_ => {}
|
|
}
|
|
|
|
// Tier 2 basename match (configuration / manifest files)
|
|
match name {
|
|
"Cargo.toml" | "pyproject.toml" => return Some("toml"),
|
|
"package.json" | "tsconfig.json" => return Some("json"),
|
|
"go.mod" => return Some("go-mod"),
|
|
"pom.xml" => return Some("xml"),
|
|
"build.gradle" => return Some("groovy"),
|
|
_ => {}
|
|
}
|
|
|
|
// Tier 2: `Dockerfile.*` prefix variant (e.g. `Dockerfile.dev`, `Dockerfile.prod`)
|
|
if name.starts_with("Dockerfile.") && name.len() > "Dockerfile.".len() {
|
|
return Some("dockerfile");
|
|
}
|
|
}
|
|
|
|
// Extension fallback (Tier 1 + Tier 2)
|
|
let ext = path.extension()?.to_str()?.to_ascii_lowercase();
|
|
match ext.as_str() {
|
|
// Tier 1 extensions
|
|
"rs" => Some("rust"),
|
|
"py" | "pyi" => Some("python"),
|
|
"ts" | "tsx" | "mts" | "cts" => Some("typescript"),
|
|
"js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
|
|
"go" => Some("go"),
|
|
"java" => Some("java"),
|
|
"kt" | "kts" => Some("kotlin"),
|
|
"c" | "h" => Some("c"),
|
|
"cpp" | "cc" | "cxx" | "hpp" | "hh" | "hxx" => Some("cpp"),
|
|
"sh" | "bash" | "zsh" => Some("shell"),
|
|
"mk" => Some("make"),
|
|
// Tier 2 extensions
|
|
"yaml" | "yml" => Some("yaml"),
|
|
"toml" => Some("toml"),
|
|
"json" => Some("json"),
|
|
"xml" => Some("xml"),
|
|
"dockerfile" => Some("dockerfile"),
|
|
"gradle" => Some("groovy"),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
/// p10-1B: workspace-relative Python file path → dotted module-path prefix.
|
|
/// See plan §Task C for the exact rules + tasks/p10/p10-1b for the §3.4
|
|
/// design contract.
|
|
///
|
|
/// Stripped source-roots: `src/`, `lib/`, and `crates/<crate>/src/`.
|
|
/// `tests/`, `examples/`, and `benches/` are intentionally NOT stripped —
|
|
/// they appear in test/example/bench namespaces and dropping them would
|
|
/// conflate identical symbol names across conventional Python directories
|
|
/// (e.g. `tests/test_foo.py` → `tests.test_foo`, not `test_foo`).
|
|
pub fn module_path_for_python(workspace_path: &str) -> String {
|
|
let mut p: &str = workspace_path;
|
|
if let Some(rest) = p.strip_prefix("crates/") {
|
|
if let Some(slash) = rest.find('/') {
|
|
let after = &rest[slash + 1..];
|
|
if let Some(stripped) = after.strip_prefix("src/") {
|
|
p = stripped;
|
|
}
|
|
}
|
|
} else if let Some(stripped) = p.strip_prefix("src/") {
|
|
p = stripped;
|
|
} else if let Some(stripped) = p.strip_prefix("lib/") {
|
|
p = stripped;
|
|
}
|
|
let p = match p.strip_suffix(".py") {
|
|
Some(s) => s,
|
|
None => p.strip_suffix(".pyi").unwrap_or(p),
|
|
};
|
|
let p = if let Some(parent) = p.strip_suffix("/__init__") {
|
|
parent
|
|
} else if p == "__init__" {
|
|
""
|
|
} else {
|
|
p
|
|
};
|
|
p.replace('/', ".")
|
|
}
|
|
|
|
/// p10-1B: workspace-relative TS/JS file path → path-style prefix
|
|
/// (no slash replacement, no source-root strip). See plan §Task C.
|
|
pub fn module_path_for_tsjs(workspace_path: &str) -> String {
|
|
let p = workspace_path;
|
|
for ext in [".tsx", ".mts", ".cts", ".ts", ".jsx", ".mjs", ".cjs", ".js"] {
|
|
if let Some(stripped) = p.strip_suffix(ext) {
|
|
return stripped.to_string();
|
|
}
|
|
}
|
|
p.to_string()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn module_path_for_python_strips_src_roots_and_extensions() {
|
|
assert_eq!(module_path_for_python("kebab_eval/metrics.py"), "kebab_eval.metrics");
|
|
assert_eq!(module_path_for_python("kebab_eval/__init__.py"), "kebab_eval");
|
|
assert_eq!(module_path_for_python("src/foo/bar.py"), "foo.bar");
|
|
assert_eq!(module_path_for_python("crates/x/src/foo/bar.py"), "foo.bar");
|
|
assert_eq!(module_path_for_python("a/b/c.pyi"), "a.b.c");
|
|
assert_eq!(module_path_for_python("standalone.py"), "standalone");
|
|
assert_eq!(module_path_for_python("src/__init__.py"), "");
|
|
// `tests/` is NOT a stripped source-root — it is preserved as
|
|
// part of the module path so test symbols stay namespaced.
|
|
assert_eq!(module_path_for_python("tests/test_foo.py"), "tests.test_foo");
|
|
}
|
|
|
|
#[test]
|
|
fn module_path_for_tsjs_keeps_slashes_and_strips_ext() {
|
|
for ext in ["ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs"] {
|
|
let p = format!("src/search/retriever/Retriever.{ext}");
|
|
assert_eq!(module_path_for_tsjs(&p), "src/search/retriever/Retriever");
|
|
}
|
|
assert_eq!(module_path_for_tsjs("foo.ts"), "foo");
|
|
assert_eq!(module_path_for_tsjs("a/b/c.ts"), "a/b/c");
|
|
assert_eq!(module_path_for_tsjs("packages/x/src/Foo.ts"), "packages/x/src/Foo");
|
|
}
|
|
|
|
#[test]
|
|
fn tier2_basename_takes_precedence_over_extension() {
|
|
assert_eq!(code_lang_for_path(Path::new("Dockerfile")), Some("dockerfile"));
|
|
assert_eq!(code_lang_for_path(Path::new("foo/Dockerfile.dev")), Some("dockerfile"));
|
|
assert_eq!(code_lang_for_path(Path::new("myapp.dockerfile")), Some("dockerfile"));
|
|
assert_eq!(code_lang_for_path(Path::new("repo/Cargo.toml")), Some("toml"));
|
|
assert_eq!(code_lang_for_path(Path::new("pyproject.toml")), Some("toml"));
|
|
assert_eq!(code_lang_for_path(Path::new("repo/package.json")), Some("json"));
|
|
assert_eq!(code_lang_for_path(Path::new("tsconfig.json")), Some("json"));
|
|
assert_eq!(code_lang_for_path(Path::new("go.mod")), Some("go-mod"));
|
|
assert_eq!(code_lang_for_path(Path::new("pom.xml")), Some("xml"));
|
|
assert_eq!(code_lang_for_path(Path::new("build.gradle")), Some("groovy"));
|
|
}
|
|
|
|
#[test]
|
|
fn tier2_extension_fallback() {
|
|
assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yaml")), Some("yaml"));
|
|
assert_eq!(code_lang_for_path(Path::new("k8s/deploy.yml")), Some("yaml"));
|
|
assert_eq!(code_lang_for_path(Path::new("foo/bar.toml")), Some("toml"));
|
|
assert_eq!(code_lang_for_path(Path::new("foo/bar.json")), Some("json"));
|
|
assert_eq!(code_lang_for_path(Path::new("foo/bar.xml")), Some("xml"));
|
|
assert_eq!(code_lang_for_path(Path::new("foo/bar.gradle")), Some("groovy"));
|
|
}
|
|
}
|