diff --git a/crates/kebab-parse-code/src/lang.rs b/crates/kebab-parse-code/src/lang.rs index 2fa992f..19fbb38 100644 --- a/crates/kebab-parse-code/src/lang.rs +++ b/crates/kebab-parse-code/src/lang.rs @@ -24,7 +24,7 @@ pub fn code_lang_for_path(path: &Path) -> Option<&'static str> { match ext.as_str() { "rs" => Some("rust"), "py" | "pyi" => Some("python"), - "ts" | "tsx" => Some("typescript"), + "ts" | "tsx" | "mts" | "cts" => Some("typescript"), "js" | "mjs" | "cjs" | "jsx" => Some("javascript"), "go" => Some("go"), "java" => Some("java"), @@ -82,7 +82,7 @@ pub fn module_path_for_python(workspace_path: &str) -> String { /// (no slash replacement, no source-root strip). See plan §Task C. pub fn module_path_for_tsjs(workspace_path: &str) -> String { let p = workspace_path; - for ext in [".tsx", ".ts", ".jsx", ".mjs", ".cjs", ".js"] { + for ext in [".tsx", ".mts", ".cts", ".ts", ".jsx", ".mjs", ".cjs", ".js"] { if let Some(stripped) = p.strip_suffix(ext) { return stripped.to_string(); } @@ -110,7 +110,7 @@ mod tests { #[test] fn module_path_for_tsjs_keeps_slashes_and_strips_ext() { - for ext in ["ts", "tsx", "js", "jsx", "mjs", "cjs"] { + for ext in ["ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs"] { let p = format!("src/search/retriever/Retriever.{ext}"); assert_eq!(module_path_for_tsjs(&p), "src/search/retriever/Retriever"); } diff --git a/crates/kebab-parse-code/src/typescript.rs b/crates/kebab-parse-code/src/typescript.rs index 2fb4e97..82232d6 100644 --- a/crates/kebab-parse-code/src/typescript.rs +++ b/crates/kebab-parse-code/src/typescript.rs @@ -173,8 +173,9 @@ impl Extractor for TypescriptAstExtractor { } /// Select the tree-sitter grammar based on the workspace path's -/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.d.ts`, -/// missing extension) → TypeScript grammar. +/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.mts`, +/// `.cts`, `.d.ts`, missing extension) → TypeScript grammar (the JSX- +/// agnostic variants all share one grammar in tree-sitter-typescript 0.23). fn select_grammar(workspace_path: &str) -> tree_sitter::Language { if workspace_path.ends_with(".tsx") { tree_sitter_typescript::LANGUAGE_TSX.into() diff --git a/crates/kebab-parse-code/tests/lang.rs b/crates/kebab-parse-code/tests/lang.rs index f7db0a9..62ec9bd 100644 --- a/crates/kebab-parse-code/tests/lang.rs +++ b/crates/kebab-parse-code/tests/lang.rs @@ -9,6 +9,8 @@ fn known_extensions_map_to_canonical_identifiers() { ("foo.pyi", Some("python")), ("foo.ts", Some("typescript")), ("foo.tsx", Some("typescript")), + ("foo.mts", Some("typescript")), // ESM TS — same grammar + ("foo.cts", Some("typescript")), // CommonJS TS — same grammar ("foo.js", Some("javascript")), ("foo.mjs", Some("javascript")), ("foo.cjs", Some("javascript")), diff --git a/crates/kebab-source-fs/src/media.rs b/crates/kebab-source-fs/src/media.rs index c84ce7f..5f940ec 100644 --- a/crates/kebab-source-fs/src/media.rs +++ b/crates/kebab-source-fs/src/media.rs @@ -19,7 +19,9 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType { .unwrap_or_default(); match ext.as_str() { - "md" => MediaType::Markdown, + // Markdown + MDX (markdown + JSX, treated as plain markdown — the + // JSX islands are folded into raw passthrough by the md parser). + "md" | "mdx" => MediaType::Markdown, "pdf" => MediaType::Pdf, "png" => MediaType::Image(ImageType::Png), @@ -40,7 +42,8 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType { // p10-1B: Python / TS / JS AST chunkers active. "py" | "pyi" => MediaType::Code("python".into()), - "ts" | "tsx" => MediaType::Code("typescript".into()), + // .mts / .cts are TypeScript ESM / CommonJS variants — same grammar. + "ts" | "tsx" | "mts" | "cts" => MediaType::Code("typescript".into()), "js" | "mjs" | "cjs" | "jsx" => MediaType::Code("javascript".into()), // Empty string (no extension) and any other extension: bucket as @@ -102,6 +105,20 @@ mod tests { assert_eq!(media_type_for(Path::new("a/b.rs")), MediaType::Code("rust".into())); } + #[test] + fn ts_variants_mts_cts() { + // .mts / .cts are TypeScript ESM / CommonJS — same grammar as .ts. + assert_eq!(media_type_for(Path::new("a/b.mts")), MediaType::Code("typescript".into())); + assert_eq!(media_type_for(Path::new("a/b.cts")), MediaType::Code("typescript".into())); + } + + #[test] + fn mdx_routes_to_markdown() { + // MDX is markdown with JSX islands; the md parser folds the JSX + // through as raw passthrough. + assert_eq!(media_type_for(Path::new("docs/page.mdx")), MediaType::Markdown); + } + #[test] fn unknown_and_missing_extension() { assert_eq!(