Compare commits

..

3 Commits

Author SHA1 Message Date
acf8cf3be2 chore: bump version 0.8.3 → 0.9.0
dogfood-discovered routing additions (PR #147) land:
- .mts / .cts → MediaType::Code(typescript)
- .mdx → MediaType::Markdown

minor bump 사유: 사용자 도그푸딩 surface 확장 — 이전에 skip 되던 28+ 파일이
이제 색인됨. design §10.4 dogfooding-ready surface 확장 = minor trigger.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 06:29:27 +00:00
ea5f7b22c8 Merge pull request 'feat(dogfood): route .mts/.cts → typescript + .mdx → markdown' (#147) from feat/dogfood-routing-cts-mts-mdx into main 2026-05-20 06:28:41 +00:00
5497c6e7b5 feat(dogfood): route .mts/.cts to typescript + .mdx to markdown
Dogfood (PR #142 1B + multi-root: kebab-docs + httpx + zod + lodash)
showed 28 files skipped by extension that are routable to existing
extractors:
- .mts (ESM TypeScript) / .cts (CommonJS TypeScript) — same grammar as
  .ts in tree-sitter-typescript 0.23 (LANGUAGE_TYPESCRIPT covers JSX-
  agnostic variants; LANGUAGE_TSX stays for .tsx only)
- .mdx (Markdown + JSX) — routed as MediaType::Markdown; the md parser
  folds JSX islands through as raw passthrough

Changes:
- crates/kebab-source-fs/src/media.rs: 'mts'|'cts' → Code(typescript),
  'mdx' → Markdown. +2 unit tests.
- crates/kebab-parse-code/src/lang.rs: code_lang_for_path matches mts/cts;
  module_path_for_tsjs strips .mts/.cts as well. Test cases extended.
- crates/kebab-parse-code/src/typescript.rs: doc comment on select_grammar
  refreshed to mention .mts/.cts.
- crates/kebab-parse-code/tests/lang.rs: 2 new assertions.

verify: kebab-source-fs 44 / kebab-parse-code lib 20 + lang 4 all pass; clippy clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-20 06:24:21 +00:00
6 changed files with 51 additions and 31 deletions

46
Cargo.lock generated
View File

@@ -4127,7 +4127,7 @@ dependencies = [
[[package]]
name = "kebab-app"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"base64 0.22.1",
@@ -4172,7 +4172,7 @@ dependencies = [
[[package]]
name = "kebab-chunk"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4187,7 +4187,7 @@ dependencies = [
[[package]]
name = "kebab-cli"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"clap",
@@ -4208,7 +4208,7 @@ dependencies = [
[[package]]
name = "kebab-config"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"dirs 5.0.1",
@@ -4223,7 +4223,7 @@ dependencies = [
[[package]]
name = "kebab-core"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4237,7 +4237,7 @@ dependencies = [
[[package]]
name = "kebab-embed"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4251,7 +4251,7 @@ dependencies = [
[[package]]
name = "kebab-embed-local"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"fastembed",
@@ -4264,7 +4264,7 @@ dependencies = [
[[package]]
name = "kebab-eval"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-app",
@@ -4283,7 +4283,7 @@ dependencies = [
[[package]]
name = "kebab-llm"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4292,7 +4292,7 @@ dependencies = [
[[package]]
name = "kebab-llm-local"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-config",
@@ -4309,7 +4309,7 @@ dependencies = [
[[package]]
name = "kebab-mcp"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-app",
@@ -4327,7 +4327,7 @@ dependencies = [
[[package]]
name = "kebab-normalize"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4342,7 +4342,7 @@ dependencies = [
[[package]]
name = "kebab-parse-code"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"gix",
@@ -4360,7 +4360,7 @@ dependencies = [
[[package]]
name = "kebab-parse-image"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"ab_glyph",
"anyhow",
@@ -4384,7 +4384,7 @@ dependencies = [
[[package]]
name = "kebab-parse-md"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"kebab-core",
@@ -4401,7 +4401,7 @@ dependencies = [
[[package]]
name = "kebab-parse-pdf"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4414,7 +4414,7 @@ dependencies = [
[[package]]
name = "kebab-parse-types"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"kebab-core",
"serde",
@@ -4422,7 +4422,7 @@ dependencies = [
[[package]]
name = "kebab-rag"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4443,7 +4443,7 @@ dependencies = [
[[package]]
name = "kebab-search"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"globset",
@@ -4462,7 +4462,7 @@ dependencies = [
[[package]]
name = "kebab-source-fs"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4481,7 +4481,7 @@ dependencies = [
[[package]]
name = "kebab-store-sqlite"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"blake3",
@@ -4502,7 +4502,7 @@ dependencies = [
[[package]]
name = "kebab-store-vector"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"arrow",
@@ -4526,7 +4526,7 @@ dependencies = [
[[package]]
name = "kebab-tui"
version = "0.8.3"
version = "0.9.0"
dependencies = [
"anyhow",
"crossterm",

View File

@@ -31,7 +31,7 @@ edition = "2024"
rust-version = "1.85"
license = "MIT OR Apache-2.0"
repository = "https://github.com/altair823/kebab"
version = "0.8.3"
version = "0.9.0"
[workspace.dependencies]
anyhow = "1"

View File

@@ -24,7 +24,7 @@ pub fn code_lang_for_path(path: &Path) -> Option<&'static str> {
match ext.as_str() {
"rs" => Some("rust"),
"py" | "pyi" => Some("python"),
"ts" | "tsx" => Some("typescript"),
"ts" | "tsx" | "mts" | "cts" => Some("typescript"),
"js" | "mjs" | "cjs" | "jsx" => Some("javascript"),
"go" => Some("go"),
"java" => Some("java"),
@@ -82,7 +82,7 @@ pub fn module_path_for_python(workspace_path: &str) -> String {
/// (no slash replacement, no source-root strip). See plan §Task C.
pub fn module_path_for_tsjs(workspace_path: &str) -> String {
let p = workspace_path;
for ext in [".tsx", ".ts", ".jsx", ".mjs", ".cjs", ".js"] {
for ext in [".tsx", ".mts", ".cts", ".ts", ".jsx", ".mjs", ".cjs", ".js"] {
if let Some(stripped) = p.strip_suffix(ext) {
return stripped.to_string();
}
@@ -110,7 +110,7 @@ mod tests {
#[test]
fn module_path_for_tsjs_keeps_slashes_and_strips_ext() {
for ext in ["ts", "tsx", "js", "jsx", "mjs", "cjs"] {
for ext in ["ts", "tsx", "mts", "cts", "js", "jsx", "mjs", "cjs"] {
let p = format!("src/search/retriever/Retriever.{ext}");
assert_eq!(module_path_for_tsjs(&p), "src/search/retriever/Retriever");
}

View File

@@ -173,8 +173,9 @@ impl Extractor for TypescriptAstExtractor {
}
/// Select the tree-sitter grammar based on the workspace path's
/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.d.ts`,
/// missing extension) → TypeScript grammar.
/// extension. `.tsx` → TSX grammar; everything else (`.ts`, `.mts`,
/// `.cts`, `.d.ts`, missing extension) → TypeScript grammar (the JSX-
/// agnostic variants all share one grammar in tree-sitter-typescript 0.23).
fn select_grammar(workspace_path: &str) -> tree_sitter::Language {
if workspace_path.ends_with(".tsx") {
tree_sitter_typescript::LANGUAGE_TSX.into()

View File

@@ -9,6 +9,8 @@ fn known_extensions_map_to_canonical_identifiers() {
("foo.pyi", Some("python")),
("foo.ts", Some("typescript")),
("foo.tsx", Some("typescript")),
("foo.mts", Some("typescript")), // ESM TS — same grammar
("foo.cts", Some("typescript")), // CommonJS TS — same grammar
("foo.js", Some("javascript")),
("foo.mjs", Some("javascript")),
("foo.cjs", Some("javascript")),

View File

@@ -19,7 +19,9 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType {
.unwrap_or_default();
match ext.as_str() {
"md" => MediaType::Markdown,
// Markdown + MDX (markdown + JSX, treated as plain markdown — the
// JSX islands are folded into raw passthrough by the md parser).
"md" | "mdx" => MediaType::Markdown,
"pdf" => MediaType::Pdf,
"png" => MediaType::Image(ImageType::Png),
@@ -40,7 +42,8 @@ pub(crate) fn media_type_for(path: &Path) -> MediaType {
// p10-1B: Python / TS / JS AST chunkers active.
"py" | "pyi" => MediaType::Code("python".into()),
"ts" | "tsx" => MediaType::Code("typescript".into()),
// .mts / .cts are TypeScript ESM / CommonJS variants — same grammar.
"ts" | "tsx" | "mts" | "cts" => MediaType::Code("typescript".into()),
"js" | "mjs" | "cjs" | "jsx" => MediaType::Code("javascript".into()),
// Empty string (no extension) and any other extension: bucket as
@@ -102,6 +105,20 @@ mod tests {
assert_eq!(media_type_for(Path::new("a/b.rs")), MediaType::Code("rust".into()));
}
#[test]
fn ts_variants_mts_cts() {
// .mts / .cts are TypeScript ESM / CommonJS — same grammar as .ts.
assert_eq!(media_type_for(Path::new("a/b.mts")), MediaType::Code("typescript".into()));
assert_eq!(media_type_for(Path::new("a/b.cts")), MediaType::Code("typescript".into()));
}
#[test]
fn mdx_routes_to_markdown() {
// MDX is markdown with JSX islands; the md parser folds the JSX
// through as raw passthrough.
assert_eq!(media_type_for(Path::new("docs/page.mdx")), MediaType::Markdown);
}
#[test]
fn unknown_and_missing_extension() {
assert_eq!(