feat(p10-1b): activate TypeScript in ingest_one_code_asset dispatch

Replaces TS bail! arms with TypescriptAstExtractor + CodeTsAstV1Chunker.
Adds typescript_file_ingests_and_searches_as_code_citation integration test —
asserts citation.lang=typescript, symbol=src/Foo.Foo.bar, code_lang=typescript.
JS arms remain bail!() (Task L).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 00:59:41 +00:00
parent 20feb3133e
commit acb61b6830
2 changed files with 94 additions and 10 deletions

View File

@@ -39,7 +39,7 @@ use std::sync::Arc;
use anyhow::{Context, anyhow};
use serde::{Deserialize, Serialize};
use kebab_chunk::{CodePythonAstV1Chunker, CodeRustAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_chunk::{CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_core::{
Answer, Block, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
@@ -50,7 +50,7 @@ use kebab_core::{
use kebab_llm_local::OllamaLanguageModel;
use kebab_normalize::build_canonical_document;
use kebab_parse_image::{ImageExtractor, OllamaVisionOcr, apply_caption, apply_ocr};
use kebab_parse_code::{PythonAstExtractor, RustAstExtractor};
use kebab_parse_code::{PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
use kebab_parse_pdf::PdfTextExtractor;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_source_fs::FsSourceConnector;
@@ -1677,20 +1677,21 @@ fn ingest_one_code_asset(
}
};
// p10-1b Task D/G: parser_version per-lang.
// p10-1b Task D/G/J: parser_version per-lang.
let parser_version = match code_lang {
"rust" => ParserVersion(kebab_parse_code::RUST_PARSER_VERSION.to_string()),
"python" => ParserVersion(kebab_parse_code::PYTHON_PARSER_VERSION.to_string()),
"typescript" => anyhow::bail!("typescript ingest not yet wired (p10-1b Task J)"),
"typescript" => ParserVersion(kebab_parse_code::TS_PARSER_VERSION.to_string()),
"javascript" => anyhow::bail!("javascript ingest not yet wired (p10-1b Task L)"),
other => anyhow::bail!("unsupported code_lang: {other}"),
};
// p10-1b Task D/G: chunker_version per-lang (TS/JS are unreachable here;
// they bail above and get real chunkers in Tasks J/L).
// p10-1b Task D/G/J: chunker_version per-lang (JS is unreachable here;
// it bails above and gets a real chunker in Task L).
let chunker_version = match code_lang {
"rust" => CodeRustAstV1Chunker.chunker_version(),
"python" => CodePythonAstV1Chunker.chunker_version(),
"rust" => CodeRustAstV1Chunker.chunker_version(),
"python" => CodePythonAstV1Chunker.chunker_version(),
"typescript" => CodeTsAstV1Chunker.chunker_version(),
other => anyhow::bail!("unreachable chunker_version: {other}"),
};
@@ -1715,7 +1716,7 @@ fn ingest_one_code_asset(
config: &extract_config,
};
// p10-1b Task D/G: extractor per-lang.
// p10-1b Task D/G/J: extractor per-lang.
let mut canonical = match code_lang {
"rust" => RustAstExtractor::new()
.extract(&ctx, &bytes)
@@ -1723,10 +1724,13 @@ fn ingest_one_code_asset(
"python" => PythonAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::PythonAstExtractor::extract (code:python)")?,
"typescript" => TypescriptAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::TypescriptAstExtractor::extract (code:typescript)")?,
other => anyhow::bail!("unreachable (extract): {other}"),
};
// p10-1b Task D/G: chunker per-lang.
// p10-1b Task D/G/J: chunker per-lang.
let chunks = match code_lang {
"rust" => CodeRustAstV1Chunker
.chunk(&canonical, chunk_policy)
@@ -1734,6 +1738,9 @@ fn ingest_one_code_asset(
"python" => CodePythonAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodePythonAstV1Chunker::chunk (code:python)")?,
"typescript" => CodeTsAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodeTsAstV1Chunker::chunk (code:typescript)")?,
other => anyhow::bail!("unreachable (chunk): {other}"),
};

View File

@@ -236,6 +236,83 @@ fn python_file_ingests_and_searches_as_code_citation() {
);
}
/// p10-1b Task J: a `.ts` file in a sub-directory is ingested and the
/// resulting `Citation::Code` hit must carry `lang="typescript"`,
/// `symbol="src/Foo.Foo.bar"`, and `line_start >= 1`.
/// The sub-directory (`src/`) ensures `module_path_for_tsjs` produces
/// a non-empty prefix so the fully-qualified symbol assertion exercises
/// the prefix wiring end-to-end.
#[test]
fn typescript_file_ingests_and_searches_as_code_citation() {
let env = TestEnv::lexical_only();
let src_dir = env.workspace_root.join("src");
std::fs::create_dir_all(&src_dir).unwrap();
std::fs::write(
src_dir.join("Foo.ts"),
"export class Foo {\n bar(): number { return 42; }\n}\n",
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert!(report.new >= 1, "ts file ingested: {report:?}");
let items = report.items.as_ref().expect("items present");
let ts_item = items
.iter()
.find(|i| i.doc_path.0.ends_with("Foo.ts"))
.expect("Foo.ts item");
assert_eq!(
ts_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("code-typescript-v1"),
"parser_version must be code-typescript-v1"
);
assert_eq!(
ts_item.chunker_version.as_ref().map(|c| c.0.as_str()),
Some("code-ts-ast-v1"),
"chunker_version must be code-ts-ast-v1"
);
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("bar"))
.expect("search must succeed");
let h = hits
.iter()
.find(|h| matches!(&h.citation, Citation::Code { .. }))
.expect("at least one Citation::Code hit for 'bar'");
match &h.citation {
Citation::Code {
lang,
symbol,
line_start,
..
} => {
assert_eq!(
lang.as_deref(),
Some("typescript"),
"citation.lang must be 'typescript'"
);
assert_eq!(
symbol.as_deref(),
Some("src/Foo.Foo.bar"),
"citation.symbol must be 'src/Foo.Foo.bar'"
);
assert!(*line_start >= 1, "line_start must be >=1");
}
_ => unreachable!(),
}
assert_eq!(
h.code_lang.as_deref(),
Some("typescript"),
"SearchHit.code_lang must be 'typescript'"
);
}
/// Re-ingesting the same `.rs` file without changes must report
/// `Unchanged` (incremental-skip path exercised).
#[test]