feat(p10-1c-jk): activate Kotlin in ingest_one_code_asset dispatch

Replaces Kotlin bail! arms with KotlinAstExtractor + CodeKotlinAstV1Chunker.
Adds kotlin_file_ingests_and_searches_as_code_citation integration test —
asserts citation.lang=kotlin, symbol=com.foo.Foo.bar, code_lang=kotlin.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 10:54:55 +00:00
parent 30e03c7a12
commit ff1bedbef5
2 changed files with 81 additions and 6 deletions

View File

@@ -39,7 +39,7 @@ use std::sync::Arc;
use anyhow::{Context, anyhow};
use serde::{Deserialize, Serialize};
use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodeKotlinAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_core::{
Answer, Block, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
@@ -50,7 +50,7 @@ use kebab_core::{
use kebab_llm_local::OllamaLanguageModel;
use kebab_normalize::build_canonical_document;
use kebab_parse_image::{ImageExtractor, OllamaVisionOcr, apply_caption, apply_ocr};
use kebab_parse_code::{GoAstExtractor, JavaAstExtractor, JavascriptAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
use kebab_parse_code::{GoAstExtractor, JavaAstExtractor, JavascriptAstExtractor, KotlinAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
use kebab_parse_pdf::PdfTextExtractor;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_source_fs::FsSourceConnector;
@@ -1830,7 +1830,7 @@ fn ingest_one_code_asset(
"javascript" => ParserVersion(kebab_parse_code::JS_PARSER_VERSION.to_string()),
"go" => ParserVersion(kebab_parse_code::GO_PARSER_VERSION.to_string()),
"java" => ParserVersion(kebab_parse_code::JAVA_PARSER_VERSION.to_string()),
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
"kotlin" => ParserVersion(kebab_parse_code::KOTLIN_PARSER_VERSION.to_string()),
other => anyhow::bail!("unsupported code_lang: {other}"),
};
@@ -1842,7 +1842,7 @@ fn ingest_one_code_asset(
"javascript" => CodeJsAstV1Chunker.chunker_version(),
"go" => CodeGoAstV1Chunker.chunker_version(),
"java" => CodeJavaAstV1Chunker.chunker_version(),
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
"kotlin" => CodeKotlinAstV1Chunker.chunker_version(),
other => anyhow::bail!("unreachable chunker_version: {other}"),
};
@@ -1887,7 +1887,9 @@ fn ingest_one_code_asset(
"java" => JavaAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::JavaAstExtractor::extract (code:java)")?,
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
"kotlin" => KotlinAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::KotlinAstExtractor::extract (code:kotlin)")?,
other => anyhow::bail!("unreachable (extract): {other}"),
};
@@ -1911,7 +1913,9 @@ fn ingest_one_code_asset(
"java" => CodeJavaAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodeJavaAstV1Chunker::chunk (code:java)")?,
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
"kotlin" => CodeKotlinAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodeKotlinAstV1Chunker::chunk (code:kotlin)")?,
other => anyhow::bail!("unreachable (chunk): {other}"),
};

View File

@@ -532,6 +532,77 @@ fn java_file_ingests_and_searches_as_code_citation() {
);
}
/// p10-1c-jk Task I: a `.kt` file in a package directory is ingested and the
/// resulting `Citation::Code` hit must carry `lang="kotlin"`,
/// `symbol="com.foo.Foo.bar"`, and `line_start >= 1`.
/// The sub-directory (`com/foo/`) ensures the Kotlin package-prefix wiring
/// produces a non-empty module prefix so the fully-qualified symbol assertion
/// exercises that path end-to-end.
#[test]
fn kotlin_file_ingests_and_searches_as_code_citation() {
let env = TestEnv::lexical_only();
let pkg_dir = env.workspace_root.join("com").join("foo");
std::fs::create_dir_all(&pkg_dir).unwrap();
std::fs::write(
pkg_dir.join("Foo.kt"),
"package com.foo\n\nclass Foo {\n fun bar(): String = \"x\"\n}\n",
)
.unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0);
assert!(report.new >= 1);
let kt_item = report
.items
.as_ref()
.expect("items present")
.iter()
.find(|i| i.doc_path.0.ends_with("Foo.kt"))
.expect("Foo.kt item present");
assert_eq!(
kt_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("code-kotlin-v1"),
"parser_version must be code-kotlin-v1"
);
assert_eq!(
kt_item.chunker_version.as_ref().map(|c| c.0.as_str()),
Some("code-kotlin-ast-v1"),
"chunker_version must be code-kotlin-ast-v1"
);
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("bar"))
.expect("search must succeed");
let h = hits
.iter()
.find(|h| matches!(&h.citation, kebab_core::Citation::Code { .. }))
.expect("Citation::Code hit");
match &h.citation {
kebab_core::Citation::Code {
lang,
symbol,
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("kotlin"), "citation.lang must be 'kotlin'");
assert_eq!(
symbol.as_deref(),
Some("com.foo.Foo.bar"),
"citation.symbol must be 'com.foo.Foo.bar'"
);
assert!(*line_start >= 1, "line_start must be >=1");
}
_ => unreachable!(),
}
assert_eq!(
h.code_lang.as_deref(),
Some("kotlin"),
"SearchHit.code_lang must be 'kotlin'"
);
}
/// Re-ingesting the same `.rs` file without changes must report
/// `Unchanged` (incremental-skip path exercised).
#[test]