feat(p10-1c-jk): activate Kotlin in ingest_one_code_asset dispatch
Replaces Kotlin bail! arms with KotlinAstExtractor + CodeKotlinAstV1Chunker. Adds kotlin_file_ingests_and_searches_as_code_citation integration test — asserts citation.lang=kotlin, symbol=com.foo.Foo.bar, code_lang=kotlin. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -39,7 +39,7 @@ use std::sync::Arc;
|
||||
use anyhow::{Context, anyhow};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
|
||||
use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodeKotlinAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
|
||||
use kebab_core::{
|
||||
Answer, Block, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
|
||||
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
|
||||
@@ -50,7 +50,7 @@ use kebab_core::{
|
||||
use kebab_llm_local::OllamaLanguageModel;
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_image::{ImageExtractor, OllamaVisionOcr, apply_caption, apply_ocr};
|
||||
use kebab_parse_code::{GoAstExtractor, JavaAstExtractor, JavascriptAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
|
||||
use kebab_parse_code::{GoAstExtractor, JavaAstExtractor, JavascriptAstExtractor, KotlinAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
|
||||
use kebab_parse_pdf::PdfTextExtractor;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
@@ -1830,7 +1830,7 @@ fn ingest_one_code_asset(
|
||||
"javascript" => ParserVersion(kebab_parse_code::JS_PARSER_VERSION.to_string()),
|
||||
"go" => ParserVersion(kebab_parse_code::GO_PARSER_VERSION.to_string()),
|
||||
"java" => ParserVersion(kebab_parse_code::JAVA_PARSER_VERSION.to_string()),
|
||||
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
|
||||
"kotlin" => ParserVersion(kebab_parse_code::KOTLIN_PARSER_VERSION.to_string()),
|
||||
other => anyhow::bail!("unsupported code_lang: {other}"),
|
||||
};
|
||||
|
||||
@@ -1842,7 +1842,7 @@ fn ingest_one_code_asset(
|
||||
"javascript" => CodeJsAstV1Chunker.chunker_version(),
|
||||
"go" => CodeGoAstV1Chunker.chunker_version(),
|
||||
"java" => CodeJavaAstV1Chunker.chunker_version(),
|
||||
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
|
||||
"kotlin" => CodeKotlinAstV1Chunker.chunker_version(),
|
||||
other => anyhow::bail!("unreachable chunker_version: {other}"),
|
||||
};
|
||||
|
||||
@@ -1887,7 +1887,9 @@ fn ingest_one_code_asset(
|
||||
"java" => JavaAstExtractor::new()
|
||||
.extract(&ctx, &bytes)
|
||||
.context("kb-parse-code::JavaAstExtractor::extract (code:java)")?,
|
||||
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
|
||||
"kotlin" => KotlinAstExtractor::new()
|
||||
.extract(&ctx, &bytes)
|
||||
.context("kb-parse-code::KotlinAstExtractor::extract (code:kotlin)")?,
|
||||
other => anyhow::bail!("unreachable (extract): {other}"),
|
||||
};
|
||||
|
||||
@@ -1911,7 +1913,9 @@ fn ingest_one_code_asset(
|
||||
"java" => CodeJavaAstV1Chunker
|
||||
.chunk(&canonical, chunk_policy)
|
||||
.context("kb-chunk::CodeJavaAstV1Chunker::chunk (code:java)")?,
|
||||
"kotlin" => anyhow::bail!("kotlin ingest not yet wired (p10-1c-jk Task I)"),
|
||||
"kotlin" => CodeKotlinAstV1Chunker
|
||||
.chunk(&canonical, chunk_policy)
|
||||
.context("kb-chunk::CodeKotlinAstV1Chunker::chunk (code:kotlin)")?,
|
||||
other => anyhow::bail!("unreachable (chunk): {other}"),
|
||||
};
|
||||
|
||||
|
||||
@@ -532,6 +532,77 @@ fn java_file_ingests_and_searches_as_code_citation() {
|
||||
);
|
||||
}
|
||||
|
||||
/// p10-1c-jk Task I: a `.kt` file in a package directory is ingested and the
|
||||
/// resulting `Citation::Code` hit must carry `lang="kotlin"`,
|
||||
/// `symbol="com.foo.Foo.bar"`, and `line_start >= 1`.
|
||||
/// The sub-directory (`com/foo/`) ensures the Kotlin package-prefix wiring
|
||||
/// produces a non-empty module prefix so the fully-qualified symbol assertion
|
||||
/// exercises that path end-to-end.
|
||||
#[test]
|
||||
fn kotlin_file_ingests_and_searches_as_code_citation() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let pkg_dir = env.workspace_root.join("com").join("foo");
|
||||
std::fs::create_dir_all(&pkg_dir).unwrap();
|
||||
std::fs::write(
|
||||
pkg_dir.join("Foo.kt"),
|
||||
"package com.foo\n\nclass Foo {\n fun bar(): String = \"x\"\n}\n",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
assert_eq!(report.errors, 0);
|
||||
assert!(report.new >= 1);
|
||||
|
||||
let kt_item = report
|
||||
.items
|
||||
.as_ref()
|
||||
.expect("items present")
|
||||
.iter()
|
||||
.find(|i| i.doc_path.0.ends_with("Foo.kt"))
|
||||
.expect("Foo.kt item present");
|
||||
assert_eq!(
|
||||
kt_item.parser_version.as_ref().map(|p| p.0.as_str()),
|
||||
Some("code-kotlin-v1"),
|
||||
"parser_version must be code-kotlin-v1"
|
||||
);
|
||||
assert_eq!(
|
||||
kt_item.chunker_version.as_ref().map(|c| c.0.as_str()),
|
||||
Some("code-kotlin-ast-v1"),
|
||||
"chunker_version must be code-kotlin-ast-v1"
|
||||
);
|
||||
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("bar"))
|
||||
.expect("search must succeed");
|
||||
let h = hits
|
||||
.iter()
|
||||
.find(|h| matches!(&h.citation, kebab_core::Citation::Code { .. }))
|
||||
.expect("Citation::Code hit");
|
||||
match &h.citation {
|
||||
kebab_core::Citation::Code {
|
||||
lang,
|
||||
symbol,
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("kotlin"), "citation.lang must be 'kotlin'");
|
||||
assert_eq!(
|
||||
symbol.as_deref(),
|
||||
Some("com.foo.Foo.bar"),
|
||||
"citation.symbol must be 'com.foo.Foo.bar'"
|
||||
);
|
||||
assert!(*line_start >= 1, "line_start must be >=1");
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
assert_eq!(
|
||||
h.code_lang.as_deref(),
|
||||
Some("kotlin"),
|
||||
"SearchHit.code_lang must be 'kotlin'"
|
||||
);
|
||||
}
|
||||
|
||||
/// Re-ingesting the same `.rs` file without changes must report
|
||||
/// `Unchanged` (incremental-skip path exercised).
|
||||
#[test]
|
||||
|
||||
Reference in New Issue
Block a user