feat(p10-1c-go): activate Go in ingest_one_code_asset dispatch

Replaces Go bail! arms with GoAstExtractor + CodeGoAstV1Chunker. Adds
go_file_ingests_and_searches_as_code_citation integration test — asserts
citation.lang=go, symbol=chunk.ParseDoc, code_lang=go.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-20 09:13:47 +00:00
parent f1a4f67e12
commit c19aa006d0
2 changed files with 81 additions and 6 deletions

View File

@@ -39,7 +39,7 @@ use std::sync::Arc;
use anyhow::{Context, anyhow};
use serde::{Deserialize, Serialize};
use kebab_chunk::{CodeJsAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_chunk::{CodeGoAstV1Chunker, CodeJsAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker};
use kebab_core::{
Answer, Block, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
@@ -50,7 +50,7 @@ use kebab_core::{
use kebab_llm_local::OllamaLanguageModel;
use kebab_normalize::build_canonical_document;
use kebab_parse_image::{ImageExtractor, OllamaVisionOcr, apply_caption, apply_ocr};
use kebab_parse_code::{JavascriptAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
use kebab_parse_code::{GoAstExtractor, JavascriptAstExtractor, PythonAstExtractor, RustAstExtractor, TypescriptAstExtractor};
use kebab_parse_pdf::PdfTextExtractor;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_source_fs::FsSourceConnector;
@@ -1827,7 +1827,7 @@ fn ingest_one_code_asset(
"python" => ParserVersion(kebab_parse_code::PYTHON_PARSER_VERSION.to_string()),
"typescript" => ParserVersion(kebab_parse_code::TS_PARSER_VERSION.to_string()),
"javascript" => ParserVersion(kebab_parse_code::JS_PARSER_VERSION.to_string()),
"go" => anyhow::bail!("go ingest not yet wired (p10-1c-go Task F)"),
"go" => ParserVersion(kebab_parse_code::GO_PARSER_VERSION.to_string()),
other => anyhow::bail!("unsupported code_lang: {other}"),
};
@@ -1837,7 +1837,7 @@ fn ingest_one_code_asset(
"python" => CodePythonAstV1Chunker.chunker_version(),
"typescript" => CodeTsAstV1Chunker.chunker_version(),
"javascript" => CodeJsAstV1Chunker.chunker_version(),
"go" => anyhow::bail!("go ingest not yet wired (p10-1c-go Task F)"),
"go" => CodeGoAstV1Chunker.chunker_version(),
other => anyhow::bail!("unreachable chunker_version: {other}"),
};
@@ -1876,7 +1876,9 @@ fn ingest_one_code_asset(
"javascript" => JavascriptAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::JavascriptAstExtractor::extract (code:javascript)")?,
"go" => anyhow::bail!("go ingest not yet wired (p10-1c-go Task F)"),
"go" => GoAstExtractor::new()
.extract(&ctx, &bytes)
.context("kb-parse-code::GoAstExtractor::extract (code:go)")?,
other => anyhow::bail!("unreachable (extract): {other}"),
};
@@ -1894,7 +1896,9 @@ fn ingest_one_code_asset(
"javascript" => CodeJsAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodeJsAstV1Chunker::chunk (code:javascript)")?,
"go" => anyhow::bail!("go ingest not yet wired (p10-1c-go Task F)"),
"go" => CodeGoAstV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::CodeGoAstV1Chunker::chunk (code:go)")?,
other => anyhow::bail!("unreachable (chunk): {other}"),
};

View File

@@ -390,6 +390,77 @@ fn javascript_file_ingests_and_searches_as_code_citation() {
);
}
/// p10-1c-go Task F: a `.go` file in a sub-directory is ingested and the
/// resulting `Citation::Code` hit must carry `lang="go"`,
/// `symbol="chunk.ParseDoc"`, and `line_start >= 1`.
/// The sub-directory (`chunk/`) ensures the Go package-prefix wiring
/// produces a non-empty module prefix so the fully-qualified symbol assertion
/// exercises that path end-to-end.
#[test]
fn go_file_ingests_and_searches_as_code_citation() {
let env = TestEnv::lexical_only();
let pkg_dir = env.workspace_root.join("chunk");
std::fs::create_dir_all(&pkg_dir).unwrap();
std::fs::write(
pkg_dir.join("ast.go"),
"package chunk\n\nfunc ParseDoc(input string) string {\n return input\n}\n",
)
.unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0);
assert!(report.new >= 1);
let go_item = report
.items
.as_ref()
.expect("items present")
.iter()
.find(|i| i.doc_path.0.ends_with("ast.go"))
.expect("ast.go item present");
assert_eq!(
go_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("code-go-v1"),
"parser_version must be code-go-v1"
);
assert_eq!(
go_item.chunker_version.as_ref().map(|c| c.0.as_str()),
Some("code-go-ast-v1"),
"chunker_version must be code-go-ast-v1"
);
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("ParseDoc"))
.expect("search must succeed");
let h = hits
.iter()
.find(|h| matches!(&h.citation, kebab_core::Citation::Code { .. }))
.expect("Citation::Code hit");
match &h.citation {
kebab_core::Citation::Code {
lang,
symbol,
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("go"), "citation.lang must be 'go'");
assert_eq!(
symbol.as_deref(),
Some("chunk.ParseDoc"),
"citation.symbol must be 'chunk.ParseDoc'"
);
assert!(*line_start >= 1, "line_start must be >=1");
}
_ => unreachable!(),
}
assert_eq!(
h.code_lang.as_deref(),
Some("go"),
"SearchHit.code_lang must be 'go'"
);
}
/// Re-ingesting the same `.rs` file without changes must report
/// `Unchanged` (incremental-skip path exercised).
#[test]