From 88d7fbc182bdd0aea669b3e97b064cbd6736143a Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 21 May 2026 11:28:41 +0000 Subject: [PATCH] feat(p10-3): activate shell direct routing through Tier 3 chunker Extends ingest_one_code_asset's allowlist + 4-arm match (parser_version / chunker_version / extract / chunks) to admit code_lang "shell" and route it to CodeTextParagraphV1Chunker. parser_version "none-v1" + synthesize_tier2_document reused. Tier 1/2 fallback wrapper lands in the next commit. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/lib.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 585b47f..4269163 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -39,7 +39,7 @@ use std::sync::Arc; use anyhow::{Context, anyhow}; use serde::{Deserialize, Serialize}; -use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodeKotlinAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTsAstV1Chunker, DockerfileFileV1Chunker, K8sManifestResourceV1Chunker, ManifestFileV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker}; +use kebab_chunk::{CodeGoAstV1Chunker, CodeJavaAstV1Chunker, CodeJsAstV1Chunker, CodeKotlinAstV1Chunker, CodePythonAstV1Chunker, CodeRustAstV1Chunker, CodeTextParagraphV1Chunker, CodeTsAstV1Chunker, DockerfileFileV1Chunker, K8sManifestResourceV1Chunker, ManifestFileV1Chunker, MdHeadingV1Chunker, PdfPageV1Chunker}; use kebab_core::{ Answer, Block, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker, DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput, @@ -948,11 +948,12 @@ fn ingest_one_asset( force_reingest, ); } - // p10-1A-2 / 1B: code ingest dispatch. p10-2: Tier 2 langs added. + // p10-1A-2 / 1B: code ingest dispatch. p10-2: Tier 2 langs added. p10-3: shell added. MediaType::Code(lang) if matches!(lang.as_str(), "rust" | "python" | "typescript" | "javascript" | "go" | "java" | "kotlin" - | "yaml" | "dockerfile" | "toml" | "json" | "xml" | "groovy" | "go-mod") => + | "yaml" | "dockerfile" | "toml" | "json" | "xml" | "groovy" | "go-mod" + | "shell") => { return ingest_one_code_asset( app, @@ -1835,6 +1836,8 @@ fn ingest_one_code_asset( // p10-2: Tier 2 has no parse step — sentinel "none-v1". "yaml" | "dockerfile" | "toml" | "json" | "xml" | "groovy" | "go-mod" => ParserVersion("none-v1".to_string()), + // p10-3: shell direct routes to Tier 3 (no parse step). + "shell" => ParserVersion("none-v1".to_string()), other => anyhow::bail!("unsupported code_lang: {other}"), }; @@ -1852,6 +1855,8 @@ fn ingest_one_code_asset( "dockerfile" => DockerfileFileV1Chunker.chunker_version(), "toml" | "json" | "xml" | "groovy" | "go-mod" => ManifestFileV1Chunker.chunker_version(), + // p10-3: + "shell" => CodeTextParagraphV1Chunker.chunker_version(), other => anyhow::bail!("unreachable chunker_version: {other}"), }; @@ -1903,6 +1908,8 @@ fn ingest_one_code_asset( "yaml" | "dockerfile" | "toml" | "json" | "xml" | "groovy" | "go-mod" => { synthesize_tier2_document(asset, &bytes, code_lang, &parser_version)? } + // p10-3: shell reuses the same synthesizer. + "shell" => synthesize_tier2_document(asset, &bytes, "shell", &parser_version)?, other => anyhow::bail!("unreachable (extract): {other}"), }; @@ -1940,6 +1947,10 @@ fn ingest_one_code_asset( => ManifestFileV1Chunker .chunk(&canonical, chunk_policy) .context("kb-chunk::ManifestFileV1Chunker::chunk")?, + // p10-3: + "shell" => CodeTextParagraphV1Chunker + .chunk(&canonical, chunk_policy) + .context("kb-chunk::CodeTextParagraphV1Chunker::chunk (code:shell)")?, other => anyhow::bail!("unreachable (chunk): {other}"), };