diff --git a/crates/kebab-app/src/bulk.rs b/crates/kebab-app/src/bulk.rs index 50676b4..36be6c4 100644 --- a/crates/kebab-app/src/bulk.rs +++ b/crates/kebab-app/src/bulk.rs @@ -197,6 +197,8 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> { media, ingested_after, doc_id, + repo: vec![], + code_lang: vec![], }; let opts = SearchOpts { diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index db257ff..3ca6e63 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -828,6 +828,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> { media: media_norm, ingested_after: ingested_after_parsed, doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())), + repo: vec![], + code_lang: vec![], }; let q = kebab_core::SearchQuery { diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs index 137370c..eaf8470 100644 --- a/crates/kebab-core/src/search.rs +++ b/crates/kebab-core/src/search.rs @@ -61,6 +61,14 @@ pub struct SearchFilters { /// p9-fb-36: restrict hits to a single document. None = no filter. #[serde(default)] pub doc_id: Option, + /// p10-1A-1: filter by `metadata.repo`. Empty = no filter; multi-value = OR. + #[serde(default)] + pub repo: Vec, + /// p10-1A-1: filter by `metadata.code_lang`. Empty = no filter; multi-value = OR. + /// Identifiers are lowercase canonical names (`rust`, `python`, `typescript`, ...). + /// Unknown values produce empty hits (consistent with `media` policy). + #[serde(default)] + pub code_lang: Vec, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -89,6 +97,15 @@ pub struct SearchHit { /// 옛 wire (fb-38 미만) 부재 시 `Rrf` default — hybrid 가 기본 mode. #[serde(default)] pub score_kind: ScoreKind, + /// p10-1A-1: optional. Filled when the source file lives in a git repo + /// (`.git/` walk-up). null for markdown / pdf / image hits and for code + /// hits ingested via `kebab ingest-file` outside a repo boundary. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub repo: Option, + /// p10-1A-1: optional. Programming language identifier (lowercase). Set for + /// every code/manifest/k8s chunk; null for markdown / pdf / image hits. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code_lang: Option, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -101,6 +118,19 @@ pub struct RetrievalDetail { pub vector_rank: Option, } +impl Default for RetrievalDetail { + fn default() -> Self { + Self { + method: SearchMode::Hybrid, + fusion_score: 0.0, + lexical_score: None, + vector_score: None, + lexical_rank: None, + vector_rank: None, + } + } +} + /// Filter for `kb-app::list_docs` (§7.2 DocumentStore::list_documents). #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] pub struct DocFilter { @@ -257,6 +287,8 @@ mod tests { indexed_at: datetime!(2026-05-09 12:00:00 UTC), stale: true, score_kind: ScoreKind::Rrf, + repo: None, + code_lang: None, }; let v = serde_json::to_value(&hit).unwrap(); assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z"); @@ -429,4 +461,74 @@ mod tests { assert!(v["response"].is_null()); assert_eq!(v["error"]["code"], "config_invalid"); } + + #[test] + fn search_hit_repo_and_code_lang_are_optional_and_omit_when_none() { + let hit = SearchHit { + rank: 1, + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath("a.md".into()), + heading_path: vec![], + section_label: None, + snippet: "".into(), + citation: Citation::Line { + path: WorkspacePath("a.md".into()), + start: 1, + end: 2, + section: None, + }, + retrieval: RetrievalDetail::default(), + index_version: IndexVersion("v1".into()), + embedding_model: None, + chunker_version: ChunkerVersion("md-heading-v1".into()), + indexed_at: time::OffsetDateTime::UNIX_EPOCH, + stale: false, + score_kind: ScoreKind::Rrf, + repo: None, + code_lang: None, + }; + let v = serde_json::to_value(&hit).unwrap(); + assert!(v.get("repo").is_none(), "repo should be omitted when None"); + assert!(v.get("code_lang").is_none(), "code_lang should be omitted when None"); + } + + #[test] + fn search_hit_repo_and_code_lang_present_when_some() { + let hit = SearchHit { + rank: 1, + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath("a.rs".into()), + heading_path: vec![], + section_label: None, + snippet: "".into(), + citation: Citation::Code { + path: WorkspacePath("a.rs".into()), + line_start: 1, + line_end: 2, + symbol: None, + lang: Some("rust".into()), + }, + retrieval: RetrievalDetail::default(), + index_version: IndexVersion("v1".into()), + embedding_model: None, + chunker_version: ChunkerVersion("code-rust-ast-v1".into()), + indexed_at: time::OffsetDateTime::UNIX_EPOCH, + stale: false, + score_kind: ScoreKind::Rrf, + repo: Some("kebab".into()), + code_lang: Some("rust".into()), + }; + let v = serde_json::to_value(&hit).unwrap(); + assert_eq!(v["repo"], "kebab"); + assert_eq!(v["code_lang"], "rust"); + } + + #[test] + fn search_filters_repo_and_code_lang_default_to_empty_vec() { + let f = SearchFilters::default(); + assert!(f.repo.is_empty()); + assert!(f.code_lang.is_empty()); + } } diff --git a/crates/kebab-eval/src/metrics.rs b/crates/kebab-eval/src/metrics.rs index f138845..6a80ed0 100644 --- a/crates/kebab-eval/src/metrics.rs +++ b/crates/kebab-eval/src/metrics.rs @@ -338,7 +338,8 @@ pub(crate) fn aggregate_from_rows( | Citation::Page { path, .. } | Citation::Region { path, .. } | Citation::Caption { path, .. } - | Citation::Time { path, .. } => !path.0.is_empty(), + | Citation::Time { path, .. } + | Citation::Code { path, .. } => !path.0.is_empty(), }); if covered { citation_num += 1; @@ -472,6 +473,8 @@ mod tests { indexed_at: OffsetDateTime::UNIX_EPOCH, stale: false, score_kind: kebab_core::ScoreKind::Rrf, + repo: None, + code_lang: None, } } diff --git a/crates/kebab-eval/tests/metrics_and_compare.rs b/crates/kebab-eval/tests/metrics_and_compare.rs index 7cd7355..17b6e56 100644 --- a/crates/kebab-eval/tests/metrics_and_compare.rs +++ b/crates/kebab-eval/tests/metrics_and_compare.rs @@ -87,6 +87,8 @@ fn hit(rank: u32, chunk_id: &str, doc_id: &str) -> SearchHit { indexed_at: OffsetDateTime::UNIX_EPOCH, stale: false, score_kind: kebab_core::ScoreKind::Rrf, + repo: None, + code_lang: None, } } diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs index 722dbdd..2586294 100644 --- a/crates/kebab-mcp/src/tools/search.rs +++ b/crates/kebab-mcp/src/tools/search.rs @@ -110,6 +110,8 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { media, ingested_after, doc_id: input.doc_id.clone().map(kebab_core::DocumentId), + repo: vec![], + code_lang: vec![], }; let query = kebab_core::SearchQuery { diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs index 6d9286b..3378f51 100644 --- a/crates/kebab-search/src/hybrid.rs +++ b/crates/kebab-search/src/hybrid.rs @@ -509,6 +509,8 @@ mod tests { indexed_at: time::OffsetDateTime::UNIX_EPOCH, stale: false, score_kind: kebab_core::ScoreKind::Rrf, + repo: None, + code_lang: None, } } @@ -760,6 +762,8 @@ mod tests { indexed_at: time::OffsetDateTime::UNIX_EPOCH, stale: false, score_kind: kebab_core::ScoreKind::Rrf, + repo: None, + code_lang: None, } } diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs index 9d83b8f..43b4d26 100644 --- a/crates/kebab-search/src/lexical.rs +++ b/crates/kebab-search/src/lexical.rs @@ -470,6 +470,8 @@ fn build_hit( // in `RagPipeline::ask` against the configured threshold. stale: false, score_kind: ScoreKind::Bm25, + repo: None, + code_lang: None, }) } diff --git a/crates/kebab-search/src/vector.rs b/crates/kebab-search/src/vector.rs index 47eda97..3975c2e 100644 --- a/crates/kebab-search/src/vector.rs +++ b/crates/kebab-search/src/vector.rs @@ -327,6 +327,8 @@ fn build_hit( // in `RagPipeline::ask` against the configured threshold. stale: false, score_kind: ScoreKind::Cosine, + repo: None, + code_lang: None, }) }