From fa4eeb5a87fc3196ed0d2e2671155b064f69a236 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Fri, 15 May 2026 15:04:23 +0900
Subject: [PATCH] feat(p10-1a-1): add SearchHit.repo / code_lang +
SearchFilters.repo / code_lang
Wire two new optional fields onto SearchHit (skip_serializing_if = None)
and two Vec filter fields onto SearchFilters (serde default).
Add RetrievalDetail::Default impl (manual, uses SearchMode::Hybrid as
sentinel). Patch all downstream SearchHit / SearchFilters literal
constructors with repo: None / code_lang: None / vec![] as appropriate.
Also covers Citation::Code arm in kebab-eval metrics match.
---
crates/kebab-app/src/bulk.rs | 2 +
crates/kebab-cli/src/main.rs | 2 +
crates/kebab-core/src/search.rs | 102 ++++++++++++++++++
crates/kebab-eval/src/metrics.rs | 5 +-
.../kebab-eval/tests/metrics_and_compare.rs | 2 +
crates/kebab-mcp/src/tools/search.rs | 2 +
crates/kebab-search/src/hybrid.rs | 4 +
crates/kebab-search/src/lexical.rs | 2 +
crates/kebab-search/src/vector.rs | 2 +
9 files changed, 122 insertions(+), 1 deletion(-)
diff --git a/crates/kebab-app/src/bulk.rs b/crates/kebab-app/src/bulk.rs
index 50676b4..36be6c4 100644
--- a/crates/kebab-app/src/bulk.rs
+++ b/crates/kebab-app/src/bulk.rs
@@ -197,6 +197,8 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> {
media,
ingested_after,
doc_id,
+ repo: vec![],
+ code_lang: vec![],
};
let opts = SearchOpts {
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index db257ff..3ca6e63 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -828,6 +828,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
media: media_norm,
ingested_after: ingested_after_parsed,
doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
+ repo: vec![],
+ code_lang: vec![],
};
let q = kebab_core::SearchQuery {
diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs
index 137370c..eaf8470 100644
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -61,6 +61,14 @@ pub struct SearchFilters {
/// p9-fb-36: restrict hits to a single document. None = no filter.
#[serde(default)]
pub doc_id: Option,
+ /// p10-1A-1: filter by `metadata.repo`. Empty = no filter; multi-value = OR.
+ #[serde(default)]
+ pub repo: Vec,
+ /// p10-1A-1: filter by `metadata.code_lang`. Empty = no filter; multi-value = OR.
+ /// Identifiers are lowercase canonical names (`rust`, `python`, `typescript`, ...).
+ /// Unknown values produce empty hits (consistent with `media` policy).
+ #[serde(default)]
+ pub code_lang: Vec,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -89,6 +97,15 @@ pub struct SearchHit {
/// 옛 wire (fb-38 미만) 부재 시 `Rrf` default — hybrid 가 기본 mode.
#[serde(default)]
pub score_kind: ScoreKind,
+ /// p10-1A-1: optional. Filled when the source file lives in a git repo
+ /// (`.git/` walk-up). null for markdown / pdf / image hits and for code
+ /// hits ingested via `kebab ingest-file` outside a repo boundary.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub repo: Option,
+ /// p10-1A-1: optional. Programming language identifier (lowercase). Set for
+ /// every code/manifest/k8s chunk; null for markdown / pdf / image hits.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub code_lang: Option,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -101,6 +118,19 @@ pub struct RetrievalDetail {
pub vector_rank: Option,
}
+impl Default for RetrievalDetail {
+ fn default() -> Self {
+ Self {
+ method: SearchMode::Hybrid,
+ fusion_score: 0.0,
+ lexical_score: None,
+ vector_score: None,
+ lexical_rank: None,
+ vector_rank: None,
+ }
+ }
+}
+
/// Filter for `kb-app::list_docs` (§7.2 DocumentStore::list_documents).
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
pub struct DocFilter {
@@ -257,6 +287,8 @@ mod tests {
indexed_at: datetime!(2026-05-09 12:00:00 UTC),
stale: true,
score_kind: ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
};
let v = serde_json::to_value(&hit).unwrap();
assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
@@ -429,4 +461,74 @@ mod tests {
assert!(v["response"].is_null());
assert_eq!(v["error"]["code"], "config_invalid");
}
+
+ #[test]
+ fn search_hit_repo_and_code_lang_are_optional_and_omit_when_none() {
+ let hit = SearchHit {
+ rank: 1,
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath("a.md".into()),
+ heading_path: vec![],
+ section_label: None,
+ snippet: "".into(),
+ citation: Citation::Line {
+ path: WorkspacePath("a.md".into()),
+ start: 1,
+ end: 2,
+ section: None,
+ },
+ retrieval: RetrievalDetail::default(),
+ index_version: IndexVersion("v1".into()),
+ embedding_model: None,
+ chunker_version: ChunkerVersion("md-heading-v1".into()),
+ indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+ stale: false,
+ score_kind: ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
+ };
+ let v = serde_json::to_value(&hit).unwrap();
+ assert!(v.get("repo").is_none(), "repo should be omitted when None");
+ assert!(v.get("code_lang").is_none(), "code_lang should be omitted when None");
+ }
+
+ #[test]
+ fn search_hit_repo_and_code_lang_present_when_some() {
+ let hit = SearchHit {
+ rank: 1,
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath("a.rs".into()),
+ heading_path: vec![],
+ section_label: None,
+ snippet: "".into(),
+ citation: Citation::Code {
+ path: WorkspacePath("a.rs".into()),
+ line_start: 1,
+ line_end: 2,
+ symbol: None,
+ lang: Some("rust".into()),
+ },
+ retrieval: RetrievalDetail::default(),
+ index_version: IndexVersion("v1".into()),
+ embedding_model: None,
+ chunker_version: ChunkerVersion("code-rust-ast-v1".into()),
+ indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+ stale: false,
+ score_kind: ScoreKind::Rrf,
+ repo: Some("kebab".into()),
+ code_lang: Some("rust".into()),
+ };
+ let v = serde_json::to_value(&hit).unwrap();
+ assert_eq!(v["repo"], "kebab");
+ assert_eq!(v["code_lang"], "rust");
+ }
+
+ #[test]
+ fn search_filters_repo_and_code_lang_default_to_empty_vec() {
+ let f = SearchFilters::default();
+ assert!(f.repo.is_empty());
+ assert!(f.code_lang.is_empty());
+ }
}
diff --git a/crates/kebab-eval/src/metrics.rs b/crates/kebab-eval/src/metrics.rs
index f138845..6a80ed0 100644
--- a/crates/kebab-eval/src/metrics.rs
+++ b/crates/kebab-eval/src/metrics.rs
@@ -338,7 +338,8 @@ pub(crate) fn aggregate_from_rows(
| Citation::Page { path, .. }
| Citation::Region { path, .. }
| Citation::Caption { path, .. }
- | Citation::Time { path, .. } => !path.0.is_empty(),
+ | Citation::Time { path, .. }
+ | Citation::Code { path, .. } => !path.0.is_empty(),
});
if covered {
citation_num += 1;
@@ -472,6 +473,8 @@ mod tests {
indexed_at: OffsetDateTime::UNIX_EPOCH,
stale: false,
score_kind: kebab_core::ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-eval/tests/metrics_and_compare.rs b/crates/kebab-eval/tests/metrics_and_compare.rs
index 7cd7355..17b6e56 100644
--- a/crates/kebab-eval/tests/metrics_and_compare.rs
+++ b/crates/kebab-eval/tests/metrics_and_compare.rs
@@ -87,6 +87,8 @@ fn hit(rank: u32, chunk_id: &str, doc_id: &str) -> SearchHit {
indexed_at: OffsetDateTime::UNIX_EPOCH,
stale: false,
score_kind: kebab_core::ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs
index 722dbdd..2586294 100644
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -110,6 +110,8 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
media,
ingested_after,
doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
+ repo: vec![],
+ code_lang: vec![],
};
let query = kebab_core::SearchQuery {
diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs
index 6d9286b..3378f51 100644
--- a/crates/kebab-search/src/hybrid.rs
+++ b/crates/kebab-search/src/hybrid.rs
@@ -509,6 +509,8 @@ mod tests {
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
stale: false,
score_kind: kebab_core::ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
}
}
@@ -760,6 +762,8 @@ mod tests {
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
stale: false,
score_kind: kebab_core::ScoreKind::Rrf,
+ repo: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs
index 9d83b8f..43b4d26 100644
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -470,6 +470,8 @@ fn build_hit(
// in `RagPipeline::ask` against the configured threshold.
stale: false,
score_kind: ScoreKind::Bm25,
+ repo: None,
+ code_lang: None,
})
}
diff --git a/crates/kebab-search/src/vector.rs b/crates/kebab-search/src/vector.rs
index 47eda97..3975c2e 100644
--- a/crates/kebab-search/src/vector.rs
+++ b/crates/kebab-search/src/vector.rs
@@ -327,6 +327,8 @@ fn build_hit(
// in `RagPipeline::ask` against the configured threshold.
stale: false,
score_kind: ScoreKind::Cosine,
+ repo: None,
+ code_lang: None,
})
}