feat(p10-1a-1): add SearchHit.repo / code_lang + SearchFilters.repo / code_lang
Wire two new optional fields onto SearchHit (skip_serializing_if = None) and two Vec<String> filter fields onto SearchFilters (serde default). Add RetrievalDetail::Default impl (manual, uses SearchMode::Hybrid as sentinel). Patch all downstream SearchHit / SearchFilters literal constructors with repo: None / code_lang: None / vec![] as appropriate. Also covers Citation::Code arm in kebab-eval metrics match.
This commit is contained in:
@@ -197,6 +197,8 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> {
|
||||
media,
|
||||
ingested_after,
|
||||
doc_id,
|
||||
repo: vec![],
|
||||
code_lang: vec![],
|
||||
};
|
||||
|
||||
let opts = SearchOpts {
|
||||
|
||||
@@ -828,6 +828,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
media: media_norm,
|
||||
ingested_after: ingested_after_parsed,
|
||||
doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
|
||||
repo: vec![],
|
||||
code_lang: vec![],
|
||||
};
|
||||
|
||||
let q = kebab_core::SearchQuery {
|
||||
|
||||
@@ -61,6 +61,14 @@ pub struct SearchFilters {
|
||||
/// p9-fb-36: restrict hits to a single document. None = no filter.
|
||||
#[serde(default)]
|
||||
pub doc_id: Option<DocumentId>,
|
||||
/// p10-1A-1: filter by `metadata.repo`. Empty = no filter; multi-value = OR.
|
||||
#[serde(default)]
|
||||
pub repo: Vec<String>,
|
||||
/// p10-1A-1: filter by `metadata.code_lang`. Empty = no filter; multi-value = OR.
|
||||
/// Identifiers are lowercase canonical names (`rust`, `python`, `typescript`, ...).
|
||||
/// Unknown values produce empty hits (consistent with `media` policy).
|
||||
#[serde(default)]
|
||||
pub code_lang: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
@@ -89,6 +97,15 @@ pub struct SearchHit {
|
||||
/// 옛 wire (fb-38 미만) 부재 시 `Rrf` default — hybrid 가 기본 mode.
|
||||
#[serde(default)]
|
||||
pub score_kind: ScoreKind,
|
||||
/// p10-1A-1: optional. Filled when the source file lives in a git repo
|
||||
/// (`.git/` walk-up). null for markdown / pdf / image hits and for code
|
||||
/// hits ingested via `kebab ingest-file` outside a repo boundary.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub repo: Option<String>,
|
||||
/// p10-1A-1: optional. Programming language identifier (lowercase). Set for
|
||||
/// every code/manifest/k8s chunk; null for markdown / pdf / image hits.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub code_lang: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
@@ -101,6 +118,19 @@ pub struct RetrievalDetail {
|
||||
pub vector_rank: Option<u32>,
|
||||
}
|
||||
|
||||
impl Default for RetrievalDetail {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
method: SearchMode::Hybrid,
|
||||
fusion_score: 0.0,
|
||||
lexical_score: None,
|
||||
vector_score: None,
|
||||
lexical_rank: None,
|
||||
vector_rank: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Filter for `kb-app::list_docs` (§7.2 DocumentStore::list_documents).
|
||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||
pub struct DocFilter {
|
||||
@@ -257,6 +287,8 @@ mod tests {
|
||||
indexed_at: datetime!(2026-05-09 12:00:00 UTC),
|
||||
stale: true,
|
||||
score_kind: ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
};
|
||||
let v = serde_json::to_value(&hit).unwrap();
|
||||
assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
|
||||
@@ -429,4 +461,74 @@ mod tests {
|
||||
assert!(v["response"].is_null());
|
||||
assert_eq!(v["error"]["code"], "config_invalid");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_hit_repo_and_code_lang_are_optional_and_omit_when_none() {
|
||||
let hit = SearchHit {
|
||||
rank: 1,
|
||||
chunk_id: ChunkId("c1".into()),
|
||||
doc_id: DocumentId("d1".into()),
|
||||
doc_path: WorkspacePath("a.md".into()),
|
||||
heading_path: vec![],
|
||||
section_label: None,
|
||||
snippet: "".into(),
|
||||
citation: Citation::Line {
|
||||
path: WorkspacePath("a.md".into()),
|
||||
start: 1,
|
||||
end: 2,
|
||||
section: None,
|
||||
},
|
||||
retrieval: RetrievalDetail::default(),
|
||||
index_version: IndexVersion("v1".into()),
|
||||
embedding_model: None,
|
||||
chunker_version: ChunkerVersion("md-heading-v1".into()),
|
||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
};
|
||||
let v = serde_json::to_value(&hit).unwrap();
|
||||
assert!(v.get("repo").is_none(), "repo should be omitted when None");
|
||||
assert!(v.get("code_lang").is_none(), "code_lang should be omitted when None");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_hit_repo_and_code_lang_present_when_some() {
|
||||
let hit = SearchHit {
|
||||
rank: 1,
|
||||
chunk_id: ChunkId("c1".into()),
|
||||
doc_id: DocumentId("d1".into()),
|
||||
doc_path: WorkspacePath("a.rs".into()),
|
||||
heading_path: vec![],
|
||||
section_label: None,
|
||||
snippet: "".into(),
|
||||
citation: Citation::Code {
|
||||
path: WorkspacePath("a.rs".into()),
|
||||
line_start: 1,
|
||||
line_end: 2,
|
||||
symbol: None,
|
||||
lang: Some("rust".into()),
|
||||
},
|
||||
retrieval: RetrievalDetail::default(),
|
||||
index_version: IndexVersion("v1".into()),
|
||||
embedding_model: None,
|
||||
chunker_version: ChunkerVersion("code-rust-ast-v1".into()),
|
||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: ScoreKind::Rrf,
|
||||
repo: Some("kebab".into()),
|
||||
code_lang: Some("rust".into()),
|
||||
};
|
||||
let v = serde_json::to_value(&hit).unwrap();
|
||||
assert_eq!(v["repo"], "kebab");
|
||||
assert_eq!(v["code_lang"], "rust");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_filters_repo_and_code_lang_default_to_empty_vec() {
|
||||
let f = SearchFilters::default();
|
||||
assert!(f.repo.is_empty());
|
||||
assert!(f.code_lang.is_empty());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,7 +338,8 @@ pub(crate) fn aggregate_from_rows(
|
||||
| Citation::Page { path, .. }
|
||||
| Citation::Region { path, .. }
|
||||
| Citation::Caption { path, .. }
|
||||
| Citation::Time { path, .. } => !path.0.is_empty(),
|
||||
| Citation::Time { path, .. }
|
||||
| Citation::Code { path, .. } => !path.0.is_empty(),
|
||||
});
|
||||
if covered {
|
||||
citation_num += 1;
|
||||
@@ -472,6 +473,8 @@ mod tests {
|
||||
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: kebab_core::ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -87,6 +87,8 @@ fn hit(rank: u32, chunk_id: &str, doc_id: &str) -> SearchHit {
|
||||
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: kebab_core::ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -110,6 +110,8 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
|
||||
media,
|
||||
ingested_after,
|
||||
doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
|
||||
repo: vec![],
|
||||
code_lang: vec![],
|
||||
};
|
||||
|
||||
let query = kebab_core::SearchQuery {
|
||||
|
||||
@@ -509,6 +509,8 @@ mod tests {
|
||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: kebab_core::ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -760,6 +762,8 @@ mod tests {
|
||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||
stale: false,
|
||||
score_kind: kebab_core::ScoreKind::Rrf,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -470,6 +470,8 @@ fn build_hit(
|
||||
// in `RagPipeline::ask` against the configured threshold.
|
||||
stale: false,
|
||||
score_kind: ScoreKind::Bm25,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -327,6 +327,8 @@ fn build_hit(
|
||||
// in `RagPipeline::ask` against the configured threshold.
|
||||
stale: false,
|
||||
score_kind: ScoreKind::Cosine,
|
||||
repo: None,
|
||||
code_lang: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user