feat(p10-1a-1): add SearchHit.repo / code_lang + SearchFilters.repo / code_lang
Wire two new optional fields onto SearchHit (skip_serializing_if = None) and two Vec<String> filter fields onto SearchFilters (serde default). Add RetrievalDetail::Default impl (manual, uses SearchMode::Hybrid as sentinel). Patch all downstream SearchHit / SearchFilters literal constructors with repo: None / code_lang: None / vec![] as appropriate. Also covers Citation::Code arm in kebab-eval metrics match.
This commit is contained in:
@@ -197,6 +197,8 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> {
|
|||||||
media,
|
media,
|
||||||
ingested_after,
|
ingested_after,
|
||||||
doc_id,
|
doc_id,
|
||||||
|
repo: vec![],
|
||||||
|
code_lang: vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
let opts = SearchOpts {
|
let opts = SearchOpts {
|
||||||
|
|||||||
@@ -828,6 +828,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
|||||||
media: media_norm,
|
media: media_norm,
|
||||||
ingested_after: ingested_after_parsed,
|
ingested_after: ingested_after_parsed,
|
||||||
doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
|
doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
|
||||||
|
repo: vec![],
|
||||||
|
code_lang: vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
let q = kebab_core::SearchQuery {
|
let q = kebab_core::SearchQuery {
|
||||||
|
|||||||
@@ -61,6 +61,14 @@ pub struct SearchFilters {
|
|||||||
/// p9-fb-36: restrict hits to a single document. None = no filter.
|
/// p9-fb-36: restrict hits to a single document. None = no filter.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub doc_id: Option<DocumentId>,
|
pub doc_id: Option<DocumentId>,
|
||||||
|
/// p10-1A-1: filter by `metadata.repo`. Empty = no filter; multi-value = OR.
|
||||||
|
#[serde(default)]
|
||||||
|
pub repo: Vec<String>,
|
||||||
|
/// p10-1A-1: filter by `metadata.code_lang`. Empty = no filter; multi-value = OR.
|
||||||
|
/// Identifiers are lowercase canonical names (`rust`, `python`, `typescript`, ...).
|
||||||
|
/// Unknown values produce empty hits (consistent with `media` policy).
|
||||||
|
#[serde(default)]
|
||||||
|
pub code_lang: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
@@ -89,6 +97,15 @@ pub struct SearchHit {
|
|||||||
/// 옛 wire (fb-38 미만) 부재 시 `Rrf` default — hybrid 가 기본 mode.
|
/// 옛 wire (fb-38 미만) 부재 시 `Rrf` default — hybrid 가 기본 mode.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub score_kind: ScoreKind,
|
pub score_kind: ScoreKind,
|
||||||
|
/// p10-1A-1: optional. Filled when the source file lives in a git repo
|
||||||
|
/// (`.git/` walk-up). null for markdown / pdf / image hits and for code
|
||||||
|
/// hits ingested via `kebab ingest-file` outside a repo boundary.
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub repo: Option<String>,
|
||||||
|
/// p10-1A-1: optional. Programming language identifier (lowercase). Set for
|
||||||
|
/// every code/manifest/k8s chunk; null for markdown / pdf / image hits.
|
||||||
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||||
|
pub code_lang: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||||
@@ -101,6 +118,19 @@ pub struct RetrievalDetail {
|
|||||||
pub vector_rank: Option<u32>,
|
pub vector_rank: Option<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Default for RetrievalDetail {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
method: SearchMode::Hybrid,
|
||||||
|
fusion_score: 0.0,
|
||||||
|
lexical_score: None,
|
||||||
|
vector_score: None,
|
||||||
|
lexical_rank: None,
|
||||||
|
vector_rank: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Filter for `kb-app::list_docs` (§7.2 DocumentStore::list_documents).
|
/// Filter for `kb-app::list_docs` (§7.2 DocumentStore::list_documents).
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct DocFilter {
|
pub struct DocFilter {
|
||||||
@@ -257,6 +287,8 @@ mod tests {
|
|||||||
indexed_at: datetime!(2026-05-09 12:00:00 UTC),
|
indexed_at: datetime!(2026-05-09 12:00:00 UTC),
|
||||||
stale: true,
|
stale: true,
|
||||||
score_kind: ScoreKind::Rrf,
|
score_kind: ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
};
|
};
|
||||||
let v = serde_json::to_value(&hit).unwrap();
|
let v = serde_json::to_value(&hit).unwrap();
|
||||||
assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
|
assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
|
||||||
@@ -429,4 +461,74 @@ mod tests {
|
|||||||
assert!(v["response"].is_null());
|
assert!(v["response"].is_null());
|
||||||
assert_eq!(v["error"]["code"], "config_invalid");
|
assert_eq!(v["error"]["code"], "config_invalid");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn search_hit_repo_and_code_lang_are_optional_and_omit_when_none() {
|
||||||
|
let hit = SearchHit {
|
||||||
|
rank: 1,
|
||||||
|
chunk_id: ChunkId("c1".into()),
|
||||||
|
doc_id: DocumentId("d1".into()),
|
||||||
|
doc_path: WorkspacePath("a.md".into()),
|
||||||
|
heading_path: vec![],
|
||||||
|
section_label: None,
|
||||||
|
snippet: "".into(),
|
||||||
|
citation: Citation::Line {
|
||||||
|
path: WorkspacePath("a.md".into()),
|
||||||
|
start: 1,
|
||||||
|
end: 2,
|
||||||
|
section: None,
|
||||||
|
},
|
||||||
|
retrieval: RetrievalDetail::default(),
|
||||||
|
index_version: IndexVersion("v1".into()),
|
||||||
|
embedding_model: None,
|
||||||
|
chunker_version: ChunkerVersion("md-heading-v1".into()),
|
||||||
|
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||||
|
stale: false,
|
||||||
|
score_kind: ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
|
};
|
||||||
|
let v = serde_json::to_value(&hit).unwrap();
|
||||||
|
assert!(v.get("repo").is_none(), "repo should be omitted when None");
|
||||||
|
assert!(v.get("code_lang").is_none(), "code_lang should be omitted when None");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn search_hit_repo_and_code_lang_present_when_some() {
|
||||||
|
let hit = SearchHit {
|
||||||
|
rank: 1,
|
||||||
|
chunk_id: ChunkId("c1".into()),
|
||||||
|
doc_id: DocumentId("d1".into()),
|
||||||
|
doc_path: WorkspacePath("a.rs".into()),
|
||||||
|
heading_path: vec![],
|
||||||
|
section_label: None,
|
||||||
|
snippet: "".into(),
|
||||||
|
citation: Citation::Code {
|
||||||
|
path: WorkspacePath("a.rs".into()),
|
||||||
|
line_start: 1,
|
||||||
|
line_end: 2,
|
||||||
|
symbol: None,
|
||||||
|
lang: Some("rust".into()),
|
||||||
|
},
|
||||||
|
retrieval: RetrievalDetail::default(),
|
||||||
|
index_version: IndexVersion("v1".into()),
|
||||||
|
embedding_model: None,
|
||||||
|
chunker_version: ChunkerVersion("code-rust-ast-v1".into()),
|
||||||
|
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||||
|
stale: false,
|
||||||
|
score_kind: ScoreKind::Rrf,
|
||||||
|
repo: Some("kebab".into()),
|
||||||
|
code_lang: Some("rust".into()),
|
||||||
|
};
|
||||||
|
let v = serde_json::to_value(&hit).unwrap();
|
||||||
|
assert_eq!(v["repo"], "kebab");
|
||||||
|
assert_eq!(v["code_lang"], "rust");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn search_filters_repo_and_code_lang_default_to_empty_vec() {
|
||||||
|
let f = SearchFilters::default();
|
||||||
|
assert!(f.repo.is_empty());
|
||||||
|
assert!(f.code_lang.is_empty());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -338,7 +338,8 @@ pub(crate) fn aggregate_from_rows(
|
|||||||
| Citation::Page { path, .. }
|
| Citation::Page { path, .. }
|
||||||
| Citation::Region { path, .. }
|
| Citation::Region { path, .. }
|
||||||
| Citation::Caption { path, .. }
|
| Citation::Caption { path, .. }
|
||||||
| Citation::Time { path, .. } => !path.0.is_empty(),
|
| Citation::Time { path, .. }
|
||||||
|
| Citation::Code { path, .. } => !path.0.is_empty(),
|
||||||
});
|
});
|
||||||
if covered {
|
if covered {
|
||||||
citation_num += 1;
|
citation_num += 1;
|
||||||
@@ -472,6 +473,8 @@ mod tests {
|
|||||||
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: kebab_core::ScoreKind::Rrf,
|
score_kind: kebab_core::ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,6 +87,8 @@ fn hit(rank: u32, chunk_id: &str, doc_id: &str) -> SearchHit {
|
|||||||
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
indexed_at: OffsetDateTime::UNIX_EPOCH,
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: kebab_core::ScoreKind::Rrf,
|
score_kind: kebab_core::ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -110,6 +110,8 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
|
|||||||
media,
|
media,
|
||||||
ingested_after,
|
ingested_after,
|
||||||
doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
|
doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
|
||||||
|
repo: vec![],
|
||||||
|
code_lang: vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
let query = kebab_core::SearchQuery {
|
let query = kebab_core::SearchQuery {
|
||||||
|
|||||||
@@ -509,6 +509,8 @@ mod tests {
|
|||||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: kebab_core::ScoreKind::Rrf,
|
score_kind: kebab_core::ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -760,6 +762,8 @@ mod tests {
|
|||||||
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
indexed_at: time::OffsetDateTime::UNIX_EPOCH,
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: kebab_core::ScoreKind::Rrf,
|
score_kind: kebab_core::ScoreKind::Rrf,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -470,6 +470,8 @@ fn build_hit(
|
|||||||
// in `RagPipeline::ask` against the configured threshold.
|
// in `RagPipeline::ask` against the configured threshold.
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: ScoreKind::Bm25,
|
score_kind: ScoreKind::Bm25,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -327,6 +327,8 @@ fn build_hit(
|
|||||||
// in `RagPipeline::ask` against the configured threshold.
|
// in `RagPipeline::ask` against the configured threshold.
|
||||||
stale: false,
|
stale: false,
|
||||||
score_kind: ScoreKind::Cosine,
|
score_kind: ScoreKind::Cosine,
|
||||||
|
repo: None,
|
||||||
|
code_lang: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user