From 1e943f21dc8782fa0a776646fac925a18f737853 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:17:04 +0900
Subject: [PATCH] feat(core): SearchTrace + IndexBytes types + SearchOpts.trace
(fb-37)
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-core/src/search.rs | 98 +++++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs
index 5e5cd31..bb66be9 100644
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -124,6 +124,57 @@ pub struct SearchOpts {
pub snippet_chars: Option,
/// Opaque base64 cursor from a previous response. None = first page.
pub cursor: Option,
+ /// p9-fb-37: when true, capture pipeline trace (cache bypassed,
+ /// lex / vec pre-fusion lists + timing populated on the response).
+ #[serde(default)]
+ pub trace: bool,
+}
+
+/// p9-fb-37: search retrieval pipeline trace. Populated only when
+/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse`
+/// otherwise. `lexical` / `vector` are pre-fusion candidate lists
+/// (each retriever's full output for the fanout query). `rrf_inputs`
+/// is the union (chunk_id) used by RRF, with each side's rank
+/// captured. `timing` is wall-clock per stage.
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+ pub chunk_id: ChunkId,
+ pub doc_id: DocumentId,
+ pub doc_path: WorkspacePath,
+ pub rank: u32,
+ pub score: f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+ pub chunk_id: ChunkId,
+ pub lexical_rank: Option,
+ pub vector_rank: Option,
+ pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+ pub lexical_ms: u64,
+ pub vector_ms: u64,
+ pub fusion_ms: u64,
+ pub total_ms: u64,
+}
+
+/// p9-fb-37: on-disk index size breakdown. Mirrored on the
+/// wire `schema.v1.stats.index_bytes` block.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+ pub sqlite: u64,
+ pub lancedb: u64,
}
#[cfg(test)]
@@ -193,4 +244,51 @@ mod tests {
assert!(old.ingested_after.is_none());
assert!(old.doc_id.is_none());
}
+
+ #[test]
+ fn search_trace_serde_roundtrip() {
+ let t = SearchTrace {
+ lexical: vec![TraceCandidate {
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+ rank: 1,
+ score: 0.42,
+ }],
+ vector: vec![],
+ rrf_inputs: vec![TraceFusionInput {
+ chunk_id: ChunkId("c1".into()),
+ lexical_rank: Some(1),
+ vector_rank: None,
+ fusion_score: 0.0234,
+ }],
+ timing: TraceTiming {
+ lexical_ms: 12,
+ vector_ms: 0,
+ fusion_ms: 1,
+ total_ms: 14,
+ },
+ };
+ let v = serde_json::to_value(&t).unwrap();
+ assert_eq!(v["timing"]["lexical_ms"], 12);
+ assert_eq!(
+ v["lexical"][0]["score"].as_f64().unwrap() as f32,
+ 0.42_f32
+ );
+ let back: SearchTrace = serde_json::from_value(v).unwrap();
+ assert_eq!(back, t);
+ }
+
+ #[test]
+ fn index_bytes_default_is_zero() {
+ let b = IndexBytes::default();
+ assert_eq!(b.sqlite, 0);
+ assert_eq!(b.lancedb, 0);
+ }
+
+ #[test]
+ fn search_opts_trace_default_false() {
+ let opts = SearchOpts::default();
+ assert!(!opts.trace);
+ }
}