From 8faad2f40731361f201e8565b29ff635861794ba Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sat, 9 May 2026 01:17:54 +0900
Subject: [PATCH] feat(search/vector): populate SearchHit.indexed_at (fb-32)
hydrate_chunks now JOINs d.updated_at. Hybrid fusion path is
unchanged (passes SearchHit through, fields preserved).
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-search/src/vector.rs | 20 ++++++++++---
crates/kebab-search/tests/hybrid.rs | 45 +++++++++++++++++++++++++++++
2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/crates/kebab-search/src/vector.rs b/crates/kebab-search/src/vector.rs
index f4d5f8b..bcd6d04 100644
--- a/crates/kebab-search/src/vector.rs
+++ b/crates/kebab-search/src/vector.rs
@@ -197,6 +197,8 @@ struct ChunkMeta {
chunker_version: String,
doc_id: String,
workspace_path: String,
+ /// p9-fb-32: documents.updated_at (RFC3339).
+ updated_at: String,
}
fn hydrate_chunks(
@@ -222,7 +224,7 @@ fn hydrate_chunks(
"SELECT \
c.chunk_id, c.text, c.heading_path_json, c.section_label, \
c.source_spans_json, c.chunker_version, \
- c.doc_id, d.workspace_path \
+ c.doc_id, d.workspace_path, d.updated_at \
FROM chunks c \
JOIN documents d ON d.doc_id = c.doc_id \
WHERE c.chunk_id IN ({placeholders})"
@@ -249,6 +251,7 @@ fn hydrate_chunks(
chunker_version: row.get(5)?,
doc_id: row.get(6)?,
workspace_path: row.get(7)?,
+ updated_at: row.get(8)?,
},
))
},
@@ -287,6 +290,16 @@ fn build_hit(
);
let snippet = trim_snippet(&meta.text, snippet_chars);
+ // p9-fb-32: documents.updated_at is stored as RFC3339 TEXT (V001
+ // migration; written by put_document via OffsetDateTime::now_utc).
+ // Mirrors the lexical retriever; see lexical::build_hit for the
+ // shared rationale on incremental-ingest skip semantics.
+ let indexed_at = time::OffsetDateTime::parse(
+ &meta.updated_at,
+ &time::format_description::well_known::Rfc3339,
+ )
+ .context("kb-search vector: parse documents.updated_at as RFC3339")?;
+
let score = hit.score;
Ok(SearchHit {
rank,
@@ -308,9 +321,8 @@ fn build_hit(
index_version: index_version.clone(),
embedding_model: Some(model_id.clone()),
chunker_version: ChunkerVersion(meta.chunker_version.clone()),
- // p9-fb-32: Task 5 will hydrate from documents.updated_at; this
- // stub keeps the lib compiling after Task 1 added the field.
- indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+ indexed_at,
+ // Placeholder — App layer overwrites against config threshold (Task 6).
stale: false,
})
}
diff --git a/crates/kebab-search/tests/hybrid.rs b/crates/kebab-search/tests/hybrid.rs
index fcda0c5..13f945d 100644
--- a/crates/kebab-search/tests/hybrid.rs
+++ b/crates/kebab-search/tests/hybrid.rs
@@ -18,6 +18,7 @@ use kebab_core::{
Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
};
use kebab_search::{FusionPolicy, HybridRetriever};
+use rusqlite::params;
use serde_json::json;
fn build_hybrid(env: &HybridEnv) -> HybridRetriever {
@@ -211,3 +212,47 @@ fn hybrid_snapshot_run_1() {
);
}
}
+
+#[test]
+#[ignore = "requires AVX-capable hardware (LanceDB)"]
+fn vector_hit_carries_indexed_at() {
+ // p9-fb-32: VectorRetriever must populate SearchHit.indexed_at from
+ // documents.updated_at via the JOIN added to hydrate_chunks (mirrors
+ // the lexical retriever's behavior — Task 5).
+ use time::OffsetDateTime;
+ use time::format_description::well_known::Rfc3339;
+
+ require_avx_or_panic();
+ let env = HybridEnv::new();
+ let _ids = seed_disjoint_corpus(&env);
+
+ // `seed_chunk` hardcodes updated_at='1970-01-01T00:00:00Z'; bump
+ // every document's updated_at to wall-clock now so the assertion
+ // against `now` is meaningful.
+ let now = OffsetDateTime::now_utc();
+ let now_rfc = now.format(&Rfc3339).expect("format now as rfc3339");
+ {
+ let conn = env.sqlite.read_conn();
+ conn.execute(
+ "UPDATE documents SET updated_at = ?",
+ params![now_rfc],
+ )
+ .expect("bump documents.updated_at");
+ }
+
+ let r = env.vector_retriever();
+ let hits = r
+ .search(&SearchQuery {
+ text: "rust".to_string(),
+ mode: SearchMode::Vector,
+ k: 5,
+ filters: SearchFilters::default(),
+ })
+ .expect("vector search");
+ let hit = hits.first().expect("at least one vector hit");
+ let now2 = OffsetDateTime::now_utc();
+ let delta = (now2 - hit.indexed_at).whole_seconds().abs();
+ assert!(delta < 60, "indexed_at within ±60s of now, got {delta}s");
+ // stale is a placeholder set by the retriever; the App layer overwrites.
+ assert!(!hit.stale, "vector retriever must default stale=false");
+}