feat(search/lexical): populate SearchHit.indexed_at (fb-32)

JOIN documents.updated_at. stale defaults to false; App facade
post-processes against config threshold.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-09 01:10:20 +09:00
parent 922849cd95
commit f4ce6652b2
7 changed files with 103 additions and 3 deletions

View File

@@ -16,6 +16,7 @@
"Snap"
],
"index_version": "v1.0",
"indexed_at": "2024-01-01T00:00:00Z",
"rank": 1,
"retrieval": {
"fusion_score": 1.4490997273242101e-6,
@@ -26,7 +27,8 @@
"vector_score": null
},
"section_label": "Snap",
"snippet": "alpha alpha"
"snippet": "alpha alpha",
"stale": false
},
{
"chunk_id": "c1000000000000000000000000000000",
@@ -45,6 +47,7 @@
"Snap"
],
"index_version": "v1.0",
"indexed_at": "2024-01-01T00:00:00Z",
"rank": 2,
"retrieval": {
"fusion_score": 9.641424867368187e-7,
@@ -55,6 +58,7 @@
"vector_score": null
},
"section_label": "Snap",
"snippet": "alpha bravo charlie"
"snippet": "alpha bravo charlie",
"stale": false
}
]

View File

@@ -612,6 +612,73 @@ fn lexical_index_version_is_returned_unchanged() {
assert_eq!(r.index_version().0, "custom-label-1");
}
#[test]
fn search_hit_carries_indexed_at_from_documents_updated_at() {
// p9-fb-32: SearchHit.indexed_at must be populated from
// documents.updated_at via the JOIN. We seed documents with
// updated_at=now (RFC3339) and assert the parsed OffsetDateTime
// round-trips within ±60s of wall-clock now.
use time::OffsetDateTime;
use time::format_description::well_known::Rfc3339;
let env = Env::new();
let conn = env.raw_conn();
// The `insert_document` helper hard-codes updated_at='2024-01-01...';
// override that here so the assertion against `now` is meaningful.
let now = OffsetDateTime::now_utc();
let now_rfc = now.format(&Rfc3339).expect("format now as rfc3339");
let doc_id = id32("d");
let asset_id = format!("{:0>32}", "d");
conn.execute(
"INSERT OR IGNORE INTO assets (
asset_id, source_uri, workspace_path, media_type, byte_len,
checksum, storage_kind, storage_path, discovered_at
) VALUES (?, 'file:///x', 'a.md', '\"markdown\"', 0,
'd0', 'reference', '/x', '2024-01-01T00:00:00Z')",
rusqlite::params![asset_id],
)
.expect("insert asset");
conn.execute(
"INSERT INTO documents (
doc_id, asset_id, workspace_path, title, lang,
source_type, trust_level, parser_version,
doc_version, schema_version, metadata_json,
provenance_json, created_at, updated_at
) VALUES (?, ?, 'a.md', 'T', 'en', 'markdown', 'primary', 'pv1', 1, 1,
'{}', '{\"events\":[]}',
?, ?)",
rusqlite::params![doc_id, asset_id, now_rfc, now_rfc],
)
.expect("insert document");
insert_chunk(
&conn,
&id32("c1"),
&doc_id,
"body about apples",
&["T"],
None,
r#"[{"kind":"line","start":1,"end":1}]"#,
"v1",
);
drop(conn);
let r = env.retriever();
let hits = r
.search(&SearchQuery {
text: "apples".to_string(),
mode: SearchMode::Lexical,
k: 5,
filters: SearchFilters::default(),
})
.expect("search");
let hit = hits.first().expect("at least one hit");
let now2 = OffsetDateTime::now_utc();
let delta = (now2 - hit.indexed_at).whole_seconds().abs();
assert!(delta < 60, "indexed_at within ±60s of now, got {delta}s");
// stale is a placeholder set by the retriever; the App layer overwrites.
assert!(!hit.stale, "lexical retriever must default stale=false");
}
#[test]
fn lexical_snapshot_run_1() {
// Pinned snapshot. A small, deterministic corpus; the JSON shape of