From 6ac7fea7b96a5f7ba6834c9c7e49cb9667fb1e5d Mon Sep 17 00:00:00 2001 From: altair823 Date: Sun, 24 May 2026 11:54:25 +0000 Subject: [PATCH] feat(v0.17.0/A5): trigram-aware build_match_string + SearchResponse.hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-A 본체. plan Task A4 Step 1c + A5. - lexical.rs::build_match_string 재설계: whole-phrase + token-AND OR-combined, 3자 미만 토큰 drop, 후보 없음 시 None (빈 MATCH 회피). raw single-quote mode 유지. - SearchResponse.hint additive — empty result + trimmed < 3 chars + non-raw 케이스에 short_query_hint helper 가 set. - CLI 'kebab search' 가 [hint] stderr 한 줄 (text mode). - TUI SearchState.short_query_hint + poll_worker stale-aware set + fire_search/mark_input_changed reset + dynamic_status 표시. - docs/wire-schema/v1/search_response.schema.json hint additive. - 신규 unit tests (lexical 9 PASS, 기존 2 expectation 갱신) + 통합 회귀 (search_korean: multi_token + mixed, 3 PASS) + BM25 snapshot regen (trigram token stream). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/app.rs | 35 +++++ crates/kebab-app/src/bulk.rs | 5 + crates/kebab-app/src/lib.rs | 2 +- crates/kebab-app/tests/search_korean.rs | 85 +++++++++++ crates/kebab-cli/src/main.rs | 9 ++ crates/kebab-cli/src/wire.rs | 11 ++ crates/kebab-search/src/lexical.rs | 136 ++++++++++++++---- .../tests/fixtures/search/lexical/run-1.json | 8 +- crates/kebab-tui/src/app.rs | 7 + crates/kebab-tui/src/run.rs | 14 ++ crates/kebab-tui/src/search.rs | 40 +++++- .../v1/search_response.schema.json | 4 + 12 files changed, 317 insertions(+), 39 deletions(-) diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index dab36b2..0b07e0f 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -73,6 +73,37 @@ pub struct SearchResponse { /// p9-fb-37: present when caller passed `SearchOpts.trace = true`. /// Consumers that ignore trace should leave this `None`. pub trace: Option, + /// v0.17.0 A5 Step 4b: human / agent-readable advisory string set + /// when the empty hit list is likely due to a query shorter than the + /// FTS5 trigram tokenizer's 3-char minimum. `None` otherwise. CLI + /// surfaces it on stderr (text mode); MCP / `--json` consumers + /// surface it however they prefer. See + /// `docs/superpowers/specs/2026-05-22-korean-trigram-tokenizer-design.md` + /// §3.3. + pub hint: Option, +} + +/// v0.17.0 A5 Step 4b: decide whether to attach a "3자 이상 키워드 권장" +/// hint to a `SearchResponse`. Fires only when the result set is empty +/// *and* the trimmed query is shorter than the trigram tokenizer can +/// resolve. Raw FTS5 mode (`'...'`) opts out — the user explicitly +/// invoked FTS5 syntax. Identical condition powers the CLI stderr line +/// and (separately) the TUI status bar. +pub fn short_query_hint(query_text: &str, hits_empty: bool) -> Option { + if !hits_empty { + return None; + } + let trimmed = query_text.trim(); + let bytes = trimmed.as_bytes(); + // Raw single-quote mode: user opted into FTS5 syntax, no advisory. + if bytes.len() >= 2 && bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\'' { + return None; + } + if trimmed.chars().count() < 3 { + Some("3자 이상 키워드 권장 (trigram tokenizer 제약)".to_string()) + } else { + None + } } /// Facade state — see module docs for lifetime rules. @@ -418,11 +449,13 @@ impl App { // Trace path skips the budget loop. Caller will inspect // `hits.len()` and `trace.timing` rather than paginate. + let hint = short_query_hint(&query.text, hits.is_empty()); return Ok(SearchResponse { hits, next_cursor: None, truncated: false, trace: Some(trace), + hint, }); } @@ -505,11 +538,13 @@ impl App { None }; + let hint = short_query_hint(&query.text, hits.is_empty()); Ok(SearchResponse { hits, next_cursor, truncated, trace: None, + hint, }) } diff --git a/crates/kebab-app/src/bulk.rs b/crates/kebab-app/src/bulk.rs index 36be6c4..6ba14bf 100644 --- a/crates/kebab-app/src/bulk.rs +++ b/crates/kebab-app/src/bulk.rs @@ -96,6 +96,11 @@ fn serialize_search_response(r: &SearchResponse) -> Value { None => Value::Null, }; map.insert("trace".to_string(), trace_v); + // v0.17.0 A5 Step 4b: only emit `hint` when set — matches + // the CLI wire wrapper's additive emit pattern. + if let Some(hint) = &r.hint { + map.insert("hint".to_string(), Value::String(hint.clone())); + } } v } diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 37013e3..19b77e5 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -69,7 +69,7 @@ pub mod reset; pub mod schema; mod staleness; -pub use app::{App, SearchResponse}; +pub use app::{App, SearchResponse, short_query_hint}; pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown}; pub use reset::{ResetReport, ResetScope, enumerate_orphans}; pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify}; diff --git a/crates/kebab-app/tests/search_korean.rs b/crates/kebab-app/tests/search_korean.rs index 625b2d5..eaff918 100644 --- a/crates/kebab-app/tests/search_korean.rs +++ b/crates/kebab-app/tests/search_korean.rs @@ -46,3 +46,88 @@ fn korean_lexical_query_returns_korean_document() { hits.iter().map(|h| &h.doc_path.0).collect::>() ); } + +/// A4 Step 1c — multi-token Korean query (`해시 충돌`) must hit when +/// the lexical builder routes it through a whole-phrase MATCH candidate. +/// +/// Expected: FAIL until A5 (`build_match_string` redesign) lands — the +/// current builder emits `"해시" "충돌"` AND, but FTS5 trigram tokenizer +/// has no 2-char terms so each side is 0-hit. A5 introduces a whole- +/// phrase candidate (`"해시 충돌"`) OR'd with the token AND, restoring +/// hits for the dominant Korean usage pattern. +#[test] +fn lexical_multi_token_korean_query_hits() { + let env = TestEnv::lexical_only(); + + // Copy the synthetic Korean fixture (introduced in A4 Step 0) into + // the test workspace. The fixture contains the exact phrase + // "해시 충돌" multiple times. + let dest = env.workspace_root.join("hash-table.md"); + let src = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("fixtures") + .join("search") + .join("korean") + .join("hash-table.md"); + std::fs::copy(&src, &dest).expect("copy korean fixture"); + + kebab_app::ingest_with_config(env.config.clone(), env.scope(), true) + .expect("ingest must succeed"); + + let hits = kebab_app::search_with_config( + env.config.clone(), + common::lexical_query("해시 충돌"), + ) + .expect("search must succeed"); + + assert!( + !hits.is_empty(), + "multi-token Korean query '해시 충돌' must hit the hash-table fixture; got {:?}", + hits.iter().map(|h| &h.doc_path.0).collect::>() + ); + let any_hash_table = hits.iter().any(|h| h.doc_path.0.contains("hash-table")); + assert!( + any_hash_table, + "expected at least one hit on the hash-table fixture, got: {:?}", + hits.iter().map(|h| &h.doc_path.0).collect::>() + ); +} + +/// A4 Step 1c — mixed Korean+English multi-token query (`Rust 충돌은`). +/// Both tokens are ≥3 chars, so the redesigned builder (A5) emits +/// `("Rust 충돌은") OR ("Rust" AND "충돌은")`. With trigram tokenizer +/// each side has substring coverage in the document, so the AND branch +/// alone is enough. Expected: FAIL pre-A5, PASS post-A5. +#[test] +fn lexical_mixed_korean_english_multi_token_query_hits() { + let env = TestEnv::lexical_only(); + let doc_path = env.workspace_root.join("rust-hash.md"); + std::fs::write( + &doc_path, + "# Rust 해시 테이블\n\nRust 의 std::collections::HashMap 에서 \ + 해시 충돌은 SipHash 로 완화한다.\n", + ) + .expect("write rust-hash fixture"); + + kebab_app::ingest_with_config(env.config.clone(), env.scope(), true) + .expect("ingest must succeed"); + + let hits = kebab_app::search_with_config( + env.config.clone(), + common::lexical_query("Rust 충돌은"), + ) + .expect("search must succeed"); + + assert!( + !hits.is_empty(), + "mixed Korean+English multi-token query 'Rust 충돌은' must hit the rust-hash fixture; got {:?}", + hits.iter().map(|h| &h.doc_path.0).collect::>() + ); + let any_rust_hash = hits.iter().any(|h| h.doc_path.0.contains("rust-hash")); + assert!( + any_rust_hash, + "expected at least one hit on the rust-hash fixture, got: {:?}", + hits.iter().map(|h| &h.doc_path.0).collect::>() + ); +} diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index fd32cc8..2c2db0a 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -933,6 +933,15 @@ fn run(cli: &Cli) -> anyhow::Result<()> { let next = resp.next_cursor.as_deref().unwrap_or("(none)"); eprintln!("[truncated; use --cursor {next} for the next page]"); } + // v0.17.0 A5 Step 4: short-query advisory. `resp.hint` + // is `Some` only when the result list is empty and the + // trimmed query is shorter than the trigram tokenizer + // can resolve (raw FTS5 mode opts out). stderr so it + // doesn't pollute the stdout hit list. `--json` skips + // this branch entirely; the field rides the wire. + if let Some(hint) = &resp.hint { + eprintln!("[hint] {hint}"); + } if *trace { if let Some(t) = &resp.trace { eprintln!(); diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index 01951c9..cf5293f 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -92,6 +92,14 @@ pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value { map.insert("trace".to_string(), trace_v); } } + // v0.17.0 A5 Step 4b: emit `hint` only when set. Keeps responses + // that don't carry a hint backward-compatible with v0 consumers + // that don't know the field. + if let Some(hint) = &r.hint { + if let Value::Object(ref mut map) = v { + map.insert("hint".to_string(), Value::String(hint.clone())); + } + } tag_object(v, "search_response.v1") } @@ -292,6 +300,7 @@ mod tests { next_cursor: Some("opaque-cursor-abc".to_string()), truncated: true, trace: None, + hint: None, }; let v = wire_search_response(&r); assert_eq!(schema_of(&v), Some("search_response.v1")); @@ -405,6 +414,7 @@ mod tests { }], timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 }, }), + hint: None, }; let v = wire_search_response(&r); assert_eq!(schema_of(&v), Some("search_response.v1")); @@ -420,6 +430,7 @@ mod tests { next_cursor: None, truncated: false, trace: None, + hint: None, }; let v = wire_search_response(&r); assert!(v.get("trace").is_none(), "trace field absent when None"); diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs index 67c21d5..f5c9080 100644 --- a/crates/kebab-search/src/lexical.rs +++ b/crates/kebab-search/src/lexical.rs @@ -162,18 +162,35 @@ impl Retriever for LexicalRetriever { /// Translate a user-typed query into an FTS5 match string. /// -/// Rules (from the task spec): +/// v0.17.0 — trigram-aware redesign (see design §5.5 + plan +/// `docs/superpowers/plans/2026-05-22-korean-trigram-tokenizer.md` +/// Task A5). The FTS5 tokenizer is `trigram` so any term shorter than +/// three Unicode chars has no index entry and would zero out an AND +/// branch. Korean compounds typically split into 2-char eojeols (e.g. +/// `해시 충돌`), so a naive token AND drops the dominant usage pattern. /// -/// - The query is wrapped in a single pair of `'...'` → strip the quotes -/// and pass the inner text through verbatim. The user has explicitly -/// opted into FTS5 syntax (e.g. `'rust AND cargo'`, `'foo*'`). +/// Rules: /// -/// - Otherwise: split on whitespace, escape every token by wrapping it -/// in `"..."` (FTS5 string literal), with any inner `"` doubled. Join -/// with spaces — FTS5 default operator is implicit AND. +/// - Raw mode (unchanged): the query is wrapped in a single pair of +/// `'...'` → strip the quotes and pass the inner text through verbatim. +/// The user has explicitly opted into FTS5 syntax (e.g. +/// `'rust AND cargo'`, `'foo*'`). /// -/// - An empty / whitespace-only token list → return `None` (caller -/// short-circuits to `Ok(vec![])`). +/// - Otherwise build up to two MATCH candidates: +/// 1. **whole-phrase**: the entire trimmed input wrapped as one FTS5 +/// string literal, *only* if it has ≥3 Unicode chars. FTS5 treats +/// a quoted string with spaces as a phrase match. +/// 2. **token AND**: whitespace-split tokens, kept only when each has +/// ≥3 Unicode chars (shorter ones are dropped — they would zero +/// out the AND under trigram). +/// +/// - Combine: `(whole) OR (token_and)` when both exist *and differ*; +/// either alone when only one exists; `None` when neither exists +/// (caller short-circuits to `Ok(vec![])`, avoiding an FTS5 syntax +/// error from an empty MATCH). +/// +/// - A single-token long query (`러스트`, `foo`) yields `whole == token_and` +/// → return the bare quoted form so the OR doesn't duplicate. fn build_match_string(text: &str) -> Option { let trimmed = text.trim(); if trimmed.is_empty() { @@ -186,14 +203,27 @@ fn build_match_string(text: &str) -> Option { } return Some(inner_trim.to_string()); } - let tokens: Vec = trimmed - .split_whitespace() - .map(escape_fts5_token) - .collect(); - if tokens.is_empty() { - None - } else { - Some(tokens.join(" ")) + + const MIN_TRIGRAM_CHARS: usize = 3; + + let whole_candidate: Option = (trimmed.chars().count() >= MIN_TRIGRAM_CHARS) + .then(|| escape_fts5_token(trimmed)); + + let token_and_candidate: Option = { + let toks: Vec = trimmed + .split_whitespace() + .filter(|t| t.chars().count() >= MIN_TRIGRAM_CHARS) + .map(escape_fts5_token) + .collect(); + (!toks.is_empty()).then(|| toks.join(" ")) + }; + + match (whole_candidate, token_and_candidate) { + (None, None) => None, + (Some(w), None) => Some(w), + (None, Some(a)) => Some(a), + (Some(w), Some(a)) if w == a => Some(w), + (Some(w), Some(a)) => Some(format!("({w}) OR ({a})")), } } @@ -555,30 +585,31 @@ mod tests { } #[test] - fn build_match_string_default_is_quoted_and_anded() { + fn build_match_string_default_emits_or_of_phrase_and_and() { + // Two long tokens: both whole-phrase and token-AND candidates + // exist and differ, so the builder combines them with OR. let s = build_match_string("rust cargo").unwrap(); - // Two tokens, each quoted, joined by a space (implicit AND). - assert_eq!(s, r#""rust" "cargo""#); + assert_eq!(s, r#"("rust cargo") OR ("rust" "cargo")"#); } #[test] fn build_match_string_escapes_special_chars() { // `*`, `(`, `)`, `:`, `^`, `"` should all be wrapped inside // FTS5 string-literal quotes so they're treated as literal - // text rather than FTS5 operators. + // text rather than FTS5 operators. Every token is ≥3 chars, + // so both the whole-phrase and token-AND candidates exist. let s = build_match_string(r#"foo* (bar) baz:qux ^head he"llo"#).unwrap(); assert_eq!( s, - r#""foo*" "(bar)" "baz:qux" "^head" "he""llo""# + r#"("foo* (bar) baz:qux ^head he""llo") OR ("foo*" "(bar)" "baz:qux" "^head" "he""llo")"# ); // The doubled `""` is FTS5's way of embedding a literal quote - // inside a string literal. + // inside a string literal. Appears in both whole-phrase and + // token-AND halves. assert!(s.contains(r#"he""llo"#)); - // Sanity: every special character lives between matching `"` - // delimiters — there is no bare-token (unquoted) span anywhere. - // We check this by confirming the string starts and ends with `"` - // and the count of unescaped `"` is even (each token is wrapped). - assert!(s.starts_with('"') && s.ends_with('"')); + // Sanity: the combined expression is `(...) OR (...)` so it + // starts with `(` and ends with `)`. + assert!(s.starts_with('(') && s.ends_with(')')); } #[test] @@ -588,6 +619,55 @@ mod tests { assert_eq!(s, "foo OR bar*"); } + // ── v0.17.0 trigram-aware redesign coverage ────────────────────────── + + /// 2-char Korean query (`충돌`) yields neither a whole-phrase nor a + /// token-AND candidate → `None`. Caller short-circuits to an empty + /// hit list rather than executing an FTS5 syntax error on `""` MATCH. + #[test] + fn build_match_string_short_korean_returns_none() { + assert!(build_match_string("충돌").is_none()); + assert!(build_match_string("키").is_none()); + assert!(build_match_string(" 충돌 ").is_none()); + } + + /// `해시 충돌` — both tokens are 2 chars (dropped from the AND), but + /// the whole-phrase candidate (`"해시 충돌"`, 5 chars total) survives. + /// This is the dominant Korean usage pattern targeted by A5. + #[test] + fn build_match_string_whole_phrase_only_when_all_tokens_short() { + let s = build_match_string("해시 충돌").unwrap(); + assert_eq!(s, r#""해시 충돌""#); + } + + /// Single long token: whole-phrase and token-AND candidates collapse + /// to the same string. The builder returns the bare quoted form so + /// the MATCH expression doesn't carry a redundant `(x) OR (x)`. + #[test] + fn build_match_string_single_long_token_no_duplicate_or() { + assert_eq!(build_match_string("러스트").unwrap(), r#""러스트""#); + assert_eq!(build_match_string("rust").unwrap(), r#""rust""#); + } + + /// Mixed Korean+English multi-token query where every token is ≥3 + /// chars: both candidates exist and differ, OR-combined. + #[test] + fn build_match_string_mixed_lang_emits_or_of_phrase_and_and() { + let s = build_match_string("Rust 충돌은").unwrap(); + assert_eq!(s, r#"("Rust 충돌은") OR ("Rust" "충돌은")"#); + } + + /// One ≥3 token + one <3 token: short token is dropped from the + /// AND, leaving a single long token there; whole-phrase exists + /// independently. Both candidates differ → OR-combined. + #[test] + fn build_match_string_drops_short_token_in_and_keeps_whole() { + // "키" (1 char) dropped from AND; "해시테이블" (5 chars) kept. + // Whole phrase "키 해시테이블" (7 chars) keeps the short token. + let s = build_match_string("키 해시테이블").unwrap(); + assert_eq!(s, r#"("키 해시테이블") OR ("해시테이블")"#); + } + #[test] fn normalize_bm25_top_score_in_unit_interval() { // A "perfect" hit is bm25 = -1.0 → normalized 0.5. diff --git a/crates/kebab-search/tests/fixtures/search/lexical/run-1.json b/crates/kebab-search/tests/fixtures/search/lexical/run-1.json index d6ae0dc..c16a495 100644 --- a/crates/kebab-search/tests/fixtures/search/lexical/run-1.json +++ b/crates/kebab-search/tests/fixtures/search/lexical/run-1.json @@ -19,9 +19,9 @@ "indexed_at": "2024-01-01T00:00:00Z", "rank": 1, "retrieval": { - "fusion_score": 1.4490997273242101e-6, + "fusion_score": 1.4615362715630908e-6, "lexical_rank": 1, - "lexical_score": 1.4490997273242101e-6, + "lexical_score": 1.4615362715630908e-6, "method": "lexical", "vector_rank": null, "vector_score": null @@ -51,9 +51,9 @@ "indexed_at": "2024-01-01T00:00:00Z", "rank": 2, "retrieval": { - "fusion_score": 9.641424867368187e-7, + "fusion_score": 9.207039965986041e-7, "lexical_rank": 2, - "lexical_score": 9.641424867368187e-7, + "lexical_score": 9.207039965986041e-7, "method": "lexical", "vector_rank": null, "vector_score": null diff --git a/crates/kebab-tui/src/app.rs b/crates/kebab-tui/src/app.rs index a87f8c8..a5be019 100644 --- a/crates/kebab-tui/src/app.rs +++ b/crates/kebab-tui/src/app.rs @@ -153,6 +153,12 @@ pub struct SearchState { /// `Ctrl-L`); the previous draft kept one for "symmetry" but /// it was dead code. pub worker_rx: Option>, + /// v0.17.0 A5 Step 5: advisory text shown when the last completed + /// search returned no hits and the (trimmed) query is shorter than + /// the FTS5 trigram tokenizer's 3-char minimum. `None` whenever + /// the input changes (so a stale hint never overlaps a fresh + /// typing session) or the next search returns ≥1 hit. + pub short_query_hint: Option, } /// p9-fb-08: payload posted by the search worker on completion. @@ -179,6 +185,7 @@ impl Default for SearchState { preview: None, generation: 0, worker_rx: None, + short_query_hint: None, } } } diff --git a/crates/kebab-tui/src/run.rs b/crates/kebab-tui/src/run.rs index fb24b22..cceca0f 100644 --- a/crates/kebab-tui/src/run.rs +++ b/crates/kebab-tui/src/run.rs @@ -393,6 +393,20 @@ fn dynamic_status(app: &App) -> String { if app.search.as_ref().map(|s| s.searching).unwrap_or(false) { return "searching…".to_string(); } + // v0.17.0 A5 Step 5: short-query advisory has higher priority than + // the idle slot but lower than active operations (streaming / + // searching / ingest progress) — the user should always see what + // is happening *now* before reading guidance about the last + // empty result. Slot only fires while focused on Search. + if app.focus == Pane::Search { + if let Some(hint) = app + .search + .as_ref() + .and_then(|s| s.short_query_hint.as_deref()) + { + return hint.to_string(); + } + } if let Some(state) = app.ingest_state.as_ref() { return crate::ingest_progress::status_line(state); } diff --git a/crates/kebab-tui/src/search.rs b/crates/kebab-tui/src/search.rs index 13c9f43..1e6bf75 100644 --- a/crates/kebab-tui/src/search.rs +++ b/crates/kebab-tui/src/search.rs @@ -333,7 +333,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { s.mode = cycle_mode(s.mode); // Force re-search at the new mode if there's a query. if !s.input.as_str().trim().is_empty() { - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); } KeyOutcome::Continue } @@ -360,7 +360,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { (KeyCode::Backspace, _) => { if !s.input.is_empty() { s.input.pop_char(); - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); } KeyOutcome::Continue } @@ -388,7 +388,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { } (KeyCode::Delete, _) => { if s.input.delete_after().is_some() { - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); } KeyOutcome::Continue } @@ -402,7 +402,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { s.preview = None; } else { s.input.push_char('j'); - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); } KeyOutcome::Continue } @@ -412,7 +412,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { s.preview = None; } else { s.input.push_char('k'); - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); } KeyOutcome::Continue } @@ -426,7 +426,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { // bindings (and don't currently match any Search // command, so they're a safe fall-through to Continue). s.input.push_char(c); - s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + mark_input_changed(s); KeyOutcome::Continue } // Normal mode + un-handled Char → no-op (no typing in @@ -435,6 +435,16 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { } } +/// v0.17.0 A5 Step 5: every input-mutation site in `handle_key_search` +/// funnels through this helper so the debounce stamp and the +/// short-query advisory stay in sync. Reset is eager — the stale +/// advisory from the previous result set must not visually overlap +/// with a fresh typing session. +fn mark_input_changed(s: &mut crate::app::SearchState) { + s.input_dirty_at = Some(time::OffsetDateTime::now_utc()); + s.short_query_hint = None; +} + fn cycle_mode(m: SearchMode) -> SearchMode { match m { SearchMode::Lexical => SearchMode::Vector, @@ -603,6 +613,11 @@ pub(crate) fn fire_search(state: &mut App) -> anyhow::Result<()> { s.generation = s.generation.wrapping_add(1); s.searching = true; s.input_dirty_at = None; + // v0.17.0 A5 Step 5: hint belongs to the *prior* result set — + // a fresh worker spawn invalidates it so the status bar + // doesn't keep showing the old advisory while the new + // query is in flight. + s.short_query_hint = None; let q_text = s.input.as_str().to_string(); s.last_query = Some((q_text.clone(), s.mode)); (q_text, s.mode, s.generation) @@ -676,6 +691,18 @@ pub fn poll_worker(state: &mut App) { s.searching = false; match result { Ok(hits) => { + // v0.17.0 A5 Step 5: stale-aware short-query hint. + // The worker carries no copy of the query text; + // we ground the advisory on `s.last_query` which + // was snapshotted at `fire_search` time and (by + // the generation guard above) still matches what + // the user submitted for *this* result set. If + // input has drifted since spawn, the gen-check + // already returned early. + let q_text = + s.last_query.as_ref().map(|(t, _)| t.as_str()).unwrap_or(""); + s.short_query_hint = + kebab_app::short_query_hint(q_text, hits.is_empty()); s.hits = hits; s.selected_hit = 0; s.preview = None; @@ -683,6 +710,7 @@ pub fn poll_worker(state: &mut App) { Err(e) => { s.hits.clear(); s.selected_hit = 0; + s.short_query_hint = None; state.error_overlay = Some(crate::error_popup::ErrorOverlay::from_anyhow(&e)); } diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json index ca89792..2a23523 100644 --- a/docs/wire-schema/v1/search_response.schema.json +++ b/docs/wire-schema/v1/search_response.schema.json @@ -29,6 +29,10 @@ } } } + }, + "hint": { + "type": "string", + "description": "v0.17.0 A5 Step 4b: advisory string set when the empty hit list is likely due to a query shorter than the FTS5 trigram tokenizer's 3-char minimum. Field is omitted when no advisory applies. Raw FTS5 mode ('...') opts out. MCP / agent consumers should surface this so users understand the empty result rather than retrying the same short query." } } }