feat(v0.17.0/A5): trigram-aware build_match_string + SearchResponse.hint

PR-A 본체. plan Task A4 Step 1c + A5. - lexical.rs::build_match_string 재설계: whole-phrase + token-AND OR-combined, 3자 미만 토큰 drop, 후보 없음 시 None (빈 MATCH 회피). raw single-quote mode 유지. - SearchResponse.hint additive — empty result + trimmed < 3 chars + non-raw 케이스에 short_query_hint helper 가 set. - CLI 'kebab search' 가 [hint] stderr 한 줄 (text mode). - TUI SearchState.short_query_hint + poll_worker stale-aware set + fire_search/mark_input_changed reset + dynamic_status 표시. - docs/wire-schema/v1/search_response.schema.json hint additive. - 신규 unit tests (lexical 9 PASS, 기존 2 expectation 갱신) + 통합 회귀 (search_korean: multi_token + mixed, 3 PASS) + BM25 snapshot regen (trigram token stream). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 11:54:25 +00:00
parent fe123c0c6d
commit 6ac7fea7b9
12 changed files with 317 additions and 39 deletions
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -73,6 +73,37 @@ pub struct SearchResponse {
    /// p9-fb-37: present when caller passed `SearchOpts.trace = true`.
    /// Consumers that ignore trace should leave this `None`.
    pub trace: Option<kebab_core::SearchTrace>,
+    /// v0.17.0 A5 Step 4b: human / agent-readable advisory string set
+    /// when the empty hit list is likely due to a query shorter than the
+    /// FTS5 trigram tokenizer's 3-char minimum. `None` otherwise. CLI
+    /// surfaces it on stderr (text mode); MCP / `--json` consumers
+    /// surface it however they prefer. See
+    /// `docs/superpowers/specs/2026-05-22-korean-trigram-tokenizer-design.md`
+    /// §3.3.
+    pub hint: Option<String>,
+}
+
+/// v0.17.0 A5 Step 4b: decide whether to attach a "3자 이상 키워드 권장"
+/// hint to a `SearchResponse`. Fires only when the result set is empty
+/// *and* the trimmed query is shorter than the trigram tokenizer can
+/// resolve. Raw FTS5 mode (`'...'`) opts out — the user explicitly
+/// invoked FTS5 syntax. Identical condition powers the CLI stderr line
+/// and (separately) the TUI status bar.
+pub fn short_query_hint(query_text: &str, hits_empty: bool) -> Option<String> {
+    if !hits_empty {
+        return None;
+    }
+    let trimmed = query_text.trim();
+    let bytes = trimmed.as_bytes();
+    // Raw single-quote mode: user opted into FTS5 syntax, no advisory.
+    if bytes.len() >= 2 && bytes[0] == b'\'' && bytes[bytes.len() - 1] == b'\'' {
+        return None;
+    }
+    if trimmed.chars().count() < 3 {
+        Some("3자 이상 키워드 권장 (trigram tokenizer 제약)".to_string())
+    } else {
+        None
+    }
 }

 /// Facade state — see module docs for lifetime rules.
@@ -418,11 +449,13 @@ impl App {

            // Trace path skips the budget loop. Caller will inspect
            // `hits.len()` and `trace.timing` rather than paginate.
+            let hint = short_query_hint(&query.text, hits.is_empty());
            return Ok(SearchResponse {
                hits,
                next_cursor: None,
                truncated: false,
                trace: Some(trace),
+                hint,
            });
        }

@@ -505,11 +538,13 @@ impl App {
            None
        };

+        let hint = short_query_hint(&query.text, hits.is_empty());
        Ok(SearchResponse {
            hits,
            next_cursor,
            truncated,
            trace: None,
+            hint,
        })
    }

--- a/crates/kebab-app/src/bulk.rs
+++ b/crates/kebab-app/src/bulk.rs
@@ -96,6 +96,11 @@ fn serialize_search_response(r: &SearchResponse) -> Value {
            None => Value::Null,
        };
        map.insert("trace".to_string(), trace_v);
+        // v0.17.0 A5 Step 4b: only emit `hint` when set — matches
+        // the CLI wire wrapper's additive emit pattern.
+        if let Some(hint) = &r.hint {
+            map.insert("hint".to_string(), Value::String(hint.clone()));
+        }
    }
    v
 }
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -69,7 +69,7 @@ pub mod reset;
 pub mod schema;
 mod staleness;

-pub use app::{App, SearchResponse};
+pub use app::{App, SearchResponse, short_query_hint};
 pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
 pub use reset::{ResetReport, ResetScope, enumerate_orphans};
 pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify};
--- a/crates/kebab-app/tests/search_korean.rs
+++ b/crates/kebab-app/tests/search_korean.rs
@@ -46,3 +46,88 @@ fn korean_lexical_query_returns_korean_document() {
        hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
    );
 }
+
+/// A4 Step 1c — multi-token Korean query (`해시 충돌`) must hit when
+/// the lexical builder routes it through a whole-phrase MATCH candidate.
+///
+/// Expected: FAIL until A5 (`build_match_string` redesign) lands — the
+/// current builder emits `"해시" "충돌"` AND, but FTS5 trigram tokenizer
+/// has no 2-char terms so each side is 0-hit. A5 introduces a whole-
+/// phrase candidate (`"해시 충돌"`) OR'd with the token AND, restoring
+/// hits for the dominant Korean usage pattern.
+#[test]
+fn lexical_multi_token_korean_query_hits() {
+    let env = TestEnv::lexical_only();
+
+    // Copy the synthetic Korean fixture (introduced in A4 Step 0) into
+    // the test workspace. The fixture contains the exact phrase
+    // "해시 충돌" multiple times.
+    let dest = env.workspace_root.join("hash-table.md");
+    let src = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+        .join("..")
+        .join("..")
+        .join("fixtures")
+        .join("search")
+        .join("korean")
+        .join("hash-table.md");
+    std::fs::copy(&src, &dest).expect("copy korean fixture");
+
+    kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
+        .expect("ingest must succeed");
+
+    let hits = kebab_app::search_with_config(
+        env.config.clone(),
+        common::lexical_query("해시 충돌"),
+    )
+    .expect("search must succeed");
+
+    assert!(
+        !hits.is_empty(),
+        "multi-token Korean query '해시 충돌' must hit the hash-table fixture; got {:?}",
+        hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
+    );
+    let any_hash_table = hits.iter().any(|h| h.doc_path.0.contains("hash-table"));
+    assert!(
+        any_hash_table,
+        "expected at least one hit on the hash-table fixture, got: {:?}",
+        hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
+    );
+}
+
+/// A4 Step 1c — mixed Korean+English multi-token query (`Rust 충돌은`).
+/// Both tokens are ≥3 chars, so the redesigned builder (A5) emits
+/// `("Rust 충돌은") OR ("Rust" AND "충돌은")`. With trigram tokenizer
+/// each side has substring coverage in the document, so the AND branch
+/// alone is enough. Expected: FAIL pre-A5, PASS post-A5.
+#[test]
+fn lexical_mixed_korean_english_multi_token_query_hits() {
+    let env = TestEnv::lexical_only();
+    let doc_path = env.workspace_root.join("rust-hash.md");
+    std::fs::write(
+        &doc_path,
+        "# Rust 해시 테이블\n\nRust 의 std::collections::HashMap 에서 \
+         해시 충돌은 SipHash 로 완화한다.\n",
+    )
+    .expect("write rust-hash fixture");
+
+    kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
+        .expect("ingest must succeed");
+
+    let hits = kebab_app::search_with_config(
+        env.config.clone(),
+        common::lexical_query("Rust 충돌은"),
+    )
+    .expect("search must succeed");
+
+    assert!(
+        !hits.is_empty(),
+        "mixed Korean+English multi-token query 'Rust 충돌은' must hit the rust-hash fixture; got {:?}",
+        hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
+    );
+    let any_rust_hash = hits.iter().any(|h| h.doc_path.0.contains("rust-hash"));
+    assert!(
+        any_rust_hash,
+        "expected at least one hit on the rust-hash fixture, got: {:?}",
+        hits.iter().map(|h| &h.doc_path.0).collect::<Vec<_>>()
+    );
+}
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -933,6 +933,15 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
                    let next = resp.next_cursor.as_deref().unwrap_or("(none)");
                    eprintln!("[truncated; use --cursor {next} for the next page]");
                }
+                // v0.17.0 A5 Step 4: short-query advisory. `resp.hint`
+                // is `Some` only when the result list is empty and the
+                // trimmed query is shorter than the trigram tokenizer
+                // can resolve (raw FTS5 mode opts out). stderr so it
+                // doesn't pollute the stdout hit list. `--json` skips
+                // this branch entirely; the field rides the wire.
+                if let Some(hint) = &resp.hint {
+                    eprintln!("[hint] {hint}");
+                }
                if *trace {
                    if let Some(t) = &resp.trace {
                        eprintln!();
--- a/crates/kebab-cli/src/wire.rs
+++ b/crates/kebab-cli/src/wire.rs
@@ -92,6 +92,14 @@ pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value {
            map.insert("trace".to_string(), trace_v);
        }
    }
+    // v0.17.0 A5 Step 4b: emit `hint` only when set. Keeps responses
+    // that don't carry a hint backward-compatible with v0 consumers
+    // that don't know the field.
+    if let Some(hint) = &r.hint {
+        if let Value::Object(ref mut map) = v {
+            map.insert("hint".to_string(), Value::String(hint.clone()));
+        }
+    }
    tag_object(v, "search_response.v1")
 }

@@ -292,6 +300,7 @@ mod tests {
            next_cursor: Some("opaque-cursor-abc".to_string()),
            truncated: true,
            trace: None,
+            hint: None,
        };
        let v = wire_search_response(&r);
        assert_eq!(schema_of(&v), Some("search_response.v1"));
@@ -405,6 +414,7 @@ mod tests {
                }],
                timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 },
            }),
+            hint: None,
        };
        let v = wire_search_response(&r);
        assert_eq!(schema_of(&v), Some("search_response.v1"));
@@ -420,6 +430,7 @@ mod tests {
            next_cursor: None,
            truncated: false,
            trace: None,
+            hint: None,
        };
        let v = wire_search_response(&r);
        assert!(v.get("trace").is_none(), "trace field absent when None");
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -162,18 +162,35 @@ impl Retriever for LexicalRetriever {

 /// Translate a user-typed query into an FTS5 match string.
 ///
-/// Rules (from the task spec):
+/// v0.17.0 — trigram-aware redesign (see design §5.5 + plan
+/// `docs/superpowers/plans/2026-05-22-korean-trigram-tokenizer.md`
+/// Task A5). The FTS5 tokenizer is `trigram` so any term shorter than
+/// three Unicode chars has no index entry and would zero out an AND
+/// branch. Korean compounds typically split into 2-char eojeols (e.g.
+/// `해시 충돌`), so a naive token AND drops the dominant usage pattern.
 ///
-/// - The query is wrapped in a single pair of `'...'` → strip the quotes
-///   and pass the inner text through verbatim. The user has explicitly
-///   opted into FTS5 syntax (e.g. `'rust AND cargo'`, `'foo*'`).
+/// Rules:
 ///
-/// - Otherwise: split on whitespace, escape every token by wrapping it
-///   in `"..."` (FTS5 string literal), with any inner `"` doubled. Join
-///   with spaces — FTS5 default operator is implicit AND.
+/// - Raw mode (unchanged): the query is wrapped in a single pair of
+///   `'...'` → strip the quotes and pass the inner text through verbatim.
+///   The user has explicitly opted into FTS5 syntax (e.g.
+///   `'rust AND cargo'`, `'foo*'`).
 ///
-/// - An empty / whitespace-only token list → return `None` (caller
-///   short-circuits to `Ok(vec![])`).
+/// - Otherwise build up to two MATCH candidates:
+///   1. **whole-phrase**: the entire trimmed input wrapped as one FTS5
+///      string literal, *only* if it has ≥3 Unicode chars. FTS5 treats
+///      a quoted string with spaces as a phrase match.
+///   2. **token AND**: whitespace-split tokens, kept only when each has
+///      ≥3 Unicode chars (shorter ones are dropped — they would zero
+///      out the AND under trigram).
+///
+/// - Combine: `(whole) OR (token_and)` when both exist *and differ*;
+///   either alone when only one exists; `None` when neither exists
+///   (caller short-circuits to `Ok(vec![])`, avoiding an FTS5 syntax
+///   error from an empty MATCH).
+///
+/// - A single-token long query (`러스트`, `foo`) yields `whole == token_and`
+///   → return the bare quoted form so the OR doesn't duplicate.
 fn build_match_string(text: &str) -> Option<String> {
    let trimmed = text.trim();
    if trimmed.is_empty() {
@@ -186,14 +203,27 @@ fn build_match_string(text: &str) -> Option<String> {
        }
        return Some(inner_trim.to_string());
    }
-    let tokens: Vec<String> = trimmed
-        .split_whitespace()
-        .map(escape_fts5_token)
-        .collect();
-    if tokens.is_empty() {
-        None
-    } else {
-        Some(tokens.join(" "))
+
+    const MIN_TRIGRAM_CHARS: usize = 3;
+
+    let whole_candidate: Option<String> = (trimmed.chars().count() >= MIN_TRIGRAM_CHARS)
+        .then(|| escape_fts5_token(trimmed));
+
+    let token_and_candidate: Option<String> = {
+        let toks: Vec<String> = trimmed
+            .split_whitespace()
+            .filter(|t| t.chars().count() >= MIN_TRIGRAM_CHARS)
+            .map(escape_fts5_token)
+            .collect();
+        (!toks.is_empty()).then(|| toks.join(" "))
+    };
+
+    match (whole_candidate, token_and_candidate) {
+        (None, None) => None,
+        (Some(w), None) => Some(w),
+        (None, Some(a)) => Some(a),
+        (Some(w), Some(a)) if w == a => Some(w),
+        (Some(w), Some(a)) => Some(format!("({w}) OR ({a})")),
    }
 }

@@ -555,30 +585,31 @@ mod tests {
    }

    #[test]
-    fn build_match_string_default_is_quoted_and_anded() {
+    fn build_match_string_default_emits_or_of_phrase_and_and() {
+        // Two long tokens: both whole-phrase and token-AND candidates
+        // exist and differ, so the builder combines them with OR.
        let s = build_match_string("rust cargo").unwrap();
-        // Two tokens, each quoted, joined by a space (implicit AND).
-        assert_eq!(s, r#""rust" "cargo""#);
+        assert_eq!(s, r#"("rust cargo") OR ("rust" "cargo")"#);
    }

    #[test]
    fn build_match_string_escapes_special_chars() {
        // `*`, `(`, `)`, `:`, `^`, `"` should all be wrapped inside
        // FTS5 string-literal quotes so they're treated as literal
-        // text rather than FTS5 operators.
+        // text rather than FTS5 operators. Every token is ≥3 chars,
+        // so both the whole-phrase and token-AND candidates exist.
        let s = build_match_string(r#"foo* (bar) baz:qux ^head he"llo"#).unwrap();
        assert_eq!(
            s,
-            r#""foo*" "(bar)" "baz:qux" "^head" "he""llo""#
+            r#"("foo* (bar) baz:qux ^head he""llo") OR ("foo*" "(bar)" "baz:qux" "^head" "he""llo")"#
        );
        // The doubled `""` is FTS5's way of embedding a literal quote
-        // inside a string literal.
+        // inside a string literal. Appears in both whole-phrase and
+        // token-AND halves.
        assert!(s.contains(r#"he""llo"#));
-        // Sanity: every special character lives between matching `"`
-        // delimiters — there is no bare-token (unquoted) span anywhere.
-        // We check this by confirming the string starts and ends with `"`
-        // and the count of unescaped `"` is even (each token is wrapped).
-        assert!(s.starts_with('"') && s.ends_with('"'));
+        // Sanity: the combined expression is `(...) OR (...)` so it
+        // starts with `(` and ends with `)`.
+        assert!(s.starts_with('(') && s.ends_with(')'));
    }

    #[test]
@@ -588,6 +619,55 @@ mod tests {
        assert_eq!(s, "foo OR bar*");
    }

+    // ── v0.17.0 trigram-aware redesign coverage ──────────────────────────
+
+    /// 2-char Korean query (`충돌`) yields neither a whole-phrase nor a
+    /// token-AND candidate → `None`. Caller short-circuits to an empty
+    /// hit list rather than executing an FTS5 syntax error on `""` MATCH.
+    #[test]
+    fn build_match_string_short_korean_returns_none() {
+        assert!(build_match_string("충돌").is_none());
+        assert!(build_match_string("키").is_none());
+        assert!(build_match_string(" 충돌 ").is_none());
+    }
+
+    /// `해시 충돌` — both tokens are 2 chars (dropped from the AND), but
+    /// the whole-phrase candidate (`"해시 충돌"`, 5 chars total) survives.
+    /// This is the dominant Korean usage pattern targeted by A5.
+    #[test]
+    fn build_match_string_whole_phrase_only_when_all_tokens_short() {
+        let s = build_match_string("해시 충돌").unwrap();
+        assert_eq!(s, r#""해시 충돌""#);
+    }
+
+    /// Single long token: whole-phrase and token-AND candidates collapse
+    /// to the same string. The builder returns the bare quoted form so
+    /// the MATCH expression doesn't carry a redundant `(x) OR (x)`.
+    #[test]
+    fn build_match_string_single_long_token_no_duplicate_or() {
+        assert_eq!(build_match_string("러스트").unwrap(), r#""러스트""#);
+        assert_eq!(build_match_string("rust").unwrap(), r#""rust""#);
+    }
+
+    /// Mixed Korean+English multi-token query where every token is ≥3
+    /// chars: both candidates exist and differ, OR-combined.
+    #[test]
+    fn build_match_string_mixed_lang_emits_or_of_phrase_and_and() {
+        let s = build_match_string("Rust 충돌은").unwrap();
+        assert_eq!(s, r#"("Rust 충돌은") OR ("Rust" "충돌은")"#);
+    }
+
+    /// One ≥3 token + one <3 token: short token is dropped from the
+    /// AND, leaving a single long token there; whole-phrase exists
+    /// independently. Both candidates differ → OR-combined.
+    #[test]
+    fn build_match_string_drops_short_token_in_and_keeps_whole() {
+        // "키" (1 char) dropped from AND; "해시테이블" (5 chars) kept.
+        // Whole phrase "키 해시테이블" (7 chars) keeps the short token.
+        let s = build_match_string("키 해시테이블").unwrap();
+        assert_eq!(s, r#"("키 해시테이블") OR ("해시테이블")"#);
+    }
+
    #[test]
    fn normalize_bm25_top_score_in_unit_interval() {
        // A "perfect" hit is bm25 = -1.0 → normalized 0.5.
--- a/crates/kebab-search/tests/fixtures/search/lexical/run-1.json
+++ b/crates/kebab-search/tests/fixtures/search/lexical/run-1.json
@@ -19,9 +19,9 @@
    "indexed_at": "2024-01-01T00:00:00Z",
    "rank": 1,
    "retrieval": {
-      "fusion_score": 1.4490997273242101e-6,
+      "fusion_score": 1.4615362715630908e-6,
      "lexical_rank": 1,
-      "lexical_score": 1.4490997273242101e-6,
+      "lexical_score": 1.4615362715630908e-6,
      "method": "lexical",
      "vector_rank": null,
      "vector_score": null
@@ -51,9 +51,9 @@
    "indexed_at": "2024-01-01T00:00:00Z",
    "rank": 2,
    "retrieval": {
-      "fusion_score": 9.641424867368187e-7,
+      "fusion_score": 9.207039965986041e-7,
      "lexical_rank": 2,
-      "lexical_score": 9.641424867368187e-7,
+      "lexical_score": 9.207039965986041e-7,
      "method": "lexical",
      "vector_rank": null,
      "vector_score": null
--- a/crates/kebab-tui/src/app.rs
+++ b/crates/kebab-tui/src/app.rs
@@ -153,6 +153,12 @@ pub struct SearchState {
    /// `Ctrl-L`); the previous draft kept one for "symmetry" but
    /// it was dead code.
    pub worker_rx: Option<std::sync::mpsc::Receiver<SearchWorkerMessage>>,
+    /// v0.17.0 A5 Step 5: advisory text shown when the last completed
+    /// search returned no hits and the (trimmed) query is shorter than
+    /// the FTS5 trigram tokenizer's 3-char minimum. `None` whenever
+    /// the input changes (so a stale hint never overlaps a fresh
+    /// typing session) or the next search returns ≥1 hit.
+    pub short_query_hint: Option<String>,
 }

 /// p9-fb-08: payload posted by the search worker on completion.
@@ -179,6 +185,7 @@ impl Default for SearchState {
            preview: None,
            generation: 0,
            worker_rx: None,
+            short_query_hint: None,
        }
    }
 }
--- a/crates/kebab-tui/src/run.rs
+++ b/crates/kebab-tui/src/run.rs
@@ -393,6 +393,20 @@ fn dynamic_status(app: &App) -> String {
    if app.search.as_ref().map(|s| s.searching).unwrap_or(false) {
        return "searching…".to_string();
    }
+    // v0.17.0 A5 Step 5: short-query advisory has higher priority than
+    // the idle slot but lower than active operations (streaming /
+    // searching / ingest progress) — the user should always see what
+    // is happening *now* before reading guidance about the last
+    // empty result. Slot only fires while focused on Search.
+    if app.focus == Pane::Search {
+        if let Some(hint) = app
+            .search
+            .as_ref()
+            .and_then(|s| s.short_query_hint.as_deref())
+        {
+            return hint.to_string();
+        }
+    }
    if let Some(state) = app.ingest_state.as_ref() {
        return crate::ingest_progress::status_line(state);
    }
--- a/crates/kebab-tui/src/search.rs
+++ b/crates/kebab-tui/src/search.rs
@@ -333,7 +333,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
            s.mode = cycle_mode(s.mode);
            // Force re-search at the new mode if there's a query.
            if !s.input.as_str().trim().is_empty() {
-                s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+                mark_input_changed(s);
            }
            KeyOutcome::Continue
        }
@@ -360,7 +360,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
        (KeyCode::Backspace, _) => {
            if !s.input.is_empty() {
                s.input.pop_char();
-                s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+                mark_input_changed(s);
            }
            KeyOutcome::Continue
        }
@@ -388,7 +388,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
        }
        (KeyCode::Delete, _) => {
            if s.input.delete_after().is_some() {
-                s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+                mark_input_changed(s);
            }
            KeyOutcome::Continue
        }
@@ -402,7 +402,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
                s.preview = None;
            } else {
                s.input.push_char('j');
-                s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+                mark_input_changed(s);
            }
            KeyOutcome::Continue
        }
@@ -412,7 +412,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
                s.preview = None;
            } else {
                s.input.push_char('k');
-                s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+                mark_input_changed(s);
            }
            KeyOutcome::Continue
        }
@@ -426,7 +426,7 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
            // bindings (and don't currently match any Search
            // command, so they're a safe fall-through to Continue).
            s.input.push_char(c);
-            s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+            mark_input_changed(s);
            KeyOutcome::Continue
        }
        // Normal mode + un-handled Char → no-op (no typing in
@@ -435,6 +435,16 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
    }
 }

+/// v0.17.0 A5 Step 5: every input-mutation site in `handle_key_search`
+/// funnels through this helper so the debounce stamp and the
+/// short-query advisory stay in sync. Reset is eager — the stale
+/// advisory from the previous result set must not visually overlap
+/// with a fresh typing session.
+fn mark_input_changed(s: &mut crate::app::SearchState) {
+    s.input_dirty_at = Some(time::OffsetDateTime::now_utc());
+    s.short_query_hint = None;
+}
+
 fn cycle_mode(m: SearchMode) -> SearchMode {
    match m {
        SearchMode::Lexical => SearchMode::Vector,
@@ -603,6 +613,11 @@ pub(crate) fn fire_search(state: &mut App) -> anyhow::Result<()> {
        s.generation = s.generation.wrapping_add(1);
        s.searching = true;
        s.input_dirty_at = None;
+        // v0.17.0 A5 Step 5: hint belongs to the *prior* result set —
+        // a fresh worker spawn invalidates it so the status bar
+        // doesn't keep showing the old advisory while the new
+        // query is in flight.
+        s.short_query_hint = None;
        let q_text = s.input.as_str().to_string();
        s.last_query = Some((q_text.clone(), s.mode));
        (q_text, s.mode, s.generation)
@@ -676,6 +691,18 @@ pub fn poll_worker(state: &mut App) {
            s.searching = false;
            match result {
                Ok(hits) => {
+                    // v0.17.0 A5 Step 5: stale-aware short-query hint.
+                    // The worker carries no copy of the query text;
+                    // we ground the advisory on `s.last_query` which
+                    // was snapshotted at `fire_search` time and (by
+                    // the generation guard above) still matches what
+                    // the user submitted for *this* result set. If
+                    // input has drifted since spawn, the gen-check
+                    // already returned early.
+                    let q_text =
+                        s.last_query.as_ref().map(|(t, _)| t.as_str()).unwrap_or("");
+                    s.short_query_hint =
+                        kebab_app::short_query_hint(q_text, hits.is_empty());
                    s.hits = hits;
                    s.selected_hit = 0;
                    s.preview = None;
@@ -683,6 +710,7 @@ pub fn poll_worker(state: &mut App) {
                Err(e) => {
                    s.hits.clear();
                    s.selected_hit = 0;
+                    s.short_query_hint = None;
                    state.error_overlay =
                        Some(crate::error_popup::ErrorOverlay::from_anyhow(&e));
                }
--- a/docs/wire-schema/v1/search_response.schema.json
+++ b/docs/wire-schema/v1/search_response.schema.json
@@ -29,6 +29,10 @@
          }
        }
      }
+    },
+    "hint": {
+      "type": "string",
+      "description": "v0.17.0 A5 Step 4b: advisory string set when the empty hit list is likely due to a query shorter than the FTS5 trigram tokenizer's 3-char minimum. Field is omitted when no advisory applies. Raw FTS5 mode ('...') opts out. MCP / agent consumers should surface this so users understand the empty result rather than retrying the same short query."
    }
  }
 }