From 5f6b2fa259fa37a277e1121b2a691c2657d025ce Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:05:31 +0900
Subject: [PATCH 01/13] spec(fb-37): trace + stats design
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- search --trace boolean flag, additive optional `trace` field on search_response.v1
- HybridRetriever search_with_trace returns (hits, SearchTrace) — lex/vec/rrf_inputs + per-stage timing
- cache bypass when --trace (debug intent)
- schema.v1.stats extended with media_breakdown / lang_breakdown / index_bytes / stale_doc_count
- TUI search pane `t` keystroke opens TracePopup
- additive minor wire — no schema bump
Co-Authored-By: Claude Opus 4.7 (1M context)
---
...6-05-10-p9-fb-37-trace-and-stats-design.md | 360 ++++++++++++++++++
1 file changed, 360 insertions(+)
create mode 100644 docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md
diff --git a/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md b/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md
new file mode 100644
index 0000000..edb2f87
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md
@@ -0,0 +1,360 @@
+---
+title: "p9-fb-37 — Trace + stats design"
+phase: P9
+component: kebab-core + kebab-search + kebab-store-sqlite + kebab-app + kebab-cli + kebab-mcp + kebab-tui
+task_id: p9-fb-37
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search, §7 RAG, §10 UX]
+date: 2026-05-10
+---
+
+# p9-fb-37 — Trace + stats
+
+## Goal
+
+retrieval pipeline 가시성 + KB 건강 surface. 두 axes:
+
+- **Trace**: `kebab search Q --trace` — `search_response.v1` 에 optional `trace` 필드 (lexical/vector pre-fusion lists + RRF inputs + per-stage timing). agent / 사용자가 "왜 이 결과가 나왔는지" 진단.
+- **Stats**: `kebab schema --json` 의 기존 `stats` 객체에 4 필드 추가 (media/lang breakdown + index disk bytes + stale doc count). KB 건강 한 눈에.
+
+둘 다 wire schema additive minor — 기존 consumer 무영향. trace 는 opt-in (cost 0 when off), stats 는 항상 채움 (저렴한 GROUP BY).
+
+## Behavior contract
+
+### CLI flag
+
+```
+kebab search [--trace] [--json] [기존 flags ...]
+kebab schema [--json]
+```
+
+`--trace` boolean, default false. 활성 시:
+- HybridRetriever 가 lexical / vector 각 단계 출력 + per-stage timing 캡처.
+- search cache **bypass 강제** (debug intent — cache hit timing 무의미).
+- `--json` 면 `search_response.v1.trace` 채움.
+- non-`--json` 면 hits 출력 후 `Trace:` section pretty-print (lex/vec 카운트 + timing + top 3 hit per stage).
+
+`kebab schema --json` 의 `stats` 4 필드 항상 출력 (no flag).
+
+### Wire shape
+
+**`search_response.v1`** (additive minor — schema bump 없음):
+
+```jsonc
+{
+ "schema_version": "search_response.v1",
+ "hits": [/* search_hit.v1 */],
+ "next_cursor": null,
+ "truncated": false,
+ "trace": { // OPTIONAL — present iff --trace
+ "lexical": [
+ {"chunk_id":"c1","doc_id":"d1","doc_path":"a.md","rank":1,"score":0.42}, ...
+ ],
+ "vector": [
+ {"chunk_id":"c2","doc_id":"d2","doc_path":"b.md","rank":1,"score":0.81}, ...
+ ],
+ "rrf_inputs": [
+ {"chunk_id":"c1","lexical_rank":2,"vector_rank":3,"fusion_score":0.0234}, ...
+ ],
+ "timing": {"lexical_ms":12,"vector_ms":45,"fusion_ms":1,"total_ms":58}
+ }
+}
+```
+
+`#[serde(default, skip_serializing_if = "Option::is_none")]` — `--trace` 없으면 `trace` 키 자체 부재.
+
+**`schema.v1.stats`** (additive minor — schema bump 없음):
+
+```jsonc
+"stats": {
+ "doc_count": 50,
+ "chunk_count": 200,
+ "asset_count": 50,
+ "last_ingest_at": "2026-05-10T12:34:56Z",
+ // fb-37 신규
+ "media_breakdown": {"markdown":12,"pdf":3,"image":5,"audio":0,"other":0},
+ "lang_breakdown": {"en":10,"ko":5,"null":5},
+ "index_bytes": {"sqlite":12345678,"lancedb":23456789},
+ "stale_doc_count": 2
+}
+```
+
+- `media_breakdown`: `MEDIA_KINDS` (markdown/pdf/image/audio/other) 5 키 항상 채움 (0 포함). `assets.media_type` JSON 의 dual shape (text vs object) 는 fb-36 과 동일한 CASE WHEN 패턴.
+- `lang_breakdown`: 비어있을 수 있음 (corpus 비면 `{}`). NULL lang 은 `"null"` 문자열 키.
+- `index_bytes.sqlite` = `*.sqlite` + `*.sqlite-wal` + `*.sqlite-shm` 합. `lancedb` = 디렉터리 recursive 합 (없으면 0).
+- `stale_doc_count` = `documents.updated_at < (now - threshold_days)` count. `config.search.stale_threshold_days = 0` 이면 항상 0 (fb-32 의미).
+
+### Edge cases
+
+| 상황 | 동작 |
+|------|------|
+| `--trace --mode lexical` | `vector: []`, `vector_ms: 0`. rrf_inputs 모두 `vector_rank: null` |
+| `--trace --mode vector` | 대칭 |
+| `--trace` cache 가 hit 가능 query | cache bypass 강제, fresh run |
+| 빈 corpus | hits=[], trace lex/vec=[], timing 정상 (모두 작은 값) |
+| index_bytes lancedb 디렉터리 부재 | 0 |
+| sqlite WAL/SHM aux 파일 부재 | 메인 `.sqlite` 만 합산 |
+| stale_doc_count threshold=0 | 0 (fb-32) |
+| cursor pagination + `--trace` | 첫 호출 trace, next_cursor 따라 재호출 trace 부재 (재요청 필요) |
+| `--trace` non-`--json` mode | hits + trace 텍스트 출력 (lex/vec count, timing, top 3 per stage) |
+
+### MCP `SearchInput` 확장
+
+```rust
+pub struct SearchInput {
+ pub query: String,
+ pub mode: Option,
+ pub k: Option,
+ pub max_tokens: Option, // fb-34
+ pub snippet_chars: Option, // fb-34
+ pub cursor: Option, // fb-34
+ pub tags: Option>, // fb-36
+ pub lang: Option, // fb-36
+ pub path_glob: Option, // fb-36
+ pub trust_min: Option, // fb-36
+ pub media: Option>, // fb-36
+ pub ingested_after: Option, // fb-36
+ pub doc_id: Option, // fb-36
+ // fb-37
+ pub trace: Option,
+}
+```
+
+`Some(true)` = trace ON, `Some(false)` / `None` = OFF. 출력은 wire 와 동일 (trace 필드 mirror).
+
+### TUI Search pane
+
+- 결과 표시 중 (`SearchPane.results` 비어있지 않음) `t` keybind → `TracePopup` 모달.
+- TUI 가 `kebab_app::search_with_trace_with_config` 재호출 (현재 query, k, mode, filters 전부).
+- popup: 단일 scroll list (lex section / vec section / rrf section 헤더로 구분), `Esc` 닫기, `j/k` 또는 ↑↓ scroll.
+- 기존 inspect pane 무수정.
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: 신규 dep 없음. domain types 추가만.
+- `kebab-store-sqlite`: 신규 dep 없음. rusqlite + std::fs 만.
+- `kebab-search`: 신규 dep 없음. std::time::Instant 사용.
+- `kebab-app`: 신규 dep 없음. facade 확장.
+- `kebab-cli`: 신규 dep 없음. clap flag 추가.
+- `kebab-mcp`: 신규 dep 없음. SearchInput 확장.
+- `kebab-tui`: 신규 dep 없음. ratatui popup widget.
+
+`kebab-core` 의 다른 `kebab-*` 의존 금지 룰 그대로. UI 크레이트는 facade 만.
+
+## Public surface delta
+
+### kebab-core (`search.rs`)
+
+```rust
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+ pub chunk_id: ChunkId,
+ pub doc_id: DocumentId,
+ pub doc_path: WorkspacePath,
+ pub rank: u32,
+ pub score: f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+ pub chunk_id: ChunkId,
+ pub lexical_rank: Option,
+ pub vector_rank: Option,
+ pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+ pub lexical_ms: u64,
+ pub vector_ms: u64,
+ pub fusion_ms: u64,
+ pub total_ms: u64,
+}
+```
+
+`IndexStats` 확장 (`stats.rs` 또는 위치 동일):
+
+```rust
+pub struct IndexStats {
+ // 기존
+ pub doc_count: u64,
+ pub chunk_count: u64,
+ pub asset_count: u64,
+ pub last_ingest_at: Option,
+ // fb-37
+ #[serde(default)]
+ pub media_breakdown: BTreeMap,
+ #[serde(default)]
+ pub lang_breakdown: BTreeMap,
+ #[serde(default)]
+ pub index_bytes: IndexBytes,
+ #[serde(default)]
+ pub stale_doc_count: u64,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+ pub sqlite: u64,
+ pub lancedb: u64,
+}
+```
+
+`#[serde(default)]` — 옛 JSON 누락 시 zero-valued 으로 deserialize (backwards-compat).
+
+### kebab-store-sqlite (`stats.rs`)
+
+```rust
+pub fn breakdowns(conn: &rusqlite::Connection, threshold_days: u64)
+ -> rusqlite::Result<(BTreeMap, BTreeMap, u64)>;
+
+pub fn index_bytes(data_dir: &Path) -> std::io::Result;
+```
+
+기존 stats helper 가 이 두 함수 호출해 `IndexStats` 채움. 신규 query:
+- media: `SELECT CASE WHEN json_type(media_type)='text' THEN json_extract(media_type,'$') ELSE (SELECT key FROM json_each(media_type) LIMIT 1) END AS kind, COUNT(DISTINCT d.doc_id) FROM documents d JOIN assets a ON a.asset_id=d.asset_id GROUP BY kind`
+- lang: `SELECT COALESCE(lang,'null') AS l, COUNT(*) FROM documents GROUP BY l`
+- stale: `SELECT COUNT(*) FROM documents WHERE updated_at < ?` (threshold_days > 0 일 때만; 0 면 0 반환).
+
+### kebab-search (`hybrid.rs`)
+
+```rust
+impl HybridRetriever {
+ pub fn search_with_trace(&self, query: &SearchQuery)
+ -> Result<(Vec, SearchTrace)>;
+}
+```
+
+기존 `Retriever::search` 무변경. `search_with_trace` 는 hybrid 전용 (lexical/vector mode 도 한 쪽만 채워 동일 type 반환). 내부:
+1. `Instant::now()` 기록, lex retriever 호출, lex_ms 측정.
+2. 같은 패턴 vec.
+3. fuse — fusion_ms 측정.
+4. trace 빌드: lex/vec 전체 list → TraceCandidate 매핑. rrf_inputs = lex ∪ vec union (chunk_id 기준), 각 entry 의 lexical_rank/vector_rank/fusion_score 캡처. fusion 결과 ranking 과 동일.
+5. total_ms = 처음~끝.
+
+### kebab-app (`app.rs`)
+
+```rust
+#[doc(hidden)]
+pub fn search_with_trace_with_config(
+ cfg: kebab_config::Config,
+ query: &str,
+ opts: SearchOpts, // 기존 + trace: bool
+) -> Result<(SearchResponse, Option)>;
+```
+
+`opts.trace = true` 시:
+- cache bypass (`no_cache = true` 강제).
+- `HybridRetriever::search_with_trace` 호출.
+- `SearchResponse` 빌드 + trace 별도 반환 (caller 가 wire 합성).
+
+기존 `search_with_config` 무변경 (zero-overhead path).
+
+### kebab-cli (`Cmd::Search`)
+
+```rust
+Cmd::Search {
+ // 기존 + fb-34 + fb-36
+ query, k, mode, explain, no_cache,
+ max_tokens, snippet_chars, cursor,
+ tag, lang, path_glob, trust_min, media, ingested_after, doc_id,
+ // fb-37
+ #[arg(long)] trace: bool,
+}
+```
+
+dispatch:
+- `trace == false` → 기존 `search_with_config` 경로.
+- `trace == true` → `search_with_trace_with_config` 호출, wire 합성 시 `search_response.v1` JSON 에 `trace` 필드 inject.
+
+non-`--json` 출력:
+- `--trace` 면 hits 후 `\nTrace:\n lexical (N hits, Xms): top3...\n vector (M hits, Yms): top3...\n rrf (Zms): top3...\n total: Wms`.
+
+### kebab-mcp (`tools/search.rs`)
+
+`SearchInput.trace: Option` 추가. dispatch 시 `Some(true)` 이면 위 `_with_trace` 호출. 출력 JSON 에 trace 합성 (wire 와 동일).
+
+### kebab-tui (`search.rs` + `trace_popup.rs` 신규)
+
+- `App` 에 `trace_popup: Option` 필드.
+- search pane key handler `t` → `kebab_app::search_with_trace_with_config` (현재 query/opts) 호출 → popup state 채움.
+- `trace_popup.rs`: ratatui Paragraph 또는 List 로 lex/vec/rrf 3 section, scroll, `Esc` 닫기.
+- cheatsheet 에 `t = trace` 한 줄 추가.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-core) | `SearchTrace` serde roundtrip — 모든 필드 |
+| unit (kebab-core) | `IndexStats` 신규 4 필드 default — 비어있는 map / 0 bytes / 0 stale |
+| unit (kebab-store-sqlite) | `breakdowns`: 3 docs (md/md/pdf, en/en/null) → media `{markdown:2,pdf:1,image:0,audio:0,other:0}` (5키 패딩 적용), lang `{en:2,null:1}` |
+| unit (kebab-store-sqlite) | `index_bytes`: temp dir 내 sqlite 파일 + 빈 lancedb dir → sqlite>0, lancedb=0 |
+| unit (kebab-store-sqlite) | `breakdowns` stale_doc_count: threshold 7 day, 8일 전 doc 1 + 어제 doc 2 → 1 |
+| unit (kebab-store-sqlite) | `breakdowns` threshold=0 → stale_doc_count=0 |
+| unit (kebab-search/hybrid) | `search_with_trace`: lex/vec list 가 단일 retriever 호출 결과 == |
+| unit (kebab-search/hybrid) | timing 모두 정의됨, total ≥ lex+vec+fusion 의 sum (sequential 가정) |
+| unit (kebab-search/hybrid) | mode=lexical → vector=[], vector_ms=0, rrf_inputs.vector_rank 모두 None |
+| 통합 (kebab-cli) | `kebab search Q --trace --json` → trace 키 존재, lexical/vector/rrf_inputs/timing 모두 valid shape |
+| 통합 (kebab-cli) | `kebab search Q --json` (no --trace) → trace 키 부재 |
+| 통합 (kebab-cli) | `kebab schema --json` → media_breakdown 5 키, lang_breakdown 가능 키, index_bytes 두 필드, stale_doc_count 모두 존재 |
+| 통합 (kebab-cli) | 빈 corpus `kebab schema --json` → media_breakdown 5키 모두 0, lang_breakdown {} |
+| 통합 (kebab-cli) | `kebab search Q --trace` (non-json) → stdout 에 `Trace:` section, lex/vec count + timing 표시 |
+| 통합 (kebab-mcp) | search input `trace:true` → 응답 JSON 에 trace 필드 |
+| 통합 (kebab-mcp) | search input `trace` 미지정 → 응답 trace 부재 |
+| TUI (kebab-tui) | search pane 결과 있는 상태에서 `t` 키 → popup 열림 (state transitions) |
+| TUI (kebab-tui) | popup 열린 상태 `Esc` → popup 닫힘 |
+
+`media_breakdown` 5키 패딩 책임: `kebab-store-sqlite::breakdowns` 가 SQL GROUP BY 결과를 받아 `MEDIA_KINDS` 순회해 누락 키 0 으로 채움.
+
+## Implementation steps (high-level)
+
+1. `kebab-core`: SearchTrace + 3 sibling struct + IndexStats 4 필드 + 단위 테스트.
+2. `kebab-store-sqlite::stats`: breakdowns + index_bytes 헬퍼 + 단위 테스트.
+3. `kebab-store-sqlite::stats`: 기존 IndexStats 빌더가 신규 4 필드 채우도록.
+4. `kebab-search::hybrid`: `search_with_trace` 구현 + 단위 테스트.
+5. `kebab-app`: `search_with_trace_with_config` facade + cache bypass.
+6. `kebab-cli::Cmd::Search`: `--trace` flag + dispatch + JSON wire 합성 + non-JSON pretty-print.
+7. `kebab-cli` 통합 테스트.
+8. `kebab-mcp::tools::search`: SearchInput.trace + dispatch + 통합 테스트.
+9. `kebab-tui::search` + `trace_popup`: `t` keybind + popup widget + cheatsheet.
+10. README + SMOKE + INDEX/spec status flip + SKILL.
+
+## Risks / notes
+
+- **timing 정확도**: 현재 hybrid sequential. 추후 병렬화 시 `total_ms = max(lex,vec) + fusion` 으로 재정의 — 그 시점 schema doc note 갱신.
+- **lancedb dir walk cost**: 큰 corpus 에서 O(file count) IO. 도그푸딩 corpus 작아 무시. 큰 corpus 만나면 cache 또는 lazy 도입 검토.
+- **`media_breakdown` JSON shape**: fb-36 과 동일한 CASE WHEN 패턴 재사용 — `MediaType` serde 의 dual shape (text variant vs tuple variant) 처리.
+- **lang null 키**: ASCII string `"null"` 사용. ISO 639 어떤 코드와도 충돌 X (3자 미만).
+- **cache bypass when --trace**: agent 가 인지해야 (SKILL/README 명시). 안 그러면 trace timing 이 cache hit 의 sub-ms 보고할 위험.
+- **wire backwards-compat**: `trace` 필드 optional + skip_serializing_if. `IndexStats` 신규 필드 #[serde(default)] 로 옛 reader 가 새 응답 deserialize 가능.
+- **TUI popup**: 별도 `t` 키. 충돌 검사 — 현재 search pane keybinds 확인 (i=inspect, /=focus, j/k=move, n=next, p=prev). `t` 미사용.
+
+## Out of scope
+
+- per-stage filter 적용 전/후 카운트 (filter-debug 별도 작업).
+- search 단계 병렬화 (sequential 유지).
+- lance 테이블 별 / column 별 index_bytes (단일 sum).
+- stats 시계열 (corpus_revision history).
+- `--trace-level` verbosity (single boolean).
+- TUI inspect pane 안 trace 통합 (search popup 으로 격리).
+- `kebab stats` 별도 명령 (schema 통합 결정).
+- `--explain` flag deprecation 알림 (현재 search dead, 무영향 — 별도 cleanup task).
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md`: `kebab search` row 의 flag 표기에 `--trace` 추가, `kebab schema` row 에 신규 stats 한 줄 언급.
+- `docs/SMOKE.md`: `--trace` walkthrough + `kebab schema --json` 출력 sample.
+- `tasks/p9/p9-fb-37-trace-and-stats.md`: `status: open → completed`, design/plan 링크 추가.
+- `tasks/INDEX.md`: fb-37 행 ✅.
+- `integrations/claude-code/kebab/SKILL.md`: `mcp__kebab__search` `trace` 입력 + 출력 trace shape 명시. `kebab schema` 신규 stats 필드 mention.
+- `docs/wire-schema/v1/search_response.schema.json`: `trace` optional 필드 추가.
+- `docs/wire-schema/v1/schema.schema.json`: `stats` 4 신규 필드 추가.
From fb31befef19b8726dd836627a4864521480300bf Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:14:26 +0900
Subject: [PATCH 02/13] plan(fb-37): trace + stats implementation plan
10 tasks: kebab-core types, store breakdowns/index_bytes helpers,
extended CountSummary + Stats wire mirror, HybridRetriever
search_with_trace, App SearchResponse.trace threading, CLI --trace
flag, integration tests, MCP SearchInput.trace, TUI TracePopup,
docs (wire schema + README + SMOKE + INDEX + SKILL).
Co-Authored-By: Claude Opus 4.7 (1M context)
---
.../2026-05-10-p9-fb-37-trace-and-stats.md | 2036 +++++++++++++++++
1 file changed, 2036 insertions(+)
create mode 100644 docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md
diff --git a/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md b/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md
new file mode 100644
index 0000000..7ec475e
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md
@@ -0,0 +1,2036 @@
+# fb-37 Trace + Stats Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Surface retrieval pipeline trace (`kebab search Q --trace`) and richer KB stats (`kebab schema --json`) for agent / user debugging.
+
+**Architecture:** Two additive surfaces. Trace = optional `trace` field on `search_response.v1` populated when `SearchOpts.trace = true`; HybridRetriever exposes a parallel `search_with_trace` method capturing pre-fusion lex/vec lists + per-stage timing. Stats = four new fields (`media_breakdown` / `lang_breakdown` / `index_bytes` / `stale_doc_count`) on existing `schema.v1.stats` populated unconditionally by new SQLite GROUP BY + fs::metadata helpers. TUI search pane gains `t` keystroke that re-runs the query with trace and opens a popup.
+
+**Tech Stack:** Rust 2024, rusqlite (SQLite WHERE / GROUP BY / json_type / json_extract / json_each), std::time::Instant, std::fs, serde, ratatui.
+
+**Spec:** `docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`
+
+---
+
+## File map
+
+**Create:**
+- `crates/kebab-search/src/trace.rs` — trace timing + capture helpers (kept separate from `hybrid.rs` so `hybrid.rs` stays focused)
+- `crates/kebab-store-sqlite/src/stats_ext.rs` — `breakdowns()` + `index_bytes()` helpers
+- `crates/kebab-tui/src/trace_popup.rs` — TUI popup widget + state
+- `crates/kebab-cli/tests/wire_search_trace.rs` — `--trace` integration tests
+- `crates/kebab-cli/tests/wire_schema_breakdowns.rs` — `kebab schema` extended stats integration tests
+- `crates/kebab-mcp/tests/tools_call_search_trace.rs` — MCP search trace integration test
+
+**Modify:**
+- `crates/kebab-core/src/search.rs` — add `SearchTrace` / `TraceCandidate` / `TraceFusionInput` / `TraceTiming` + `IndexBytes` types; extend `SearchOpts` with `trace: bool`
+- `crates/kebab-store-sqlite/src/store.rs` — extend `CountSummary` with new fields, populate via new helpers
+- `crates/kebab-app/src/schema.rs` — extend `Stats` mirror with new fields, wire collect_stats
+- `crates/kebab-app/src/app.rs` — extend `SearchResponse` with `trace: Option`, thread trace through `App::search_with_opts`
+- `crates/kebab-search/src/hybrid.rs` — add `HybridRetriever::search_with_trace`
+- `crates/kebab-cli/src/main.rs` — add `--trace` flag to `Cmd::Search`, dispatch + non-JSON pretty-print
+- `crates/kebab-cli/src/wire.rs` — extend `wire_search_response` to serialize `trace` field when present
+- `crates/kebab-mcp/src/tools/search.rs` — add `trace: Option` to `SearchInput`, dispatch through
+- `crates/kebab-tui/src/search.rs` — add `t` keystroke handler invoking trace + opening popup
+- `crates/kebab-tui/src/app.rs` — store `trace_popup: Option`
+- `crates/kebab-tui/src/cheatsheet.rs` — add `t = trace` line
+- `crates/kebab-tui/src/lib.rs` — register `trace_popup` module
+- `docs/wire-schema/v1/search_response.schema.json` — declare optional `trace` field
+- `docs/wire-schema/v1/schema.schema.json` — declare new stats fields
+- `README.md`, `docs/SMOKE.md`, `tasks/p9/p9-fb-37-trace-and-stats.md`, `tasks/INDEX.md`, `integrations/claude-code/kebab/SKILL.md`
+
+---
+
+## Task 1: Trace + IndexBytes domain types in kebab-core
+
+**Files:**
+- Modify: `crates/kebab-core/src/search.rs`
+
+- [ ] **Step 1: Write failing test for SearchTrace serde roundtrip**
+
+Append to `crates/kebab-core/src/search.rs` `mod tests`:
+```rust
+#[test]
+fn search_trace_serde_roundtrip() {
+ let t = SearchTrace {
+ lexical: vec![TraceCandidate {
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+ rank: 1,
+ score: 0.42,
+ }],
+ vector: vec![],
+ rrf_inputs: vec![TraceFusionInput {
+ chunk_id: ChunkId("c1".into()),
+ lexical_rank: Some(1),
+ vector_rank: None,
+ fusion_score: 0.0234,
+ }],
+ timing: TraceTiming {
+ lexical_ms: 12,
+ vector_ms: 0,
+ fusion_ms: 1,
+ total_ms: 14,
+ },
+ };
+ let v = serde_json::to_value(&t).unwrap();
+ assert_eq!(v["timing"]["lexical_ms"], 12);
+ assert_eq!(v["lexical"][0]["score"], 0.42);
+ let back: SearchTrace = serde_json::from_value(v).unwrap();
+ assert_eq!(back, t);
+}
+
+#[test]
+fn index_bytes_default_is_zero() {
+ let b = IndexBytes::default();
+ assert_eq!(b.sqlite, 0);
+ assert_eq!(b.lancedb, 0);
+}
+
+#[test]
+fn search_opts_trace_default_false() {
+ let opts = SearchOpts::default();
+ assert!(!opts.trace);
+}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cargo test -p kebab-core --lib
+```
+Expected: compile errors — `SearchTrace`, `TraceCandidate`, `TraceFusionInput`, `TraceTiming`, `IndexBytes` not defined; `SearchOpts.trace` field missing.
+
+- [ ] **Step 3: Add types**
+
+Append to `crates/kebab-core/src/search.rs` (after existing `SearchOpts`):
+
+```rust
+/// p9-fb-37: search retrieval pipeline trace. Populated only when
+/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse`
+/// otherwise. `lexical` / `vector` are pre-fusion candidate lists
+/// (each retriever's full output for the fanout query). `rrf_inputs`
+/// is the union (chunk_id) used by RRF, with each side's rank
+/// captured. `timing` is wall-clock per stage.
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+ pub chunk_id: ChunkId,
+ pub doc_id: DocumentId,
+ pub doc_path: WorkspacePath,
+ pub rank: u32,
+ pub score: f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+ pub chunk_id: ChunkId,
+ pub lexical_rank: Option,
+ pub vector_rank: Option,
+ pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+ pub lexical_ms: u64,
+ pub vector_ms: u64,
+ pub fusion_ms: u64,
+ pub total_ms: u64,
+}
+
+/// p9-fb-37: on-disk index size breakdown. Mirrored on the
+/// wire `schema.v1.stats.index_bytes` block.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+ pub sqlite: u64,
+ pub lancedb: u64,
+}
+```
+
+Extend `SearchOpts` (replace the existing struct definition):
+
+```rust
+/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`.
+/// All `None` = no enforcement (existing behavior).
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchOpts {
+ /// chars/4 approximation of wire JSON token cost. None = no cap.
+ pub max_tokens: Option,
+ /// Per-hit snippet character cap. None = use config default.
+ pub snippet_chars: Option,
+ /// Opaque base64 cursor from a previous response. None = first page.
+ pub cursor: Option,
+ /// p9-fb-37: when true, capture pipeline trace (cache bypassed,
+ /// lex / vec pre-fusion lists + timing populated on the response).
+ #[serde(default)]
+ pub trace: bool,
+}
+```
+
+- [ ] **Step 4: Run tests to verify they pass**
+
+```bash
+cargo test -p kebab-core --lib
+```
+Expected: all 3 new tests pass; existing tests unaffected.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add crates/kebab-core/src/search.rs
+git commit -m "feat(core): SearchTrace + IndexBytes types + SearchOpts.trace (fb-37)"
+```
+
+---
+
+## Task 2: SQLite breakdowns helper
+
+**Files:**
+- Create: `crates/kebab-store-sqlite/src/stats_ext.rs`
+- Modify: `crates/kebab-store-sqlite/src/lib.rs` (register module)
+
+- [ ] **Step 1: Write failing tests**
+
+Create `crates/kebab-store-sqlite/src/stats_ext.rs`:
+
+```rust
+//! p9-fb-37: extended stats helpers — per-media / per-lang doc counts,
+//! stale doc count, on-disk index byte sums.
+
+use std::collections::BTreeMap;
+use std::path::Path;
+
+use kebab_core::{IndexBytes, MEDIA_KINDS};
+use rusqlite::Connection;
+
+/// Returns `(media_breakdown, lang_breakdown, stale_doc_count)`.
+///
+/// `media_breakdown` always contains all 5 `MEDIA_KINDS` (zero-padded).
+/// `lang_breakdown` only contains observed languages; NULL lang is
+/// keyed as the literal string `"null"`. `stale_doc_count` is 0 when
+/// `threshold_days == 0` (mirrors fb-32 staleness disable semantics).
+pub fn breakdowns(
+ conn: &Connection,
+ threshold_days: u64,
+) -> rusqlite::Result<(BTreeMap, BTreeMap, u64)> {
+ // media: dual JSON shape — text variant ("markdown") vs object
+ // variant ({"image":{"format":"png"}}). Same CASE WHEN as fb-36.
+ let mut media: BTreeMap = MEDIA_KINDS
+ .iter()
+ .map(|k| ((*k).to_string(), 0u64))
+ .collect();
+ let mut stmt = conn.prepare(
+ "SELECT \
+ CASE \
+ WHEN json_type(a.media_type) = 'text' \
+ THEN json_extract(a.media_type, '$') \
+ ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+ END AS kind, \
+ COUNT(DISTINCT d.doc_id) \
+ FROM documents d JOIN assets a ON a.asset_id = d.asset_id \
+ GROUP BY kind",
+ )?;
+ let rows = stmt.query_map([], |r| {
+ Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+ })?;
+ for row in rows {
+ let (kind, n) = row?;
+ media.insert(kind, n);
+ }
+
+ let mut lang: BTreeMap = BTreeMap::new();
+ let mut stmt = conn.prepare(
+ "SELECT COALESCE(lang, 'null') AS l, COUNT(*) \
+ FROM documents GROUP BY l",
+ )?;
+ let rows = stmt.query_map([], |r| {
+ Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+ })?;
+ for row in rows {
+ let (l, n) = row?;
+ lang.insert(l, n);
+ }
+
+ let stale: u64 = if threshold_days == 0 {
+ 0
+ } else {
+ let secs = (threshold_days as i64) * 86_400;
+ let cutoff = time::OffsetDateTime::now_utc()
+ - time::Duration::seconds(secs);
+ let cutoff_str = cutoff
+ .format(&time::format_description::well_known::Rfc3339)
+ .expect("RFC3339 format");
+ conn.query_row(
+ "SELECT COUNT(*) FROM documents WHERE updated_at < ?",
+ [cutoff_str],
+ |r| r.get(0),
+ )?
+ };
+
+ Ok((media, lang, stale))
+}
+
+/// Sum on-disk bytes of the SQLite database (main + WAL + SHM) and
+/// the LanceDB directory tree. Missing files / dir = 0.
+pub fn index_bytes(data_dir: &Path) -> std::io::Result {
+ fn file_size_or_zero(p: &Path) -> u64 {
+ std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
+ }
+ fn dir_walk_sum(p: &Path) -> std::io::Result {
+ if !p.exists() {
+ return Ok(0);
+ }
+ let mut total = 0u64;
+ for entry in std::fs::read_dir(p)? {
+ let entry = entry?;
+ let ty = entry.file_type()?;
+ if ty.is_dir() {
+ total += dir_walk_sum(&entry.path())?;
+ } else if ty.is_file() {
+ total += entry.metadata()?.len();
+ }
+ }
+ Ok(total)
+ }
+
+ let sqlite_main = data_dir.join("kebab.sqlite");
+ let sqlite_wal = data_dir.join("kebab.sqlite-wal");
+ let sqlite_shm = data_dir.join("kebab.sqlite-shm");
+ let sqlite = file_size_or_zero(&sqlite_main)
+ + file_size_or_zero(&sqlite_wal)
+ + file_size_or_zero(&sqlite_shm);
+ let lancedb = dir_walk_sum(&data_dir.join("lancedb"))?;
+ Ok(IndexBytes { sqlite, lancedb })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn open_fresh() -> (tempfile::TempDir, crate::SqliteStore) {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ let store = crate::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ (dir, store)
+ }
+
+ #[test]
+ fn breakdowns_empty_corpus() {
+ let (_dir, store) = open_fresh();
+ let conn = store.read_conn();
+ let (media, lang, stale) = breakdowns(&conn, 0).unwrap();
+ // 5 keys all zero, lang map empty, stale 0.
+ assert_eq!(media.len(), 5);
+ for k in MEDIA_KINDS {
+ assert_eq!(media.get(*k), Some(&0u64));
+ }
+ assert!(lang.is_empty());
+ assert_eq!(stale, 0);
+ }
+
+ #[test]
+ fn index_bytes_includes_sqlite_main() {
+ let (dir, _store) = open_fresh();
+ let b = index_bytes(dir.path()).unwrap();
+ assert!(b.sqlite > 0, "main sqlite file should exist after migrations");
+ assert_eq!(b.lancedb, 0);
+ }
+
+ #[test]
+ fn index_bytes_lancedb_dir_walk() {
+ let dir = tempfile::tempdir().unwrap();
+ let lance = dir.path().join("lancedb");
+ std::fs::create_dir_all(lance.join("vectors.lance")).unwrap();
+ std::fs::write(
+ lance.join("vectors.lance").join("data.bin"),
+ vec![0u8; 1024],
+ )
+ .unwrap();
+ let b = index_bytes(dir.path()).unwrap();
+ assert_eq!(b.lancedb, 1024);
+ }
+}
+```
+
+Modify `crates/kebab-store-sqlite/src/lib.rs`. Find the existing `pub mod` declarations and add:
+
+```rust
+pub mod stats_ext;
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cargo test -p kebab-store-sqlite stats_ext
+```
+Expected: build error initially (module exists but test imports `MEDIA_KINDS` from kebab-core); resolve any compile issue, then run again. Tests should pass with the implementation provided in Step 1 — this is a test-with-implementation step (verifying via cargo).
+
+Actually since the implementation is already in stats_ext.rs in Step 1, run:
+```bash
+cargo test -p kebab-store-sqlite stats_ext
+```
+Expected: 3 new tests pass.
+
+- [ ] **Step 3: Commit**
+
+```bash
+git add crates/kebab-store-sqlite/src/stats_ext.rs crates/kebab-store-sqlite/src/lib.rs
+git commit -m "feat(store): breakdowns + index_bytes helpers (fb-37)"
+```
+
+---
+
+## Task 3: Extend CountSummary + wire to schema.v1.stats
+
+**Files:**
+- Modify: `crates/kebab-store-sqlite/src/store.rs`
+- Modify: `crates/kebab-app/src/schema.rs`
+
+- [ ] **Step 1: Write failing test in kebab-app**
+
+Append to `crates/kebab-app/src/schema.rs` `mod tests` section (or create one if absent — check around line 200+):
+
+```rust
+#[cfg(test)]
+mod tests_stats_ext {
+ use super::*;
+
+ #[test]
+ fn stats_includes_breakdowns_and_bytes_on_fresh_corpus() {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ // Bring up migrations so the sqlite file is created.
+ let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ drop(store);
+
+ let s = schema_with_config(&cfg).unwrap();
+ // 5 keys padded.
+ assert_eq!(s.stats.media_breakdown.len(), 5);
+ assert_eq!(s.stats.media_breakdown.get("markdown"), Some(&0));
+ assert_eq!(s.stats.media_breakdown.get("pdf"), Some(&0));
+ // lang map empty on empty corpus.
+ assert!(s.stats.lang_breakdown.is_empty());
+ // sqlite bytes positive after migrations, lancedb 0.
+ assert!(s.stats.index_bytes.sqlite > 0);
+ assert_eq!(s.stats.index_bytes.lancedb, 0);
+ assert_eq!(s.stats.stale_doc_count, 0);
+ }
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```bash
+cargo test -p kebab-app stats_includes_breakdowns_and_bytes_on_fresh_corpus
+```
+Expected: compile error — `Stats` lacks `media_breakdown`, `lang_breakdown`, `index_bytes`, `stale_doc_count`.
+
+- [ ] **Step 3: Extend `CountSummary`**
+
+Modify `crates/kebab-store-sqlite/src/store.rs`. Find `pub struct CountSummary` (~line 595-606) and replace with:
+
+```rust
+#[derive(Debug, Clone)]
+pub struct CountSummary {
+ pub doc_count: u64,
+ pub chunk_count: u64,
+ pub asset_count: u64,
+ /// ISO-8601 timestamp of the most-recently updated document row, or
+ /// `None` when the store is empty.
+ pub last_ingest_at: Option,
+ /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+ pub media_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+ pub lang_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: docs whose `updated_at < now - threshold_days`. 0 when threshold=0.
+ pub stale_doc_count: u64,
+}
+```
+
+Modify `count_summary` body (around line 615-650) to populate new fields. Replace the body of `pub fn count_summary(&self) -> anyhow::Result`:
+
+```rust
+pub fn count_summary(&self) -> anyhow::Result {
+ use anyhow::Context;
+ use rusqlite::OptionalExtension;
+
+ let conn = self.read_conn();
+
+ let doc_count: u64 = conn
+ .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
+ .context("count documents")?;
+ let chunk_count: u64 = conn
+ .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
+ .context("count chunks")?;
+ let asset_count: u64 = conn
+ .query_row("SELECT COUNT(*) FROM assets", [], |r| r.get(0))
+ .context("count assets")?;
+ let last_ingest_at: Option = conn
+ .query_row("SELECT MAX(updated_at) FROM documents", [], |r| r.get(0))
+ .optional()
+ .context("max updated_at")?
+ .flatten();
+
+ // p9-fb-37: pull threshold from config-defaults via a sentinel —
+ // CountSummary callers that want correct stale_doc_count must
+ // pass through count_summary_with_threshold. Default path uses 0
+ // (matches fb-32 disable semantics) for backwards compat.
+ let (media_breakdown, lang_breakdown, stale_doc_count) =
+ crate::stats_ext::breakdowns(&conn, 0).context("breakdowns")?;
+
+ Ok(CountSummary {
+ doc_count,
+ chunk_count,
+ asset_count,
+ last_ingest_at,
+ media_breakdown,
+ lang_breakdown,
+ stale_doc_count,
+ })
+}
+
+/// p9-fb-37: variant that honors `config.search.stale_threshold_days`.
+/// Callers who need a meaningful `stale_doc_count` (e.g. `kebab schema`)
+/// pass the configured threshold; the older `count_summary` returns 0.
+pub fn count_summary_with_threshold(
+ &self,
+ threshold_days: u64,
+) -> anyhow::Result {
+ use anyhow::Context;
+ let mut s = self.count_summary()?;
+ let conn = self.read_conn();
+ let (m, l, stale) = crate::stats_ext::breakdowns(&conn, threshold_days)
+ .context("breakdowns_with_threshold")?;
+ s.media_breakdown = m;
+ s.lang_breakdown = l;
+ s.stale_doc_count = stale;
+ Ok(s)
+}
+```
+
+Update existing `count_summary_zero_on_fresh_store` test (~line 678) to assert new fields:
+
+```rust
+#[test]
+fn count_summary_zero_on_fresh_store() {
+ let (_dir, store) = open_fresh_store();
+ let s = store.count_summary().unwrap();
+ assert_eq!(s.doc_count, 0);
+ assert_eq!(s.chunk_count, 0);
+ assert_eq!(s.asset_count, 0);
+ assert!(s.last_ingest_at.is_none());
+ assert_eq!(s.media_breakdown.len(), 5);
+ assert!(s.lang_breakdown.is_empty());
+ assert_eq!(s.stale_doc_count, 0);
+}
+```
+
+- [ ] **Step 4: Extend `Stats` mirror in kebab-app::schema**
+
+Modify `crates/kebab-app/src/schema.rs`. Replace `pub struct Stats`:
+
+```rust
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Stats {
+ pub doc_count: u64,
+ pub chunk_count: u64,
+ pub asset_count: u64,
+ pub last_ingest_at: Option,
+ /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+ #[serde(default)]
+ pub media_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+ #[serde(default)]
+ pub lang_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: on-disk byte sums.
+ #[serde(default)]
+ pub index_bytes: kebab_core::IndexBytes,
+ /// p9-fb-37: docs whose `updated_at` exceeds the staleness threshold.
+ #[serde(default)]
+ pub stale_doc_count: u64,
+}
+```
+
+Replace `collect_stats` body:
+
+```rust
+fn collect_stats(
+ cfg: &Config,
+ store: &kebab_store_sqlite::SqliteStore,
+) -> anyhow::Result {
+ let counts = store
+ .count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?;
+ let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
+ let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir)
+ .map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?;
+ Ok(Stats {
+ doc_count: counts.doc_count,
+ chunk_count: counts.chunk_count,
+ asset_count: counts.asset_count,
+ last_ingest_at: counts.last_ingest_at,
+ media_breakdown: counts.media_breakdown,
+ lang_breakdown: counts.lang_breakdown,
+ index_bytes,
+ stale_doc_count: counts.stale_doc_count,
+ })
+}
+```
+
+Update the call site `let stats = collect_stats(&store)?;` (~line 88) to:
+
+```rust
+let stats = collect_stats(cfg, &store)?;
+```
+
+- [ ] **Step 5: Run tests to verify they pass**
+
+```bash
+cargo test -p kebab-store-sqlite count_summary
+cargo test -p kebab-app stats_includes_breakdowns_and_bytes_on_fresh_corpus
+```
+Expected: both pass.
+
+- [ ] **Step 6: Verify config field type**
+
+`cfg.search.stale_threshold_days` must exist as integer. Check `crates/kebab-config/src/lib.rs` for `Search.stale_threshold_days`. If type mismatch (e.g. it's `u32`), adjust `as u64` cast accordingly.
+
+```bash
+grep -n "stale_threshold_days" crates/kebab-config/src/lib.rs
+```
+Expected: line with the field type. If it's already `u64` drop the cast; if `u32` keep `as u64`.
+
+- [ ] **Step 7: Run full clippy + workspace tests**
+
+```bash
+cargo clippy -p kebab-core -p kebab-store-sqlite -p kebab-app --all-targets -- -D warnings
+cargo test -p kebab-core -p kebab-store-sqlite -p kebab-app
+```
+Expected: clippy clean, all tests pass.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add crates/kebab-store-sqlite/src/store.rs crates/kebab-app/src/schema.rs
+git commit -m "feat(stats): media/lang/bytes/stale fields on schema.v1.stats (fb-37)"
+```
+
+---
+
+## Task 4: HybridRetriever search_with_trace
+
+**Files:**
+- Create: `crates/kebab-search/src/trace.rs`
+- Modify: `crates/kebab-search/src/hybrid.rs`
+- Modify: `crates/kebab-search/src/lib.rs`
+
+- [ ] **Step 1: Write failing test in hybrid.rs**
+
+Append to `crates/kebab-search/src/hybrid.rs` `mod tests`:
+
+```rust
+#[test]
+fn search_with_trace_returns_lex_and_vec_lists() {
+ use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion,
+ RetrievalDetail, SearchHit, SearchMode, SearchQuery,
+ WorkspacePath, Citation};
+ use std::sync::Arc;
+
+ fn mk_hit(rank: u32, chunk: &str, score: f32, mode: SearchMode) -> SearchHit {
+ SearchHit {
+ rank,
+ chunk_id: ChunkId(chunk.into()),
+ doc_id: DocumentId(format!("d-{chunk}")),
+ doc_path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+ heading_path: vec![],
+ section_label: None,
+ snippet: chunk.into(),
+ citation: Citation::Line {
+ path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+ start: 1,
+ end: 1,
+ section: None,
+ },
+ retrieval: RetrievalDetail {
+ method: mode,
+ fusion_score: score,
+ lexical_score: if mode == SearchMode::Lexical { Some(score) } else { None },
+ vector_score: if mode == SearchMode::Vector { Some(score) } else { None },
+ lexical_rank: if mode == SearchMode::Lexical { Some(rank) } else { None },
+ vector_rank: if mode == SearchMode::Vector { Some(rank) } else { None },
+ },
+ index_version: IndexVersion("v1".into()),
+ embedding_model: None,
+ chunker_version: ChunkerVersion("c1".into()),
+ indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+ stale: false,
+ }
+ }
+
+ // Stub retrievers from existing test patterns in this file (see
+ // `MockRetriever` near line 363 if present, otherwise inline).
+ struct Stub { hits: Vec, mode: SearchMode }
+ impl Retriever for Stub {
+ fn search(&self, _q: &SearchQuery) -> anyhow::Result> {
+ Ok(self.hits.clone())
+ }
+ fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+ }
+
+ let lex = Arc::new(Stub {
+ hits: vec![
+ mk_hit(1, "c1", 0.9, SearchMode::Lexical),
+ mk_hit(2, "c2", 0.5, SearchMode::Lexical),
+ ],
+ mode: SearchMode::Lexical,
+ });
+ let vec_r = Arc::new(Stub {
+ hits: vec![
+ mk_hit(1, "c2", 0.8, SearchMode::Vector),
+ mk_hit(2, "c3", 0.6, SearchMode::Vector),
+ ],
+ mode: SearchMode::Vector,
+ });
+ let hybrid = HybridRetriever::with_policy(
+ lex.clone(),
+ vec_r.clone(),
+ FusionPolicy::Rrf { k: 60 },
+ 2,
+ );
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Hybrid,
+ k: 2,
+ filters: Default::default(),
+ };
+ let (hits, trace) = hybrid.search_with_trace(&q).unwrap();
+ assert!(!hits.is_empty());
+ assert_eq!(trace.lexical.len(), 2);
+ assert_eq!(trace.vector.len(), 2);
+ // Union: c1, c2, c3 → 3 entries.
+ assert_eq!(trace.rrf_inputs.len(), 3);
+ // Sanity: timing populated (any field >= 0 trivially; just check
+ // the type was set, not a Default::default()).
+ let _ = trace.timing.lexical_ms;
+}
+
+#[test]
+fn search_with_trace_lexical_mode_empty_vector() {
+ use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion,
+ RetrievalDetail, SearchHit, SearchMode, SearchQuery,
+ WorkspacePath, Citation};
+ use std::sync::Arc;
+ struct EmptyR(SearchMode);
+ impl Retriever for EmptyR {
+ fn search(&self, _q: &SearchQuery) -> anyhow::Result> {
+ Ok(vec![])
+ }
+ fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+ }
+ let lex = Arc::new(EmptyR(SearchMode::Lexical));
+ let vec_r = Arc::new(EmptyR(SearchMode::Vector));
+ let hybrid = HybridRetriever::with_policy(lex, vec_r, FusionPolicy::Rrf { k: 60 }, 2);
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 2,
+ filters: Default::default(),
+ };
+ let (_hits, trace) = hybrid.search_with_trace(&q).unwrap();
+ assert!(trace.vector.is_empty());
+ assert_eq!(trace.timing.vector_ms, 0);
+}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cargo test -p kebab-search hybrid::tests::search_with_trace
+```
+Expected: compile error — `search_with_trace` undefined.
+
+- [ ] **Step 3: Add `trace.rs` helper module**
+
+Create `crates/kebab-search/src/trace.rs`:
+
+```rust
+//! p9-fb-37: trace capture helpers for `HybridRetriever::search_with_trace`.
+
+use std::collections::BTreeMap;
+
+use kebab_core::{
+ SearchHit, SearchTrace, TraceCandidate, TraceFusionInput, TraceTiming,
+};
+
+/// Build a `TraceCandidate` from a `SearchHit`. The score field reflects
+/// each side's score (lexical / vector / fusion) — caller selects which
+/// retriever's hit list this is.
+pub fn candidates_from_hits(hits: &[SearchHit], score_kind: ScoreKind) -> Vec {
+ hits.iter()
+ .map(|h| TraceCandidate {
+ chunk_id: h.chunk_id.clone(),
+ doc_id: h.doc_id.clone(),
+ doc_path: h.doc_path.clone(),
+ rank: h.rank,
+ score: match score_kind {
+ ScoreKind::Lexical => h.retrieval.lexical_score.unwrap_or(0.0),
+ ScoreKind::Vector => h.retrieval.vector_score.unwrap_or(0.0),
+ },
+ })
+ .collect()
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum ScoreKind {
+ Lexical,
+ Vector,
+}
+
+/// Build the union of (chunk_id) across lex and vec hit lists, with
+/// each side's rank captured. `fusion_score` is filled by the caller
+/// (RRF computes it during fusion, this helper just pre-builds the
+/// rank table — caller overwrites fusion_score in a second pass).
+pub fn build_fusion_input_skeleton(
+ lex: &[SearchHit],
+ vec: &[SearchHit],
+) -> Vec {
+ let mut by_chunk: BTreeMap = BTreeMap::new();
+ for h in lex {
+ by_chunk
+ .entry(h.chunk_id.0.clone())
+ .or_insert(TraceFusionInput {
+ chunk_id: h.chunk_id.clone(),
+ lexical_rank: None,
+ vector_rank: None,
+ fusion_score: 0.0,
+ })
+ .lexical_rank = Some(h.rank);
+ }
+ for h in vec {
+ by_chunk
+ .entry(h.chunk_id.0.clone())
+ .or_insert(TraceFusionInput {
+ chunk_id: h.chunk_id.clone(),
+ lexical_rank: None,
+ vector_rank: None,
+ fusion_score: 0.0,
+ })
+ .vector_rank = Some(h.rank);
+ }
+ by_chunk.into_values().collect()
+}
+
+/// Container the hybrid retriever fills during a traced run.
+#[derive(Default)]
+pub struct TraceBuilder {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+impl TraceBuilder {
+ pub fn into_trace(self) -> SearchTrace {
+ SearchTrace {
+ lexical: self.lexical,
+ vector: self.vector,
+ rrf_inputs: self.rrf_inputs,
+ timing: self.timing,
+ }
+ }
+}
+```
+
+Modify `crates/kebab-search/src/lib.rs`. Add module declaration:
+
+```rust
+mod trace;
+```
+
+- [ ] **Step 4: Add `search_with_trace` on HybridRetriever**
+
+Modify `crates/kebab-search/src/hybrid.rs`. Add at the top (under existing `use` lines):
+
+```rust
+use crate::trace::{build_fusion_input_skeleton, candidates_from_hits, ScoreKind, TraceBuilder};
+use kebab_core::SearchTrace;
+use std::time::Instant;
+```
+
+Add a method to `impl HybridRetriever` (place after `fn fuse`):
+
+```rust
+/// p9-fb-37: parallel to `Retriever::search` but additionally returns
+/// a trace of pre-fusion lex/vec lists, RRF inputs (union with each
+/// side's rank), and per-stage timing. Same fan-out logic as `fuse`,
+/// just instrumented.
+pub fn search_with_trace(
+ &self,
+ query: &SearchQuery,
+) -> anyhow::Result<(Vec, SearchTrace)> {
+ let start_total = Instant::now();
+ let target_k = if query.k == 0 { self.default_k } else { query.k };
+ let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
+ let fanout_query = SearchQuery {
+ k: fanout_k,
+ ..query.clone()
+ };
+
+ let mut tb = TraceBuilder::default();
+
+ let (lex_hits, vec_hits): (Vec, Vec) = match query.mode {
+ SearchMode::Lexical => {
+ let t0 = Instant::now();
+ let lh = self.lexical.search(&fanout_query)?;
+ tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+ (lh, Vec::new())
+ }
+ SearchMode::Vector => {
+ let t0 = Instant::now();
+ let vh = self.vector.search(&fanout_query)?;
+ tb.timing.vector_ms = t0.elapsed().as_millis() as u64;
+ (Vec::new(), vh)
+ }
+ SearchMode::Hybrid => {
+ let t0 = Instant::now();
+ let lh = self.lexical.search(&fanout_query)?;
+ tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+ let t1 = Instant::now();
+ let vh = self.vector.search(&fanout_query)?;
+ tb.timing.vector_ms = t1.elapsed().as_millis() as u64;
+ (lh, vh)
+ }
+ };
+
+ tb.lexical = candidates_from_hits(&lex_hits, ScoreKind::Lexical);
+ tb.vector = candidates_from_hits(&vec_hits, ScoreKind::Vector);
+ tb.rrf_inputs = build_fusion_input_skeleton(&lex_hits, &vec_hits);
+
+ let t_fusion = Instant::now();
+ let final_hits = match query.mode {
+ SearchMode::Lexical => {
+ let mut h = lex_hits.clone();
+ h.truncate(target_k);
+ h
+ }
+ SearchMode::Vector => {
+ let mut h = vec_hits.clone();
+ h.truncate(target_k);
+ h
+ }
+ SearchMode::Hybrid => self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)?,
+ };
+ tb.timing.fusion_ms = t_fusion.elapsed().as_millis() as u64;
+
+ // Backfill fusion_score onto the rrf_inputs union for each chunk
+ // present in the final fused list.
+ let score_by_chunk: std::collections::HashMap = final_hits
+ .iter()
+ .map(|h| (h.chunk_id.0.clone(), h.retrieval.fusion_score))
+ .collect();
+ for entry in &mut tb.rrf_inputs {
+ if let Some(s) = score_by_chunk.get(&entry.chunk_id.0) {
+ entry.fusion_score = *s;
+ }
+ }
+
+ tb.timing.total_ms = start_total.elapsed().as_millis() as u64;
+ Ok((final_hits, tb.into_trace()))
+}
+```
+
+`fuse_with_inputs` is needed — extract from existing `fuse` so both `Retriever::search` (hybrid mode) and `search_with_trace` reuse the same RRF body without re-querying retrievers.
+
+Refactoring recipe:
+1. Read existing `fn fuse` (at line ~145). Note the body issues two `.search()` calls then builds `lex_index` / `vec_index` via `.into_iter()`.
+2. Split into two functions. `fn fuse` keeps the two `.search()` calls, then delegates the rest. `fn fuse_with_inputs` takes the already-resolved hit slices.
+3. Inside `fuse_with_inputs`: replace `let lex_index: HashMap<...> = lex_hits.into_iter().map(...).collect();` with `let lex_index: HashMap<...> = lex_hits.iter().cloned().map(...).collect();` (mirror for vec_index). All other RRF logic stays identical.
+
+```rust
+fn fuse(&self, query: &SearchQuery) -> Result> {
+ let target_k = if query.k == 0 { self.default_k } else { query.k };
+ let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
+ let fanout_query = SearchQuery {
+ k: fanout_k,
+ ..query.clone()
+ };
+ let lex_hits = self.lexical.search(&fanout_query)?;
+ let vec_hits = self.vector.search(&fanout_query)?;
+ self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)
+}
+
+fn fuse_with_inputs(
+ &self,
+ lex_hits: &[SearchHit],
+ vec_hits: &[SearchHit],
+ target_k: usize,
+) -> Result> {
+ tracing::debug!(
+ lex = lex_hits.len(),
+ vec = vec_hits.len(),
+ target_k,
+ "kb-search hybrid: pre-fusion candidate counts"
+ );
+ // PASTE the rest of the original `fn fuse` body here. Two changes:
+ // - replace `lex_hits.into_iter()` with `lex_hits.iter().cloned()`
+ // - replace `vec_hits.into_iter()` with `vec_hits.iter().cloned()`
+ // Everything else (RRF score formula, sort, truncate to target_k,
+ // tie-breaking, `Ok(...)` return) is verbatim preserved.
+}
+```
+
+Verify with `cargo test -p kebab-search` — existing hybrid tests must still pass (they exercise the `Retriever::search` → `fuse` path).
+
+- [ ] **Step 5: Run tests**
+
+```bash
+cargo test -p kebab-search
+```
+Expected: existing hybrid tests still pass + 2 new search_with_trace tests pass.
+
+- [ ] **Step 6: Clippy gate**
+
+```bash
+cargo clippy -p kebab-search --all-targets -- -D warnings
+```
+Expected: clean.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add crates/kebab-search/src/trace.rs crates/kebab-search/src/hybrid.rs crates/kebab-search/src/lib.rs
+git commit -m "feat(search): HybridRetriever::search_with_trace (fb-37)"
+```
+
+---
+
+## Task 5: SearchResponse trace field + App::search_with_opts threading
+
+**Files:**
+- Modify: `crates/kebab-app/src/app.rs`
+
+- [ ] **Step 1: Write failing test**
+
+Append to `crates/kebab-app/src/app.rs` tests module (find existing `#[cfg(test)] mod tests` near bottom; if absent, add one at file end):
+
+```rust
+#[cfg(test)]
+mod tests_trace {
+ use super::*;
+ use kebab_core::{SearchOpts, SearchQuery, SearchMode};
+
+ fn open_app_with_temp_dir() -> (tempfile::TempDir, App) {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ // Ensure DB exists.
+ let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ drop(store);
+ let app = App::open_with_config(cfg).unwrap();
+ (dir, app)
+ }
+
+ #[test]
+ fn search_response_trace_none_when_opts_trace_false() {
+ let (_dir, app) = open_app_with_temp_dir();
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 1,
+ filters: Default::default(),
+ };
+ let resp = app.search_with_opts(q, SearchOpts::default()).unwrap();
+ assert!(resp.trace.is_none());
+ }
+
+ #[test]
+ fn search_response_trace_some_when_opts_trace_true() {
+ let (_dir, app) = open_app_with_temp_dir();
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 1,
+ filters: Default::default(),
+ };
+ let opts = SearchOpts { trace: true, ..Default::default() };
+ let resp = app.search_with_opts(q, opts).unwrap();
+ assert!(resp.trace.is_some(), "trace populated when opts.trace=true");
+ }
+}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cargo test -p kebab-app tests_trace
+```
+Expected: compile errors — `SearchResponse.trace` field absent.
+
+- [ ] **Step 3: Extend `SearchResponse`**
+
+In `crates/kebab-app/src/app.rs`, replace `pub struct SearchResponse` (~line 69):
+
+```rust
+#[derive(Clone, Debug)]
+pub struct SearchResponse {
+ pub hits: Vec,
+ pub next_cursor: Option,
+ pub truncated: bool,
+ /// p9-fb-37: present when caller passed `SearchOpts.trace = true`.
+ /// Consumers that ignore trace should leave this `None`.
+ pub trace: Option,
+}
+```
+
+- [ ] **Step 4: Thread through `App::search_with_opts`**
+
+In `crates/kebab-app/src/app.rs`, modify `pub fn search_with_opts` (~line 306) to honor `opts.trace`. Find the current `let mut all_hits = self.search(fetch_query)?;` line and replace surrounding logic:
+
+```rust
+let trace = if opts.trace {
+ // Build a trace-capable retriever directly. Re-use construction
+ // from the cached search path but bypass cache (debug intent).
+ let retriever = self.build_retriever()?;
+ let traced = retriever
+ .as_any()
+ .downcast_ref::()
+ .map(|h| h.search_with_trace(&fetch_query));
+ if let Some(Ok((hits, t))) = traced {
+ let mut all_hits = hits;
+ let drop_n = offset.min(all_hits.len());
+ all_hits.drain(..drop_n);
+ let final_hits: Vec = all_hits.into_iter().take(k_effective).collect();
+ return Ok(self.build_response(final_hits, k_effective, &opts, snippet_chars, Some(t)));
+ }
+ None
+} else {
+ None
+};
+
+let mut all_hits = self.search(fetch_query)?;
+// ... existing code ...
+```
+
+Engineer note: this is a sketch — review actual `App::search_with_opts` body before editing; the `build_retriever` / `as_any` / `build_response` helpers may not exist verbatim. The minimal change required is:
+1. When `opts.trace = true`, call `search_with_trace` on the hybrid retriever (constructed the same way `App::search_uncached` does).
+2. Bypass the search cache entirely.
+3. Plug the resulting `SearchTrace` into `SearchResponse.trace`.
+
+Use the existing `App::search_uncached` (line ~243) as the model — duplicate that path with `search_with_trace` and wrap the result. Look for: `let retriever = ... HybridRetriever::new(&self.config, lex, vec);`. Call `retriever.search_with_trace(&query)` instead of `retriever.search(&query)` when tracing.
+
+If the retriever is constructed only as `Arc` (and `search_with_trace` is not on the trait), add a concrete-typed local construction in the `if opts.trace` branch. Example pattern:
+
+```rust
+// inside fn search_with_opts:
+if opts.trace {
+ use kebab_search::HybridRetriever;
+ let lex = self.build_lexical_retriever()?;
+ let vec = self.build_vector_retriever()?;
+ let retriever = HybridRetriever::new(&self.config, lex, vec);
+ let (hits, trace) = retriever.search_with_trace(&fetch_query)?;
+ // skip cache, run budget loop on hits, attach trace to response
+ return Ok(self.finalize_response(hits, k_effective, offset, &opts, snippet_chars, Some(trace)));
+}
+```
+
+The exact helpers (`build_lexical_retriever`, `finalize_response`) are method names you'll either find or extract during implementation. Goal: trace path bypasses cache and returns `Some(trace)`; non-trace path unchanged returns `None`.
+
+Also update every other `SearchResponse { ... }` constructor in `app.rs` and `lib.rs` to include `trace: None`. Search for `SearchResponse {` to find all sites.
+
+```bash
+grep -n "SearchResponse {" crates/kebab-app/src/app.rs crates/kebab-app/src/lib.rs
+```
+
+- [ ] **Step 5: Run tests**
+
+```bash
+cargo test -p kebab-app tests_trace
+cargo test -p kebab-app
+```
+Expected: 2 new trace tests pass; existing app tests unaffected.
+
+- [ ] **Step 6: Workspace clippy**
+
+```bash
+cargo clippy -p kebab-app --all-targets -- -D warnings
+```
+Expected: clean.
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add crates/kebab-app/src/app.rs
+git commit -m "feat(app): SearchResponse.trace + opts.trace threading (fb-37)"
+```
+
+---
+
+## Task 6: CLI --trace flag + JSON wire + non-JSON pretty print
+
+**Files:**
+- Modify: `crates/kebab-cli/src/main.rs`
+- Modify: `crates/kebab-cli/src/wire.rs`
+
+- [ ] **Step 1: Write failing test for wire serialization**
+
+Append to `crates/kebab-cli/src/wire.rs` `mod tests`:
+
+```rust
+#[test]
+fn search_response_with_trace_serializes_trace_field() {
+ use kebab_core::{SearchTrace, TraceCandidate, TraceFusionInput,
+ TraceTiming, ChunkId, DocumentId, WorkspacePath};
+ let r = kebab_app::SearchResponse {
+ hits: vec![],
+ next_cursor: None,
+ truncated: false,
+ trace: Some(SearchTrace {
+ lexical: vec![TraceCandidate {
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+ rank: 1,
+ score: 0.42,
+ }],
+ vector: vec![],
+ rrf_inputs: vec![TraceFusionInput {
+ chunk_id: ChunkId("c1".into()),
+ lexical_rank: Some(1),
+ vector_rank: None,
+ fusion_score: 0.0,
+ }],
+ timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 },
+ }),
+ };
+ let v = wire_search_response(&r);
+ assert_eq!(v["schema_version"], "search_response.v1");
+ assert!(v["trace"].is_object());
+ assert_eq!(v["trace"]["timing"]["lexical_ms"], 5);
+ assert_eq!(v["trace"]["lexical"][0]["chunk_id"], "c1");
+}
+
+#[test]
+fn search_response_without_trace_omits_field() {
+ let r = kebab_app::SearchResponse {
+ hits: vec![],
+ next_cursor: None,
+ truncated: false,
+ trace: None,
+ };
+ let v = wire_search_response(&r);
+ assert!(v.get("trace").is_none(), "trace field absent when None");
+}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```bash
+cargo test -p kebab-cli wire::tests::search_response_with_trace_serializes_trace_field
+```
+Expected: compile error — `SearchResponse.trace` not threaded into wire helper output.
+
+- [ ] **Step 3: Update `wire_search_response`**
+
+Modify `crates/kebab-cli/src/wire.rs` `wire_search_response`:
+
+```rust
+pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value {
+ let mut v = serde_json::json!({
+ "hits": r.hits.iter().map(wire_search_hit).collect::>(),
+ "next_cursor": r.next_cursor,
+ "truncated": r.truncated,
+ });
+ if let Some(trace) = &r.trace {
+ let trace_v = serde_json::to_value(trace).expect("SearchTrace serializes");
+ if let Value::Object(ref mut map) = v {
+ map.insert("trace".to_string(), trace_v);
+ }
+ }
+ tag_object(v, "search_response.v1")
+}
+```
+
+- [ ] **Step 4: Add `--trace` clap flag**
+
+Modify `crates/kebab-cli/src/main.rs`. Find `Cmd::Search { ... }` definition (~line 95-150). Add at the end of its field list (after `doc_id`):
+
+```rust
+ /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate
+ /// lists + per-stage timing in the response. Bypasses cache
+ /// (debug intent — fresh run guaranteed).
+ #[arg(long)]
+ trace: bool,
+```
+
+Find the `Cmd::Search` dispatch arm (~line 656). Add `trace,` to the destructure pattern (after `doc_id,`). Find where `SearchOpts` is constructed (~look for `SearchOpts {` inside the search arm, ~line 745) and add `trace: *trace,`. Example:
+
+```rust
+let opts = kebab_core::SearchOpts {
+ max_tokens: *max_tokens,
+ snippet_chars: *snippet_chars,
+ cursor: cursor.clone(),
+ trace: *trace,
+};
+```
+
+- [ ] **Step 5: Add non-JSON pretty-print**
+
+Find the search dispatch's non-JSON branch (the `else` of `if cli.json`, ~line 750-780). After hits are printed, add:
+
+```rust
+if *trace {
+ if let Some(t) = &resp.trace {
+ eprintln!();
+ eprintln!("Trace:");
+ eprintln!(" lexical ({} hits, {}ms):", t.lexical.len(), t.timing.lexical_ms);
+ for c in t.lexical.iter().take(3) {
+ eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+ }
+ eprintln!(" vector ({} hits, {}ms):", t.vector.len(), t.timing.vector_ms);
+ for c in t.vector.iter().take(3) {
+ eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+ }
+ eprintln!(" fusion ({} inputs, {}ms)", t.rrf_inputs.len(), t.timing.fusion_ms);
+ eprintln!(" total: {}ms", t.timing.total_ms);
+ }
+}
+```
+
+- [ ] **Step 6: Run tests**
+
+```bash
+cargo test -p kebab-cli wire::tests
+cargo test -p kebab-cli
+```
+Expected: 2 new wire tests pass; existing cli tests unaffected.
+
+- [ ] **Step 7: Clippy**
+
+```bash
+cargo clippy -p kebab-cli --all-targets -- -D warnings
+```
+Expected: clean.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add crates/kebab-cli/src/main.rs crates/kebab-cli/src/wire.rs
+git commit -m "feat(cli): kebab search --trace flag + wire trace + pretty print (fb-37)"
+```
+
+---
+
+## Task 7: CLI integration tests for --trace and stats breakdowns
+
+**Files:**
+- Create: `crates/kebab-cli/tests/wire_search_trace.rs`
+- Create: `crates/kebab-cli/tests/wire_schema_breakdowns.rs`
+
+- [ ] **Step 1: Write failing integration tests for --trace**
+
+Create `crates/kebab-cli/tests/wire_search_trace.rs`. Use the same fixture pattern as existing `crates/kebab-cli/tests/wire_search_filters.rs` (read it first to mirror temp-dir + ingest setup):
+
+```rust
+//! p9-fb-37: integration tests for `kebab search --trace --json`.
+
+use std::process::Command;
+
+mod common;
+use common::{cargo_bin, ingest_fixture, temp_kebab_root};
+
+#[test]
+fn search_trace_json_includes_trace_block() {
+ let (_root, cfg_path) = temp_kebab_root();
+ ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n");
+
+ let out = Command::new(cargo_bin())
+ .args([
+ "--config", cfg_path.to_str().unwrap(),
+ "search", "rust", "--trace", "--json",
+ ])
+ .output()
+ .expect("run");
+ assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
+ let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
+ assert_eq!(v["schema_version"], "search_response.v1");
+ assert!(v["trace"].is_object(), "trace block present");
+ assert!(v["trace"]["timing"].is_object());
+ assert!(v["trace"]["timing"]["total_ms"].is_number());
+ assert!(v["trace"]["lexical"].is_array());
+ assert!(v["trace"]["vector"].is_array());
+ assert!(v["trace"]["rrf_inputs"].is_array());
+}
+
+#[test]
+fn search_without_trace_omits_trace_field() {
+ let (_root, cfg_path) = temp_kebab_root();
+ ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n");
+
+ let out = Command::new(cargo_bin())
+ .args([
+ "--config", cfg_path.to_str().unwrap(),
+ "search", "rust", "--json",
+ ])
+ .output()
+ .expect("run");
+ assert!(out.status.success());
+ let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
+ assert!(v.get("trace").is_none(), "trace field absent when --trace not passed");
+}
+
+#[test]
+fn search_trace_lexical_mode_empty_vector_list() {
+ let (_root, cfg_path) = temp_kebab_root();
+ ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n");
+
+ let out = Command::new(cargo_bin())
+ .args([
+ "--config", cfg_path.to_str().unwrap(),
+ "search", "rust", "--trace", "--mode", "lexical", "--json",
+ ])
+ .output()
+ .expect("run");
+ assert!(out.status.success());
+ let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
+ assert_eq!(v["trace"]["vector"].as_array().unwrap().len(), 0);
+ assert_eq!(v["trace"]["timing"]["vector_ms"], 0);
+}
+```
+
+- [ ] **Step 2: Write failing integration tests for stats**
+
+Create `crates/kebab-cli/tests/wire_schema_breakdowns.rs`:
+
+```rust
+//! p9-fb-37: integration tests for `kebab schema --json` extended stats.
+
+use std::process::Command;
+
+mod common;
+use common::{cargo_bin, ingest_fixture, temp_kebab_root};
+
+#[test]
+fn schema_stats_includes_breakdowns_on_fresh_corpus() {
+ let (_root, cfg_path) = temp_kebab_root();
+ // Fresh init — no docs. We need migrations to have run; the
+ // first search/ingest call brings them up. Run an empty schema
+ // query on a freshly-init'd config:
+ Command::new(cargo_bin())
+ .args(["--config", cfg_path.to_str().unwrap(), "init"])
+ .output()
+ .expect("init");
+
+ let out = Command::new(cargo_bin())
+ .args(["--config", cfg_path.to_str().unwrap(), "schema", "--json"])
+ .output()
+ .expect("run");
+ assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
+ let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
+ let stats = &v["stats"];
+ // 5 keys padded.
+ let m = stats["media_breakdown"].as_object().unwrap();
+ assert_eq!(m.len(), 5);
+ for k in &["markdown", "pdf", "image", "audio", "other"] {
+ assert_eq!(m[*k], 0);
+ }
+ // lang_breakdown empty {}.
+ assert_eq!(stats["lang_breakdown"].as_object().unwrap().len(), 0);
+ // index_bytes shape.
+ assert!(stats["index_bytes"]["sqlite"].is_number());
+ assert!(stats["index_bytes"]["lancedb"].is_number());
+ assert_eq!(stats["stale_doc_count"], 0);
+}
+
+#[test]
+fn schema_stats_breakdowns_after_ingest() {
+ let (_root, cfg_path) = temp_kebab_root();
+ ingest_fixture(&cfg_path, "a.md", "---\nlang: en\n---\nhello\n");
+ ingest_fixture(&cfg_path, "b.md", "---\nlang: ko\n---\n안녕\n");
+
+ let out = Command::new(cargo_bin())
+ .args(["--config", cfg_path.to_str().unwrap(), "schema", "--json"])
+ .output()
+ .expect("run");
+ assert!(out.status.success());
+ let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap();
+ let stats = &v["stats"];
+ assert_eq!(stats["media_breakdown"]["markdown"], 2);
+ assert_eq!(stats["lang_breakdown"]["en"], 1);
+ assert_eq!(stats["lang_breakdown"]["ko"], 1);
+ assert!(stats["index_bytes"]["sqlite"].as_u64().unwrap() > 0);
+}
+```
+
+- [ ] **Step 3: Verify or create `tests/common/mod.rs`**
+
+Check existing tests for shared `common` module:
+```bash
+ls crates/kebab-cli/tests/
+cat crates/kebab-cli/tests/common/mod.rs 2>/dev/null
+```
+
+If `common` module exists with `cargo_bin`, `ingest_fixture`, `temp_kebab_root`, reuse. If not, mirror functions from `wire_search_filters.rs` (the fb-36 integration test) — copy its fixture helpers to `crates/kebab-cli/tests/common/mod.rs` and reference via `mod common`.
+
+- [ ] **Step 4: Run integration tests**
+
+```bash
+cargo test -p kebab-cli --test wire_search_trace
+cargo test -p kebab-cli --test wire_schema_breakdowns
+```
+Expected: all tests pass.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add crates/kebab-cli/tests/wire_search_trace.rs crates/kebab-cli/tests/wire_schema_breakdowns.rs crates/kebab-cli/tests/common/mod.rs
+git commit -m "test(cli): integration tests for --trace + schema breakdowns (fb-37)"
+```
+
+---
+
+## Task 8: MCP SearchInput trace + integration test
+
+**Files:**
+- Modify: `crates/kebab-mcp/src/tools/search.rs`
+- Create: `crates/kebab-mcp/tests/tools_call_search_trace.rs`
+
+- [ ] **Step 1: Write failing integration test**
+
+Create `crates/kebab-mcp/tests/tools_call_search_trace.rs`. Mirror existing `tools_call_search.rs` fixture pattern (read it first):
+
+```rust
+//! p9-fb-37: MCP search trace input/output integration.
+
+use serde_json::json;
+
+mod common;
+use common::call_tool_with_temp_corpus;
+
+#[test]
+fn search_with_trace_true_returns_trace_field() {
+ let v = call_tool_with_temp_corpus(
+ "kebab__search",
+ json!({"query": "rust", "trace": true}),
+ );
+ assert!(v["trace"].is_object(), "trace field present when trace:true");
+ assert!(v["trace"]["timing"]["total_ms"].is_number());
+}
+
+#[test]
+fn search_without_trace_omits_field() {
+ let v = call_tool_with_temp_corpus(
+ "kebab__search",
+ json!({"query": "rust"}),
+ );
+ assert!(v.get("trace").is_none(), "trace absent when not requested");
+}
+
+#[test]
+fn search_with_trace_false_omits_field() {
+ let v = call_tool_with_temp_corpus(
+ "kebab__search",
+ json!({"query": "rust", "trace": false}),
+ );
+ assert!(v.get("trace").is_none());
+}
+```
+
+If `tests/common/mod.rs` lacks `call_tool_with_temp_corpus`, derive from existing test fixtures. Pattern: spin up `kebab_mcp::Server`, send tools/call request, return result `serde_json::Value`.
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+```bash
+cargo test -p kebab-mcp --test tools_call_search_trace
+```
+Expected: compile error — `SearchInput.trace` field absent.
+
+- [ ] **Step 3: Add `trace` to `SearchInput`**
+
+Modify `crates/kebab-mcp/src/tools/search.rs`. Find `pub struct SearchInput` (~line 30-50). Add at end:
+
+```rust
+ /// p9-fb-37: when true, capture pipeline trace and include in
+ /// response. Bypasses cache. Default false.
+ #[serde(default)]
+ pub trace: Option,
+```
+
+- [ ] **Step 4: Wire `trace` into dispatch**
+
+Find the dispatch body where `SearchOpts` is constructed (~line 90-130). Add:
+
+```rust
+let opts = kebab_core::SearchOpts {
+ max_tokens: input.max_tokens,
+ snippet_chars: input.snippet_chars,
+ cursor: input.cursor.clone(),
+ trace: input.trace.unwrap_or(false),
+};
+```
+
+(The existing struct construction may not include `cursor` etc — adapt to what's actually present, just add `trace:` line.)
+
+The output JSON should already pick up `trace` because the wire helper inherits from the same `SearchResponse` shape. Verify by searching for how the MCP tool serializes its response — check whether it uses `kebab_cli::wire::wire_search_response` or its own builder.
+
+```bash
+grep -n "wire_search_response\|search_response.v1\|SearchResponse" crates/kebab-mcp/src/tools/search.rs
+```
+
+If MCP uses its own builder, mirror the trace-injection pattern from Task 6 Step 3.
+
+- [ ] **Step 5: Run tests**
+
+```bash
+cargo test -p kebab-mcp --test tools_call_search_trace
+```
+Expected: all 3 pass.
+
+- [ ] **Step 6: Clippy**
+
+```bash
+cargo clippy -p kebab-mcp --all-targets -- -D warnings
+```
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add crates/kebab-mcp/src/tools/search.rs crates/kebab-mcp/tests/tools_call_search_trace.rs
+git commit -m "feat(mcp): kebab__search trace input + output mirror (fb-37)"
+```
+
+---
+
+## Task 9: TUI search pane `t` keystroke + TracePopup
+
+**Files:**
+- Create: `crates/kebab-tui/src/trace_popup.rs`
+- Modify: `crates/kebab-tui/src/lib.rs`
+- Modify: `crates/kebab-tui/src/app.rs`
+- Modify: `crates/kebab-tui/src/search.rs`
+- Modify: `crates/kebab-tui/src/cheatsheet.rs`
+
+- [ ] **Step 1: Create `trace_popup.rs`**
+
+```rust
+//! p9-fb-37: TUI trace popup. Opens from Search pane via `t` key
+//! when results are visible. Re-runs the current query with
+//! `SearchOpts.trace = true` and displays the lex / vec / rrf union
+//! + per-stage timing as a single scroll list.
+
+use crossterm::event::{KeyCode, KeyEvent};
+use kebab_core::SearchTrace;
+use ratatui::Frame;
+use ratatui::layout::Rect;
+use ratatui::style::{Modifier, Style};
+use ratatui::text::{Line, Span};
+use ratatui::widgets::{Block, Borders, Paragraph, Wrap};
+
+#[derive(Debug, Clone)]
+pub struct TracePopupState {
+ pub trace: SearchTrace,
+ pub scroll: u16,
+}
+
+impl TracePopupState {
+ pub fn new(trace: SearchTrace) -> Self {
+ Self { trace, scroll: 0 }
+ }
+}
+
+pub fn render_trace_popup(f: &mut Frame, area: Rect, state: &TracePopupState) {
+ let mut lines: Vec = Vec::new();
+ let bold = Style::default().add_modifier(Modifier::BOLD);
+
+ lines.push(Line::from(Span::styled(
+ format!(
+ "Lexical ({} hits, {} ms)",
+ state.trace.lexical.len(),
+ state.trace.timing.lexical_ms,
+ ),
+ bold,
+ )));
+ for c in &state.trace.lexical {
+ lines.push(Line::from(format!(
+ " #{:>2} score={:.4} chunk={}",
+ c.rank, c.score, c.chunk_id.0
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!(
+ "Vector ({} hits, {} ms)",
+ state.trace.vector.len(),
+ state.trace.timing.vector_ms,
+ ),
+ bold,
+ )));
+ for c in &state.trace.vector {
+ lines.push(Line::from(format!(
+ " #{:>2} score={:.4} chunk={}",
+ c.rank, c.score, c.chunk_id.0
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!(
+ "RRF inputs ({} entries, {} ms fusion)",
+ state.trace.rrf_inputs.len(),
+ state.trace.timing.fusion_ms,
+ ),
+ bold,
+ )));
+ for e in &state.trace.rrf_inputs {
+ lines.push(Line::from(format!(
+ " chunk={} lex={:?} vec={:?} fusion={:.4}",
+ e.chunk_id.0, e.lexical_rank, e.vector_rank, e.fusion_score
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!("Total: {} ms", state.trace.timing.total_ms),
+ bold,
+ )));
+
+ let block = Block::default()
+ .title("Trace — Esc to close, j/k or ↑↓ to scroll")
+ .borders(Borders::ALL);
+ let p = Paragraph::new(lines)
+ .block(block)
+ .scroll((state.scroll, 0))
+ .wrap(Wrap { trim: false });
+ f.render_widget(p, area);
+}
+
+/// Handle keys while popup is open. Returns true if the popup should
+/// close.
+pub fn handle_key_trace_popup(state: &mut TracePopupState, key: KeyEvent) -> bool {
+ match key.code {
+ KeyCode::Esc => true,
+ KeyCode::Char('j') | KeyCode::Down => {
+ state.scroll = state.scroll.saturating_add(1);
+ false
+ }
+ KeyCode::Char('k') | KeyCode::Up => {
+ state.scroll = state.scroll.saturating_sub(1);
+ false
+ }
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crossterm::event::KeyModifiers;
+ use kebab_core::TraceTiming;
+
+ fn dummy_state() -> TracePopupState {
+ TracePopupState::new(SearchTrace {
+ lexical: vec![],
+ vector: vec![],
+ rrf_inputs: vec![],
+ timing: TraceTiming::default(),
+ })
+ }
+
+ #[test]
+ fn esc_closes() {
+ let mut s = dummy_state();
+ assert!(handle_key_trace_popup(
+ &mut s,
+ KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE),
+ ));
+ }
+
+ #[test]
+ fn j_scrolls_down() {
+ let mut s = dummy_state();
+ assert!(!handle_key_trace_popup(
+ &mut s,
+ KeyEvent::new(KeyCode::Char('j'), KeyModifiers::NONE),
+ ));
+ assert_eq!(s.scroll, 1);
+ }
+}
+```
+
+- [ ] **Step 2: Register module + state**
+
+Modify `crates/kebab-tui/src/lib.rs`:
+```rust
+pub mod trace_popup;
+```
+
+Modify `crates/kebab-tui/src/app.rs`. Find `pub struct App` (~line 1-100). Add field:
+```rust
+ /// p9-fb-37: trace popup state, `Some` while open.
+ pub trace_popup: Option,
+```
+
+Initialize in `App::new` / `App::default` to `None`.
+
+- [ ] **Step 3: Wire `t` keystroke in search pane**
+
+Modify `crates/kebab-tui/src/search.rs` `pub fn handle_key_search` (~line 196). Add a key arm in the match block before existing arms:
+
+```rust
+ (KeyCode::Char('t'), KeyModifiers::NONE)
+ if !state.results.is_empty() && state.trace_popup.is_none() =>
+ {
+ // Re-run current query with trace enabled.
+ let cfg = match kebab_config::Config::load(state.config_path.as_deref()) {
+ Ok(c) => c,
+ Err(_) => return KeyOutcome::Consumed,
+ };
+ let q = kebab_core::SearchQuery {
+ text: state.query.clone(),
+ mode: state.mode,
+ k: state.k,
+ filters: state.filters.clone(),
+ };
+ let opts = kebab_core::SearchOpts {
+ trace: true,
+ ..Default::default()
+ };
+ if let Ok(resp) = kebab_app::search_with_opts_with_config(cfg, q, opts) {
+ if let Some(t) = resp.trace {
+ state.trace_popup = Some(crate::trace_popup::TracePopupState::new(t));
+ }
+ }
+ KeyOutcome::Consumed
+ }
+```
+
+Engineer note: field names (`state.results`, `state.query`, `state.mode`, `state.k`, `state.filters`, `state.config_path`) must match actual `App` struct. Inspect `kebab-tui/src/app.rs` and adapt — if some are absent (e.g. `config_path`), fall back to `kebab_config::Config::load(None)`.
+
+- [ ] **Step 4: Render popup + handle popup keys in main loop**
+
+Find the main render loop (in `crates/kebab-tui/src/run.rs` or `app.rs`) — wherever `render_search` / `render_inspect` are conditionally called. Add a render check: if `state.trace_popup.is_some()`, draw the popup overlay. Pattern:
+
+```rust
+if let Some(popup) = &state.trace_popup {
+ let popup_area = centered_rect(80, 80, frame.area());
+ crate::trace_popup::render_trace_popup(frame, popup_area, popup);
+}
+```
+
+`centered_rect` helper may already exist (commonly in `app.rs` or `terminal.rs`). If not, define it inline:
+
+```rust
+fn centered_rect(percent_x: u16, percent_y: u16, r: Rect) -> Rect {
+ let popup_layout = Layout::default()
+ .direction(Direction::Vertical)
+ .constraints([
+ Constraint::Percentage((100 - percent_y) / 2),
+ Constraint::Percentage(percent_y),
+ Constraint::Percentage((100 - percent_y) / 2),
+ ])
+ .split(r);
+ Layout::default()
+ .direction(Direction::Horizontal)
+ .constraints([
+ Constraint::Percentage((100 - percent_x) / 2),
+ Constraint::Percentage(percent_x),
+ Constraint::Percentage((100 - percent_x) / 2),
+ ])
+ .split(popup_layout[1])[1]
+}
+```
+
+In key dispatch, intercept popup keys first:
+
+```rust
+if let Some(popup) = state.trace_popup.as_mut() {
+ if crate::trace_popup::handle_key_trace_popup(popup, key) {
+ state.trace_popup = None;
+ }
+ return KeyOutcome::Consumed;
+}
+```
+
+Place before the per-pane key dispatch.
+
+- [ ] **Step 5: Update cheatsheet**
+
+Modify `crates/kebab-tui/src/cheatsheet.rs`. Find the search pane keybind list (search for "Search" header or `i = inspect`). Add:
+
+```rust
+ "t = trace",
+```
+
+(Exact insertion depends on cheatsheet's data structure — array of strings, struct rows, etc. Adapt.)
+
+- [ ] **Step 6: Run TUI tests**
+
+```bash
+cargo test -p kebab-tui
+```
+Expected: 2 new trace_popup tests pass; existing TUI tests unaffected.
+
+- [ ] **Step 7: Clippy**
+
+```bash
+cargo clippy -p kebab-tui --all-targets -- -D warnings
+```
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add crates/kebab-tui/src/trace_popup.rs crates/kebab-tui/src/lib.rs \
+ crates/kebab-tui/src/app.rs crates/kebab-tui/src/search.rs \
+ crates/kebab-tui/src/cheatsheet.rs crates/kebab-tui/src/run.rs
+git commit -m "feat(tui): search pane t-key opens TracePopup (fb-37)"
+```
+
+---
+
+## Task 10: Wire schema docs + README + SMOKE + INDEX + SKILL + status flip
+
+**Files:**
+- Modify: `docs/wire-schema/v1/search_response.schema.json`
+- Modify: `docs/wire-schema/v1/schema.schema.json`
+- Modify: `README.md`
+- Modify: `docs/SMOKE.md`
+- Modify: `tasks/p9/p9-fb-37-trace-and-stats.md`
+- Modify: `tasks/INDEX.md`
+- Modify: `integrations/claude-code/kebab/SKILL.md`
+
+- [ ] **Step 1: Update `search_response.schema.json`**
+
+Add `trace` to `properties` (NOT to `required`):
+
+```json
+"trace": {
+ "type": "object",
+ "description": "p9-fb-37: present iff caller passed --trace / SearchOpts.trace=true. Lex/vec pre-fusion lists + RRF union + per-stage timing.",
+ "required": ["lexical", "vector", "rrf_inputs", "timing"],
+ "properties": {
+ "lexical": { "type": "array", "items": { "type": "object" } },
+ "vector": { "type": "array", "items": { "type": "object" } },
+ "rrf_inputs":{ "type": "array", "items": { "type": "object" } },
+ "timing": {
+ "type": "object",
+ "required": ["lexical_ms", "vector_ms", "fusion_ms", "total_ms"],
+ "properties": {
+ "lexical_ms": { "type": "integer", "minimum": 0 },
+ "vector_ms": { "type": "integer", "minimum": 0 },
+ "fusion_ms": { "type": "integer", "minimum": 0 },
+ "total_ms": { "type": "integer", "minimum": 0 }
+ }
+ }
+ }
+}
+```
+
+- [ ] **Step 2: Update `schema.schema.json`**
+
+In `properties.stats.properties`, add the four new fields:
+
+```json
+"media_breakdown": {
+ "type": "object",
+ "description": "p9-fb-37: per-media-kind doc count. 5 keys (markdown/pdf/image/audio/other), zero-padded.",
+ "additionalProperties": { "type": "integer", "minimum": 0 }
+},
+"lang_breakdown": {
+ "type": "object",
+ "description": "p9-fb-37: per-language doc count. NULL lang keyed as the literal string 'null'. Map may be empty on empty corpus.",
+ "additionalProperties": { "type": "integer", "minimum": 0 }
+},
+"index_bytes": {
+ "type": "object",
+ "description": "p9-fb-37: on-disk byte sums.",
+ "required": ["sqlite", "lancedb"],
+ "properties": {
+ "sqlite": { "type": "integer", "minimum": 0 },
+ "lancedb": { "type": "integer", "minimum": 0 }
+ }
+},
+"stale_doc_count": {
+ "type": "integer",
+ "minimum": 0,
+ "description": "p9-fb-37: docs whose updated_at exceeds config.search.stale_threshold_days. 0 when threshold=0."
+}
+```
+
+- [ ] **Step 3: Update `README.md`**
+
+Find the `kebab search` row in the command table. Add `--trace` to its flag list. Find the `kebab schema` row — extend its description with one phrase like "+ media/lang/bytes/stale breakdowns (fb-37)".
+
+- [ ] **Step 4: Update `docs/SMOKE.md`**
+
+Add a new section after the fb-36 walkthrough:
+
+```markdown
+### Trace + stats (fb-37)
+
+Re-run a search with `--trace` to see per-stage candidate lists + timing:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml search "rust async" --trace --json | jq .trace
+```
+
+Inspect the corpus health surface:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml schema --json | jq .stats
+```
+
+Look for: `media_breakdown` (5 keys), `lang_breakdown`, `index_bytes`, `stale_doc_count`.
+```
+
+- [ ] **Step 5: Update `tasks/p9/p9-fb-37-trace-and-stats.md`**
+
+Flip the frontmatter `status: open` → `status: completed`. Add at the top (after the existing skeleton banner) a "Design + plan" links block:
+
+```markdown
+- Design: [`docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`](../../docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md)
+- Plan: [`docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md`](../../docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md)
+```
+
+- [ ] **Step 6: Update `tasks/INDEX.md`**
+
+Find the fb-37 row. Flip the status column to ✅.
+
+- [ ] **Step 7: Update `integrations/claude-code/kebab/SKILL.md`**
+
+Find the `mcp__kebab__search` input shape block. Append a `trace: null` field. Add a sentence under the search inputs bullet list noting that `trace: true` returns a `trace` block on the response with pre-fusion lex/vec lists + per-stage timing, and that trace bypasses the search cache. Also update the schema bullet list to mention the new stats sub-fields.
+
+- [ ] **Step 8: Run full workspace tests + clippy**
+
+```bash
+cargo test --workspace --no-fail-fast -j 1
+cargo clippy --workspace --all-targets -- -D warnings
+```
+Expected: all green.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add docs/ README.md tasks/p9/p9-fb-37-trace-and-stats.md tasks/INDEX.md integrations/claude-code/kebab/SKILL.md
+git commit -m "docs(fb-37): wire schema + README + SMOKE + INDEX + SKILL"
+```
+
+---
+
+## Final verification checklist
+
+- [ ] `cargo test --workspace --no-fail-fast -j 1` green
+- [ ] `cargo clippy --workspace --all-targets -- -D warnings` clean
+- [ ] Manual smoke against `/tmp/kebab-smoke`:
+ - [ ] `kebab search Q --trace --json | jq .trace` shows lex/vec/rrf/timing
+ - [ ] `kebab search Q --json` does NOT include `trace`
+ - [ ] `kebab schema --json | jq .stats` shows 4 new fields
+- [ ] README, SMOKE, SKILL, INDEX, spec status all updated
From 1e943f21dc8782fa0a776646fac925a18f737853 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:17:04 +0900
Subject: [PATCH 03/13] feat(core): SearchTrace + IndexBytes types +
SearchOpts.trace (fb-37)
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-core/src/search.rs | 98 +++++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs
index 5e5cd31..bb66be9 100644
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -124,6 +124,57 @@ pub struct SearchOpts {
pub snippet_chars: Option,
/// Opaque base64 cursor from a previous response. None = first page.
pub cursor: Option,
+ /// p9-fb-37: when true, capture pipeline trace (cache bypassed,
+ /// lex / vec pre-fusion lists + timing populated on the response).
+ #[serde(default)]
+ pub trace: bool,
+}
+
+/// p9-fb-37: search retrieval pipeline trace. Populated only when
+/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse`
+/// otherwise. `lexical` / `vector` are pre-fusion candidate lists
+/// (each retriever's full output for the fanout query). `rrf_inputs`
+/// is the union (chunk_id) used by RRF, with each side's rank
+/// captured. `timing` is wall-clock per stage.
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+ pub chunk_id: ChunkId,
+ pub doc_id: DocumentId,
+ pub doc_path: WorkspacePath,
+ pub rank: u32,
+ pub score: f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+ pub chunk_id: ChunkId,
+ pub lexical_rank: Option,
+ pub vector_rank: Option,
+ pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+ pub lexical_ms: u64,
+ pub vector_ms: u64,
+ pub fusion_ms: u64,
+ pub total_ms: u64,
+}
+
+/// p9-fb-37: on-disk index size breakdown. Mirrored on the
+/// wire `schema.v1.stats.index_bytes` block.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+ pub sqlite: u64,
+ pub lancedb: u64,
}
#[cfg(test)]
@@ -193,4 +244,51 @@ mod tests {
assert!(old.ingested_after.is_none());
assert!(old.doc_id.is_none());
}
+
+ #[test]
+ fn search_trace_serde_roundtrip() {
+ let t = SearchTrace {
+ lexical: vec![TraceCandidate {
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+ rank: 1,
+ score: 0.42,
+ }],
+ vector: vec![],
+ rrf_inputs: vec![TraceFusionInput {
+ chunk_id: ChunkId("c1".into()),
+ lexical_rank: Some(1),
+ vector_rank: None,
+ fusion_score: 0.0234,
+ }],
+ timing: TraceTiming {
+ lexical_ms: 12,
+ vector_ms: 0,
+ fusion_ms: 1,
+ total_ms: 14,
+ },
+ };
+ let v = serde_json::to_value(&t).unwrap();
+ assert_eq!(v["timing"]["lexical_ms"], 12);
+ assert_eq!(
+ v["lexical"][0]["score"].as_f64().unwrap() as f32,
+ 0.42_f32
+ );
+ let back: SearchTrace = serde_json::from_value(v).unwrap();
+ assert_eq!(back, t);
+ }
+
+ #[test]
+ fn index_bytes_default_is_zero() {
+ let b = IndexBytes::default();
+ assert_eq!(b.sqlite, 0);
+ assert_eq!(b.lancedb, 0);
+ }
+
+ #[test]
+ fn search_opts_trace_default_false() {
+ let opts = SearchOpts::default();
+ assert!(!opts.trace);
+ }
}
From 69c6e23432f350a540b006c9d5e8761ecdafb365 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:21:45 +0900
Subject: [PATCH 04/13] feat(store): breakdowns + index_bytes helpers (fb-37)
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-core/src/lib.rs | 5 +-
crates/kebab-store-sqlite/src/lib.rs | 1 +
crates/kebab-store-sqlite/src/stats_ext.rs | 168 +++++++++++++++++++++
3 files changed, 172 insertions(+), 2 deletions(-)
create mode 100644 crates/kebab-store-sqlite/src/stats_ext.rs
diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs
index 7bbb01b..1cee095 100644
--- a/crates/kebab-core/src/lib.rs
+++ b/crates/kebab-core/src/lib.rs
@@ -51,8 +51,9 @@ pub use metadata::{
TrustLevel,
};
pub use search::{
- DocFilter, DocSummary, RetrievalDetail, SearchFilters, SearchHit,
- SearchMode, SearchOpts, SearchQuery,
+ DocFilter, DocSummary, IndexBytes, MEDIA_KINDS, RetrievalDetail, SearchFilters, SearchHit,
+ SearchMode, SearchOpts, SearchQuery, SearchTrace, TraceCandidate, TraceFusionInput,
+ TraceTiming,
};
pub use answer::{
Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
diff --git a/crates/kebab-store-sqlite/src/lib.rs b/crates/kebab-store-sqlite/src/lib.rs
index f68872b..89c0fa3 100644
--- a/crates/kebab-store-sqlite/src/lib.rs
+++ b/crates/kebab-store-sqlite/src/lib.rs
@@ -28,6 +28,7 @@ mod fts;
mod jobs;
mod schema;
mod store;
+pub mod stats_ext;
pub use embeddings::EmbeddingRecordRow;
pub use error::StoreError;
diff --git a/crates/kebab-store-sqlite/src/stats_ext.rs b/crates/kebab-store-sqlite/src/stats_ext.rs
new file mode 100644
index 0000000..e6df4e2
--- /dev/null
+++ b/crates/kebab-store-sqlite/src/stats_ext.rs
@@ -0,0 +1,168 @@
+//! p9-fb-37: extended stats helpers — per-media / per-lang doc counts,
+//! stale doc count, on-disk index byte sums.
+
+use std::collections::BTreeMap;
+use std::path::Path;
+
+use kebab_core::{IndexBytes, MEDIA_KINDS};
+use rusqlite::Connection;
+
+/// p9-fb-37: result of [`breakdowns`] — three independent counts collected in one pass.
+#[derive(Debug, Clone, Default)]
+pub struct Breakdowns {
+ pub media: BTreeMap,
+ pub lang: BTreeMap,
+ pub stale_doc_count: u64,
+}
+
+/// `media` always contains all 5 `MEDIA_KINDS` (zero-padded).
+/// `lang` only contains observed languages; NULL lang is
+/// keyed as the literal string `"null"`. `stale_doc_count` is 0 when
+/// `threshold_days == 0` (mirrors fb-32 staleness disable semantics).
+pub fn breakdowns(
+ conn: &Connection,
+ threshold_days: u64,
+) -> rusqlite::Result {
+ // media: dual JSON shape — text variant ("markdown") vs object
+ // variant ({"image":{"format":"png"}}). Same CASE WHEN as fb-36.
+ let mut media: BTreeMap = MEDIA_KINDS
+ .iter()
+ .map(|k| ((*k).to_string(), 0u64))
+ .collect();
+ let mut stmt = conn.prepare(
+ "SELECT \
+ CASE \
+ WHEN json_type(a.media_type) = 'text' \
+ THEN json_extract(a.media_type, '$') \
+ ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+ END AS kind, \
+ COUNT(DISTINCT d.doc_id) \
+ FROM documents d JOIN assets a ON a.asset_id = d.asset_id \
+ GROUP BY kind",
+ )?;
+ let rows = stmt.query_map([], |r| {
+ Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+ })?;
+ for row in rows {
+ let (kind, n) = row?;
+ media.insert(kind, n);
+ }
+
+ let mut lang: BTreeMap = BTreeMap::new();
+ let mut stmt = conn.prepare(
+ "SELECT COALESCE(lang, 'null') AS l, COUNT(*) \
+ FROM documents GROUP BY l",
+ )?;
+ let rows = stmt.query_map([], |r| {
+ Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+ })?;
+ for row in rows {
+ let (l, n) = row?;
+ lang.insert(l, n);
+ }
+
+ let stale_doc_count: u64 = if threshold_days == 0 {
+ 0
+ } else {
+ let secs = (threshold_days as i64) * 86_400;
+ let cutoff = time::OffsetDateTime::now_utc()
+ - time::Duration::seconds(secs);
+ let cutoff_str = cutoff
+ .format(&time::format_description::well_known::Rfc3339)
+ .expect("RFC3339 format");
+ conn.query_row(
+ "SELECT COUNT(*) FROM documents WHERE updated_at < ?",
+ [cutoff_str],
+ |r| r.get(0),
+ )?
+ };
+
+ Ok(Breakdowns {
+ media,
+ lang,
+ stale_doc_count,
+ })
+}
+
+/// Sum on-disk bytes of the SQLite database (main + WAL + SHM) and
+/// the LanceDB directory tree. Missing files / dir = 0.
+pub fn index_bytes(data_dir: &Path) -> std::io::Result {
+ fn file_size_or_zero(p: &Path) -> u64 {
+ std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
+ }
+ fn dir_walk_sum(p: &Path) -> std::io::Result {
+ if !p.exists() {
+ return Ok(0);
+ }
+ let mut total = 0u64;
+ for entry in std::fs::read_dir(p)? {
+ let entry = entry?;
+ let ty = entry.file_type()?;
+ if ty.is_dir() {
+ total += dir_walk_sum(&entry.path())?;
+ } else if ty.is_file() {
+ total += entry.metadata()?.len();
+ }
+ }
+ Ok(total)
+ }
+
+ let sqlite_main = data_dir.join("kebab.sqlite");
+ let sqlite_wal = data_dir.join("kebab.sqlite-wal");
+ let sqlite_shm = data_dir.join("kebab.sqlite-shm");
+ let sqlite = file_size_or_zero(&sqlite_main)
+ + file_size_or_zero(&sqlite_wal)
+ + file_size_or_zero(&sqlite_shm);
+ let lancedb = dir_walk_sum(&data_dir.join("lancedb"))?;
+ Ok(IndexBytes { sqlite, lancedb })
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn open_fresh() -> (tempfile::TempDir, crate::SqliteStore) {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ let store = crate::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ (dir, store)
+ }
+
+ #[test]
+ fn breakdowns_empty_corpus() {
+ let (_dir, store) = open_fresh();
+ let conn = store.read_conn();
+ let b = breakdowns(&conn, 0).unwrap();
+ // 5 keys all zero, lang map empty, stale 0.
+ assert_eq!(b.media.len(), 5);
+ for k in MEDIA_KINDS {
+ assert_eq!(b.media.get(*k), Some(&0u64));
+ }
+ assert!(b.lang.is_empty());
+ assert_eq!(b.stale_doc_count, 0);
+ }
+
+ #[test]
+ fn index_bytes_includes_sqlite_main() {
+ let (dir, _store) = open_fresh();
+ let b = index_bytes(dir.path()).unwrap();
+ assert!(b.sqlite > 0, "main sqlite file should exist after migrations");
+ assert_eq!(b.lancedb, 0);
+ }
+
+ #[test]
+ fn index_bytes_lancedb_dir_walk() {
+ let dir = tempfile::tempdir().unwrap();
+ let lance = dir.path().join("lancedb");
+ std::fs::create_dir_all(lance.join("vectors.lance")).unwrap();
+ std::fs::write(
+ lance.join("vectors.lance").join("data.bin"),
+ vec![0u8; 1024],
+ )
+ .unwrap();
+ let b = index_bytes(dir.path()).unwrap();
+ assert_eq!(b.lancedb, 1024);
+ }
+}
From 231d80e82d32aae2254539e7bc7ec557e5a675de Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:30:19 +0900
Subject: [PATCH 05/13] feat(stats): media/lang/bytes/stale fields on
schema.v1.stats (fb-37)
Extends CountSummary with media_breakdown, lang_breakdown, stale_doc_count
fields populated via stats_ext::breakdowns(). Adds count_summary_with_threshold
for callers that need real stale counts. Mirrors all new fields onto the
wire-bound Stats struct in kebab-app::schema with #[serde(default)] for
backwards-compat. Also fixes search_budget_integration.rs for the trace field
added to SearchOpts in Task 1.
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-app/src/schema.rs | 57 ++++++++++++++++++-
.../tests/search_budget_integration.rs | 4 ++
crates/kebab-store-sqlite/src/store.rs | 48 ++++++++++++----
3 files changed, 96 insertions(+), 13 deletions(-)
diff --git a/crates/kebab-app/src/schema.rs b/crates/kebab-app/src/schema.rs
index 603b212..46841fb 100644
--- a/crates/kebab-app/src/schema.rs
+++ b/crates/kebab-app/src/schema.rs
@@ -50,6 +50,18 @@ pub struct Stats {
pub chunk_count: u64,
pub asset_count: u64,
pub last_ingest_at: Option,
+ /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+ #[serde(default)]
+ pub media_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+ #[serde(default)]
+ pub lang_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: on-disk byte sums.
+ #[serde(default)]
+ pub index_bytes: kebab_core::IndexBytes,
+ /// p9-fb-37: docs whose `updated_at` exceeds the staleness threshold.
+ #[serde(default)]
+ pub stale_doc_count: u64,
}
const KEBAB_VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -85,7 +97,7 @@ const WIRE_SCHEMAS: &[&str] = &[
#[doc(hidden)]
pub fn schema_with_config(cfg: &Config) -> anyhow::Result {
let store = open_store_for_stats(cfg)?;
- let stats = collect_stats(&store)?;
+ let stats = collect_stats(cfg, &store)?;
let models = collect_models(cfg, &store);
Ok(SchemaV1 {
schema_version: SCHEMA_V1_ID.to_string(),
@@ -124,13 +136,24 @@ fn open_store_for_stats(cfg: &Config) -> anyhow::Result anyhow::Result {
- let counts = store.count_summary()?;
+fn collect_stats(
+ cfg: &Config,
+ store: &kebab_store_sqlite::SqliteStore,
+) -> anyhow::Result {
+ let counts = store
+ .count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?;
+ let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
+ let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir)
+ .map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?;
Ok(Stats {
doc_count: counts.doc_count,
chunk_count: counts.chunk_count,
asset_count: counts.asset_count,
last_ingest_at: counts.last_ingest_at,
+ media_breakdown: counts.media_breakdown,
+ lang_breakdown: counts.lang_breakdown,
+ index_bytes,
+ stale_doc_count: counts.stale_doc_count,
})
}
@@ -150,3 +173,31 @@ fn collect_models(cfg: &Config, store: &kebab_store_sqlite::SqliteStore) -> Mode
corpus_revision: store.corpus_revision(),
}
}
+
+#[cfg(test)]
+mod tests_stats_ext {
+ use super::*;
+
+ #[test]
+ fn stats_includes_breakdowns_and_bytes_on_fresh_corpus() {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ // Bring up migrations so the sqlite file is created.
+ let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ drop(store);
+
+ let s = schema_with_config(&cfg).unwrap();
+ // 5 keys padded.
+ assert_eq!(s.stats.media_breakdown.len(), 5);
+ assert_eq!(s.stats.media_breakdown.get("markdown"), Some(&0));
+ assert_eq!(s.stats.media_breakdown.get("pdf"), Some(&0));
+ // lang map empty on empty corpus.
+ assert!(s.stats.lang_breakdown.is_empty());
+ // sqlite bytes positive after migrations, lancedb 0.
+ assert!(s.stats.index_bytes.sqlite > 0);
+ assert_eq!(s.stats.index_bytes.lancedb, 0);
+ assert_eq!(s.stats.stale_doc_count, 0);
+ }
+}
diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs
index 42ad346..c309b69 100644
--- a/crates/kebab-app/tests/search_budget_integration.rs
+++ b/crates/kebab-app/tests/search_budget_integration.rs
@@ -47,6 +47,7 @@ fn budget_truncates_snippets_when_below_threshold() {
max_tokens: Some(50),
snippet_chars: None,
cursor: None,
+ trace: false,
},
)
.unwrap();
@@ -78,6 +79,7 @@ fn cursor_paginates_to_next_page() {
max_tokens: None,
snippet_chars: None,
cursor: Some(cursor),
+ trace: false,
},
)
.unwrap();
@@ -114,6 +116,7 @@ fn cursor_rejected_after_corpus_revision_bump() {
max_tokens: None,
snippet_chars: None,
cursor: Some(c),
+ trace: false,
},
);
let err = result.unwrap_err();
@@ -147,6 +150,7 @@ fn max_tokens_zero_returns_one_hit_truncated() {
max_tokens: Some(0),
snippet_chars: None,
cursor: None,
+ trace: false,
},
)
.unwrap();
diff --git a/crates/kebab-store-sqlite/src/store.rs b/crates/kebab-store-sqlite/src/store.rs
index 13691b3..57e16da 100644
--- a/crates/kebab-store-sqlite/src/store.rs
+++ b/crates/kebab-store-sqlite/src/store.rs
@@ -604,6 +604,12 @@ pub struct CountSummary {
/// ISO-8601 timestamp of the most-recently updated document row, or
/// `None` when the store is empty.
pub last_ingest_at: Option,
+ /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+ pub media_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+ pub lang_breakdown: std::collections::BTreeMap,
+ /// p9-fb-37: docs whose `updated_at < now - threshold_days`. 0 when threshold=0.
+ pub stale_doc_count: u64,
}
impl SqliteStore {
@@ -611,39 +617,58 @@ impl SqliteStore {
/// most-recent `documents.updated_at` timestamp.
///
/// Uses `read_conn()` (no mutations) — mirrors the pattern used by
- /// [`Self::corpus_revision`].
- pub fn count_summary(&self) -> anyhow::Result {
+ /// Shared helper: counts and breakdowns in a single pass with given threshold.
+ fn count_summary_inner(&self, threshold_days: u64) -> anyhow::Result {
+ use anyhow::Context;
+ use rusqlite::OptionalExtension;
+
let conn = self.read_conn();
let doc_count: u64 = conn
.query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
.context("count documents")?;
-
let chunk_count: u64 = conn
.query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
.context("count chunks")?;
-
let asset_count: u64 = conn
.query_row("SELECT COUNT(*) FROM assets", [], |r| r.get(0))
.context("count assets")?;
-
let last_ingest_at: Option = conn
- .query_row(
- "SELECT MAX(updated_at) FROM documents",
- [],
- |r| r.get(0),
- )
+ .query_row("SELECT MAX(updated_at) FROM documents", [], |r| r.get(0))
.optional()
.context("max updated_at")?
.flatten();
+ let bd = crate::stats_ext::breakdowns(&conn, threshold_days).context("breakdowns")?;
+
Ok(CountSummary {
doc_count,
chunk_count,
asset_count,
last_ingest_at,
+ media_breakdown: bd.media,
+ lang_breakdown: bd.lang,
+ stale_doc_count: bd.stale_doc_count,
})
}
+
+ /// [`Self::corpus_revision`].
+ pub fn count_summary(&self) -> anyhow::Result {
+ // p9-fb-37: default uses threshold_days=0 (matches fb-32 disable
+ // semantics). Callers that need real stale_doc_count call
+ // count_summary_with_threshold.
+ self.count_summary_inner(0)
+ }
+
+ /// p9-fb-37: variant that honors `config.search.stale_threshold_days`.
+ /// Callers who need a meaningful `stale_doc_count` (e.g. `kebab schema`)
+ /// pass the configured threshold; the older `count_summary` returns 0.
+ pub fn count_summary_with_threshold(
+ &self,
+ threshold_days: u64,
+ ) -> anyhow::Result {
+ self.count_summary_inner(threshold_days)
+ }
}
/// Apply the design §5 / task-spec pragmas. Called once per connection.
@@ -681,6 +706,9 @@ mod tests {
assert_eq!(s.chunk_count, 0);
assert_eq!(s.asset_count, 0);
assert!(s.last_ingest_at.is_none());
+ assert_eq!(s.media_breakdown.len(), 5);
+ assert!(s.lang_breakdown.is_empty());
+ assert_eq!(s.stale_doc_count, 0);
}
}
From 6a067e3ab1988319b91f821e4124efb7c41557e9 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 12:38:53 +0900
Subject: [PATCH 06/13] feat(search): HybridRetriever::search_with_trace
(fb-37)
---
crates/kebab-search/src/hybrid.rs | 203 ++++++++++++++++++++++++++++--
crates/kebab-search/src/lib.rs | 1 +
crates/kebab-search/src/trace.rs | 85 +++++++++++++
3 files changed, 280 insertions(+), 9 deletions(-)
create mode 100644 crates/kebab-search/src/trace.rs
diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs
index 37cd629..58b6678 100644
--- a/crates/kebab-search/src/hybrid.rs
+++ b/crates/kebab-search/src/hybrid.rs
@@ -18,12 +18,15 @@
use std::collections::HashMap;
use std::sync::Arc;
+use std::time::Instant;
use anyhow::Result;
use kebab_core::{
- IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
+ IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, SearchTrace,
};
+use crate::trace::{build_fusion_input_skeleton, candidates_from_hits, ScoreKind, TraceBuilder};
+
/// Default `k_rrf` if `kb-config::SearchCfg::rrf_k` is misconfigured.
/// Matches §6.4's documented default (60).
const DEFAULT_K_RRF: u32 = 60;
@@ -145,20 +148,22 @@ impl Retriever for HybridRetriever {
impl HybridRetriever {
fn fuse(&self, query: &SearchQuery) -> Result> {
let target_k = if query.k == 0 { self.default_k } else { query.k };
-
- // Fanout: ask each retriever for `target_k * MULTIPLIER` so
- // the disjoint set of candidates is wide enough. The two
- // per-side queries are identical (same text, k, mode, filters);
- // only the dispatch differs, so we share one `SearchQuery`.
let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
let lex_query = SearchQuery {
k: fanout_k,
..query.clone()
};
-
let lex_hits = self.lexical.search(&lex_query)?;
let vec_hits = self.vector.search(&lex_query)?;
+ self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)
+ }
+ fn fuse_with_inputs(
+ &self,
+ lex_hits: &[SearchHit],
+ vec_hits: &[SearchHit],
+ target_k: usize,
+ ) -> Result> {
tracing::debug!(
lex = lex_hits.len(),
vec = vec_hits.len(),
@@ -171,11 +176,13 @@ impl HybridRetriever {
// already 1-based by both LexicalRetriever and VectorRetriever
// (and any well-behaved Retriever should mirror).
let lex_index: HashMap = lex_hits
- .into_iter()
+ .iter()
+ .cloned()
.map(|h| (h.chunk_id.0.clone(), (h.rank, h)))
.collect();
let vec_index: HashMap = vec_hits
- .into_iter()
+ .iter()
+ .cloned()
.map(|h| (h.chunk_id.0.clone(), (h.rank, h)))
.collect();
@@ -312,6 +319,81 @@ impl HybridRetriever {
tracing::debug!(rows = hits.len(), "kb-search hybrid: search done");
Ok(hits)
}
+
+ /// p9-fb-37: parallel to `Retriever::search` but additionally returns
+ /// a trace of pre-fusion lex/vec lists, RRF inputs (union with each
+ /// side's rank), and per-stage timing.
+ pub fn search_with_trace(
+ &self,
+ query: &SearchQuery,
+ ) -> anyhow::Result<(Vec, SearchTrace)> {
+ let start_total = Instant::now();
+ let target_k = if query.k == 0 { self.default_k } else { query.k };
+ let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
+ let fanout_query = SearchQuery {
+ k: fanout_k,
+ ..query.clone()
+ };
+
+ let mut tb = TraceBuilder::default();
+
+ let (lex_hits, vec_hits): (Vec, Vec) = match query.mode {
+ SearchMode::Lexical => {
+ let t0 = Instant::now();
+ let lh = self.lexical.search(&fanout_query)?;
+ tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+ (lh, Vec::new())
+ }
+ SearchMode::Vector => {
+ let t0 = Instant::now();
+ let vh = self.vector.search(&fanout_query)?;
+ tb.timing.vector_ms = t0.elapsed().as_millis() as u64;
+ (Vec::new(), vh)
+ }
+ SearchMode::Hybrid => {
+ let t0 = Instant::now();
+ let lh = self.lexical.search(&fanout_query)?;
+ tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+ let t1 = Instant::now();
+ let vh = self.vector.search(&fanout_query)?;
+ tb.timing.vector_ms = t1.elapsed().as_millis() as u64;
+ (lh, vh)
+ }
+ };
+
+ tb.lexical = candidates_from_hits(&lex_hits, ScoreKind::Lexical);
+ tb.vector = candidates_from_hits(&vec_hits, ScoreKind::Vector);
+ tb.rrf_inputs = build_fusion_input_skeleton(&lex_hits, &vec_hits);
+
+ let t_fusion = Instant::now();
+ let final_hits = match query.mode {
+ SearchMode::Lexical => {
+ let mut h = lex_hits.clone();
+ h.truncate(target_k);
+ h
+ }
+ SearchMode::Vector => {
+ let mut h = vec_hits.clone();
+ h.truncate(target_k);
+ h
+ }
+ SearchMode::Hybrid => self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)?,
+ };
+ tb.timing.fusion_ms = t_fusion.elapsed().as_millis() as u64;
+
+ let score_by_chunk: std::collections::HashMap = final_hits
+ .iter()
+ .map(|h| (h.chunk_id.0.clone(), h.retrieval.fusion_score))
+ .collect();
+ for entry in &mut tb.rrf_inputs {
+ if let Some(s) = score_by_chunk.get(&entry.chunk_id.0) {
+ entry.fusion_score = *s;
+ }
+ }
+
+ tb.timing.total_ms = start_total.elapsed().as_millis() as u64;
+ Ok((final_hits, tb.into_trace()))
+ }
}
/// Parse the `hybrid_fusion` config string into a [`FusionPolicy`].
@@ -633,4 +715,107 @@ mod tests {
let FusionPolicy::Rrf { k_rrf } = parse_fusion("rrf", 0);
assert_eq!(k_rrf, DEFAULT_K_RRF);
}
+
+ #[test]
+ fn search_with_trace_returns_lex_and_vec_lists() {
+ use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion,
+ RetrievalDetail, SearchHit, SearchMode, SearchQuery,
+ WorkspacePath, Citation};
+ use std::sync::Arc;
+
+ fn mk_hit(rank: u32, chunk: &str, score: f32, mode: SearchMode) -> SearchHit {
+ SearchHit {
+ rank,
+ chunk_id: ChunkId(chunk.into()),
+ doc_id: DocumentId(format!("d-{chunk}")),
+ doc_path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+ heading_path: vec![],
+ section_label: None,
+ snippet: chunk.into(),
+ citation: Citation::Line {
+ path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+ start: 1,
+ end: 1,
+ section: None,
+ },
+ retrieval: RetrievalDetail {
+ method: mode,
+ fusion_score: score,
+ lexical_score: if mode == SearchMode::Lexical { Some(score) } else { None },
+ vector_score: if mode == SearchMode::Vector { Some(score) } else { None },
+ lexical_rank: if mode == SearchMode::Lexical { Some(rank) } else { None },
+ vector_rank: if mode == SearchMode::Vector { Some(rank) } else { None },
+ },
+ index_version: IndexVersion("v1".into()),
+ embedding_model: None,
+ chunker_version: ChunkerVersion("c1".into()),
+ indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+ stale: false,
+ }
+ }
+
+ struct Stub { hits: Vec }
+ impl Retriever for Stub {
+ fn search(&self, _q: &SearchQuery) -> anyhow::Result> {
+ Ok(self.hits.clone())
+ }
+ fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+ }
+
+ let lex = Arc::new(Stub {
+ hits: vec![
+ mk_hit(1, "c1", 0.9, SearchMode::Lexical),
+ mk_hit(2, "c2", 0.5, SearchMode::Lexical),
+ ],
+ });
+ let vec_r = Arc::new(Stub {
+ hits: vec![
+ mk_hit(1, "c2", 0.8, SearchMode::Vector),
+ mk_hit(2, "c3", 0.6, SearchMode::Vector),
+ ],
+ });
+ let hybrid = HybridRetriever::with_policy(
+ lex.clone(),
+ vec_r.clone(),
+ FusionPolicy::Rrf { k_rrf: 60 },
+ 2,
+ );
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Hybrid,
+ k: 2,
+ filters: Default::default(),
+ };
+ let (hits, trace) = hybrid.search_with_trace(&q).unwrap();
+ assert!(!hits.is_empty());
+ assert_eq!(trace.lexical.len(), 2);
+ assert_eq!(trace.vector.len(), 2);
+ // Union: c1, c2, c3 → 3 entries.
+ assert_eq!(trace.rrf_inputs.len(), 3);
+ }
+
+ #[test]
+ fn search_with_trace_lexical_mode_empty_vector() {
+ use kebab_core::{IndexVersion, SearchMode, SearchQuery};
+ use std::sync::Arc;
+ struct EmptyR;
+ impl Retriever for EmptyR {
+ fn search(&self, _q: &SearchQuery) -> anyhow::Result> {
+ Ok(vec![])
+ }
+ fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+ }
+ let lex = Arc::new(EmptyR);
+ let vec_r = Arc::new(EmptyR);
+ let hybrid = HybridRetriever::with_policy(lex, vec_r, FusionPolicy::Rrf { k_rrf: 60 }, 2);
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 2,
+ filters: Default::default(),
+ };
+ let (_hits, trace) = hybrid.search_with_trace(&q).unwrap();
+ assert!(trace.vector.is_empty());
+ assert_eq!(trace.timing.vector_ms, 0);
+ }
}
diff --git a/crates/kebab-search/src/lib.rs b/crates/kebab-search/src/lib.rs
index 47f832d..fef87f3 100644
--- a/crates/kebab-search/src/lib.rs
+++ b/crates/kebab-search/src/lib.rs
@@ -19,6 +19,7 @@
mod citation_helper;
mod hybrid;
mod lexical;
+mod trace;
mod vector;
pub use hybrid::{FusionPolicy, HybridRetriever};
diff --git a/crates/kebab-search/src/trace.rs b/crates/kebab-search/src/trace.rs
new file mode 100644
index 0000000..5ddbf9c
--- /dev/null
+++ b/crates/kebab-search/src/trace.rs
@@ -0,0 +1,85 @@
+//! p9-fb-37: trace capture helpers for `HybridRetriever::search_with_trace`.
+
+use std::collections::BTreeMap;
+
+use kebab_core::{
+ SearchHit, SearchTrace, TraceCandidate, TraceFusionInput, TraceTiming,
+};
+
+/// Build a `TraceCandidate` from a `SearchHit`. The score field reflects
+/// each side's score (lexical / vector / fusion) — caller selects which
+/// retriever's hit list this is.
+pub fn candidates_from_hits(hits: &[SearchHit], score_kind: ScoreKind) -> Vec {
+ hits.iter()
+ .map(|h| TraceCandidate {
+ chunk_id: h.chunk_id.clone(),
+ doc_id: h.doc_id.clone(),
+ doc_path: h.doc_path.clone(),
+ rank: h.rank,
+ score: match score_kind {
+ ScoreKind::Lexical => h.retrieval.lexical_score.unwrap_or(0.0),
+ ScoreKind::Vector => h.retrieval.vector_score.unwrap_or(0.0),
+ },
+ })
+ .collect()
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum ScoreKind {
+ Lexical,
+ Vector,
+}
+
+/// Build the union of (chunk_id) across lex and vec hit lists, with
+/// each side's rank captured. `fusion_score` is filled by the caller
+/// (RRF computes it during fusion, this helper just pre-builds the
+/// rank table — caller overwrites fusion_score in a second pass).
+pub fn build_fusion_input_skeleton(
+ lex: &[SearchHit],
+ vec: &[SearchHit],
+) -> Vec {
+ let mut by_chunk: BTreeMap = BTreeMap::new();
+ for h in lex {
+ by_chunk
+ .entry(h.chunk_id.0.clone())
+ .or_insert(TraceFusionInput {
+ chunk_id: h.chunk_id.clone(),
+ lexical_rank: None,
+ vector_rank: None,
+ fusion_score: 0.0,
+ })
+ .lexical_rank = Some(h.rank);
+ }
+ for h in vec {
+ by_chunk
+ .entry(h.chunk_id.0.clone())
+ .or_insert(TraceFusionInput {
+ chunk_id: h.chunk_id.clone(),
+ lexical_rank: None,
+ vector_rank: None,
+ fusion_score: 0.0,
+ })
+ .vector_rank = Some(h.rank);
+ }
+ by_chunk.into_values().collect()
+}
+
+/// Container the hybrid retriever fills during a traced run.
+#[derive(Default)]
+pub struct TraceBuilder {
+ pub lexical: Vec,
+ pub vector: Vec,
+ pub rrf_inputs: Vec,
+ pub timing: TraceTiming,
+}
+
+impl TraceBuilder {
+ pub fn into_trace(self) -> SearchTrace {
+ SearchTrace {
+ lexical: self.lexical,
+ vector: self.vector,
+ rrf_inputs: self.rrf_inputs,
+ timing: self.timing,
+ }
+ }
+}
From 69037c313a9352c3884263ef3dbc8bb90df5d94f Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 13:01:18 +0900
Subject: [PATCH 07/13] feat(app): SearchResponse.trace + opts.trace threading
(fb-37)
Adds the `trace: Option` field to `SearchResponse` and
threads `SearchOpts.trace` through `App::search_with_opts`. When the
caller sets `opts.trace = true` the path bypasses the LRU search cache
and runs through `HybridRetriever::search_with_trace`, which dispatches
all 3 SearchModes internally; this means `--trace` requires embeddings
(same constraint as `--mode hybrid`). The non-trace path keeps its
exact prior behavior with `trace: None` stamped on the response.
Picked up Task 1 / Task 3 follow-ups in the same commit so the
workspace compiles: SearchOpts struct-literals in kebab-cli/main.rs +
kebab-mcp/tools/search.rs default the new `trace` field to false, and
the schema-wrapper test in kebab-cli/wire.rs fills the new
media_breakdown / lang_breakdown / index_bytes / stale_doc_count fields
on Stats with `Default::default()`.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/app.rs | 131 +++++++++++++++++++++++++++
crates/kebab-cli/src/main.rs | 1 +
crates/kebab-cli/src/wire.rs | 5 +
crates/kebab-mcp/src/tools/search.rs | 1 +
4 files changed, 138 insertions(+)
diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs
index 3e0c53d..7895459 100644
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -70,6 +70,9 @@ pub struct SearchResponse {
pub hits: Vec,
pub next_cursor: Option,
pub truncated: bool,
+ /// p9-fb-37: present when caller passed `SearchOpts.trace = true`.
+ /// Consumers that ignore trace should leave this `None`.
+ pub trace: Option,
}
/// Facade state — see module docs for lifetime rules.
@@ -341,6 +344,65 @@ impl App {
k: fetch_k,
..query.clone()
};
+
+ // p9-fb-37: when --trace is requested, bypass the LRU cache and
+ // run through `HybridRetriever::search_with_trace`, which
+ // dispatches by mode internally. This requires embeddings (same
+ // as `--mode hybrid`); `require_embeddings()` surfaces the
+ // existing "switch to --mode lexical" error otherwise.
+ if opts.trace {
+ let lex = Arc::new(LexicalRetriever::with_settings(
+ self.sqlite.clone(),
+ lexical_index_version(&self.config),
+ self.config.search.snippet_chars,
+ )) as Arc;
+ let (emb, vec_store) = self.require_embeddings()?;
+ let vec_iv = vector_index_version(emb.as_ref());
+ let vec_dyn: Arc = vec_store;
+ let emb_dyn: Arc = emb;
+ let vec_retr = Arc::new(VectorRetriever::with_settings(
+ vec_dyn,
+ emb_dyn,
+ self.sqlite.clone(),
+ vec_iv,
+ self.config.search.snippet_chars,
+ )) as Arc;
+ let hybrid = HybridRetriever::new(&self.config, lex, vec_retr);
+ let (mut traced_hits, trace) = hybrid.search_with_trace(&fetch_query)?;
+
+ // Stamp staleness — same as search_uncached.
+ let now = time::OffsetDateTime::now_utc();
+ crate::staleness::mark_stale_in_place(
+ &mut traced_hits,
+ now,
+ self.config.search.stale_threshold_days,
+ );
+
+ // Apply offset + k_effective truncation (mirrors non-trace path).
+ let drop_n = offset.min(traced_hits.len());
+ traced_hits.drain(..drop_n);
+ let mut hits: Vec =
+ traced_hits.into_iter().take(k_effective).collect();
+
+ // Snippet truncation if opts.snippet_chars set (mirror non-trace path).
+ if opts.snippet_chars.is_some() {
+ for h in hits.iter_mut() {
+ if h.snippet.chars().count() > snippet_chars {
+ h.snippet = trim_to_chars(&h.snippet, snippet_chars);
+ }
+ }
+ }
+
+ // Trace path skips the budget loop. Caller will inspect
+ // `hits.len()` and `trace.timing` rather than paginate.
+ return Ok(SearchResponse {
+ hits,
+ next_cursor: None,
+ truncated: false,
+ trace: Some(trace),
+ });
+ }
+
let mut all_hits = self.search(fetch_query)?;
// Skip offset.
@@ -421,6 +483,7 @@ impl App {
hits,
next_cursor,
truncated,
+ trace: None,
})
}
@@ -847,3 +910,71 @@ mod tests {
assert_ne!(a, d, "different session_id → different hash");
}
}
+
+#[cfg(test)]
+mod tests_trace {
+ use super::*;
+ use kebab_core::{SearchMode, SearchOpts, SearchQuery};
+
+ fn open_app_with_temp_dir() -> (tempfile::TempDir, App) {
+ let dir = tempfile::tempdir().unwrap();
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+ // Bring up migrations.
+ let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+ store.run_migrations().unwrap();
+ drop(store);
+ let app = App::open_with_config(cfg).unwrap();
+ (dir, app)
+ }
+
+ #[test]
+ fn search_response_trace_none_when_opts_trace_false() {
+ let (_dir, app) = open_app_with_temp_dir();
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 1,
+ filters: Default::default(),
+ };
+ let resp = app.search_with_opts(q, SearchOpts::default()).unwrap();
+ assert!(resp.trace.is_none());
+ }
+
+ #[test]
+ fn search_response_trace_some_when_opts_trace_true_lexical_mode() {
+ // Lexical mode doesn't require embeddings — the trace path
+ // builds HybridRetriever which holds both retrievers, but
+ // for SearchMode::Lexical only the lexical side is invoked.
+ // require_embeddings will fail if no embedding provider is
+ // configured. Default Config has provider = "none" so this
+ // test will fail unless we tolerate that. Skip the assertion
+ // if the call returns the embedding-disabled error.
+ let (_dir, app) = open_app_with_temp_dir();
+ let q = SearchQuery {
+ text: "x".into(),
+ mode: SearchMode::Lexical,
+ k: 1,
+ filters: Default::default(),
+ };
+ let opts = SearchOpts {
+ trace: true,
+ ..Default::default()
+ };
+ match app.search_with_opts(q, opts) {
+ Ok(resp) => {
+ assert!(resp.trace.is_some(), "trace populated when opts.trace=true");
+ }
+ Err(e) => {
+ // Acceptable in test environment without embeddings —
+ // verify the error is the expected embedding-disabled
+ // shape, not an unrelated panic.
+ let msg = format!("{e:#}");
+ assert!(
+ msg.contains("embedding") || msg.contains("--mode lexical"),
+ "unexpected error: {msg}"
+ );
+ }
+ }
+ }
+}
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index 7e41d8a..21ee509 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -732,6 +732,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
max_tokens: *max_tokens,
snippet_chars: *snippet_chars,
cursor: cursor.clone(),
+ trace: false,
};
// p9-fb-34: budget-aware path. --no-cache still bypasses the
// App-level LRU; wire wrapper applies regardless.
diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs
index 178fa22..504288d 100644
--- a/crates/kebab-cli/src/wire.rs
+++ b/crates/kebab-cli/src/wire.rs
@@ -264,6 +264,7 @@ mod tests {
hits: vec![],
next_cursor: Some("opaque-cursor-abc".to_string()),
truncated: true,
+ trace: None,
};
let v = wire_search_response(&r);
assert_eq!(schema_of(&v), Some("search_response.v1"));
@@ -303,6 +304,10 @@ mod tests {
stats: Stats {
doc_count: 1, chunk_count: 2, asset_count: 1,
last_ingest_at: None,
+ media_breakdown: Default::default(),
+ lang_breakdown: Default::default(),
+ index_bytes: Default::default(),
+ stale_doc_count: 0,
},
};
let v = wire_schema(&schema);
diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs
index 74af6e9..167cb61 100644
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -118,6 +118,7 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
max_tokens: input.max_tokens,
snippet_chars: input.snippet_chars,
cursor: input.cursor,
+ trace: false,
};
let cfg_clone = (*state.config).clone();
match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) {
From 72c227af239acaae623fe23f22095209c66bcb55 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 13:08:48 +0900
Subject: [PATCH 08/13] feat(cli): kebab search --trace flag + wire trace +
pretty print (fb-37)
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-cli/src/main.rs | 26 +++++++++++++++++-
crates/kebab-cli/src/wire.rs | 53 +++++++++++++++++++++++++++++++++++-
2 files changed, 77 insertions(+), 2 deletions(-)
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index 21ee509..305397c 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -163,6 +163,13 @@ enum Cmd {
/// p9-fb-36: filter to a single doc by id.
#[arg(long)]
doc_id: Option,
+
+ /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate
+ /// lists + per-stage timing in the response. Bypasses cache
+ /// (debug intent — fresh run guaranteed). Requires embeddings
+ /// to be enabled.
+ #[arg(long)]
+ trace: bool,
},
/// Retrieval-augmented question answering.
@@ -669,6 +676,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
media,
ingested_after,
doc_id,
+ trace,
} => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
@@ -732,7 +740,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
max_tokens: *max_tokens,
snippet_chars: *snippet_chars,
cursor: cursor.clone(),
- trace: false,
+ trace: *trace,
};
// p9-fb-34: budget-aware path. --no-cache still bypasses the
// App-level LRU; wire wrapper applies regardless.
@@ -790,6 +798,22 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
let next = resp.next_cursor.as_deref().unwrap_or("(none)");
eprintln!("[truncated; use --cursor {next} for the next page]");
}
+ if *trace {
+ if let Some(t) = &resp.trace {
+ eprintln!();
+ eprintln!("Trace:");
+ eprintln!(" lexical ({} hits, {}ms):", t.lexical.len(), t.timing.lexical_ms);
+ for c in t.lexical.iter().take(3) {
+ eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+ }
+ eprintln!(" vector ({} hits, {}ms):", t.vector.len(), t.timing.vector_ms);
+ for c in t.vector.iter().take(3) {
+ eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+ }
+ eprintln!(" fusion ({} inputs, {}ms)", t.rrf_inputs.len(), t.timing.fusion_ms);
+ eprintln!(" total: {}ms", t.timing.total_ms);
+ }
+ }
}
Ok(())
}
diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs
index 504288d..29ab7aa 100644
--- a/crates/kebab-cli/src/wire.rs
+++ b/crates/kebab-cli/src/wire.rs
@@ -81,11 +81,17 @@ pub fn wire_search_hit(h: &SearchHit) -> Value {
/// array (`wire_search_hits`) — see HOTFIXES / fb-34 for the
/// breaking shape change.
pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value {
- let v = serde_json::json!({
+ let mut v = serde_json::json!({
"hits": r.hits.iter().map(wire_search_hit).collect::>(),
"next_cursor": r.next_cursor,
"truncated": r.truncated,
});
+ if let Some(trace) = &r.trace {
+ let trace_v = serde_json::to_value(trace).expect("SearchTrace serializes");
+ if let Value::Object(ref mut map) = v {
+ map.insert("trace".to_string(), trace_v);
+ }
+ }
tag_object(v, "search_response.v1")
}
@@ -348,4 +354,49 @@ mod tests {
assert_eq!(paths.len(), 1);
assert_eq!(paths[0].as_str(), Some("/tmp/x"));
}
+
+ #[test]
+ fn search_response_with_trace_serializes_trace_field() {
+ use kebab_core::{SearchTrace, TraceCandidate, TraceFusionInput,
+ TraceTiming, ChunkId, DocumentId, WorkspacePath};
+ let r = kebab_app::SearchResponse {
+ hits: vec![],
+ next_cursor: None,
+ truncated: false,
+ trace: Some(SearchTrace {
+ lexical: vec![TraceCandidate {
+ chunk_id: ChunkId("c1".into()),
+ doc_id: DocumentId("d1".into()),
+ doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+ rank: 1,
+ score: 0.42,
+ }],
+ vector: vec![],
+ rrf_inputs: vec![TraceFusionInput {
+ chunk_id: ChunkId("c1".into()),
+ lexical_rank: Some(1),
+ vector_rank: None,
+ fusion_score: 0.0,
+ }],
+ timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 },
+ }),
+ };
+ let v = wire_search_response(&r);
+ assert_eq!(schema_of(&v), Some("search_response.v1"));
+ assert!(v["trace"].is_object());
+ assert_eq!(v["trace"]["timing"]["lexical_ms"], 5);
+ assert_eq!(v["trace"]["lexical"][0]["chunk_id"], "c1");
+ }
+
+ #[test]
+ fn search_response_without_trace_omits_field() {
+ let r = kebab_app::SearchResponse {
+ hits: vec![],
+ next_cursor: None,
+ truncated: false,
+ trace: None,
+ };
+ let v = wire_search_response(&r);
+ assert!(v.get("trace").is_none(), "trace field absent when None");
+ }
}
From f7e2072d6693151683e7e67f42a32bf445dccf0a Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 13:21:33 +0900
Subject: [PATCH 09/13] test(cli): integration tests for --trace + schema
breakdowns (fb-37)
Also fixes App::search_with_opts trace branch to use NoopRetriever
for SearchMode::Lexical, removing the embeddings requirement when
the user only wants lexical-mode trace.
---
crates/kebab-app/src/app.rs | 86 +++++++++++--------
.../kebab-cli/tests/wire_schema_breakdowns.rs | 57 ++++++++++++
crates/kebab-cli/tests/wire_search_trace.rs | 58 +++++++++++++
3 files changed, 166 insertions(+), 35 deletions(-)
create mode 100644 crates/kebab-cli/tests/wire_schema_breakdowns.rs
create mode 100644 crates/kebab-cli/tests/wire_search_trace.rs
diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs
index 7895459..a3d2c07 100644
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -347,26 +347,36 @@ impl App {
// p9-fb-37: when --trace is requested, bypass the LRU cache and
// run through `HybridRetriever::search_with_trace`, which
- // dispatches by mode internally. This requires embeddings (same
- // as `--mode hybrid`); `require_embeddings()` surfaces the
- // existing "switch to --mode lexical" error otherwise.
+ // dispatches by mode internally. Vector / hybrid modes require
+ // embeddings (same as `--mode hybrid`); lexical mode skips
+ // embedder construction via `NoopRetriever` so lexical-only
+ // workspaces (provider = "none") can use `--trace` without
+ // surfacing the "switch to --mode lexical" error.
if opts.trace {
let lex = Arc::new(LexicalRetriever::with_settings(
self.sqlite.clone(),
lexical_index_version(&self.config),
self.config.search.snippet_chars,
)) as Arc;
- let (emb, vec_store) = self.require_embeddings()?;
- let vec_iv = vector_index_version(emb.as_ref());
- let vec_dyn: Arc = vec_store;
- let emb_dyn: Arc = emb;
- let vec_retr = Arc::new(VectorRetriever::with_settings(
- vec_dyn,
- emb_dyn,
- self.sqlite.clone(),
- vec_iv,
- self.config.search.snippet_chars,
- )) as Arc;
+ let vec_retr: Arc = if matches!(query.mode, SearchMode::Lexical) {
+ // `HybridRetriever::search_with_trace` never invokes the
+ // vector retriever for `SearchMode::Lexical` (Task 4).
+ // A no-op stand-in lets us avoid the ~470 MB embedder
+ // load when the user only asked for lexical trace.
+ Arc::new(NoopRetriever)
+ } else {
+ let (emb, vec_store) = self.require_embeddings()?;
+ let vec_iv = vector_index_version(emb.as_ref());
+ let vec_dyn: Arc = vec_store;
+ let emb_dyn: Arc = emb;
+ Arc::new(VectorRetriever::with_settings(
+ vec_dyn,
+ emb_dyn,
+ self.sqlite.clone(),
+ vec_iv,
+ self.config.search.snippet_chars,
+ )) as Arc
+ };
let hybrid = HybridRetriever::new(&self.config, lex, vec_retr);
let (mut traced_hits, trace) = hybrid.search_with_trace(&fetch_query)?;
@@ -800,6 +810,24 @@ fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
IndexVersion(format!("lex:{}", config.chunking.chunker_version))
}
+/// p9-fb-37: stand-in for the vector retriever in the trace path when
+/// `query.mode == SearchMode::Lexical`. `HybridRetriever::search_with_trace`'s
+/// Lexical branch never calls `vector.search()`, so returning an empty
+/// hit list here is safe and lets lexical-only workspaces (embedding
+/// `provider = "none"`) use `--trace` without paying the ~470 MB
+/// embedder load.
+struct NoopRetriever;
+
+impl Retriever for NoopRetriever {
+ fn search(&self, _q: &kebab_core::SearchQuery) -> anyhow::Result> {
+ Ok(Vec::new())
+ }
+
+ fn index_version(&self) -> kebab_core::IndexVersion {
+ kebab_core::IndexVersion("noop:trace".into())
+ }
+}
+
/// Compose a stable `IndexVersion` for the vector retriever. Tracks
/// `(embedding_model, embedding_version, dimensions)` so a model swap
/// flags drift via the existing index_version mismatch warning in
@@ -944,12 +972,11 @@ mod tests_trace {
#[test]
fn search_response_trace_some_when_opts_trace_true_lexical_mode() {
// Lexical mode doesn't require embeddings — the trace path
- // builds HybridRetriever which holds both retrievers, but
- // for SearchMode::Lexical only the lexical side is invoked.
- // require_embeddings will fail if no embedding provider is
- // configured. Default Config has provider = "none" so this
- // test will fail unless we tolerate that. Skip the assertion
- // if the call returns the embedding-disabled error.
+ // builds HybridRetriever with a `NoopRetriever` stand-in for
+ // the vector side, since `HybridRetriever::search_with_trace`'s
+ // Lexical branch never invokes `vector.search()`. Default
+ // Config has embedding `provider = "none"`, and lexical-mode
+ // trace must succeed under that config (no embedder load).
let (_dir, app) = open_app_with_temp_dir();
let q = SearchQuery {
text: "x".into(),
@@ -961,20 +988,9 @@ mod tests_trace {
trace: true,
..Default::default()
};
- match app.search_with_opts(q, opts) {
- Ok(resp) => {
- assert!(resp.trace.is_some(), "trace populated when opts.trace=true");
- }
- Err(e) => {
- // Acceptable in test environment without embeddings —
- // verify the error is the expected embedding-disabled
- // shape, not an unrelated panic.
- let msg = format!("{e:#}");
- assert!(
- msg.contains("embedding") || msg.contains("--mode lexical"),
- "unexpected error: {msg}"
- );
- }
- }
+ let resp = app
+ .search_with_opts(q, opts)
+ .expect("lexical-mode trace must succeed without embeddings");
+ assert!(resp.trace.is_some(), "trace populated when opts.trace=true");
}
}
diff --git a/crates/kebab-cli/tests/wire_schema_breakdowns.rs b/crates/kebab-cli/tests/wire_schema_breakdowns.rs
new file mode 100644
index 0000000..5696cd2
--- /dev/null
+++ b/crates/kebab-cli/tests/wire_schema_breakdowns.rs
@@ -0,0 +1,57 @@
+//! p9-fb-37: integration tests for `kebab schema --json` extended stats.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+use std::process::Command;
+
+fn run_schema(cfg: &std::path::Path) -> Value {
+ let bin = env!("CARGO_BIN_EXE_kebab");
+ let out = Command::new(bin)
+ .args(["--config", cfg.to_str().unwrap(), "schema", "--json"])
+ .output()
+ .expect("run kebab schema");
+ assert!(
+ out.status.success(),
+ "schema failed: stderr={}",
+ String::from_utf8_lossy(&out.stderr)
+ );
+ serde_json::from_slice(&out.stdout).expect("valid JSON")
+}
+
+#[test]
+fn schema_stats_includes_breakdowns_on_fresh_corpus() {
+ let dir = tempfile::tempdir().unwrap();
+ let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+ // Run a no-op ingest to bring up migrations + create the SQLite file.
+ fs::write(workspace.join("placeholder.md"), "# placeholder\n").unwrap();
+ common::ingest(&cfg, &workspace);
+
+ let v = run_schema(&cfg);
+ let stats = &v["stats"];
+ let m = stats["media_breakdown"].as_object().unwrap();
+ assert_eq!(m.len(), 5, "5 media keys padded");
+ for k in &["markdown", "pdf", "image", "audio", "other"] {
+ assert!(m[*k].is_number(), "media[{k}] is integer");
+ }
+ assert!(stats["lang_breakdown"].is_object());
+ assert!(stats["index_bytes"]["sqlite"].is_number());
+ assert!(stats["index_bytes"]["lancedb"].is_number());
+ assert!(stats["stale_doc_count"].is_number());
+}
+
+#[test]
+fn schema_stats_breakdowns_after_ingest() {
+ let dir = tempfile::tempdir().unwrap();
+ let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+ fs::write(workspace.join("a.md"), "---\nlang: en\n---\nhello\n").unwrap();
+ fs::write(workspace.join("b.md"), "---\nlang: ko\n---\n안녕\n").unwrap();
+ common::ingest(&cfg, &workspace);
+
+ let v = run_schema(&cfg);
+ let stats = &v["stats"];
+ assert_eq!(stats["media_breakdown"]["markdown"], 2);
+ assert!(stats["lang_breakdown"].is_object());
+ assert!(stats["index_bytes"]["sqlite"].as_u64().unwrap() > 0);
+}
diff --git a/crates/kebab-cli/tests/wire_search_trace.rs b/crates/kebab-cli/tests/wire_search_trace.rs
new file mode 100644
index 0000000..4b8daff
--- /dev/null
+++ b/crates/kebab-cli/tests/wire_search_trace.rs
@@ -0,0 +1,58 @@
+//! p9-fb-37: integration tests for `kebab search --trace --json`.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+
+#[test]
+fn search_trace_json_includes_trace_block() {
+ let dir = tempfile::tempdir().unwrap();
+ let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+ fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+ common::ingest(&cfg, &workspace);
+
+ let (stdout, _stderr) = common::run_search_with_args(
+ &cfg,
+ &["--mode", "lexical", "--trace", "--json", "rust"],
+ );
+ let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+ assert_eq!(v["schema_version"], "search_response.v1");
+ assert!(v["trace"].is_object(), "trace block present");
+ assert!(v["trace"]["timing"].is_object());
+ assert!(v["trace"]["timing"]["total_ms"].is_number());
+ assert!(v["trace"]["lexical"].is_array());
+ assert!(v["trace"]["vector"].is_array());
+ assert!(v["trace"]["rrf_inputs"].is_array());
+}
+
+#[test]
+fn search_without_trace_omits_trace_field() {
+ let dir = tempfile::tempdir().unwrap();
+ let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+ fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+ common::ingest(&cfg, &workspace);
+
+ let (stdout, _stderr) = common::run_search_with_args(
+ &cfg,
+ &["--mode", "lexical", "--json", "rust"],
+ );
+ let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+ assert!(v.get("trace").is_none(), "trace field absent without --trace");
+}
+
+#[test]
+fn search_trace_lexical_mode_vector_list_empty() {
+ let dir = tempfile::tempdir().unwrap();
+ let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+ fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+ common::ingest(&cfg, &workspace);
+
+ let (stdout, _stderr) = common::run_search_with_args(
+ &cfg,
+ &["--mode", "lexical", "--trace", "--json", "rust"],
+ );
+ let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+ assert_eq!(v["trace"]["vector"].as_array().unwrap().len(), 0);
+ assert_eq!(v["trace"]["timing"]["vector_ms"], 0);
+}
From 653e432a3050a2bf6c749f8a541a8f2fbb8e2aa5 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 13:32:30 +0900
Subject: [PATCH 10/13] feat(mcp): kebab__search trace input + output mirror
(fb-37)
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-mcp/src/tools/search.rs | 15 ++-
crates/kebab-mcp/tests/tools_call_fetch.rs | 1 +
crates/kebab-mcp/tests/tools_call_search.rs | 4 +
.../tests/tools_call_search_trace.rs | 104 ++++++++++++++++++
4 files changed, 122 insertions(+), 2 deletions(-)
create mode 100644 crates/kebab-mcp/tests/tools_call_search_trace.rs
diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs
index 167cb61..722dbdd 100644
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -47,6 +47,10 @@ pub struct SearchInput {
pub ingested_after: Option,
/// p9-fb-36: filter to a single doc.
pub doc_id: Option,
+ /// p9-fb-37: when true, include a `trace` field on the response
+ /// with pre-fusion lexical/vector candidate lists + per-stage timing.
+ /// Bypasses cache (debug intent — fresh run guaranteed). Default false.
+ pub trace: Option,
}
pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
@@ -118,7 +122,7 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
max_tokens: input.max_tokens,
snippet_chars: input.snippet_chars,
cursor: input.cursor,
- trace: false,
+ trace: input.trace.unwrap_or(false),
};
let cfg_clone = (*state.config).clone();
match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) {
@@ -139,12 +143,19 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
v
})
.collect();
- let envelope = serde_json::json!({
+ let mut envelope = serde_json::json!({
"schema_version": "search_response.v1",
"hits": tagged,
"next_cursor": resp.next_cursor,
"truncated": resp.truncated,
});
+ if let Some(trace) = &resp.trace {
+ let trace_v =
+ serde_json::to_value(trace).unwrap_or(serde_json::Value::Null);
+ if let serde_json::Value::Object(ref mut map) = envelope {
+ map.insert("trace".to_string(), trace_v);
+ }
+ }
match serde_json::to_string(&envelope) {
Ok(json) => to_tool_success(json),
Err(e) => to_tool_error(&anyhow::anyhow!(e)),
diff --git a/crates/kebab-mcp/tests/tools_call_fetch.rs b/crates/kebab-mcp/tests/tools_call_fetch.rs
index 8da70a7..821db4d 100644
--- a/crates/kebab-mcp/tests/tools_call_fetch.rs
+++ b/crates/kebab-mcp/tests/tools_call_fetch.rs
@@ -69,6 +69,7 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
media: None,
ingested_after: None,
doc_id: None,
+ trace: None,
},
);
let search_text = match &search_result.content.first().unwrap().raw {
diff --git a/crates/kebab-mcp/tests/tools_call_search.rs b/crates/kebab-mcp/tests/tools_call_search.rs
index 58a32d8..58456f7 100644
--- a/crates/kebab-mcp/tests/tools_call_search.rs
+++ b/crates/kebab-mcp/tests/tools_call_search.rs
@@ -65,6 +65,7 @@ async fn search_tool_returns_search_response_v1() {
media: None,
ingested_after: None,
doc_id: None,
+ trace: None,
},
);
@@ -166,6 +167,7 @@ async fn search_with_doc_id_filter_returns_only_target() {
media: None,
ingested_after: None,
doc_id: None,
+ trace: None,
},
);
assert!(
@@ -204,6 +206,7 @@ async fn search_with_doc_id_filter_returns_only_target() {
media: None,
ingested_after: None,
doc_id: Some(target_doc_id.clone()),
+ trace: None,
},
);
assert!(
@@ -260,6 +263,7 @@ async fn search_with_invalid_ingested_after_returns_invalid_input() {
media: None,
ingested_after: Some("garbage".to_string()),
doc_id: None,
+ trace: None,
},
);
diff --git a/crates/kebab-mcp/tests/tools_call_search_trace.rs b/crates/kebab-mcp/tests/tools_call_search_trace.rs
new file mode 100644
index 0000000..1cb07cd
--- /dev/null
+++ b/crates/kebab-mcp/tests/tools_call_search_trace.rs
@@ -0,0 +1,104 @@
+//! p9-fb-37: integration test for `mcp__kebab__search` trace input/output.
+
+use std::fs;
+
+use kebab_config::Config;
+use kebab_core::SourceScope;
+use kebab_mcp::{KebabAppState, KebabHandler};
+use rmcp::model::RawContent;
+
+fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) -> Config {
+ let mut cfg = Config::defaults();
+ cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
+ cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
+ cfg.workspace.root = workspace_root.to_string_lossy().into_owned();
+ cfg.workspace.exclude.clear();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+ cfg
+}
+
+fn setup() -> (tempfile::TempDir, KebabHandler) {
+ let dir = tempfile::tempdir().unwrap();
+ let data_dir = dir.path().join("data");
+ let workspace_root = dir.path().join("notes");
+ fs::create_dir_all(&data_dir).unwrap();
+ fs::create_dir_all(&workspace_root).unwrap();
+ let config = minimal_config(&data_dir, &workspace_root);
+ fs::write(
+ workspace_root.join("a.md"),
+ "# Alpha\n\nThis document mentions kebab and bread.",
+ )
+ .unwrap();
+ let scope = SourceScope {
+ root: workspace_root.clone(),
+ include: vec![],
+ exclude: vec![],
+ };
+ let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+ let state = KebabAppState::new(config, None);
+ let handler = KebabHandler::new(state);
+ (dir, handler)
+}
+
+fn make_input(trace: Option) -> kebab_mcp::tools::search::SearchInput {
+ kebab_mcp::tools::search::SearchInput {
+ query: "kebab".to_string(),
+ mode: Some("lexical".to_string()),
+ k: Some(5),
+ max_tokens: None,
+ snippet_chars: None,
+ cursor: None,
+ tags: None,
+ lang: None,
+ path_glob: None,
+ trust_min: None,
+ media: None,
+ ingested_after: None,
+ doc_id: None,
+ trace,
+ }
+}
+
+fn extract_json(result: &rmcp::model::CallToolResult) -> serde_json::Value {
+ assert!(
+ !result.is_error.unwrap_or(false),
+ "expected isError=false, got {result:?}"
+ );
+ let content = result.content.first().expect("at least one content item");
+ let text = match &content.raw {
+ RawContent::Text(t) => &t.text,
+ other => panic!("expected Text content, got {other:?}"),
+ };
+ serde_json::from_str(text).expect("valid JSON")
+}
+
+#[tokio::test]
+async fn search_with_trace_true_returns_trace_field() {
+ let (_dir, handler) = setup();
+ let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(true)));
+ let v = extract_json(&result);
+ assert_eq!(v["schema_version"], "search_response.v1");
+ assert!(v["trace"].is_object(), "trace field present when trace:true");
+ assert!(v["trace"]["timing"]["total_ms"].is_number());
+ assert!(v["trace"]["lexical"].is_array());
+ assert!(v["trace"]["vector"].is_array());
+ assert!(v["trace"]["rrf_inputs"].is_array());
+}
+
+#[tokio::test]
+async fn search_without_trace_omits_trace_field() {
+ let (_dir, handler) = setup();
+ let result = kebab_mcp::tools::search::handle(handler.state(), make_input(None));
+ let v = extract_json(&result);
+ assert_eq!(v["schema_version"], "search_response.v1");
+ assert!(v.get("trace").is_none(), "trace absent when None");
+}
+
+#[tokio::test]
+async fn search_with_trace_false_omits_trace_field() {
+ let (_dir, handler) = setup();
+ let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(false)));
+ let v = extract_json(&result);
+ assert!(v.get("trace").is_none(), "trace absent when false");
+}
From 5687cbc0e28e5a1e559fe8cacdca866e021d49a2 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 13:39:11 +0900
Subject: [PATCH 11/13] feat(tui): search pane t-key opens TracePopup (fb-37)
---
crates/kebab-tui/src/app.rs | 3 +
crates/kebab-tui/src/cheatsheet.rs | 1 +
crates/kebab-tui/src/lib.rs | 1 +
crates/kebab-tui/src/run.rs | 43 +++++++++
crates/kebab-tui/src/search.rs | 43 +++++++++
crates/kebab-tui/src/trace_popup.rs | 139 ++++++++++++++++++++++++++++
6 files changed, 230 insertions(+)
create mode 100644 crates/kebab-tui/src/trace_popup.rs
diff --git a/crates/kebab-tui/src/app.rs b/crates/kebab-tui/src/app.rs
index 1d53d0c..a87f8c8 100644
--- a/crates/kebab-tui/src/app.rs
+++ b/crates/kebab-tui/src/app.rs
@@ -387,6 +387,8 @@ pub struct App {
pub ask: Option,
/// Populated by p9-4.
pub inspect: Option,
+ /// p9-fb-37: trace popup state, `Some` while open.
+ pub trace_popup: Option,
/// Populated by p9-fb-03 when the user kicks off an in-shell
/// ingest (Library `r`). Cleared by the run loop a few seconds
/// after the run reaches a terminal event.
@@ -461,6 +463,7 @@ impl App {
search: None,
ask: None,
inspect: None,
+ trace_popup: None,
ingest_state: None,
error_overlay: None,
should_quit: false,
diff --git a/crates/kebab-tui/src/cheatsheet.rs b/crates/kebab-tui/src/cheatsheet.rs
index 1af1751..f490ff9 100644
--- a/crates/kebab-tui/src/cheatsheet.rs
+++ b/crates/kebab-tui/src/cheatsheet.rs
@@ -80,6 +80,7 @@ pub fn render_cheatsheet(f: &mut Frame, area: Rect, app: &App) {
("Delete", "remove char at cursor"),
("g", "open hit's citation in $EDITOR (Normal)"),
("o", "inspect selected hit's chunk (Normal — was `i` pre-fb-21)"),
+ ("t", "open retrieval trace popup (Normal — p9-fb-37)"),
("i", "Normal → Insert (toggle back to typing)"),
("Esc", "back to Library"),
]);
diff --git a/crates/kebab-tui/src/lib.rs b/crates/kebab-tui/src/lib.rs
index d61c6f2..1457c1e 100644
--- a/crates/kebab-tui/src/lib.rs
+++ b/crates/kebab-tui/src/lib.rs
@@ -27,6 +27,7 @@ mod run;
mod search;
mod terminal;
mod theme;
+pub mod trace_popup;
pub use input::{InputBuffer, display_width, place_cursor_x, truncate_to_display_width};
pub use theme::{Palette, Role, Theme};
diff --git a/crates/kebab-tui/src/run.rs b/crates/kebab-tui/src/run.rs
index cc2db24..fb24b22 100644
--- a/crates/kebab-tui/src/run.rs
+++ b/crates/kebab-tui/src/run.rs
@@ -130,6 +130,21 @@ pub(crate) fn run_loop(app: &mut App) -> Result<()> {
if event::poll(POLL_INTERVAL)? {
match event::read()? {
Event::Key(key) if key.kind == KeyEventKind::Press => {
+ // p9-fb-37: trace popup eats keys while open.
+ // Sits ahead of cheatsheet + mode + pane dispatch
+ // so Esc / j / k / arrows route to the popup
+ // instead of leaking through to the search pane.
+ if app.trace_popup.is_some() {
+ let close = if let Some(popup) = app.trace_popup.as_mut() {
+ crate::trace_popup::handle_key_trace_popup(popup, key)
+ } else {
+ false
+ };
+ if close {
+ app.trace_popup = None;
+ }
+ continue;
+ }
// p9-fb-13: cheatsheet popup toggle takes
// precedence over both mode + pane dispatch.
// F1 toggles open/close. While visible, Esc
@@ -255,6 +270,12 @@ fn render_root(f: &mut Frame, app: &App) {
}
render_status_bar(f, outer[2], app);
render_key_hints(f, outer[3], app);
+ // p9-fb-37: trace popup overlays on top of pane content but
+ // below the error overlay (errors are higher-priority modal).
+ if let Some(popup) = &app.trace_popup {
+ let popup_area = centered_rect(80, 80, f.area());
+ crate::trace_popup::render_trace_popup(f, popup_area, popup);
+ }
if let Some(err) = &app.error_overlay {
render_error_overlay(f, f.area(), err, &app.theme);
}
@@ -263,6 +284,28 @@ fn render_root(f: &mut Frame, app: &App) {
}
}
+/// p9-fb-37: centered sub-rect helper for the trace popup. Returns
+/// a rect of `percent_x` × `percent_y` percent of `r`, centered.
+fn centered_rect(percent_x: u16, percent_y: u16, r: ratatui::layout::Rect) -> ratatui::layout::Rect {
+ use ratatui::layout::{Constraint, Direction, Layout};
+ let popup_layout = Layout::default()
+ .direction(Direction::Vertical)
+ .constraints([
+ Constraint::Percentage((100 - percent_y) / 2),
+ Constraint::Percentage(percent_y),
+ Constraint::Percentage((100 - percent_y) / 2),
+ ])
+ .split(r);
+ Layout::default()
+ .direction(Direction::Horizontal)
+ .constraints([
+ Constraint::Percentage((100 - percent_x) / 2),
+ Constraint::Percentage(percent_x),
+ Constraint::Percentage((100 - percent_x) / 2),
+ ])
+ .split(popup_layout[1])[1]
+}
+
fn render_header(f: &mut Frame, area: Rect, app: &App) {
let pane_label = match app.focus {
Pane::Library => "Library",
diff --git a/crates/kebab-tui/src/search.rs b/crates/kebab-tui/src/search.rs
index cd1fb99..9166fe3 100644
--- a/crates/kebab-tui/src/search.rs
+++ b/crates/kebab-tui/src/search.rs
@@ -209,6 +209,49 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
// pre-fb-12 SHIFT/none heuristic).
let is_normal = state.mode == crate::app::Mode::Normal;
+ // p9-fb-37: `t` opens the trace popup. Re-runs the last submitted
+ // query with SearchOpts.trace = true. Bypasses cache by going
+ // through `search_with_opts_with_config` (Task 5 wires opts.trace
+ // to skip the LRU cache).
+ if is_normal
+ && matches!(
+ (key.code, key.modifiers),
+ (KeyCode::Char('t'), KeyModifiers::NONE)
+ )
+ {
+ let (last_query, has_results) = {
+ let s = state.search.as_ref().unwrap();
+ (s.last_query.clone(), !s.hits.is_empty())
+ };
+ if !has_results {
+ return KeyOutcome::Continue;
+ }
+ if let Some((q_text, q_mode)) = last_query {
+ let q = kebab_core::SearchQuery {
+ text: q_text,
+ mode: q_mode,
+ k: state.config.search.default_k,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let opts = kebab_core::SearchOpts {
+ trace: true,
+ ..Default::default()
+ };
+ match kebab_app::search_with_opts_with_config(state.config.clone(), q, opts) {
+ Ok(resp) => {
+ if let Some(t) = resp.trace {
+ state.trace_popup = Some(crate::trace_popup::TracePopupState::new(t));
+ }
+ }
+ Err(_) => {
+ // Silent failure — trace is debug-only; user
+ // can still see search hits without it.
+ }
+ }
+ }
+ return KeyOutcome::Continue;
+ }
+
// p9-fb-21: chunk-inspect rebound from `i` to `o` (vim "open").
// The `i` key is now the universal Normal→Insert toggle (handled
// in `mode_intercept`), so it cannot also mean "inspect chunk"
diff --git a/crates/kebab-tui/src/trace_popup.rs b/crates/kebab-tui/src/trace_popup.rs
new file mode 100644
index 0000000..5374936
--- /dev/null
+++ b/crates/kebab-tui/src/trace_popup.rs
@@ -0,0 +1,139 @@
+//! p9-fb-37: TUI trace popup. Opens from Search pane via `t` key
+//! when results are visible. Re-runs the current query with
+//! `SearchOpts.trace = true` and displays the lex / vec / rrf union
+//! + per-stage timing as a single scroll list.
+
+use crossterm::event::{KeyCode, KeyEvent};
+use kebab_core::SearchTrace;
+use ratatui::Frame;
+use ratatui::layout::Rect;
+use ratatui::style::{Modifier, Style};
+use ratatui::text::{Line, Span};
+use ratatui::widgets::{Block, Borders, Paragraph, Wrap};
+
+#[derive(Debug, Clone)]
+pub struct TracePopupState {
+ pub trace: SearchTrace,
+ pub scroll: u16,
+}
+
+impl TracePopupState {
+ pub fn new(trace: SearchTrace) -> Self {
+ Self { trace, scroll: 0 }
+ }
+}
+
+pub fn render_trace_popup(f: &mut Frame, area: Rect, state: &TracePopupState) {
+ let mut lines: Vec = Vec::new();
+ let bold = Style::default().add_modifier(Modifier::BOLD);
+
+ lines.push(Line::from(Span::styled(
+ format!(
+ "Lexical ({} hits, {} ms)",
+ state.trace.lexical.len(),
+ state.trace.timing.lexical_ms,
+ ),
+ bold,
+ )));
+ for c in &state.trace.lexical {
+ lines.push(Line::from(format!(
+ " #{:>2} score={:.4} chunk={}",
+ c.rank, c.score, c.chunk_id.0
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!(
+ "Vector ({} hits, {} ms)",
+ state.trace.vector.len(),
+ state.trace.timing.vector_ms,
+ ),
+ bold,
+ )));
+ for c in &state.trace.vector {
+ lines.push(Line::from(format!(
+ " #{:>2} score={:.4} chunk={}",
+ c.rank, c.score, c.chunk_id.0
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!(
+ "RRF inputs ({} entries, {} ms fusion)",
+ state.trace.rrf_inputs.len(),
+ state.trace.timing.fusion_ms,
+ ),
+ bold,
+ )));
+ for e in &state.trace.rrf_inputs {
+ lines.push(Line::from(format!(
+ " chunk={} lex={:?} vec={:?} fusion={:.4}",
+ e.chunk_id.0, e.lexical_rank, e.vector_rank, e.fusion_score
+ )));
+ }
+ lines.push(Line::from(""));
+ lines.push(Line::from(Span::styled(
+ format!("Total: {} ms", state.trace.timing.total_ms),
+ bold,
+ )));
+
+ let block = Block::default()
+ .title("Trace — Esc to close, j/k or ↑↓ to scroll")
+ .borders(Borders::ALL);
+ let p = Paragraph::new(lines)
+ .block(block)
+ .scroll((state.scroll, 0))
+ .wrap(Wrap { trim: false });
+ f.render_widget(p, area);
+}
+
+/// Handle keys while popup is open. Returns true if the popup should close.
+pub fn handle_key_trace_popup(state: &mut TracePopupState, key: KeyEvent) -> bool {
+ match key.code {
+ KeyCode::Esc => true,
+ KeyCode::Char('j') | KeyCode::Down => {
+ state.scroll = state.scroll.saturating_add(1);
+ false
+ }
+ KeyCode::Char('k') | KeyCode::Up => {
+ state.scroll = state.scroll.saturating_sub(1);
+ false
+ }
+ _ => false,
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use crossterm::event::KeyModifiers;
+ use kebab_core::TraceTiming;
+
+ fn dummy_state() -> TracePopupState {
+ TracePopupState::new(SearchTrace {
+ lexical: vec![],
+ vector: vec![],
+ rrf_inputs: vec![],
+ timing: TraceTiming::default(),
+ })
+ }
+
+ #[test]
+ fn esc_closes() {
+ let mut s = dummy_state();
+ assert!(handle_key_trace_popup(
+ &mut s,
+ KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE),
+ ));
+ }
+
+ #[test]
+ fn j_scrolls_down() {
+ let mut s = dummy_state();
+ assert!(!handle_key_trace_popup(
+ &mut s,
+ KeyEvent::new(KeyCode::Char('j'), KeyModifiers::NONE),
+ ));
+ assert_eq!(s.scroll, 1);
+ }
+}
From a40593590b9277d345e049ae472b7210314d2107 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 14:04:54 +0900
Subject: [PATCH 12/13] docs(fb-37): wire schema + README + SMOKE + INDEX +
SKILL
---
README.md | 4 ++--
crates/kebab-cli/src/main.rs | 3 ++-
docs/SMOKE.md | 16 +++++++++++++
docs/wire-schema/v1/schema.schema.json | 24 +++++++++++++++++++
.../v1/search_response.schema.json | 22 ++++++++++++++++-
integrations/claude-code/kebab/SKILL.md | 5 ++--
tasks/INDEX.md | 2 +-
tasks/p9/p9-fb-37-trace-and-stats.md | 7 ++++--
8 files changed, 74 insertions(+), 9 deletions(-)
diff --git a/README.md b/README.md
index 7697391..5dd9ef7 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ kebab doctor
|------|------|
| `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
| `kebab ingest []` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. |
-| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor ] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). |
+| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor ] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID] [--trace]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). **`--trace` (p9-fb-37)** — `search_response.v1.trace` 에 lexical / vector pre-fusion 후보 + RRF union + per-stage timing (`lexical_ms` / `vector_ms` / `fusion_ms` / `total_ms`) 노출. trace 요청은 캐시 우회 (`--no-cache` 없이도 항상 cold). |
| `kebab list docs` | 색인된 문서 목록 |
| `kebab inspect doc ` / `kebab inspect chunk ` | raw record 보기 |
| `kebab fetch chunk [--context N]` / `kebab fetch doc [--max-tokens N]` / `kebab fetch span [--max-tokens N]` | (p9-fb-35) verbatim text fetch from indexed corpus. wire = `fetch_result.v1` (kind discriminator). chunk: target + ±N ordinal-context chunks. doc: full normalized markdown. span: 1-based line range (PDF/audio rejected as `error.v1.code = span_not_supported`). chars/4 budget on doc/span. |
@@ -80,7 +80,7 @@ kebab doctor
| `kebab tui` | Ratatui 셸 (Library + Search + Ask + Inspect 패널, desktop 진행 중). Library 에서 `r` 키로 background ingest 시작 — 화면 하단 status bar 가 진행 표시, 완료/abort 시 final 라인 잠시 유지 후 자동 hide. ingest 진행 중 `Esc` / `Ctrl-C` 가 cancel signal (그 외에는 quit). vim-style mode (header 우측 `-- NORMAL --` / `-- INSERT --`) — Library/Inspect 는 자동 NORMAL, Search/Ask 는 자동 INSERT. `i` 로 Normal→Insert (모든 pane — p9-fb-21), `Esc` 로 Insert→Normal 어디서나. mode-authoritative dispatch — Search 의 `j/k/o/g`, Ask 의 `e/j/k` 는 NORMAL 모드에서만 명령으로 동작, INSERT 에서는 입력 문자로 typing. (Search 의 chunk inspect 키는 `i`→`o` 로 rebind — `i` 가 universal Insert toggle.) **`F1` 로 cheatsheet popup** (현재 pane 의 키 매핑 + global 토글 표) — `Esc` / `F1` 로 닫기. Search 패널은 200ms debounce 후 background worker 가 검색 — 키 입력으로 UI freeze 안 됨, 사용자가 계속 타이핑하면 stale 결과 자동 폐기 (generation counter). Ask 패널은 multi-turn — 같은 conversation 안에서 Q1/A1, Q2/A2 transcript 누적, 다음 질문이 이전 턴을 history 로 받아 답변. 답변 본문은 markdown 렌더 (bold/italic/inline code/heading/list/code fence/table/blockquote, raw `**bold**` 가 실제 굵게 표시). `Ctrl-L` 로 새 conversation 시작. Search 의 `g` 키가 `$EDITOR` (기본 `vi`) 로 hit 의 citation 위치 열기 — 종료 후 TUI 화면이 자동으로 깨끗이 redraw. CLI `kebab ask` 는 raw markdown 그대로 (terminal 호환성 위해). Library 의 doc-list 가 한글 / 일본어 / 중국어 (CJK) 제목을 wide-char 정확한 column width 로 truncate — 한글 제목이 한 줄을 넘기지 않음 (CJK 1 자 = 2 col). Search/Ask/Filter 입력의 cursor 가 wide char 위에서 column 단위로 정렬 — 한글 입력 시 caret 이 글자 옆에 정확히 놓임. `← / →` 로 입력 문자열 중간 cursor 이동 (한글 한 글자 = 2 column 이라도 한 번에 이동), `Home / End` 로 양 끝 점프, `Delete` 로 cursor 위치 char 삭제 — 모든 input pane (Ask / Search / Library filter overlay) 동일 (p9-fb-22). Ask 트랜스크립트는 새 답변이 viewport 아래로 누적될 때 자동으로 tail 을 따라감 (auto-scroll); `j` / `k` 로 위로 스크롤하면 freeze, `Shift-G` 로 다시 bottom + auto-tail 재개. 화면 하단 hint line 은 한국어 동사구로 (`"위로"` / `"아래로"` / `"필터"` / `"타이핑 검색어"` / `"Esc 로 NORMAL 모드"` / `"i 입력모드"` 등) + 현재 (pane, mode) 조합에 맞춰 자동 분기, **첫 fragment 가 항상 `F1 도움말`** (cheatsheet 발견성 보장). 모든 모드에서 항상 떠 있는 상태바 — `kebab v │ │ docs │ ` (state: streaming/searching/indexing/idle, ingest 진행 중에는 progress 가 같은 자리에 흡수됨). Ask 진입 시 conversation id 8 자 prefix 도 함께 표시. Ask 트랜스크립트와 Inspect 양쪽에서 `PgUp / PgDn` 으로 10 줄씩 페이지 스크롤. Library 의 doc list 위에는 `TITLE / TAGS / UPDATED / CHUNKS` 컬럼 헤더 행 표시 (display-width 정렬, Hangul / CJK 안전). |
| `kebab reset [--all / --data-only / --vector-only / --config-only] [--yes]` | XDG 데이터 wipe. **Irreversible.** TTY 면 confirm prompt, 아니면 `--yes` 필수. `--vector-only` 는 SQLite `embedding_records` 도 함께 truncate (orphan 방지) |
| `kebab eval run / compare` | golden query 회귀 측정 |
-| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. |
+| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. **stats 에 (p9-fb-37) `media_breakdown` (5 keys: markdown / pdf / image / audio / other) + `lang_breakdown` (BCP-47 코드, NULL 은 literal `"null"`) + `index_bytes` (sqlite + lancedb on-disk 합계) + `stale_doc_count` (`config.search.stale_threshold_days` 초과 doc 수) 추가.** |
| `kebab ingest-file ` | 단일 파일 ingest (workspace 외부 가능). 바이트는 `/_external/.` 로 copy. `.kebabignore` 매치 시 stderr warn 후 진행 (explicit ingest 가 bypass intent). |
| `kebab ingest-stdin --title [--source-uri ]` | stdin 의 markdown 본문 ingest. frontmatter (title + source_uri) 자동 prepend. v1 markdown only. |
| `kebab mcp` | MCP (Model Context Protocol) stdio server. agent host (Claude Code / Cursor / OpenAI Agents) 가 spawn 하여 tool 호출 (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`). `--config` honor. |
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index 305397c..fa11508 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -167,7 +167,8 @@ enum Cmd {
/// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate
/// lists + per-stage timing in the response. Bypasses cache
/// (debug intent — fresh run guaranteed). Requires embeddings
- /// to be enabled.
+ /// when `--mode hybrid` or `--mode vector`; lexical mode runs
+ /// without embeddings via a no-op vector stub.
#[arg(long)]
trace: bool,
},
diff --git a/docs/SMOKE.md b/docs/SMOKE.md
index 9a68800..3121076 100644
--- a/docs/SMOKE.md
+++ b/docs/SMOKE.md
@@ -206,6 +206,22 @@ kebab search "rust" --doc-id "" --tag rust --json
Bad `--ingested-after` → `error.v1.code = config_invalid`, exit 2.
Unknown `--media` value → silently empty (no error).
+### Trace + stats (fb-37)
+
+Re-run a search with `--trace` to see per-stage candidate lists + timing:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml search "rust async" --trace --json | jq .trace
+```
+
+Inspect the corpus health surface:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml schema --json | jq .stats
+```
+
+Look for: `media_breakdown` (5 keys), `lang_breakdown`, `index_bytes`, `stale_doc_count`.
+
## P6-4 이미지 ingestion 옵션
`config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략):
diff --git a/docs/wire-schema/v1/schema.schema.json b/docs/wire-schema/v1/schema.schema.json
index f168ff4..0866134 100644
--- a/docs/wire-schema/v1/schema.schema.json
+++ b/docs/wire-schema/v1/schema.schema.json
@@ -54,6 +54,30 @@
{ "type": "string", "format": "date-time" },
{ "type": "null" }
]
+ },
+ "media_breakdown": {
+ "type": "object",
+ "description": "p9-fb-37: per-media-kind doc count. 5 keys (markdown/pdf/image/audio/other), zero-padded.",
+ "additionalProperties": { "type": "integer", "minimum": 0 }
+ },
+ "lang_breakdown": {
+ "type": "object",
+ "description": "p9-fb-37: per-language doc count. NULL lang keyed as the literal string 'null'. Map may be empty on empty corpus.",
+ "additionalProperties": { "type": "integer", "minimum": 0 }
+ },
+ "index_bytes": {
+ "type": "object",
+ "description": "p9-fb-37: on-disk byte sums.",
+ "required": ["sqlite", "lancedb"],
+ "properties": {
+ "sqlite": { "type": "integer", "minimum": 0 },
+ "lancedb": { "type": "integer", "minimum": 0 }
+ }
+ },
+ "stale_doc_count": {
+ "type": "integer",
+ "minimum": 0,
+ "description": "p9-fb-37: docs whose updated_at exceeds config.search.stale_threshold_days. 0 when threshold=0."
}
}
}
diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json
index 20e6eb8..ca89792 100644
--- a/docs/wire-schema/v1/search_response.schema.json
+++ b/docs/wire-schema/v1/search_response.schema.json
@@ -9,6 +9,26 @@
"schema_version": { "const": "search_response.v1" },
"hits": { "type": "array", "description": "search_hit.v1[]" },
"next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." },
- "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." }
+ "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." },
+ "trace": {
+ "type": "object",
+ "description": "p9-fb-37: present iff caller passed --trace / SearchOpts.trace=true. Lex/vec pre-fusion lists + RRF union + per-stage timing.",
+ "required": ["lexical", "vector", "rrf_inputs", "timing"],
+ "properties": {
+ "lexical": { "type": "array", "items": { "type": "object" } },
+ "vector": { "type": "array", "items": { "type": "object" } },
+ "rrf_inputs":{ "type": "array", "items": { "type": "object" } },
+ "timing": {
+ "type": "object",
+ "required": ["lexical_ms", "vector_ms", "fusion_ms", "total_ms"],
+ "properties": {
+ "lexical_ms": { "type": "integer", "minimum": 0 },
+ "vector_ms": { "type": "integer", "minimum": 0 },
+ "fusion_ms": { "type": "integer", "minimum": 0 },
+ "total_ms": { "type": "integer", "minimum": 0 }
+ }
+ }
+ }
+ }
}
}
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index fea4e2e..f3571af 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -48,7 +48,7 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun
Input:
```json
-{ "query": "", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null }
+{ "query": "", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null, "trace": null }
```
- `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`").
@@ -57,6 +57,7 @@ Input:
- Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
- Cite back to the user as `doc_path § heading_path[-1]` so they can open the source.
- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
+- **`trace: true` (p9-fb-37)** — debug aid. Response carries an extra `trace` block: `lexical[]` + `vector[]` (pre-fusion candidates), `rrf_inputs[]` (RRF union before final cut), and `timing` (`lexical_ms`, `vector_ms`, `fusion_ms`, `total_ms`). Trace bypasses the search cache (always cold). Use sparingly — it bloats the wire response and is for diagnosing "why did this hit / not hit", not normal retrieval.
### `mcp__kebab__ask` — when you need the answer
@@ -133,7 +134,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
Before using streaming or multi-turn features, probe what this binary supports — call `mcp__kebab__schema` (or CLI `kebab schema --json`):
-Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session.
+Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at, plus p9-fb-37 health surface: `media_breakdown` per-kind doc counts (5 zero-padded keys: markdown / pdf / image / audio / other), `lang_breakdown` per BCP-47 lang (NULL keyed as the literal string `"null"`), `index_bytes.{sqlite,lancedb}` on-disk byte sums, `stale_doc_count` for docs older than `config.search.stale_threshold_days`). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session.
## Quick health check
diff --git a/tasks/INDEX.md b/tasks/INDEX.md
index db35a0b..803acbc 100644
--- a/tasks/INDEX.md
+++ b/tasks/INDEX.md
@@ -125,7 +125,7 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능.
- [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
- [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
- [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ✅ 머지 (2026-05-10)
- - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27)
+ - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ✅ 머지 (2026-05-10)
### 🎯 0.5.0 — RAG quality (cascade 동반: V00X + reindex)
- [p9-fb-38 score semantics](p9/p9-fb-38-score-semantics.md) — ⏳ 미구현, brainstorm 필요
diff --git a/tasks/p9/p9-fb-37-trace-and-stats.md b/tasks/p9/p9-fb-37-trace-and-stats.md
index e881ce9..4ed057e 100644
--- a/tasks/p9/p9-fb-37-trace-and-stats.md
+++ b/tasks/p9/p9-fb-37-trace-and-stats.md
@@ -3,7 +3,7 @@ phase: P9
component: kebab-cli + kebab-search + kebab-rag
task_id: p9-fb-37
title: "Trace (--trace) + stats — pipeline 가시성"
-status: open
+status: completed
target_version: 0.4.0
depends_on: [p9-fb-27]
unblocks: []
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent / 사용자가 "왜
# p9-fb-37 — Trace + stats
-> ⏳ **백로그 only — 미구현 (Nice-to-have).** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. trace 의 verbosity level / wire shape / stats 의 별도 명령 vs schema 통합 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태.
+>
+> - Design: [`docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`](../../docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md)
+> - Plan: [`docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md`](../../docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md)
## 증상 / 동기
From 6a33d08aea09c515dccfc9411a2f8dfaa9836166 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sun, 10 May 2026 16:26:34 +0900
Subject: [PATCH 13/13] fix(fb-37): address PR #129 round 1 review
- doc TraceFusionInput.fusion_score semantics (single-mode vs hybrid)
- comment why total_ms vs stage sum can drift (millis truncation)
- TODO marker on TUI trace popup filter passthrough
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-core/src/search.rs | 3 +++
crates/kebab-search/src/hybrid.rs | 4 ++++
crates/kebab-tui/src/search.rs | 2 ++
3 files changed, 9 insertions(+)
diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs
index bb66be9..38e41ad 100644
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -158,6 +158,9 @@ pub struct TraceFusionInput {
pub chunk_id: ChunkId,
pub lexical_rank: Option,
pub vector_rank: Option,
+ /// Hybrid mode: normalized RRF score in `[0, 1]`.
+ /// Lexical / Vector mode: equals the underlying retriever's score
+ /// (no fusion ran). 0.0 for chunks dropped past `target_k`.
pub fusion_score: f32,
}
diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs
index 58b6678..7f415a9 100644
--- a/crates/kebab-search/src/hybrid.rs
+++ b/crates/kebab-search/src/hybrid.rs
@@ -391,6 +391,10 @@ impl HybridRetriever {
}
}
+ // total_ms is wall-clock from start; per-stage `lexical_ms` /
+ // `vector_ms` / `fusion_ms` each truncate to whole millis via
+ // `as_millis() as u64`, so their sum can drift below total
+ // (sub-ms losses) — DO NOT assert `total_ms >= sum(stages)`.
tb.timing.total_ms = start_total.elapsed().as_millis() as u64;
Ok((final_hits, tb.into_trace()))
}
diff --git a/crates/kebab-tui/src/search.rs b/crates/kebab-tui/src/search.rs
index 9166fe3..13c9f43 100644
--- a/crates/kebab-tui/src/search.rs
+++ b/crates/kebab-tui/src/search.rs
@@ -227,6 +227,8 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
return KeyOutcome::Continue;
}
if let Some((q_text, q_mode)) = last_query {
+ // TODO: thread filters when TUI gains a filter UI (currently
+ // mirrors fire_search which also passes default filters).
let q = kebab_core::SearchQuery {
text: q_text,
mode: q_mode,