From 5f6b2fa259fa37a277e1121b2a691c2657d025ce Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:05:31 +0900 Subject: [PATCH 01/13] spec(fb-37): trace + stats design MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - search --trace boolean flag, additive optional `trace` field on search_response.v1 - HybridRetriever search_with_trace returns (hits, SearchTrace) — lex/vec/rrf_inputs + per-stage timing - cache bypass when --trace (debug intent) - schema.v1.stats extended with media_breakdown / lang_breakdown / index_bytes / stale_doc_count - TUI search pane `t` keystroke opens TracePopup - additive minor wire — no schema bump Co-Authored-By: Claude Opus 4.7 (1M context) --- ...6-05-10-p9-fb-37-trace-and-stats-design.md | 360 ++++++++++++++++++ 1 file changed, 360 insertions(+) create mode 100644 docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md diff --git a/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md b/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md new file mode 100644 index 0000000..edb2f87 --- /dev/null +++ b/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md @@ -0,0 +1,360 @@ +--- +title: "p9-fb-37 — Trace + stats design" +phase: P9 +component: kebab-core + kebab-search + kebab-store-sqlite + kebab-app + kebab-cli + kebab-mcp + kebab-tui +task_id: p9-fb-37 +status: design +target_version: 0.5.0 +contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md +contract_sections: [§4 search, §7 RAG, §10 UX] +date: 2026-05-10 +--- + +# p9-fb-37 — Trace + stats + +## Goal + +retrieval pipeline 가시성 + KB 건강 surface. 두 axes: + +- **Trace**: `kebab search Q --trace` — `search_response.v1` 에 optional `trace` 필드 (lexical/vector pre-fusion lists + RRF inputs + per-stage timing). agent / 사용자가 "왜 이 결과가 나왔는지" 진단. +- **Stats**: `kebab schema --json` 의 기존 `stats` 객체에 4 필드 추가 (media/lang breakdown + index disk bytes + stale doc count). KB 건강 한 눈에. + +둘 다 wire schema additive minor — 기존 consumer 무영향. trace 는 opt-in (cost 0 when off), stats 는 항상 채움 (저렴한 GROUP BY). + +## Behavior contract + +### CLI flag + +``` +kebab search [--trace] [--json] [기존 flags ...] +kebab schema [--json] +``` + +`--trace` boolean, default false. 활성 시: +- HybridRetriever 가 lexical / vector 각 단계 출력 + per-stage timing 캡처. +- search cache **bypass 강제** (debug intent — cache hit timing 무의미). +- `--json` 면 `search_response.v1.trace` 채움. +- non-`--json` 면 hits 출력 후 `Trace:` section pretty-print (lex/vec 카운트 + timing + top 3 hit per stage). + +`kebab schema --json` 의 `stats` 4 필드 항상 출력 (no flag). + +### Wire shape + +**`search_response.v1`** (additive minor — schema bump 없음): + +```jsonc +{ + "schema_version": "search_response.v1", + "hits": [/* search_hit.v1 */], + "next_cursor": null, + "truncated": false, + "trace": { // OPTIONAL — present iff --trace + "lexical": [ + {"chunk_id":"c1","doc_id":"d1","doc_path":"a.md","rank":1,"score":0.42}, ... + ], + "vector": [ + {"chunk_id":"c2","doc_id":"d2","doc_path":"b.md","rank":1,"score":0.81}, ... + ], + "rrf_inputs": [ + {"chunk_id":"c1","lexical_rank":2,"vector_rank":3,"fusion_score":0.0234}, ... + ], + "timing": {"lexical_ms":12,"vector_ms":45,"fusion_ms":1,"total_ms":58} + } +} +``` + +`#[serde(default, skip_serializing_if = "Option::is_none")]` — `--trace` 없으면 `trace` 키 자체 부재. + +**`schema.v1.stats`** (additive minor — schema bump 없음): + +```jsonc +"stats": { + "doc_count": 50, + "chunk_count": 200, + "asset_count": 50, + "last_ingest_at": "2026-05-10T12:34:56Z", + // fb-37 신규 + "media_breakdown": {"markdown":12,"pdf":3,"image":5,"audio":0,"other":0}, + "lang_breakdown": {"en":10,"ko":5,"null":5}, + "index_bytes": {"sqlite":12345678,"lancedb":23456789}, + "stale_doc_count": 2 +} +``` + +- `media_breakdown`: `MEDIA_KINDS` (markdown/pdf/image/audio/other) 5 키 항상 채움 (0 포함). `assets.media_type` JSON 의 dual shape (text vs object) 는 fb-36 과 동일한 CASE WHEN 패턴. +- `lang_breakdown`: 비어있을 수 있음 (corpus 비면 `{}`). NULL lang 은 `"null"` 문자열 키. +- `index_bytes.sqlite` = `*.sqlite` + `*.sqlite-wal` + `*.sqlite-shm` 합. `lancedb` = 디렉터리 recursive 합 (없으면 0). +- `stale_doc_count` = `documents.updated_at < (now - threshold_days)` count. `config.search.stale_threshold_days = 0` 이면 항상 0 (fb-32 의미). + +### Edge cases + +| 상황 | 동작 | +|------|------| +| `--trace --mode lexical` | `vector: []`, `vector_ms: 0`. rrf_inputs 모두 `vector_rank: null` | +| `--trace --mode vector` | 대칭 | +| `--trace` cache 가 hit 가능 query | cache bypass 강제, fresh run | +| 빈 corpus | hits=[], trace lex/vec=[], timing 정상 (모두 작은 값) | +| index_bytes lancedb 디렉터리 부재 | 0 | +| sqlite WAL/SHM aux 파일 부재 | 메인 `.sqlite` 만 합산 | +| stale_doc_count threshold=0 | 0 (fb-32) | +| cursor pagination + `--trace` | 첫 호출 trace, next_cursor 따라 재호출 trace 부재 (재요청 필요) | +| `--trace` non-`--json` mode | hits + trace 텍스트 출력 (lex/vec count, timing, top 3 per stage) | + +### MCP `SearchInput` 확장 + +```rust +pub struct SearchInput { + pub query: String, + pub mode: Option, + pub k: Option, + pub max_tokens: Option, // fb-34 + pub snippet_chars: Option, // fb-34 + pub cursor: Option, // fb-34 + pub tags: Option>, // fb-36 + pub lang: Option, // fb-36 + pub path_glob: Option, // fb-36 + pub trust_min: Option, // fb-36 + pub media: Option>, // fb-36 + pub ingested_after: Option, // fb-36 + pub doc_id: Option, // fb-36 + // fb-37 + pub trace: Option, +} +``` + +`Some(true)` = trace ON, `Some(false)` / `None` = OFF. 출력은 wire 와 동일 (trace 필드 mirror). + +### TUI Search pane + +- 결과 표시 중 (`SearchPane.results` 비어있지 않음) `t` keybind → `TracePopup` 모달. +- TUI 가 `kebab_app::search_with_trace_with_config` 재호출 (현재 query, k, mode, filters 전부). +- popup: 단일 scroll list (lex section / vec section / rrf section 헤더로 구분), `Esc` 닫기, `j/k` 또는 ↑↓ scroll. +- 기존 inspect pane 무수정. + +## Allowed / forbidden dependencies + +- `kebab-core`: 신규 dep 없음. domain types 추가만. +- `kebab-store-sqlite`: 신규 dep 없음. rusqlite + std::fs 만. +- `kebab-search`: 신규 dep 없음. std::time::Instant 사용. +- `kebab-app`: 신규 dep 없음. facade 확장. +- `kebab-cli`: 신규 dep 없음. clap flag 추가. +- `kebab-mcp`: 신규 dep 없음. SearchInput 확장. +- `kebab-tui`: 신규 dep 없음. ratatui popup widget. + +`kebab-core` 의 다른 `kebab-*` 의존 금지 룰 그대로. UI 크레이트는 facade 만. + +## Public surface delta + +### kebab-core (`search.rs`) + +```rust +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SearchTrace { + pub lexical: Vec, + pub vector: Vec, + pub rrf_inputs: Vec, + pub timing: TraceTiming, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceCandidate { + pub chunk_id: ChunkId, + pub doc_id: DocumentId, + pub doc_path: WorkspacePath, + pub rank: u32, + pub score: f32, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceFusionInput { + pub chunk_id: ChunkId, + pub lexical_rank: Option, + pub vector_rank: Option, + pub fusion_score: f32, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct TraceTiming { + pub lexical_ms: u64, + pub vector_ms: u64, + pub fusion_ms: u64, + pub total_ms: u64, +} +``` + +`IndexStats` 확장 (`stats.rs` 또는 위치 동일): + +```rust +pub struct IndexStats { + // 기존 + pub doc_count: u64, + pub chunk_count: u64, + pub asset_count: u64, + pub last_ingest_at: Option, + // fb-37 + #[serde(default)] + pub media_breakdown: BTreeMap, + #[serde(default)] + pub lang_breakdown: BTreeMap, + #[serde(default)] + pub index_bytes: IndexBytes, + #[serde(default)] + pub stale_doc_count: u64, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct IndexBytes { + pub sqlite: u64, + pub lancedb: u64, +} +``` + +`#[serde(default)]` — 옛 JSON 누락 시 zero-valued 으로 deserialize (backwards-compat). + +### kebab-store-sqlite (`stats.rs`) + +```rust +pub fn breakdowns(conn: &rusqlite::Connection, threshold_days: u64) + -> rusqlite::Result<(BTreeMap, BTreeMap, u64)>; + +pub fn index_bytes(data_dir: &Path) -> std::io::Result; +``` + +기존 stats helper 가 이 두 함수 호출해 `IndexStats` 채움. 신규 query: +- media: `SELECT CASE WHEN json_type(media_type)='text' THEN json_extract(media_type,'$') ELSE (SELECT key FROM json_each(media_type) LIMIT 1) END AS kind, COUNT(DISTINCT d.doc_id) FROM documents d JOIN assets a ON a.asset_id=d.asset_id GROUP BY kind` +- lang: `SELECT COALESCE(lang,'null') AS l, COUNT(*) FROM documents GROUP BY l` +- stale: `SELECT COUNT(*) FROM documents WHERE updated_at < ?` (threshold_days > 0 일 때만; 0 면 0 반환). + +### kebab-search (`hybrid.rs`) + +```rust +impl HybridRetriever { + pub fn search_with_trace(&self, query: &SearchQuery) + -> Result<(Vec, SearchTrace)>; +} +``` + +기존 `Retriever::search` 무변경. `search_with_trace` 는 hybrid 전용 (lexical/vector mode 도 한 쪽만 채워 동일 type 반환). 내부: +1. `Instant::now()` 기록, lex retriever 호출, lex_ms 측정. +2. 같은 패턴 vec. +3. fuse — fusion_ms 측정. +4. trace 빌드: lex/vec 전체 list → TraceCandidate 매핑. rrf_inputs = lex ∪ vec union (chunk_id 기준), 각 entry 의 lexical_rank/vector_rank/fusion_score 캡처. fusion 결과 ranking 과 동일. +5. total_ms = 처음~끝. + +### kebab-app (`app.rs`) + +```rust +#[doc(hidden)] +pub fn search_with_trace_with_config( + cfg: kebab_config::Config, + query: &str, + opts: SearchOpts, // 기존 + trace: bool +) -> Result<(SearchResponse, Option)>; +``` + +`opts.trace = true` 시: +- cache bypass (`no_cache = true` 강제). +- `HybridRetriever::search_with_trace` 호출. +- `SearchResponse` 빌드 + trace 별도 반환 (caller 가 wire 합성). + +기존 `search_with_config` 무변경 (zero-overhead path). + +### kebab-cli (`Cmd::Search`) + +```rust +Cmd::Search { + // 기존 + fb-34 + fb-36 + query, k, mode, explain, no_cache, + max_tokens, snippet_chars, cursor, + tag, lang, path_glob, trust_min, media, ingested_after, doc_id, + // fb-37 + #[arg(long)] trace: bool, +} +``` + +dispatch: +- `trace == false` → 기존 `search_with_config` 경로. +- `trace == true` → `search_with_trace_with_config` 호출, wire 합성 시 `search_response.v1` JSON 에 `trace` 필드 inject. + +non-`--json` 출력: +- `--trace` 면 hits 후 `\nTrace:\n lexical (N hits, Xms): top3...\n vector (M hits, Yms): top3...\n rrf (Zms): top3...\n total: Wms`. + +### kebab-mcp (`tools/search.rs`) + +`SearchInput.trace: Option` 추가. dispatch 시 `Some(true)` 이면 위 `_with_trace` 호출. 출력 JSON 에 trace 합성 (wire 와 동일). + +### kebab-tui (`search.rs` + `trace_popup.rs` 신규) + +- `App` 에 `trace_popup: Option` 필드. +- search pane key handler `t` → `kebab_app::search_with_trace_with_config` (현재 query/opts) 호출 → popup state 채움. +- `trace_popup.rs`: ratatui Paragraph 또는 List 로 lex/vec/rrf 3 section, scroll, `Esc` 닫기. +- cheatsheet 에 `t = trace` 한 줄 추가. + +## Test plan + +| kind | description | +|------|-------------| +| unit (kebab-core) | `SearchTrace` serde roundtrip — 모든 필드 | +| unit (kebab-core) | `IndexStats` 신규 4 필드 default — 비어있는 map / 0 bytes / 0 stale | +| unit (kebab-store-sqlite) | `breakdowns`: 3 docs (md/md/pdf, en/en/null) → media `{markdown:2,pdf:1,image:0,audio:0,other:0}` (5키 패딩 적용), lang `{en:2,null:1}` | +| unit (kebab-store-sqlite) | `index_bytes`: temp dir 내 sqlite 파일 + 빈 lancedb dir → sqlite>0, lancedb=0 | +| unit (kebab-store-sqlite) | `breakdowns` stale_doc_count: threshold 7 day, 8일 전 doc 1 + 어제 doc 2 → 1 | +| unit (kebab-store-sqlite) | `breakdowns` threshold=0 → stale_doc_count=0 | +| unit (kebab-search/hybrid) | `search_with_trace`: lex/vec list 가 단일 retriever 호출 결과 == | +| unit (kebab-search/hybrid) | timing 모두 정의됨, total ≥ lex+vec+fusion 의 sum (sequential 가정) | +| unit (kebab-search/hybrid) | mode=lexical → vector=[], vector_ms=0, rrf_inputs.vector_rank 모두 None | +| 통합 (kebab-cli) | `kebab search Q --trace --json` → trace 키 존재, lexical/vector/rrf_inputs/timing 모두 valid shape | +| 통합 (kebab-cli) | `kebab search Q --json` (no --trace) → trace 키 부재 | +| 통합 (kebab-cli) | `kebab schema --json` → media_breakdown 5 키, lang_breakdown 가능 키, index_bytes 두 필드, stale_doc_count 모두 존재 | +| 통합 (kebab-cli) | 빈 corpus `kebab schema --json` → media_breakdown 5키 모두 0, lang_breakdown {} | +| 통합 (kebab-cli) | `kebab search Q --trace` (non-json) → stdout 에 `Trace:` section, lex/vec count + timing 표시 | +| 통합 (kebab-mcp) | search input `trace:true` → 응답 JSON 에 trace 필드 | +| 통합 (kebab-mcp) | search input `trace` 미지정 → 응답 trace 부재 | +| TUI (kebab-tui) | search pane 결과 있는 상태에서 `t` 키 → popup 열림 (state transitions) | +| TUI (kebab-tui) | popup 열린 상태 `Esc` → popup 닫힘 | + +`media_breakdown` 5키 패딩 책임: `kebab-store-sqlite::breakdowns` 가 SQL GROUP BY 결과를 받아 `MEDIA_KINDS` 순회해 누락 키 0 으로 채움. + +## Implementation steps (high-level) + +1. `kebab-core`: SearchTrace + 3 sibling struct + IndexStats 4 필드 + 단위 테스트. +2. `kebab-store-sqlite::stats`: breakdowns + index_bytes 헬퍼 + 단위 테스트. +3. `kebab-store-sqlite::stats`: 기존 IndexStats 빌더가 신규 4 필드 채우도록. +4. `kebab-search::hybrid`: `search_with_trace` 구현 + 단위 테스트. +5. `kebab-app`: `search_with_trace_with_config` facade + cache bypass. +6. `kebab-cli::Cmd::Search`: `--trace` flag + dispatch + JSON wire 합성 + non-JSON pretty-print. +7. `kebab-cli` 통합 테스트. +8. `kebab-mcp::tools::search`: SearchInput.trace + dispatch + 통합 테스트. +9. `kebab-tui::search` + `trace_popup`: `t` keybind + popup widget + cheatsheet. +10. README + SMOKE + INDEX/spec status flip + SKILL. + +## Risks / notes + +- **timing 정확도**: 현재 hybrid sequential. 추후 병렬화 시 `total_ms = max(lex,vec) + fusion` 으로 재정의 — 그 시점 schema doc note 갱신. +- **lancedb dir walk cost**: 큰 corpus 에서 O(file count) IO. 도그푸딩 corpus 작아 무시. 큰 corpus 만나면 cache 또는 lazy 도입 검토. +- **`media_breakdown` JSON shape**: fb-36 과 동일한 CASE WHEN 패턴 재사용 — `MediaType` serde 의 dual shape (text variant vs tuple variant) 처리. +- **lang null 키**: ASCII string `"null"` 사용. ISO 639 어떤 코드와도 충돌 X (3자 미만). +- **cache bypass when --trace**: agent 가 인지해야 (SKILL/README 명시). 안 그러면 trace timing 이 cache hit 의 sub-ms 보고할 위험. +- **wire backwards-compat**: `trace` 필드 optional + skip_serializing_if. `IndexStats` 신규 필드 #[serde(default)] 로 옛 reader 가 새 응답 deserialize 가능. +- **TUI popup**: 별도 `t` 키. 충돌 검사 — 현재 search pane keybinds 확인 (i=inspect, /=focus, j/k=move, n=next, p=prev). `t` 미사용. + +## Out of scope + +- per-stage filter 적용 전/후 카운트 (filter-debug 별도 작업). +- search 단계 병렬화 (sequential 유지). +- lance 테이블 별 / column 별 index_bytes (단일 sum). +- stats 시계열 (corpus_revision history). +- `--trace-level` verbosity (single boolean). +- TUI inspect pane 안 trace 통합 (search popup 으로 격리). +- `kebab stats` 별도 명령 (schema 통합 결정). +- `--explain` flag deprecation 알림 (현재 search dead, 무영향 — 별도 cleanup task). + +## Documentation updates (implementation PR 동시) + +- `README.md`: `kebab search` row 의 flag 표기에 `--trace` 추가, `kebab schema` row 에 신규 stats 한 줄 언급. +- `docs/SMOKE.md`: `--trace` walkthrough + `kebab schema --json` 출력 sample. +- `tasks/p9/p9-fb-37-trace-and-stats.md`: `status: open → completed`, design/plan 링크 추가. +- `tasks/INDEX.md`: fb-37 행 ✅. +- `integrations/claude-code/kebab/SKILL.md`: `mcp__kebab__search` `trace` 입력 + 출력 trace shape 명시. `kebab schema` 신규 stats 필드 mention. +- `docs/wire-schema/v1/search_response.schema.json`: `trace` optional 필드 추가. +- `docs/wire-schema/v1/schema.schema.json`: `stats` 4 신규 필드 추가. -- 2.49.1 From fb31befef19b8726dd836627a4864521480300bf Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:14:26 +0900 Subject: [PATCH 02/13] plan(fb-37): trace + stats implementation plan 10 tasks: kebab-core types, store breakdowns/index_bytes helpers, extended CountSummary + Stats wire mirror, HybridRetriever search_with_trace, App SearchResponse.trace threading, CLI --trace flag, integration tests, MCP SearchInput.trace, TUI TracePopup, docs (wire schema + README + SMOKE + INDEX + SKILL). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../2026-05-10-p9-fb-37-trace-and-stats.md | 2036 +++++++++++++++++ 1 file changed, 2036 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md diff --git a/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md b/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md new file mode 100644 index 0000000..7ec475e --- /dev/null +++ b/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md @@ -0,0 +1,2036 @@ +# fb-37 Trace + Stats Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Surface retrieval pipeline trace (`kebab search Q --trace`) and richer KB stats (`kebab schema --json`) for agent / user debugging. + +**Architecture:** Two additive surfaces. Trace = optional `trace` field on `search_response.v1` populated when `SearchOpts.trace = true`; HybridRetriever exposes a parallel `search_with_trace` method capturing pre-fusion lex/vec lists + per-stage timing. Stats = four new fields (`media_breakdown` / `lang_breakdown` / `index_bytes` / `stale_doc_count`) on existing `schema.v1.stats` populated unconditionally by new SQLite GROUP BY + fs::metadata helpers. TUI search pane gains `t` keystroke that re-runs the query with trace and opens a popup. + +**Tech Stack:** Rust 2024, rusqlite (SQLite WHERE / GROUP BY / json_type / json_extract / json_each), std::time::Instant, std::fs, serde, ratatui. + +**Spec:** `docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md` + +--- + +## File map + +**Create:** +- `crates/kebab-search/src/trace.rs` — trace timing + capture helpers (kept separate from `hybrid.rs` so `hybrid.rs` stays focused) +- `crates/kebab-store-sqlite/src/stats_ext.rs` — `breakdowns()` + `index_bytes()` helpers +- `crates/kebab-tui/src/trace_popup.rs` — TUI popup widget + state +- `crates/kebab-cli/tests/wire_search_trace.rs` — `--trace` integration tests +- `crates/kebab-cli/tests/wire_schema_breakdowns.rs` — `kebab schema` extended stats integration tests +- `crates/kebab-mcp/tests/tools_call_search_trace.rs` — MCP search trace integration test + +**Modify:** +- `crates/kebab-core/src/search.rs` — add `SearchTrace` / `TraceCandidate` / `TraceFusionInput` / `TraceTiming` + `IndexBytes` types; extend `SearchOpts` with `trace: bool` +- `crates/kebab-store-sqlite/src/store.rs` — extend `CountSummary` with new fields, populate via new helpers +- `crates/kebab-app/src/schema.rs` — extend `Stats` mirror with new fields, wire collect_stats +- `crates/kebab-app/src/app.rs` — extend `SearchResponse` with `trace: Option`, thread trace through `App::search_with_opts` +- `crates/kebab-search/src/hybrid.rs` — add `HybridRetriever::search_with_trace` +- `crates/kebab-cli/src/main.rs` — add `--trace` flag to `Cmd::Search`, dispatch + non-JSON pretty-print +- `crates/kebab-cli/src/wire.rs` — extend `wire_search_response` to serialize `trace` field when present +- `crates/kebab-mcp/src/tools/search.rs` — add `trace: Option` to `SearchInput`, dispatch through +- `crates/kebab-tui/src/search.rs` — add `t` keystroke handler invoking trace + opening popup +- `crates/kebab-tui/src/app.rs` — store `trace_popup: Option` +- `crates/kebab-tui/src/cheatsheet.rs` — add `t = trace` line +- `crates/kebab-tui/src/lib.rs` — register `trace_popup` module +- `docs/wire-schema/v1/search_response.schema.json` — declare optional `trace` field +- `docs/wire-schema/v1/schema.schema.json` — declare new stats fields +- `README.md`, `docs/SMOKE.md`, `tasks/p9/p9-fb-37-trace-and-stats.md`, `tasks/INDEX.md`, `integrations/claude-code/kebab/SKILL.md` + +--- + +## Task 1: Trace + IndexBytes domain types in kebab-core + +**Files:** +- Modify: `crates/kebab-core/src/search.rs` + +- [ ] **Step 1: Write failing test for SearchTrace serde roundtrip** + +Append to `crates/kebab-core/src/search.rs` `mod tests`: +```rust +#[test] +fn search_trace_serde_roundtrip() { + let t = SearchTrace { + lexical: vec![TraceCandidate { + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath::new("a.md".into()).unwrap(), + rank: 1, + score: 0.42, + }], + vector: vec![], + rrf_inputs: vec![TraceFusionInput { + chunk_id: ChunkId("c1".into()), + lexical_rank: Some(1), + vector_rank: None, + fusion_score: 0.0234, + }], + timing: TraceTiming { + lexical_ms: 12, + vector_ms: 0, + fusion_ms: 1, + total_ms: 14, + }, + }; + let v = serde_json::to_value(&t).unwrap(); + assert_eq!(v["timing"]["lexical_ms"], 12); + assert_eq!(v["lexical"][0]["score"], 0.42); + let back: SearchTrace = serde_json::from_value(v).unwrap(); + assert_eq!(back, t); +} + +#[test] +fn index_bytes_default_is_zero() { + let b = IndexBytes::default(); + assert_eq!(b.sqlite, 0); + assert_eq!(b.lancedb, 0); +} + +#[test] +fn search_opts_trace_default_false() { + let opts = SearchOpts::default(); + assert!(!opts.trace); +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cargo test -p kebab-core --lib +``` +Expected: compile errors — `SearchTrace`, `TraceCandidate`, `TraceFusionInput`, `TraceTiming`, `IndexBytes` not defined; `SearchOpts.trace` field missing. + +- [ ] **Step 3: Add types** + +Append to `crates/kebab-core/src/search.rs` (after existing `SearchOpts`): + +```rust +/// p9-fb-37: search retrieval pipeline trace. Populated only when +/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse` +/// otherwise. `lexical` / `vector` are pre-fusion candidate lists +/// (each retriever's full output for the fanout query). `rrf_inputs` +/// is the union (chunk_id) used by RRF, with each side's rank +/// captured. `timing` is wall-clock per stage. +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct SearchTrace { + pub lexical: Vec, + pub vector: Vec, + pub rrf_inputs: Vec, + pub timing: TraceTiming, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceCandidate { + pub chunk_id: ChunkId, + pub doc_id: DocumentId, + pub doc_path: WorkspacePath, + pub rank: u32, + pub score: f32, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceFusionInput { + pub chunk_id: ChunkId, + pub lexical_rank: Option, + pub vector_rank: Option, + pub fusion_score: f32, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct TraceTiming { + pub lexical_ms: u64, + pub vector_ms: u64, + pub fusion_ms: u64, + pub total_ms: u64, +} + +/// p9-fb-37: on-disk index size breakdown. Mirrored on the +/// wire `schema.v1.stats.index_bytes` block. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct IndexBytes { + pub sqlite: u64, + pub lancedb: u64, +} +``` + +Extend `SearchOpts` (replace the existing struct definition): + +```rust +/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`. +/// All `None` = no enforcement (existing behavior). +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct SearchOpts { + /// chars/4 approximation of wire JSON token cost. None = no cap. + pub max_tokens: Option, + /// Per-hit snippet character cap. None = use config default. + pub snippet_chars: Option, + /// Opaque base64 cursor from a previous response. None = first page. + pub cursor: Option, + /// p9-fb-37: when true, capture pipeline trace (cache bypassed, + /// lex / vec pre-fusion lists + timing populated on the response). + #[serde(default)] + pub trace: bool, +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +```bash +cargo test -p kebab-core --lib +``` +Expected: all 3 new tests pass; existing tests unaffected. + +- [ ] **Step 5: Commit** + +```bash +git add crates/kebab-core/src/search.rs +git commit -m "feat(core): SearchTrace + IndexBytes types + SearchOpts.trace (fb-37)" +``` + +--- + +## Task 2: SQLite breakdowns helper + +**Files:** +- Create: `crates/kebab-store-sqlite/src/stats_ext.rs` +- Modify: `crates/kebab-store-sqlite/src/lib.rs` (register module) + +- [ ] **Step 1: Write failing tests** + +Create `crates/kebab-store-sqlite/src/stats_ext.rs`: + +```rust +//! p9-fb-37: extended stats helpers — per-media / per-lang doc counts, +//! stale doc count, on-disk index byte sums. + +use std::collections::BTreeMap; +use std::path::Path; + +use kebab_core::{IndexBytes, MEDIA_KINDS}; +use rusqlite::Connection; + +/// Returns `(media_breakdown, lang_breakdown, stale_doc_count)`. +/// +/// `media_breakdown` always contains all 5 `MEDIA_KINDS` (zero-padded). +/// `lang_breakdown` only contains observed languages; NULL lang is +/// keyed as the literal string `"null"`. `stale_doc_count` is 0 when +/// `threshold_days == 0` (mirrors fb-32 staleness disable semantics). +pub fn breakdowns( + conn: &Connection, + threshold_days: u64, +) -> rusqlite::Result<(BTreeMap, BTreeMap, u64)> { + // media: dual JSON shape — text variant ("markdown") vs object + // variant ({"image":{"format":"png"}}). Same CASE WHEN as fb-36. + let mut media: BTreeMap = MEDIA_KINDS + .iter() + .map(|k| ((*k).to_string(), 0u64)) + .collect(); + let mut stmt = conn.prepare( + "SELECT \ + CASE \ + WHEN json_type(a.media_type) = 'text' \ + THEN json_extract(a.media_type, '$') \ + ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \ + END AS kind, \ + COUNT(DISTINCT d.doc_id) \ + FROM documents d JOIN assets a ON a.asset_id = d.asset_id \ + GROUP BY kind", + )?; + let rows = stmt.query_map([], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)) + })?; + for row in rows { + let (kind, n) = row?; + media.insert(kind, n); + } + + let mut lang: BTreeMap = BTreeMap::new(); + let mut stmt = conn.prepare( + "SELECT COALESCE(lang, 'null') AS l, COUNT(*) \ + FROM documents GROUP BY l", + )?; + let rows = stmt.query_map([], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)) + })?; + for row in rows { + let (l, n) = row?; + lang.insert(l, n); + } + + let stale: u64 = if threshold_days == 0 { + 0 + } else { + let secs = (threshold_days as i64) * 86_400; + let cutoff = time::OffsetDateTime::now_utc() + - time::Duration::seconds(secs); + let cutoff_str = cutoff + .format(&time::format_description::well_known::Rfc3339) + .expect("RFC3339 format"); + conn.query_row( + "SELECT COUNT(*) FROM documents WHERE updated_at < ?", + [cutoff_str], + |r| r.get(0), + )? + }; + + Ok((media, lang, stale)) +} + +/// Sum on-disk bytes of the SQLite database (main + WAL + SHM) and +/// the LanceDB directory tree. Missing files / dir = 0. +pub fn index_bytes(data_dir: &Path) -> std::io::Result { + fn file_size_or_zero(p: &Path) -> u64 { + std::fs::metadata(p).map(|m| m.len()).unwrap_or(0) + } + fn dir_walk_sum(p: &Path) -> std::io::Result { + if !p.exists() { + return Ok(0); + } + let mut total = 0u64; + for entry in std::fs::read_dir(p)? { + let entry = entry?; + let ty = entry.file_type()?; + if ty.is_dir() { + total += dir_walk_sum(&entry.path())?; + } else if ty.is_file() { + total += entry.metadata()?.len(); + } + } + Ok(total) + } + + let sqlite_main = data_dir.join("kebab.sqlite"); + let sqlite_wal = data_dir.join("kebab.sqlite-wal"); + let sqlite_shm = data_dir.join("kebab.sqlite-shm"); + let sqlite = file_size_or_zero(&sqlite_main) + + file_size_or_zero(&sqlite_wal) + + file_size_or_zero(&sqlite_shm); + let lancedb = dir_walk_sum(&data_dir.join("lancedb"))?; + Ok(IndexBytes { sqlite, lancedb }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn open_fresh() -> (tempfile::TempDir, crate::SqliteStore) { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + let store = crate::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + (dir, store) + } + + #[test] + fn breakdowns_empty_corpus() { + let (_dir, store) = open_fresh(); + let conn = store.read_conn(); + let (media, lang, stale) = breakdowns(&conn, 0).unwrap(); + // 5 keys all zero, lang map empty, stale 0. + assert_eq!(media.len(), 5); + for k in MEDIA_KINDS { + assert_eq!(media.get(*k), Some(&0u64)); + } + assert!(lang.is_empty()); + assert_eq!(stale, 0); + } + + #[test] + fn index_bytes_includes_sqlite_main() { + let (dir, _store) = open_fresh(); + let b = index_bytes(dir.path()).unwrap(); + assert!(b.sqlite > 0, "main sqlite file should exist after migrations"); + assert_eq!(b.lancedb, 0); + } + + #[test] + fn index_bytes_lancedb_dir_walk() { + let dir = tempfile::tempdir().unwrap(); + let lance = dir.path().join("lancedb"); + std::fs::create_dir_all(lance.join("vectors.lance")).unwrap(); + std::fs::write( + lance.join("vectors.lance").join("data.bin"), + vec![0u8; 1024], + ) + .unwrap(); + let b = index_bytes(dir.path()).unwrap(); + assert_eq!(b.lancedb, 1024); + } +} +``` + +Modify `crates/kebab-store-sqlite/src/lib.rs`. Find the existing `pub mod` declarations and add: + +```rust +pub mod stats_ext; +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cargo test -p kebab-store-sqlite stats_ext +``` +Expected: build error initially (module exists but test imports `MEDIA_KINDS` from kebab-core); resolve any compile issue, then run again. Tests should pass with the implementation provided in Step 1 — this is a test-with-implementation step (verifying via cargo). + +Actually since the implementation is already in stats_ext.rs in Step 1, run: +```bash +cargo test -p kebab-store-sqlite stats_ext +``` +Expected: 3 new tests pass. + +- [ ] **Step 3: Commit** + +```bash +git add crates/kebab-store-sqlite/src/stats_ext.rs crates/kebab-store-sqlite/src/lib.rs +git commit -m "feat(store): breakdowns + index_bytes helpers (fb-37)" +``` + +--- + +## Task 3: Extend CountSummary + wire to schema.v1.stats + +**Files:** +- Modify: `crates/kebab-store-sqlite/src/store.rs` +- Modify: `crates/kebab-app/src/schema.rs` + +- [ ] **Step 1: Write failing test in kebab-app** + +Append to `crates/kebab-app/src/schema.rs` `mod tests` section (or create one if absent — check around line 200+): + +```rust +#[cfg(test)] +mod tests_stats_ext { + use super::*; + + #[test] + fn stats_includes_breakdowns_and_bytes_on_fresh_corpus() { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + // Bring up migrations so the sqlite file is created. + let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + drop(store); + + let s = schema_with_config(&cfg).unwrap(); + // 5 keys padded. + assert_eq!(s.stats.media_breakdown.len(), 5); + assert_eq!(s.stats.media_breakdown.get("markdown"), Some(&0)); + assert_eq!(s.stats.media_breakdown.get("pdf"), Some(&0)); + // lang map empty on empty corpus. + assert!(s.stats.lang_breakdown.is_empty()); + // sqlite bytes positive after migrations, lancedb 0. + assert!(s.stats.index_bytes.sqlite > 0); + assert_eq!(s.stats.index_bytes.lancedb, 0); + assert_eq!(s.stats.stale_doc_count, 0); + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cargo test -p kebab-app stats_includes_breakdowns_and_bytes_on_fresh_corpus +``` +Expected: compile error — `Stats` lacks `media_breakdown`, `lang_breakdown`, `index_bytes`, `stale_doc_count`. + +- [ ] **Step 3: Extend `CountSummary`** + +Modify `crates/kebab-store-sqlite/src/store.rs`. Find `pub struct CountSummary` (~line 595-606) and replace with: + +```rust +#[derive(Debug, Clone)] +pub struct CountSummary { + pub doc_count: u64, + pub chunk_count: u64, + pub asset_count: u64, + /// ISO-8601 timestamp of the most-recently updated document row, or + /// `None` when the store is empty. + pub last_ingest_at: Option, + /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded). + pub media_breakdown: std::collections::BTreeMap, + /// p9-fb-37: per-language doc count, NULL keyed as `"null"`. + pub lang_breakdown: std::collections::BTreeMap, + /// p9-fb-37: docs whose `updated_at < now - threshold_days`. 0 when threshold=0. + pub stale_doc_count: u64, +} +``` + +Modify `count_summary` body (around line 615-650) to populate new fields. Replace the body of `pub fn count_summary(&self) -> anyhow::Result`: + +```rust +pub fn count_summary(&self) -> anyhow::Result { + use anyhow::Context; + use rusqlite::OptionalExtension; + + let conn = self.read_conn(); + + let doc_count: u64 = conn + .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)) + .context("count documents")?; + let chunk_count: u64 = conn + .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0)) + .context("count chunks")?; + let asset_count: u64 = conn + .query_row("SELECT COUNT(*) FROM assets", [], |r| r.get(0)) + .context("count assets")?; + let last_ingest_at: Option = conn + .query_row("SELECT MAX(updated_at) FROM documents", [], |r| r.get(0)) + .optional() + .context("max updated_at")? + .flatten(); + + // p9-fb-37: pull threshold from config-defaults via a sentinel — + // CountSummary callers that want correct stale_doc_count must + // pass through count_summary_with_threshold. Default path uses 0 + // (matches fb-32 disable semantics) for backwards compat. + let (media_breakdown, lang_breakdown, stale_doc_count) = + crate::stats_ext::breakdowns(&conn, 0).context("breakdowns")?; + + Ok(CountSummary { + doc_count, + chunk_count, + asset_count, + last_ingest_at, + media_breakdown, + lang_breakdown, + stale_doc_count, + }) +} + +/// p9-fb-37: variant that honors `config.search.stale_threshold_days`. +/// Callers who need a meaningful `stale_doc_count` (e.g. `kebab schema`) +/// pass the configured threshold; the older `count_summary` returns 0. +pub fn count_summary_with_threshold( + &self, + threshold_days: u64, +) -> anyhow::Result { + use anyhow::Context; + let mut s = self.count_summary()?; + let conn = self.read_conn(); + let (m, l, stale) = crate::stats_ext::breakdowns(&conn, threshold_days) + .context("breakdowns_with_threshold")?; + s.media_breakdown = m; + s.lang_breakdown = l; + s.stale_doc_count = stale; + Ok(s) +} +``` + +Update existing `count_summary_zero_on_fresh_store` test (~line 678) to assert new fields: + +```rust +#[test] +fn count_summary_zero_on_fresh_store() { + let (_dir, store) = open_fresh_store(); + let s = store.count_summary().unwrap(); + assert_eq!(s.doc_count, 0); + assert_eq!(s.chunk_count, 0); + assert_eq!(s.asset_count, 0); + assert!(s.last_ingest_at.is_none()); + assert_eq!(s.media_breakdown.len(), 5); + assert!(s.lang_breakdown.is_empty()); + assert_eq!(s.stale_doc_count, 0); +} +``` + +- [ ] **Step 4: Extend `Stats` mirror in kebab-app::schema** + +Modify `crates/kebab-app/src/schema.rs`. Replace `pub struct Stats`: + +```rust +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Stats { + pub doc_count: u64, + pub chunk_count: u64, + pub asset_count: u64, + pub last_ingest_at: Option, + /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded). + #[serde(default)] + pub media_breakdown: std::collections::BTreeMap, + /// p9-fb-37: per-language doc count, NULL keyed as `"null"`. + #[serde(default)] + pub lang_breakdown: std::collections::BTreeMap, + /// p9-fb-37: on-disk byte sums. + #[serde(default)] + pub index_bytes: kebab_core::IndexBytes, + /// p9-fb-37: docs whose `updated_at` exceeds the staleness threshold. + #[serde(default)] + pub stale_doc_count: u64, +} +``` + +Replace `collect_stats` body: + +```rust +fn collect_stats( + cfg: &Config, + store: &kebab_store_sqlite::SqliteStore, +) -> anyhow::Result { + let counts = store + .count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?; + let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, ""); + let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir) + .map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?; + Ok(Stats { + doc_count: counts.doc_count, + chunk_count: counts.chunk_count, + asset_count: counts.asset_count, + last_ingest_at: counts.last_ingest_at, + media_breakdown: counts.media_breakdown, + lang_breakdown: counts.lang_breakdown, + index_bytes, + stale_doc_count: counts.stale_doc_count, + }) +} +``` + +Update the call site `let stats = collect_stats(&store)?;` (~line 88) to: + +```rust +let stats = collect_stats(cfg, &store)?; +``` + +- [ ] **Step 5: Run tests to verify they pass** + +```bash +cargo test -p kebab-store-sqlite count_summary +cargo test -p kebab-app stats_includes_breakdowns_and_bytes_on_fresh_corpus +``` +Expected: both pass. + +- [ ] **Step 6: Verify config field type** + +`cfg.search.stale_threshold_days` must exist as integer. Check `crates/kebab-config/src/lib.rs` for `Search.stale_threshold_days`. If type mismatch (e.g. it's `u32`), adjust `as u64` cast accordingly. + +```bash +grep -n "stale_threshold_days" crates/kebab-config/src/lib.rs +``` +Expected: line with the field type. If it's already `u64` drop the cast; if `u32` keep `as u64`. + +- [ ] **Step 7: Run full clippy + workspace tests** + +```bash +cargo clippy -p kebab-core -p kebab-store-sqlite -p kebab-app --all-targets -- -D warnings +cargo test -p kebab-core -p kebab-store-sqlite -p kebab-app +``` +Expected: clippy clean, all tests pass. + +- [ ] **Step 8: Commit** + +```bash +git add crates/kebab-store-sqlite/src/store.rs crates/kebab-app/src/schema.rs +git commit -m "feat(stats): media/lang/bytes/stale fields on schema.v1.stats (fb-37)" +``` + +--- + +## Task 4: HybridRetriever search_with_trace + +**Files:** +- Create: `crates/kebab-search/src/trace.rs` +- Modify: `crates/kebab-search/src/hybrid.rs` +- Modify: `crates/kebab-search/src/lib.rs` + +- [ ] **Step 1: Write failing test in hybrid.rs** + +Append to `crates/kebab-search/src/hybrid.rs` `mod tests`: + +```rust +#[test] +fn search_with_trace_returns_lex_and_vec_lists() { + use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion, + RetrievalDetail, SearchHit, SearchMode, SearchQuery, + WorkspacePath, Citation}; + use std::sync::Arc; + + fn mk_hit(rank: u32, chunk: &str, score: f32, mode: SearchMode) -> SearchHit { + SearchHit { + rank, + chunk_id: ChunkId(chunk.into()), + doc_id: DocumentId(format!("d-{chunk}")), + doc_path: WorkspacePath::new(format!("{chunk}.md")).unwrap(), + heading_path: vec![], + section_label: None, + snippet: chunk.into(), + citation: Citation::Line { + path: WorkspacePath::new(format!("{chunk}.md")).unwrap(), + start: 1, + end: 1, + section: None, + }, + retrieval: RetrievalDetail { + method: mode, + fusion_score: score, + lexical_score: if mode == SearchMode::Lexical { Some(score) } else { None }, + vector_score: if mode == SearchMode::Vector { Some(score) } else { None }, + lexical_rank: if mode == SearchMode::Lexical { Some(rank) } else { None }, + vector_rank: if mode == SearchMode::Vector { Some(rank) } else { None }, + }, + index_version: IndexVersion("v1".into()), + embedding_model: None, + chunker_version: ChunkerVersion("c1".into()), + indexed_at: time::OffsetDateTime::UNIX_EPOCH, + stale: false, + } + } + + // Stub retrievers from existing test patterns in this file (see + // `MockRetriever` near line 363 if present, otherwise inline). + struct Stub { hits: Vec, mode: SearchMode } + impl Retriever for Stub { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(self.hits.clone()) + } + fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) } + } + + let lex = Arc::new(Stub { + hits: vec![ + mk_hit(1, "c1", 0.9, SearchMode::Lexical), + mk_hit(2, "c2", 0.5, SearchMode::Lexical), + ], + mode: SearchMode::Lexical, + }); + let vec_r = Arc::new(Stub { + hits: vec![ + mk_hit(1, "c2", 0.8, SearchMode::Vector), + mk_hit(2, "c3", 0.6, SearchMode::Vector), + ], + mode: SearchMode::Vector, + }); + let hybrid = HybridRetriever::with_policy( + lex.clone(), + vec_r.clone(), + FusionPolicy::Rrf { k: 60 }, + 2, + ); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Hybrid, + k: 2, + filters: Default::default(), + }; + let (hits, trace) = hybrid.search_with_trace(&q).unwrap(); + assert!(!hits.is_empty()); + assert_eq!(trace.lexical.len(), 2); + assert_eq!(trace.vector.len(), 2); + // Union: c1, c2, c3 → 3 entries. + assert_eq!(trace.rrf_inputs.len(), 3); + // Sanity: timing populated (any field >= 0 trivially; just check + // the type was set, not a Default::default()). + let _ = trace.timing.lexical_ms; +} + +#[test] +fn search_with_trace_lexical_mode_empty_vector() { + use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion, + RetrievalDetail, SearchHit, SearchMode, SearchQuery, + WorkspacePath, Citation}; + use std::sync::Arc; + struct EmptyR(SearchMode); + impl Retriever for EmptyR { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(vec![]) + } + fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) } + } + let lex = Arc::new(EmptyR(SearchMode::Lexical)); + let vec_r = Arc::new(EmptyR(SearchMode::Vector)); + let hybrid = HybridRetriever::with_policy(lex, vec_r, FusionPolicy::Rrf { k: 60 }, 2); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 2, + filters: Default::default(), + }; + let (_hits, trace) = hybrid.search_with_trace(&q).unwrap(); + assert!(trace.vector.is_empty()); + assert_eq!(trace.timing.vector_ms, 0); +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cargo test -p kebab-search hybrid::tests::search_with_trace +``` +Expected: compile error — `search_with_trace` undefined. + +- [ ] **Step 3: Add `trace.rs` helper module** + +Create `crates/kebab-search/src/trace.rs`: + +```rust +//! p9-fb-37: trace capture helpers for `HybridRetriever::search_with_trace`. + +use std::collections::BTreeMap; + +use kebab_core::{ + SearchHit, SearchTrace, TraceCandidate, TraceFusionInput, TraceTiming, +}; + +/// Build a `TraceCandidate` from a `SearchHit`. The score field reflects +/// each side's score (lexical / vector / fusion) — caller selects which +/// retriever's hit list this is. +pub fn candidates_from_hits(hits: &[SearchHit], score_kind: ScoreKind) -> Vec { + hits.iter() + .map(|h| TraceCandidate { + chunk_id: h.chunk_id.clone(), + doc_id: h.doc_id.clone(), + doc_path: h.doc_path.clone(), + rank: h.rank, + score: match score_kind { + ScoreKind::Lexical => h.retrieval.lexical_score.unwrap_or(0.0), + ScoreKind::Vector => h.retrieval.vector_score.unwrap_or(0.0), + }, + }) + .collect() +} + +#[derive(Clone, Copy, Debug)] +pub enum ScoreKind { + Lexical, + Vector, +} + +/// Build the union of (chunk_id) across lex and vec hit lists, with +/// each side's rank captured. `fusion_score` is filled by the caller +/// (RRF computes it during fusion, this helper just pre-builds the +/// rank table — caller overwrites fusion_score in a second pass). +pub fn build_fusion_input_skeleton( + lex: &[SearchHit], + vec: &[SearchHit], +) -> Vec { + let mut by_chunk: BTreeMap = BTreeMap::new(); + for h in lex { + by_chunk + .entry(h.chunk_id.0.clone()) + .or_insert(TraceFusionInput { + chunk_id: h.chunk_id.clone(), + lexical_rank: None, + vector_rank: None, + fusion_score: 0.0, + }) + .lexical_rank = Some(h.rank); + } + for h in vec { + by_chunk + .entry(h.chunk_id.0.clone()) + .or_insert(TraceFusionInput { + chunk_id: h.chunk_id.clone(), + lexical_rank: None, + vector_rank: None, + fusion_score: 0.0, + }) + .vector_rank = Some(h.rank); + } + by_chunk.into_values().collect() +} + +/// Container the hybrid retriever fills during a traced run. +#[derive(Default)] +pub struct TraceBuilder { + pub lexical: Vec, + pub vector: Vec, + pub rrf_inputs: Vec, + pub timing: TraceTiming, +} + +impl TraceBuilder { + pub fn into_trace(self) -> SearchTrace { + SearchTrace { + lexical: self.lexical, + vector: self.vector, + rrf_inputs: self.rrf_inputs, + timing: self.timing, + } + } +} +``` + +Modify `crates/kebab-search/src/lib.rs`. Add module declaration: + +```rust +mod trace; +``` + +- [ ] **Step 4: Add `search_with_trace` on HybridRetriever** + +Modify `crates/kebab-search/src/hybrid.rs`. Add at the top (under existing `use` lines): + +```rust +use crate::trace::{build_fusion_input_skeleton, candidates_from_hits, ScoreKind, TraceBuilder}; +use kebab_core::SearchTrace; +use std::time::Instant; +``` + +Add a method to `impl HybridRetriever` (place after `fn fuse`): + +```rust +/// p9-fb-37: parallel to `Retriever::search` but additionally returns +/// a trace of pre-fusion lex/vec lists, RRF inputs (union with each +/// side's rank), and per-stage timing. Same fan-out logic as `fuse`, +/// just instrumented. +pub fn search_with_trace( + &self, + query: &SearchQuery, +) -> anyhow::Result<(Vec, SearchTrace)> { + let start_total = Instant::now(); + let target_k = if query.k == 0 { self.default_k } else { query.k }; + let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER); + let fanout_query = SearchQuery { + k: fanout_k, + ..query.clone() + }; + + let mut tb = TraceBuilder::default(); + + let (lex_hits, vec_hits): (Vec, Vec) = match query.mode { + SearchMode::Lexical => { + let t0 = Instant::now(); + let lh = self.lexical.search(&fanout_query)?; + tb.timing.lexical_ms = t0.elapsed().as_millis() as u64; + (lh, Vec::new()) + } + SearchMode::Vector => { + let t0 = Instant::now(); + let vh = self.vector.search(&fanout_query)?; + tb.timing.vector_ms = t0.elapsed().as_millis() as u64; + (Vec::new(), vh) + } + SearchMode::Hybrid => { + let t0 = Instant::now(); + let lh = self.lexical.search(&fanout_query)?; + tb.timing.lexical_ms = t0.elapsed().as_millis() as u64; + let t1 = Instant::now(); + let vh = self.vector.search(&fanout_query)?; + tb.timing.vector_ms = t1.elapsed().as_millis() as u64; + (lh, vh) + } + }; + + tb.lexical = candidates_from_hits(&lex_hits, ScoreKind::Lexical); + tb.vector = candidates_from_hits(&vec_hits, ScoreKind::Vector); + tb.rrf_inputs = build_fusion_input_skeleton(&lex_hits, &vec_hits); + + let t_fusion = Instant::now(); + let final_hits = match query.mode { + SearchMode::Lexical => { + let mut h = lex_hits.clone(); + h.truncate(target_k); + h + } + SearchMode::Vector => { + let mut h = vec_hits.clone(); + h.truncate(target_k); + h + } + SearchMode::Hybrid => self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)?, + }; + tb.timing.fusion_ms = t_fusion.elapsed().as_millis() as u64; + + // Backfill fusion_score onto the rrf_inputs union for each chunk + // present in the final fused list. + let score_by_chunk: std::collections::HashMap = final_hits + .iter() + .map(|h| (h.chunk_id.0.clone(), h.retrieval.fusion_score)) + .collect(); + for entry in &mut tb.rrf_inputs { + if let Some(s) = score_by_chunk.get(&entry.chunk_id.0) { + entry.fusion_score = *s; + } + } + + tb.timing.total_ms = start_total.elapsed().as_millis() as u64; + Ok((final_hits, tb.into_trace())) +} +``` + +`fuse_with_inputs` is needed — extract from existing `fuse` so both `Retriever::search` (hybrid mode) and `search_with_trace` reuse the same RRF body without re-querying retrievers. + +Refactoring recipe: +1. Read existing `fn fuse` (at line ~145). Note the body issues two `.search()` calls then builds `lex_index` / `vec_index` via `.into_iter()`. +2. Split into two functions. `fn fuse` keeps the two `.search()` calls, then delegates the rest. `fn fuse_with_inputs` takes the already-resolved hit slices. +3. Inside `fuse_with_inputs`: replace `let lex_index: HashMap<...> = lex_hits.into_iter().map(...).collect();` with `let lex_index: HashMap<...> = lex_hits.iter().cloned().map(...).collect();` (mirror for vec_index). All other RRF logic stays identical. + +```rust +fn fuse(&self, query: &SearchQuery) -> Result> { + let target_k = if query.k == 0 { self.default_k } else { query.k }; + let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER); + let fanout_query = SearchQuery { + k: fanout_k, + ..query.clone() + }; + let lex_hits = self.lexical.search(&fanout_query)?; + let vec_hits = self.vector.search(&fanout_query)?; + self.fuse_with_inputs(&lex_hits, &vec_hits, target_k) +} + +fn fuse_with_inputs( + &self, + lex_hits: &[SearchHit], + vec_hits: &[SearchHit], + target_k: usize, +) -> Result> { + tracing::debug!( + lex = lex_hits.len(), + vec = vec_hits.len(), + target_k, + "kb-search hybrid: pre-fusion candidate counts" + ); + // PASTE the rest of the original `fn fuse` body here. Two changes: + // - replace `lex_hits.into_iter()` with `lex_hits.iter().cloned()` + // - replace `vec_hits.into_iter()` with `vec_hits.iter().cloned()` + // Everything else (RRF score formula, sort, truncate to target_k, + // tie-breaking, `Ok(...)` return) is verbatim preserved. +} +``` + +Verify with `cargo test -p kebab-search` — existing hybrid tests must still pass (they exercise the `Retriever::search` → `fuse` path). + +- [ ] **Step 5: Run tests** + +```bash +cargo test -p kebab-search +``` +Expected: existing hybrid tests still pass + 2 new search_with_trace tests pass. + +- [ ] **Step 6: Clippy gate** + +```bash +cargo clippy -p kebab-search --all-targets -- -D warnings +``` +Expected: clean. + +- [ ] **Step 7: Commit** + +```bash +git add crates/kebab-search/src/trace.rs crates/kebab-search/src/hybrid.rs crates/kebab-search/src/lib.rs +git commit -m "feat(search): HybridRetriever::search_with_trace (fb-37)" +``` + +--- + +## Task 5: SearchResponse trace field + App::search_with_opts threading + +**Files:** +- Modify: `crates/kebab-app/src/app.rs` + +- [ ] **Step 1: Write failing test** + +Append to `crates/kebab-app/src/app.rs` tests module (find existing `#[cfg(test)] mod tests` near bottom; if absent, add one at file end): + +```rust +#[cfg(test)] +mod tests_trace { + use super::*; + use kebab_core::{SearchOpts, SearchQuery, SearchMode}; + + fn open_app_with_temp_dir() -> (tempfile::TempDir, App) { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + // Ensure DB exists. + let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + drop(store); + let app = App::open_with_config(cfg).unwrap(); + (dir, app) + } + + #[test] + fn search_response_trace_none_when_opts_trace_false() { + let (_dir, app) = open_app_with_temp_dir(); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 1, + filters: Default::default(), + }; + let resp = app.search_with_opts(q, SearchOpts::default()).unwrap(); + assert!(resp.trace.is_none()); + } + + #[test] + fn search_response_trace_some_when_opts_trace_true() { + let (_dir, app) = open_app_with_temp_dir(); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 1, + filters: Default::default(), + }; + let opts = SearchOpts { trace: true, ..Default::default() }; + let resp = app.search_with_opts(q, opts).unwrap(); + assert!(resp.trace.is_some(), "trace populated when opts.trace=true"); + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cargo test -p kebab-app tests_trace +``` +Expected: compile errors — `SearchResponse.trace` field absent. + +- [ ] **Step 3: Extend `SearchResponse`** + +In `crates/kebab-app/src/app.rs`, replace `pub struct SearchResponse` (~line 69): + +```rust +#[derive(Clone, Debug)] +pub struct SearchResponse { + pub hits: Vec, + pub next_cursor: Option, + pub truncated: bool, + /// p9-fb-37: present when caller passed `SearchOpts.trace = true`. + /// Consumers that ignore trace should leave this `None`. + pub trace: Option, +} +``` + +- [ ] **Step 4: Thread through `App::search_with_opts`** + +In `crates/kebab-app/src/app.rs`, modify `pub fn search_with_opts` (~line 306) to honor `opts.trace`. Find the current `let mut all_hits = self.search(fetch_query)?;` line and replace surrounding logic: + +```rust +let trace = if opts.trace { + // Build a trace-capable retriever directly. Re-use construction + // from the cached search path but bypass cache (debug intent). + let retriever = self.build_retriever()?; + let traced = retriever + .as_any() + .downcast_ref::() + .map(|h| h.search_with_trace(&fetch_query)); + if let Some(Ok((hits, t))) = traced { + let mut all_hits = hits; + let drop_n = offset.min(all_hits.len()); + all_hits.drain(..drop_n); + let final_hits: Vec = all_hits.into_iter().take(k_effective).collect(); + return Ok(self.build_response(final_hits, k_effective, &opts, snippet_chars, Some(t))); + } + None +} else { + None +}; + +let mut all_hits = self.search(fetch_query)?; +// ... existing code ... +``` + +Engineer note: this is a sketch — review actual `App::search_with_opts` body before editing; the `build_retriever` / `as_any` / `build_response` helpers may not exist verbatim. The minimal change required is: +1. When `opts.trace = true`, call `search_with_trace` on the hybrid retriever (constructed the same way `App::search_uncached` does). +2. Bypass the search cache entirely. +3. Plug the resulting `SearchTrace` into `SearchResponse.trace`. + +Use the existing `App::search_uncached` (line ~243) as the model — duplicate that path with `search_with_trace` and wrap the result. Look for: `let retriever = ... HybridRetriever::new(&self.config, lex, vec);`. Call `retriever.search_with_trace(&query)` instead of `retriever.search(&query)` when tracing. + +If the retriever is constructed only as `Arc` (and `search_with_trace` is not on the trait), add a concrete-typed local construction in the `if opts.trace` branch. Example pattern: + +```rust +// inside fn search_with_opts: +if opts.trace { + use kebab_search::HybridRetriever; + let lex = self.build_lexical_retriever()?; + let vec = self.build_vector_retriever()?; + let retriever = HybridRetriever::new(&self.config, lex, vec); + let (hits, trace) = retriever.search_with_trace(&fetch_query)?; + // skip cache, run budget loop on hits, attach trace to response + return Ok(self.finalize_response(hits, k_effective, offset, &opts, snippet_chars, Some(trace))); +} +``` + +The exact helpers (`build_lexical_retriever`, `finalize_response`) are method names you'll either find or extract during implementation. Goal: trace path bypasses cache and returns `Some(trace)`; non-trace path unchanged returns `None`. + +Also update every other `SearchResponse { ... }` constructor in `app.rs` and `lib.rs` to include `trace: None`. Search for `SearchResponse {` to find all sites. + +```bash +grep -n "SearchResponse {" crates/kebab-app/src/app.rs crates/kebab-app/src/lib.rs +``` + +- [ ] **Step 5: Run tests** + +```bash +cargo test -p kebab-app tests_trace +cargo test -p kebab-app +``` +Expected: 2 new trace tests pass; existing app tests unaffected. + +- [ ] **Step 6: Workspace clippy** + +```bash +cargo clippy -p kebab-app --all-targets -- -D warnings +``` +Expected: clean. + +- [ ] **Step 7: Commit** + +```bash +git add crates/kebab-app/src/app.rs +git commit -m "feat(app): SearchResponse.trace + opts.trace threading (fb-37)" +``` + +--- + +## Task 6: CLI --trace flag + JSON wire + non-JSON pretty print + +**Files:** +- Modify: `crates/kebab-cli/src/main.rs` +- Modify: `crates/kebab-cli/src/wire.rs` + +- [ ] **Step 1: Write failing test for wire serialization** + +Append to `crates/kebab-cli/src/wire.rs` `mod tests`: + +```rust +#[test] +fn search_response_with_trace_serializes_trace_field() { + use kebab_core::{SearchTrace, TraceCandidate, TraceFusionInput, + TraceTiming, ChunkId, DocumentId, WorkspacePath}; + let r = kebab_app::SearchResponse { + hits: vec![], + next_cursor: None, + truncated: false, + trace: Some(SearchTrace { + lexical: vec![TraceCandidate { + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath::new("a.md".into()).unwrap(), + rank: 1, + score: 0.42, + }], + vector: vec![], + rrf_inputs: vec![TraceFusionInput { + chunk_id: ChunkId("c1".into()), + lexical_rank: Some(1), + vector_rank: None, + fusion_score: 0.0, + }], + timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 }, + }), + }; + let v = wire_search_response(&r); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["trace"].is_object()); + assert_eq!(v["trace"]["timing"]["lexical_ms"], 5); + assert_eq!(v["trace"]["lexical"][0]["chunk_id"], "c1"); +} + +#[test] +fn search_response_without_trace_omits_field() { + let r = kebab_app::SearchResponse { + hits: vec![], + next_cursor: None, + truncated: false, + trace: None, + }; + let v = wire_search_response(&r); + assert!(v.get("trace").is_none(), "trace field absent when None"); +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +```bash +cargo test -p kebab-cli wire::tests::search_response_with_trace_serializes_trace_field +``` +Expected: compile error — `SearchResponse.trace` not threaded into wire helper output. + +- [ ] **Step 3: Update `wire_search_response`** + +Modify `crates/kebab-cli/src/wire.rs` `wire_search_response`: + +```rust +pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value { + let mut v = serde_json::json!({ + "hits": r.hits.iter().map(wire_search_hit).collect::>(), + "next_cursor": r.next_cursor, + "truncated": r.truncated, + }); + if let Some(trace) = &r.trace { + let trace_v = serde_json::to_value(trace).expect("SearchTrace serializes"); + if let Value::Object(ref mut map) = v { + map.insert("trace".to_string(), trace_v); + } + } + tag_object(v, "search_response.v1") +} +``` + +- [ ] **Step 4: Add `--trace` clap flag** + +Modify `crates/kebab-cli/src/main.rs`. Find `Cmd::Search { ... }` definition (~line 95-150). Add at the end of its field list (after `doc_id`): + +```rust + /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate + /// lists + per-stage timing in the response. Bypasses cache + /// (debug intent — fresh run guaranteed). + #[arg(long)] + trace: bool, +``` + +Find the `Cmd::Search` dispatch arm (~line 656). Add `trace,` to the destructure pattern (after `doc_id,`). Find where `SearchOpts` is constructed (~look for `SearchOpts {` inside the search arm, ~line 745) and add `trace: *trace,`. Example: + +```rust +let opts = kebab_core::SearchOpts { + max_tokens: *max_tokens, + snippet_chars: *snippet_chars, + cursor: cursor.clone(), + trace: *trace, +}; +``` + +- [ ] **Step 5: Add non-JSON pretty-print** + +Find the search dispatch's non-JSON branch (the `else` of `if cli.json`, ~line 750-780). After hits are printed, add: + +```rust +if *trace { + if let Some(t) = &resp.trace { + eprintln!(); + eprintln!("Trace:"); + eprintln!(" lexical ({} hits, {}ms):", t.lexical.len(), t.timing.lexical_ms); + for c in t.lexical.iter().take(3) { + eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0); + } + eprintln!(" vector ({} hits, {}ms):", t.vector.len(), t.timing.vector_ms); + for c in t.vector.iter().take(3) { + eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0); + } + eprintln!(" fusion ({} inputs, {}ms)", t.rrf_inputs.len(), t.timing.fusion_ms); + eprintln!(" total: {}ms", t.timing.total_ms); + } +} +``` + +- [ ] **Step 6: Run tests** + +```bash +cargo test -p kebab-cli wire::tests +cargo test -p kebab-cli +``` +Expected: 2 new wire tests pass; existing cli tests unaffected. + +- [ ] **Step 7: Clippy** + +```bash +cargo clippy -p kebab-cli --all-targets -- -D warnings +``` +Expected: clean. + +- [ ] **Step 8: Commit** + +```bash +git add crates/kebab-cli/src/main.rs crates/kebab-cli/src/wire.rs +git commit -m "feat(cli): kebab search --trace flag + wire trace + pretty print (fb-37)" +``` + +--- + +## Task 7: CLI integration tests for --trace and stats breakdowns + +**Files:** +- Create: `crates/kebab-cli/tests/wire_search_trace.rs` +- Create: `crates/kebab-cli/tests/wire_schema_breakdowns.rs` + +- [ ] **Step 1: Write failing integration tests for --trace** + +Create `crates/kebab-cli/tests/wire_search_trace.rs`. Use the same fixture pattern as existing `crates/kebab-cli/tests/wire_search_filters.rs` (read it first to mirror temp-dir + ingest setup): + +```rust +//! p9-fb-37: integration tests for `kebab search --trace --json`. + +use std::process::Command; + +mod common; +use common::{cargo_bin, ingest_fixture, temp_kebab_root}; + +#[test] +fn search_trace_json_includes_trace_block() { + let (_root, cfg_path) = temp_kebab_root(); + ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n"); + + let out = Command::new(cargo_bin()) + .args([ + "--config", cfg_path.to_str().unwrap(), + "search", "rust", "--trace", "--json", + ]) + .output() + .expect("run"); + assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr)); + let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap(); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["trace"].is_object(), "trace block present"); + assert!(v["trace"]["timing"].is_object()); + assert!(v["trace"]["timing"]["total_ms"].is_number()); + assert!(v["trace"]["lexical"].is_array()); + assert!(v["trace"]["vector"].is_array()); + assert!(v["trace"]["rrf_inputs"].is_array()); +} + +#[test] +fn search_without_trace_omits_trace_field() { + let (_root, cfg_path) = temp_kebab_root(); + ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n"); + + let out = Command::new(cargo_bin()) + .args([ + "--config", cfg_path.to_str().unwrap(), + "search", "rust", "--json", + ]) + .output() + .expect("run"); + assert!(out.status.success()); + let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap(); + assert!(v.get("trace").is_none(), "trace field absent when --trace not passed"); +} + +#[test] +fn search_trace_lexical_mode_empty_vector_list() { + let (_root, cfg_path) = temp_kebab_root(); + ingest_fixture(&cfg_path, "doc1.md", "# Title\n\nrust async hello\n"); + + let out = Command::new(cargo_bin()) + .args([ + "--config", cfg_path.to_str().unwrap(), + "search", "rust", "--trace", "--mode", "lexical", "--json", + ]) + .output() + .expect("run"); + assert!(out.status.success()); + let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap(); + assert_eq!(v["trace"]["vector"].as_array().unwrap().len(), 0); + assert_eq!(v["trace"]["timing"]["vector_ms"], 0); +} +``` + +- [ ] **Step 2: Write failing integration tests for stats** + +Create `crates/kebab-cli/tests/wire_schema_breakdowns.rs`: + +```rust +//! p9-fb-37: integration tests for `kebab schema --json` extended stats. + +use std::process::Command; + +mod common; +use common::{cargo_bin, ingest_fixture, temp_kebab_root}; + +#[test] +fn schema_stats_includes_breakdowns_on_fresh_corpus() { + let (_root, cfg_path) = temp_kebab_root(); + // Fresh init — no docs. We need migrations to have run; the + // first search/ingest call brings them up. Run an empty schema + // query on a freshly-init'd config: + Command::new(cargo_bin()) + .args(["--config", cfg_path.to_str().unwrap(), "init"]) + .output() + .expect("init"); + + let out = Command::new(cargo_bin()) + .args(["--config", cfg_path.to_str().unwrap(), "schema", "--json"]) + .output() + .expect("run"); + assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr)); + let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap(); + let stats = &v["stats"]; + // 5 keys padded. + let m = stats["media_breakdown"].as_object().unwrap(); + assert_eq!(m.len(), 5); + for k in &["markdown", "pdf", "image", "audio", "other"] { + assert_eq!(m[*k], 0); + } + // lang_breakdown empty {}. + assert_eq!(stats["lang_breakdown"].as_object().unwrap().len(), 0); + // index_bytes shape. + assert!(stats["index_bytes"]["sqlite"].is_number()); + assert!(stats["index_bytes"]["lancedb"].is_number()); + assert_eq!(stats["stale_doc_count"], 0); +} + +#[test] +fn schema_stats_breakdowns_after_ingest() { + let (_root, cfg_path) = temp_kebab_root(); + ingest_fixture(&cfg_path, "a.md", "---\nlang: en\n---\nhello\n"); + ingest_fixture(&cfg_path, "b.md", "---\nlang: ko\n---\n안녕\n"); + + let out = Command::new(cargo_bin()) + .args(["--config", cfg_path.to_str().unwrap(), "schema", "--json"]) + .output() + .expect("run"); + assert!(out.status.success()); + let v: serde_json::Value = serde_json::from_slice(&out.stdout).unwrap(); + let stats = &v["stats"]; + assert_eq!(stats["media_breakdown"]["markdown"], 2); + assert_eq!(stats["lang_breakdown"]["en"], 1); + assert_eq!(stats["lang_breakdown"]["ko"], 1); + assert!(stats["index_bytes"]["sqlite"].as_u64().unwrap() > 0); +} +``` + +- [ ] **Step 3: Verify or create `tests/common/mod.rs`** + +Check existing tests for shared `common` module: +```bash +ls crates/kebab-cli/tests/ +cat crates/kebab-cli/tests/common/mod.rs 2>/dev/null +``` + +If `common` module exists with `cargo_bin`, `ingest_fixture`, `temp_kebab_root`, reuse. If not, mirror functions from `wire_search_filters.rs` (the fb-36 integration test) — copy its fixture helpers to `crates/kebab-cli/tests/common/mod.rs` and reference via `mod common`. + +- [ ] **Step 4: Run integration tests** + +```bash +cargo test -p kebab-cli --test wire_search_trace +cargo test -p kebab-cli --test wire_schema_breakdowns +``` +Expected: all tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add crates/kebab-cli/tests/wire_search_trace.rs crates/kebab-cli/tests/wire_schema_breakdowns.rs crates/kebab-cli/tests/common/mod.rs +git commit -m "test(cli): integration tests for --trace + schema breakdowns (fb-37)" +``` + +--- + +## Task 8: MCP SearchInput trace + integration test + +**Files:** +- Modify: `crates/kebab-mcp/src/tools/search.rs` +- Create: `crates/kebab-mcp/tests/tools_call_search_trace.rs` + +- [ ] **Step 1: Write failing integration test** + +Create `crates/kebab-mcp/tests/tools_call_search_trace.rs`. Mirror existing `tools_call_search.rs` fixture pattern (read it first): + +```rust +//! p9-fb-37: MCP search trace input/output integration. + +use serde_json::json; + +mod common; +use common::call_tool_with_temp_corpus; + +#[test] +fn search_with_trace_true_returns_trace_field() { + let v = call_tool_with_temp_corpus( + "kebab__search", + json!({"query": "rust", "trace": true}), + ); + assert!(v["trace"].is_object(), "trace field present when trace:true"); + assert!(v["trace"]["timing"]["total_ms"].is_number()); +} + +#[test] +fn search_without_trace_omits_field() { + let v = call_tool_with_temp_corpus( + "kebab__search", + json!({"query": "rust"}), + ); + assert!(v.get("trace").is_none(), "trace absent when not requested"); +} + +#[test] +fn search_with_trace_false_omits_field() { + let v = call_tool_with_temp_corpus( + "kebab__search", + json!({"query": "rust", "trace": false}), + ); + assert!(v.get("trace").is_none()); +} +``` + +If `tests/common/mod.rs` lacks `call_tool_with_temp_corpus`, derive from existing test fixtures. Pattern: spin up `kebab_mcp::Server`, send tools/call request, return result `serde_json::Value`. + +- [ ] **Step 2: Run tests to verify they fail** + +```bash +cargo test -p kebab-mcp --test tools_call_search_trace +``` +Expected: compile error — `SearchInput.trace` field absent. + +- [ ] **Step 3: Add `trace` to `SearchInput`** + +Modify `crates/kebab-mcp/src/tools/search.rs`. Find `pub struct SearchInput` (~line 30-50). Add at end: + +```rust + /// p9-fb-37: when true, capture pipeline trace and include in + /// response. Bypasses cache. Default false. + #[serde(default)] + pub trace: Option, +``` + +- [ ] **Step 4: Wire `trace` into dispatch** + +Find the dispatch body where `SearchOpts` is constructed (~line 90-130). Add: + +```rust +let opts = kebab_core::SearchOpts { + max_tokens: input.max_tokens, + snippet_chars: input.snippet_chars, + cursor: input.cursor.clone(), + trace: input.trace.unwrap_or(false), +}; +``` + +(The existing struct construction may not include `cursor` etc — adapt to what's actually present, just add `trace:` line.) + +The output JSON should already pick up `trace` because the wire helper inherits from the same `SearchResponse` shape. Verify by searching for how the MCP tool serializes its response — check whether it uses `kebab_cli::wire::wire_search_response` or its own builder. + +```bash +grep -n "wire_search_response\|search_response.v1\|SearchResponse" crates/kebab-mcp/src/tools/search.rs +``` + +If MCP uses its own builder, mirror the trace-injection pattern from Task 6 Step 3. + +- [ ] **Step 5: Run tests** + +```bash +cargo test -p kebab-mcp --test tools_call_search_trace +``` +Expected: all 3 pass. + +- [ ] **Step 6: Clippy** + +```bash +cargo clippy -p kebab-mcp --all-targets -- -D warnings +``` + +- [ ] **Step 7: Commit** + +```bash +git add crates/kebab-mcp/src/tools/search.rs crates/kebab-mcp/tests/tools_call_search_trace.rs +git commit -m "feat(mcp): kebab__search trace input + output mirror (fb-37)" +``` + +--- + +## Task 9: TUI search pane `t` keystroke + TracePopup + +**Files:** +- Create: `crates/kebab-tui/src/trace_popup.rs` +- Modify: `crates/kebab-tui/src/lib.rs` +- Modify: `crates/kebab-tui/src/app.rs` +- Modify: `crates/kebab-tui/src/search.rs` +- Modify: `crates/kebab-tui/src/cheatsheet.rs` + +- [ ] **Step 1: Create `trace_popup.rs`** + +```rust +//! p9-fb-37: TUI trace popup. Opens from Search pane via `t` key +//! when results are visible. Re-runs the current query with +//! `SearchOpts.trace = true` and displays the lex / vec / rrf union +//! + per-stage timing as a single scroll list. + +use crossterm::event::{KeyCode, KeyEvent}; +use kebab_core::SearchTrace; +use ratatui::Frame; +use ratatui::layout::Rect; +use ratatui::style::{Modifier, Style}; +use ratatui::text::{Line, Span}; +use ratatui::widgets::{Block, Borders, Paragraph, Wrap}; + +#[derive(Debug, Clone)] +pub struct TracePopupState { + pub trace: SearchTrace, + pub scroll: u16, +} + +impl TracePopupState { + pub fn new(trace: SearchTrace) -> Self { + Self { trace, scroll: 0 } + } +} + +pub fn render_trace_popup(f: &mut Frame, area: Rect, state: &TracePopupState) { + let mut lines: Vec = Vec::new(); + let bold = Style::default().add_modifier(Modifier::BOLD); + + lines.push(Line::from(Span::styled( + format!( + "Lexical ({} hits, {} ms)", + state.trace.lexical.len(), + state.trace.timing.lexical_ms, + ), + bold, + ))); + for c in &state.trace.lexical { + lines.push(Line::from(format!( + " #{:>2} score={:.4} chunk={}", + c.rank, c.score, c.chunk_id.0 + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!( + "Vector ({} hits, {} ms)", + state.trace.vector.len(), + state.trace.timing.vector_ms, + ), + bold, + ))); + for c in &state.trace.vector { + lines.push(Line::from(format!( + " #{:>2} score={:.4} chunk={}", + c.rank, c.score, c.chunk_id.0 + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!( + "RRF inputs ({} entries, {} ms fusion)", + state.trace.rrf_inputs.len(), + state.trace.timing.fusion_ms, + ), + bold, + ))); + for e in &state.trace.rrf_inputs { + lines.push(Line::from(format!( + " chunk={} lex={:?} vec={:?} fusion={:.4}", + e.chunk_id.0, e.lexical_rank, e.vector_rank, e.fusion_score + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!("Total: {} ms", state.trace.timing.total_ms), + bold, + ))); + + let block = Block::default() + .title("Trace — Esc to close, j/k or ↑↓ to scroll") + .borders(Borders::ALL); + let p = Paragraph::new(lines) + .block(block) + .scroll((state.scroll, 0)) + .wrap(Wrap { trim: false }); + f.render_widget(p, area); +} + +/// Handle keys while popup is open. Returns true if the popup should +/// close. +pub fn handle_key_trace_popup(state: &mut TracePopupState, key: KeyEvent) -> bool { + match key.code { + KeyCode::Esc => true, + KeyCode::Char('j') | KeyCode::Down => { + state.scroll = state.scroll.saturating_add(1); + false + } + KeyCode::Char('k') | KeyCode::Up => { + state.scroll = state.scroll.saturating_sub(1); + false + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crossterm::event::KeyModifiers; + use kebab_core::TraceTiming; + + fn dummy_state() -> TracePopupState { + TracePopupState::new(SearchTrace { + lexical: vec![], + vector: vec![], + rrf_inputs: vec![], + timing: TraceTiming::default(), + }) + } + + #[test] + fn esc_closes() { + let mut s = dummy_state(); + assert!(handle_key_trace_popup( + &mut s, + KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), + )); + } + + #[test] + fn j_scrolls_down() { + let mut s = dummy_state(); + assert!(!handle_key_trace_popup( + &mut s, + KeyEvent::new(KeyCode::Char('j'), KeyModifiers::NONE), + )); + assert_eq!(s.scroll, 1); + } +} +``` + +- [ ] **Step 2: Register module + state** + +Modify `crates/kebab-tui/src/lib.rs`: +```rust +pub mod trace_popup; +``` + +Modify `crates/kebab-tui/src/app.rs`. Find `pub struct App` (~line 1-100). Add field: +```rust + /// p9-fb-37: trace popup state, `Some` while open. + pub trace_popup: Option, +``` + +Initialize in `App::new` / `App::default` to `None`. + +- [ ] **Step 3: Wire `t` keystroke in search pane** + +Modify `crates/kebab-tui/src/search.rs` `pub fn handle_key_search` (~line 196). Add a key arm in the match block before existing arms: + +```rust + (KeyCode::Char('t'), KeyModifiers::NONE) + if !state.results.is_empty() && state.trace_popup.is_none() => + { + // Re-run current query with trace enabled. + let cfg = match kebab_config::Config::load(state.config_path.as_deref()) { + Ok(c) => c, + Err(_) => return KeyOutcome::Consumed, + }; + let q = kebab_core::SearchQuery { + text: state.query.clone(), + mode: state.mode, + k: state.k, + filters: state.filters.clone(), + }; + let opts = kebab_core::SearchOpts { + trace: true, + ..Default::default() + }; + if let Ok(resp) = kebab_app::search_with_opts_with_config(cfg, q, opts) { + if let Some(t) = resp.trace { + state.trace_popup = Some(crate::trace_popup::TracePopupState::new(t)); + } + } + KeyOutcome::Consumed + } +``` + +Engineer note: field names (`state.results`, `state.query`, `state.mode`, `state.k`, `state.filters`, `state.config_path`) must match actual `App` struct. Inspect `kebab-tui/src/app.rs` and adapt — if some are absent (e.g. `config_path`), fall back to `kebab_config::Config::load(None)`. + +- [ ] **Step 4: Render popup + handle popup keys in main loop** + +Find the main render loop (in `crates/kebab-tui/src/run.rs` or `app.rs`) — wherever `render_search` / `render_inspect` are conditionally called. Add a render check: if `state.trace_popup.is_some()`, draw the popup overlay. Pattern: + +```rust +if let Some(popup) = &state.trace_popup { + let popup_area = centered_rect(80, 80, frame.area()); + crate::trace_popup::render_trace_popup(frame, popup_area, popup); +} +``` + +`centered_rect` helper may already exist (commonly in `app.rs` or `terminal.rs`). If not, define it inline: + +```rust +fn centered_rect(percent_x: u16, percent_y: u16, r: Rect) -> Rect { + let popup_layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Percentage((100 - percent_y) / 2), + Constraint::Percentage(percent_y), + Constraint::Percentage((100 - percent_y) / 2), + ]) + .split(r); + Layout::default() + .direction(Direction::Horizontal) + .constraints([ + Constraint::Percentage((100 - percent_x) / 2), + Constraint::Percentage(percent_x), + Constraint::Percentage((100 - percent_x) / 2), + ]) + .split(popup_layout[1])[1] +} +``` + +In key dispatch, intercept popup keys first: + +```rust +if let Some(popup) = state.trace_popup.as_mut() { + if crate::trace_popup::handle_key_trace_popup(popup, key) { + state.trace_popup = None; + } + return KeyOutcome::Consumed; +} +``` + +Place before the per-pane key dispatch. + +- [ ] **Step 5: Update cheatsheet** + +Modify `crates/kebab-tui/src/cheatsheet.rs`. Find the search pane keybind list (search for "Search" header or `i = inspect`). Add: + +```rust + "t = trace", +``` + +(Exact insertion depends on cheatsheet's data structure — array of strings, struct rows, etc. Adapt.) + +- [ ] **Step 6: Run TUI tests** + +```bash +cargo test -p kebab-tui +``` +Expected: 2 new trace_popup tests pass; existing TUI tests unaffected. + +- [ ] **Step 7: Clippy** + +```bash +cargo clippy -p kebab-tui --all-targets -- -D warnings +``` + +- [ ] **Step 8: Commit** + +```bash +git add crates/kebab-tui/src/trace_popup.rs crates/kebab-tui/src/lib.rs \ + crates/kebab-tui/src/app.rs crates/kebab-tui/src/search.rs \ + crates/kebab-tui/src/cheatsheet.rs crates/kebab-tui/src/run.rs +git commit -m "feat(tui): search pane t-key opens TracePopup (fb-37)" +``` + +--- + +## Task 10: Wire schema docs + README + SMOKE + INDEX + SKILL + status flip + +**Files:** +- Modify: `docs/wire-schema/v1/search_response.schema.json` +- Modify: `docs/wire-schema/v1/schema.schema.json` +- Modify: `README.md` +- Modify: `docs/SMOKE.md` +- Modify: `tasks/p9/p9-fb-37-trace-and-stats.md` +- Modify: `tasks/INDEX.md` +- Modify: `integrations/claude-code/kebab/SKILL.md` + +- [ ] **Step 1: Update `search_response.schema.json`** + +Add `trace` to `properties` (NOT to `required`): + +```json +"trace": { + "type": "object", + "description": "p9-fb-37: present iff caller passed --trace / SearchOpts.trace=true. Lex/vec pre-fusion lists + RRF union + per-stage timing.", + "required": ["lexical", "vector", "rrf_inputs", "timing"], + "properties": { + "lexical": { "type": "array", "items": { "type": "object" } }, + "vector": { "type": "array", "items": { "type": "object" } }, + "rrf_inputs":{ "type": "array", "items": { "type": "object" } }, + "timing": { + "type": "object", + "required": ["lexical_ms", "vector_ms", "fusion_ms", "total_ms"], + "properties": { + "lexical_ms": { "type": "integer", "minimum": 0 }, + "vector_ms": { "type": "integer", "minimum": 0 }, + "fusion_ms": { "type": "integer", "minimum": 0 }, + "total_ms": { "type": "integer", "minimum": 0 } + } + } + } +} +``` + +- [ ] **Step 2: Update `schema.schema.json`** + +In `properties.stats.properties`, add the four new fields: + +```json +"media_breakdown": { + "type": "object", + "description": "p9-fb-37: per-media-kind doc count. 5 keys (markdown/pdf/image/audio/other), zero-padded.", + "additionalProperties": { "type": "integer", "minimum": 0 } +}, +"lang_breakdown": { + "type": "object", + "description": "p9-fb-37: per-language doc count. NULL lang keyed as the literal string 'null'. Map may be empty on empty corpus.", + "additionalProperties": { "type": "integer", "minimum": 0 } +}, +"index_bytes": { + "type": "object", + "description": "p9-fb-37: on-disk byte sums.", + "required": ["sqlite", "lancedb"], + "properties": { + "sqlite": { "type": "integer", "minimum": 0 }, + "lancedb": { "type": "integer", "minimum": 0 } + } +}, +"stale_doc_count": { + "type": "integer", + "minimum": 0, + "description": "p9-fb-37: docs whose updated_at exceeds config.search.stale_threshold_days. 0 when threshold=0." +} +``` + +- [ ] **Step 3: Update `README.md`** + +Find the `kebab search` row in the command table. Add `--trace` to its flag list. Find the `kebab schema` row — extend its description with one phrase like "+ media/lang/bytes/stale breakdowns (fb-37)". + +- [ ] **Step 4: Update `docs/SMOKE.md`** + +Add a new section after the fb-36 walkthrough: + +```markdown +### Trace + stats (fb-37) + +Re-run a search with `--trace` to see per-stage candidate lists + timing: + +```bash +kebab --config /tmp/kebab-smoke/config.toml search "rust async" --trace --json | jq .trace +``` + +Inspect the corpus health surface: + +```bash +kebab --config /tmp/kebab-smoke/config.toml schema --json | jq .stats +``` + +Look for: `media_breakdown` (5 keys), `lang_breakdown`, `index_bytes`, `stale_doc_count`. +``` + +- [ ] **Step 5: Update `tasks/p9/p9-fb-37-trace-and-stats.md`** + +Flip the frontmatter `status: open` → `status: completed`. Add at the top (after the existing skeleton banner) a "Design + plan" links block: + +```markdown +- Design: [`docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`](../../docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md) +- Plan: [`docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md`](../../docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md) +``` + +- [ ] **Step 6: Update `tasks/INDEX.md`** + +Find the fb-37 row. Flip the status column to ✅. + +- [ ] **Step 7: Update `integrations/claude-code/kebab/SKILL.md`** + +Find the `mcp__kebab__search` input shape block. Append a `trace: null` field. Add a sentence under the search inputs bullet list noting that `trace: true` returns a `trace` block on the response with pre-fusion lex/vec lists + per-stage timing, and that trace bypasses the search cache. Also update the schema bullet list to mention the new stats sub-fields. + +- [ ] **Step 8: Run full workspace tests + clippy** + +```bash +cargo test --workspace --no-fail-fast -j 1 +cargo clippy --workspace --all-targets -- -D warnings +``` +Expected: all green. + +- [ ] **Step 9: Commit** + +```bash +git add docs/ README.md tasks/p9/p9-fb-37-trace-and-stats.md tasks/INDEX.md integrations/claude-code/kebab/SKILL.md +git commit -m "docs(fb-37): wire schema + README + SMOKE + INDEX + SKILL" +``` + +--- + +## Final verification checklist + +- [ ] `cargo test --workspace --no-fail-fast -j 1` green +- [ ] `cargo clippy --workspace --all-targets -- -D warnings` clean +- [ ] Manual smoke against `/tmp/kebab-smoke`: + - [ ] `kebab search Q --trace --json | jq .trace` shows lex/vec/rrf/timing + - [ ] `kebab search Q --json` does NOT include `trace` + - [ ] `kebab schema --json | jq .stats` shows 4 new fields +- [ ] README, SMOKE, SKILL, INDEX, spec status all updated -- 2.49.1 From 1e943f21dc8782fa0a776646fac925a18f737853 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:17:04 +0900 Subject: [PATCH 03/13] feat(core): SearchTrace + IndexBytes types + SearchOpts.trace (fb-37) Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-core/src/search.rs | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs index 5e5cd31..bb66be9 100644 --- a/crates/kebab-core/src/search.rs +++ b/crates/kebab-core/src/search.rs @@ -124,6 +124,57 @@ pub struct SearchOpts { pub snippet_chars: Option, /// Opaque base64 cursor from a previous response. None = first page. pub cursor: Option, + /// p9-fb-37: when true, capture pipeline trace (cache bypassed, + /// lex / vec pre-fusion lists + timing populated on the response). + #[serde(default)] + pub trace: bool, +} + +/// p9-fb-37: search retrieval pipeline trace. Populated only when +/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse` +/// otherwise. `lexical` / `vector` are pre-fusion candidate lists +/// (each retriever's full output for the fanout query). `rrf_inputs` +/// is the union (chunk_id) used by RRF, with each side's rank +/// captured. `timing` is wall-clock per stage. +#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct SearchTrace { + pub lexical: Vec, + pub vector: Vec, + pub rrf_inputs: Vec, + pub timing: TraceTiming, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceCandidate { + pub chunk_id: ChunkId, + pub doc_id: DocumentId, + pub doc_path: WorkspacePath, + pub rank: u32, + pub score: f32, +} + +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct TraceFusionInput { + pub chunk_id: ChunkId, + pub lexical_rank: Option, + pub vector_rank: Option, + pub fusion_score: f32, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct TraceTiming { + pub lexical_ms: u64, + pub vector_ms: u64, + pub fusion_ms: u64, + pub total_ms: u64, +} + +/// p9-fb-37: on-disk index size breakdown. Mirrored on the +/// wire `schema.v1.stats.index_bytes` block. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +pub struct IndexBytes { + pub sqlite: u64, + pub lancedb: u64, } #[cfg(test)] @@ -193,4 +244,51 @@ mod tests { assert!(old.ingested_after.is_none()); assert!(old.doc_id.is_none()); } + + #[test] + fn search_trace_serde_roundtrip() { + let t = SearchTrace { + lexical: vec![TraceCandidate { + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath::new("a.md".into()).unwrap(), + rank: 1, + score: 0.42, + }], + vector: vec![], + rrf_inputs: vec![TraceFusionInput { + chunk_id: ChunkId("c1".into()), + lexical_rank: Some(1), + vector_rank: None, + fusion_score: 0.0234, + }], + timing: TraceTiming { + lexical_ms: 12, + vector_ms: 0, + fusion_ms: 1, + total_ms: 14, + }, + }; + let v = serde_json::to_value(&t).unwrap(); + assert_eq!(v["timing"]["lexical_ms"], 12); + assert_eq!( + v["lexical"][0]["score"].as_f64().unwrap() as f32, + 0.42_f32 + ); + let back: SearchTrace = serde_json::from_value(v).unwrap(); + assert_eq!(back, t); + } + + #[test] + fn index_bytes_default_is_zero() { + let b = IndexBytes::default(); + assert_eq!(b.sqlite, 0); + assert_eq!(b.lancedb, 0); + } + + #[test] + fn search_opts_trace_default_false() { + let opts = SearchOpts::default(); + assert!(!opts.trace); + } } -- 2.49.1 From 69c6e23432f350a540b006c9d5e8761ecdafb365 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:21:45 +0900 Subject: [PATCH 04/13] feat(store): breakdowns + index_bytes helpers (fb-37) Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-core/src/lib.rs | 5 +- crates/kebab-store-sqlite/src/lib.rs | 1 + crates/kebab-store-sqlite/src/stats_ext.rs | 168 +++++++++++++++++++++ 3 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 crates/kebab-store-sqlite/src/stats_ext.rs diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs index 7bbb01b..1cee095 100644 --- a/crates/kebab-core/src/lib.rs +++ b/crates/kebab-core/src/lib.rs @@ -51,8 +51,9 @@ pub use metadata::{ TrustLevel, }; pub use search::{ - DocFilter, DocSummary, RetrievalDetail, SearchFilters, SearchHit, - SearchMode, SearchOpts, SearchQuery, + DocFilter, DocSummary, IndexBytes, MEDIA_KINDS, RetrievalDetail, SearchFilters, SearchHit, + SearchMode, SearchOpts, SearchQuery, SearchTrace, TraceCandidate, TraceFusionInput, + TraceTiming, }; pub use answer::{ Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage, diff --git a/crates/kebab-store-sqlite/src/lib.rs b/crates/kebab-store-sqlite/src/lib.rs index f68872b..89c0fa3 100644 --- a/crates/kebab-store-sqlite/src/lib.rs +++ b/crates/kebab-store-sqlite/src/lib.rs @@ -28,6 +28,7 @@ mod fts; mod jobs; mod schema; mod store; +pub mod stats_ext; pub use embeddings::EmbeddingRecordRow; pub use error::StoreError; diff --git a/crates/kebab-store-sqlite/src/stats_ext.rs b/crates/kebab-store-sqlite/src/stats_ext.rs new file mode 100644 index 0000000..e6df4e2 --- /dev/null +++ b/crates/kebab-store-sqlite/src/stats_ext.rs @@ -0,0 +1,168 @@ +//! p9-fb-37: extended stats helpers — per-media / per-lang doc counts, +//! stale doc count, on-disk index byte sums. + +use std::collections::BTreeMap; +use std::path::Path; + +use kebab_core::{IndexBytes, MEDIA_KINDS}; +use rusqlite::Connection; + +/// p9-fb-37: result of [`breakdowns`] — three independent counts collected in one pass. +#[derive(Debug, Clone, Default)] +pub struct Breakdowns { + pub media: BTreeMap, + pub lang: BTreeMap, + pub stale_doc_count: u64, +} + +/// `media` always contains all 5 `MEDIA_KINDS` (zero-padded). +/// `lang` only contains observed languages; NULL lang is +/// keyed as the literal string `"null"`. `stale_doc_count` is 0 when +/// `threshold_days == 0` (mirrors fb-32 staleness disable semantics). +pub fn breakdowns( + conn: &Connection, + threshold_days: u64, +) -> rusqlite::Result { + // media: dual JSON shape — text variant ("markdown") vs object + // variant ({"image":{"format":"png"}}). Same CASE WHEN as fb-36. + let mut media: BTreeMap = MEDIA_KINDS + .iter() + .map(|k| ((*k).to_string(), 0u64)) + .collect(); + let mut stmt = conn.prepare( + "SELECT \ + CASE \ + WHEN json_type(a.media_type) = 'text' \ + THEN json_extract(a.media_type, '$') \ + ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \ + END AS kind, \ + COUNT(DISTINCT d.doc_id) \ + FROM documents d JOIN assets a ON a.asset_id = d.asset_id \ + GROUP BY kind", + )?; + let rows = stmt.query_map([], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)) + })?; + for row in rows { + let (kind, n) = row?; + media.insert(kind, n); + } + + let mut lang: BTreeMap = BTreeMap::new(); + let mut stmt = conn.prepare( + "SELECT COALESCE(lang, 'null') AS l, COUNT(*) \ + FROM documents GROUP BY l", + )?; + let rows = stmt.query_map([], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)) + })?; + for row in rows { + let (l, n) = row?; + lang.insert(l, n); + } + + let stale_doc_count: u64 = if threshold_days == 0 { + 0 + } else { + let secs = (threshold_days as i64) * 86_400; + let cutoff = time::OffsetDateTime::now_utc() + - time::Duration::seconds(secs); + let cutoff_str = cutoff + .format(&time::format_description::well_known::Rfc3339) + .expect("RFC3339 format"); + conn.query_row( + "SELECT COUNT(*) FROM documents WHERE updated_at < ?", + [cutoff_str], + |r| r.get(0), + )? + }; + + Ok(Breakdowns { + media, + lang, + stale_doc_count, + }) +} + +/// Sum on-disk bytes of the SQLite database (main + WAL + SHM) and +/// the LanceDB directory tree. Missing files / dir = 0. +pub fn index_bytes(data_dir: &Path) -> std::io::Result { + fn file_size_or_zero(p: &Path) -> u64 { + std::fs::metadata(p).map(|m| m.len()).unwrap_or(0) + } + fn dir_walk_sum(p: &Path) -> std::io::Result { + if !p.exists() { + return Ok(0); + } + let mut total = 0u64; + for entry in std::fs::read_dir(p)? { + let entry = entry?; + let ty = entry.file_type()?; + if ty.is_dir() { + total += dir_walk_sum(&entry.path())?; + } else if ty.is_file() { + total += entry.metadata()?.len(); + } + } + Ok(total) + } + + let sqlite_main = data_dir.join("kebab.sqlite"); + let sqlite_wal = data_dir.join("kebab.sqlite-wal"); + let sqlite_shm = data_dir.join("kebab.sqlite-shm"); + let sqlite = file_size_or_zero(&sqlite_main) + + file_size_or_zero(&sqlite_wal) + + file_size_or_zero(&sqlite_shm); + let lancedb = dir_walk_sum(&data_dir.join("lancedb"))?; + Ok(IndexBytes { sqlite, lancedb }) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn open_fresh() -> (tempfile::TempDir, crate::SqliteStore) { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + let store = crate::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + (dir, store) + } + + #[test] + fn breakdowns_empty_corpus() { + let (_dir, store) = open_fresh(); + let conn = store.read_conn(); + let b = breakdowns(&conn, 0).unwrap(); + // 5 keys all zero, lang map empty, stale 0. + assert_eq!(b.media.len(), 5); + for k in MEDIA_KINDS { + assert_eq!(b.media.get(*k), Some(&0u64)); + } + assert!(b.lang.is_empty()); + assert_eq!(b.stale_doc_count, 0); + } + + #[test] + fn index_bytes_includes_sqlite_main() { + let (dir, _store) = open_fresh(); + let b = index_bytes(dir.path()).unwrap(); + assert!(b.sqlite > 0, "main sqlite file should exist after migrations"); + assert_eq!(b.lancedb, 0); + } + + #[test] + fn index_bytes_lancedb_dir_walk() { + let dir = tempfile::tempdir().unwrap(); + let lance = dir.path().join("lancedb"); + std::fs::create_dir_all(lance.join("vectors.lance")).unwrap(); + std::fs::write( + lance.join("vectors.lance").join("data.bin"), + vec![0u8; 1024], + ) + .unwrap(); + let b = index_bytes(dir.path()).unwrap(); + assert_eq!(b.lancedb, 1024); + } +} -- 2.49.1 From 231d80e82d32aae2254539e7bc7ec557e5a675de Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:30:19 +0900 Subject: [PATCH 05/13] feat(stats): media/lang/bytes/stale fields on schema.v1.stats (fb-37) Extends CountSummary with media_breakdown, lang_breakdown, stale_doc_count fields populated via stats_ext::breakdowns(). Adds count_summary_with_threshold for callers that need real stale counts. Mirrors all new fields onto the wire-bound Stats struct in kebab-app::schema with #[serde(default)] for backwards-compat. Also fixes search_budget_integration.rs for the trace field added to SearchOpts in Task 1. Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-app/src/schema.rs | 57 ++++++++++++++++++- .../tests/search_budget_integration.rs | 4 ++ crates/kebab-store-sqlite/src/store.rs | 48 ++++++++++++---- 3 files changed, 96 insertions(+), 13 deletions(-) diff --git a/crates/kebab-app/src/schema.rs b/crates/kebab-app/src/schema.rs index 603b212..46841fb 100644 --- a/crates/kebab-app/src/schema.rs +++ b/crates/kebab-app/src/schema.rs @@ -50,6 +50,18 @@ pub struct Stats { pub chunk_count: u64, pub asset_count: u64, pub last_ingest_at: Option, + /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded). + #[serde(default)] + pub media_breakdown: std::collections::BTreeMap, + /// p9-fb-37: per-language doc count, NULL keyed as `"null"`. + #[serde(default)] + pub lang_breakdown: std::collections::BTreeMap, + /// p9-fb-37: on-disk byte sums. + #[serde(default)] + pub index_bytes: kebab_core::IndexBytes, + /// p9-fb-37: docs whose `updated_at` exceeds the staleness threshold. + #[serde(default)] + pub stale_doc_count: u64, } const KEBAB_VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -85,7 +97,7 @@ const WIRE_SCHEMAS: &[&str] = &[ #[doc(hidden)] pub fn schema_with_config(cfg: &Config) -> anyhow::Result { let store = open_store_for_stats(cfg)?; - let stats = collect_stats(&store)?; + let stats = collect_stats(cfg, &store)?; let models = collect_models(cfg, &store); Ok(SchemaV1 { schema_version: SCHEMA_V1_ID.to_string(), @@ -124,13 +136,24 @@ fn open_store_for_stats(cfg: &Config) -> anyhow::Result anyhow::Result { - let counts = store.count_summary()?; +fn collect_stats( + cfg: &Config, + store: &kebab_store_sqlite::SqliteStore, +) -> anyhow::Result { + let counts = store + .count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?; + let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, ""); + let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir) + .map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?; Ok(Stats { doc_count: counts.doc_count, chunk_count: counts.chunk_count, asset_count: counts.asset_count, last_ingest_at: counts.last_ingest_at, + media_breakdown: counts.media_breakdown, + lang_breakdown: counts.lang_breakdown, + index_bytes, + stale_doc_count: counts.stale_doc_count, }) } @@ -150,3 +173,31 @@ fn collect_models(cfg: &Config, store: &kebab_store_sqlite::SqliteStore) -> Mode corpus_revision: store.corpus_revision(), } } + +#[cfg(test)] +mod tests_stats_ext { + use super::*; + + #[test] + fn stats_includes_breakdowns_and_bytes_on_fresh_corpus() { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + // Bring up migrations so the sqlite file is created. + let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + drop(store); + + let s = schema_with_config(&cfg).unwrap(); + // 5 keys padded. + assert_eq!(s.stats.media_breakdown.len(), 5); + assert_eq!(s.stats.media_breakdown.get("markdown"), Some(&0)); + assert_eq!(s.stats.media_breakdown.get("pdf"), Some(&0)); + // lang map empty on empty corpus. + assert!(s.stats.lang_breakdown.is_empty()); + // sqlite bytes positive after migrations, lancedb 0. + assert!(s.stats.index_bytes.sqlite > 0); + assert_eq!(s.stats.index_bytes.lancedb, 0); + assert_eq!(s.stats.stale_doc_count, 0); + } +} diff --git a/crates/kebab-app/tests/search_budget_integration.rs b/crates/kebab-app/tests/search_budget_integration.rs index 42ad346..c309b69 100644 --- a/crates/kebab-app/tests/search_budget_integration.rs +++ b/crates/kebab-app/tests/search_budget_integration.rs @@ -47,6 +47,7 @@ fn budget_truncates_snippets_when_below_threshold() { max_tokens: Some(50), snippet_chars: None, cursor: None, + trace: false, }, ) .unwrap(); @@ -78,6 +79,7 @@ fn cursor_paginates_to_next_page() { max_tokens: None, snippet_chars: None, cursor: Some(cursor), + trace: false, }, ) .unwrap(); @@ -114,6 +116,7 @@ fn cursor_rejected_after_corpus_revision_bump() { max_tokens: None, snippet_chars: None, cursor: Some(c), + trace: false, }, ); let err = result.unwrap_err(); @@ -147,6 +150,7 @@ fn max_tokens_zero_returns_one_hit_truncated() { max_tokens: Some(0), snippet_chars: None, cursor: None, + trace: false, }, ) .unwrap(); diff --git a/crates/kebab-store-sqlite/src/store.rs b/crates/kebab-store-sqlite/src/store.rs index 13691b3..57e16da 100644 --- a/crates/kebab-store-sqlite/src/store.rs +++ b/crates/kebab-store-sqlite/src/store.rs @@ -604,6 +604,12 @@ pub struct CountSummary { /// ISO-8601 timestamp of the most-recently updated document row, or /// `None` when the store is empty. pub last_ingest_at: Option, + /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded). + pub media_breakdown: std::collections::BTreeMap, + /// p9-fb-37: per-language doc count, NULL keyed as `"null"`. + pub lang_breakdown: std::collections::BTreeMap, + /// p9-fb-37: docs whose `updated_at < now - threshold_days`. 0 when threshold=0. + pub stale_doc_count: u64, } impl SqliteStore { @@ -611,39 +617,58 @@ impl SqliteStore { /// most-recent `documents.updated_at` timestamp. /// /// Uses `read_conn()` (no mutations) — mirrors the pattern used by - /// [`Self::corpus_revision`]. - pub fn count_summary(&self) -> anyhow::Result { + /// Shared helper: counts and breakdowns in a single pass with given threshold. + fn count_summary_inner(&self, threshold_days: u64) -> anyhow::Result { + use anyhow::Context; + use rusqlite::OptionalExtension; + let conn = self.read_conn(); let doc_count: u64 = conn .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0)) .context("count documents")?; - let chunk_count: u64 = conn .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0)) .context("count chunks")?; - let asset_count: u64 = conn .query_row("SELECT COUNT(*) FROM assets", [], |r| r.get(0)) .context("count assets")?; - let last_ingest_at: Option = conn - .query_row( - "SELECT MAX(updated_at) FROM documents", - [], - |r| r.get(0), - ) + .query_row("SELECT MAX(updated_at) FROM documents", [], |r| r.get(0)) .optional() .context("max updated_at")? .flatten(); + let bd = crate::stats_ext::breakdowns(&conn, threshold_days).context("breakdowns")?; + Ok(CountSummary { doc_count, chunk_count, asset_count, last_ingest_at, + media_breakdown: bd.media, + lang_breakdown: bd.lang, + stale_doc_count: bd.stale_doc_count, }) } + + /// [`Self::corpus_revision`]. + pub fn count_summary(&self) -> anyhow::Result { + // p9-fb-37: default uses threshold_days=0 (matches fb-32 disable + // semantics). Callers that need real stale_doc_count call + // count_summary_with_threshold. + self.count_summary_inner(0) + } + + /// p9-fb-37: variant that honors `config.search.stale_threshold_days`. + /// Callers who need a meaningful `stale_doc_count` (e.g. `kebab schema`) + /// pass the configured threshold; the older `count_summary` returns 0. + pub fn count_summary_with_threshold( + &self, + threshold_days: u64, + ) -> anyhow::Result { + self.count_summary_inner(threshold_days) + } } /// Apply the design §5 / task-spec pragmas. Called once per connection. @@ -681,6 +706,9 @@ mod tests { assert_eq!(s.chunk_count, 0); assert_eq!(s.asset_count, 0); assert!(s.last_ingest_at.is_none()); + assert_eq!(s.media_breakdown.len(), 5); + assert!(s.lang_breakdown.is_empty()); + assert_eq!(s.stale_doc_count, 0); } } -- 2.49.1 From 6a067e3ab1988319b91f821e4124efb7c41557e9 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 12:38:53 +0900 Subject: [PATCH 06/13] feat(search): HybridRetriever::search_with_trace (fb-37) --- crates/kebab-search/src/hybrid.rs | 203 ++++++++++++++++++++++++++++-- crates/kebab-search/src/lib.rs | 1 + crates/kebab-search/src/trace.rs | 85 +++++++++++++ 3 files changed, 280 insertions(+), 9 deletions(-) create mode 100644 crates/kebab-search/src/trace.rs diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs index 37cd629..58b6678 100644 --- a/crates/kebab-search/src/hybrid.rs +++ b/crates/kebab-search/src/hybrid.rs @@ -18,12 +18,15 @@ use std::collections::HashMap; use std::sync::Arc; +use std::time::Instant; use anyhow::Result; use kebab_core::{ - IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, + IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, SearchTrace, }; +use crate::trace::{build_fusion_input_skeleton, candidates_from_hits, ScoreKind, TraceBuilder}; + /// Default `k_rrf` if `kb-config::SearchCfg::rrf_k` is misconfigured. /// Matches §6.4's documented default (60). const DEFAULT_K_RRF: u32 = 60; @@ -145,20 +148,22 @@ impl Retriever for HybridRetriever { impl HybridRetriever { fn fuse(&self, query: &SearchQuery) -> Result> { let target_k = if query.k == 0 { self.default_k } else { query.k }; - - // Fanout: ask each retriever for `target_k * MULTIPLIER` so - // the disjoint set of candidates is wide enough. The two - // per-side queries are identical (same text, k, mode, filters); - // only the dispatch differs, so we share one `SearchQuery`. let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER); let lex_query = SearchQuery { k: fanout_k, ..query.clone() }; - let lex_hits = self.lexical.search(&lex_query)?; let vec_hits = self.vector.search(&lex_query)?; + self.fuse_with_inputs(&lex_hits, &vec_hits, target_k) + } + fn fuse_with_inputs( + &self, + lex_hits: &[SearchHit], + vec_hits: &[SearchHit], + target_k: usize, + ) -> Result> { tracing::debug!( lex = lex_hits.len(), vec = vec_hits.len(), @@ -171,11 +176,13 @@ impl HybridRetriever { // already 1-based by both LexicalRetriever and VectorRetriever // (and any well-behaved Retriever should mirror). let lex_index: HashMap = lex_hits - .into_iter() + .iter() + .cloned() .map(|h| (h.chunk_id.0.clone(), (h.rank, h))) .collect(); let vec_index: HashMap = vec_hits - .into_iter() + .iter() + .cloned() .map(|h| (h.chunk_id.0.clone(), (h.rank, h))) .collect(); @@ -312,6 +319,81 @@ impl HybridRetriever { tracing::debug!(rows = hits.len(), "kb-search hybrid: search done"); Ok(hits) } + + /// p9-fb-37: parallel to `Retriever::search` but additionally returns + /// a trace of pre-fusion lex/vec lists, RRF inputs (union with each + /// side's rank), and per-stage timing. + pub fn search_with_trace( + &self, + query: &SearchQuery, + ) -> anyhow::Result<(Vec, SearchTrace)> { + let start_total = Instant::now(); + let target_k = if query.k == 0 { self.default_k } else { query.k }; + let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER); + let fanout_query = SearchQuery { + k: fanout_k, + ..query.clone() + }; + + let mut tb = TraceBuilder::default(); + + let (lex_hits, vec_hits): (Vec, Vec) = match query.mode { + SearchMode::Lexical => { + let t0 = Instant::now(); + let lh = self.lexical.search(&fanout_query)?; + tb.timing.lexical_ms = t0.elapsed().as_millis() as u64; + (lh, Vec::new()) + } + SearchMode::Vector => { + let t0 = Instant::now(); + let vh = self.vector.search(&fanout_query)?; + tb.timing.vector_ms = t0.elapsed().as_millis() as u64; + (Vec::new(), vh) + } + SearchMode::Hybrid => { + let t0 = Instant::now(); + let lh = self.lexical.search(&fanout_query)?; + tb.timing.lexical_ms = t0.elapsed().as_millis() as u64; + let t1 = Instant::now(); + let vh = self.vector.search(&fanout_query)?; + tb.timing.vector_ms = t1.elapsed().as_millis() as u64; + (lh, vh) + } + }; + + tb.lexical = candidates_from_hits(&lex_hits, ScoreKind::Lexical); + tb.vector = candidates_from_hits(&vec_hits, ScoreKind::Vector); + tb.rrf_inputs = build_fusion_input_skeleton(&lex_hits, &vec_hits); + + let t_fusion = Instant::now(); + let final_hits = match query.mode { + SearchMode::Lexical => { + let mut h = lex_hits.clone(); + h.truncate(target_k); + h + } + SearchMode::Vector => { + let mut h = vec_hits.clone(); + h.truncate(target_k); + h + } + SearchMode::Hybrid => self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)?, + }; + tb.timing.fusion_ms = t_fusion.elapsed().as_millis() as u64; + + let score_by_chunk: std::collections::HashMap = final_hits + .iter() + .map(|h| (h.chunk_id.0.clone(), h.retrieval.fusion_score)) + .collect(); + for entry in &mut tb.rrf_inputs { + if let Some(s) = score_by_chunk.get(&entry.chunk_id.0) { + entry.fusion_score = *s; + } + } + + tb.timing.total_ms = start_total.elapsed().as_millis() as u64; + Ok((final_hits, tb.into_trace())) + } } /// Parse the `hybrid_fusion` config string into a [`FusionPolicy`]. @@ -633,4 +715,107 @@ mod tests { let FusionPolicy::Rrf { k_rrf } = parse_fusion("rrf", 0); assert_eq!(k_rrf, DEFAULT_K_RRF); } + + #[test] + fn search_with_trace_returns_lex_and_vec_lists() { + use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion, + RetrievalDetail, SearchHit, SearchMode, SearchQuery, + WorkspacePath, Citation}; + use std::sync::Arc; + + fn mk_hit(rank: u32, chunk: &str, score: f32, mode: SearchMode) -> SearchHit { + SearchHit { + rank, + chunk_id: ChunkId(chunk.into()), + doc_id: DocumentId(format!("d-{chunk}")), + doc_path: WorkspacePath::new(format!("{chunk}.md")).unwrap(), + heading_path: vec![], + section_label: None, + snippet: chunk.into(), + citation: Citation::Line { + path: WorkspacePath::new(format!("{chunk}.md")).unwrap(), + start: 1, + end: 1, + section: None, + }, + retrieval: RetrievalDetail { + method: mode, + fusion_score: score, + lexical_score: if mode == SearchMode::Lexical { Some(score) } else { None }, + vector_score: if mode == SearchMode::Vector { Some(score) } else { None }, + lexical_rank: if mode == SearchMode::Lexical { Some(rank) } else { None }, + vector_rank: if mode == SearchMode::Vector { Some(rank) } else { None }, + }, + index_version: IndexVersion("v1".into()), + embedding_model: None, + chunker_version: ChunkerVersion("c1".into()), + indexed_at: time::OffsetDateTime::UNIX_EPOCH, + stale: false, + } + } + + struct Stub { hits: Vec } + impl Retriever for Stub { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(self.hits.clone()) + } + fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) } + } + + let lex = Arc::new(Stub { + hits: vec![ + mk_hit(1, "c1", 0.9, SearchMode::Lexical), + mk_hit(2, "c2", 0.5, SearchMode::Lexical), + ], + }); + let vec_r = Arc::new(Stub { + hits: vec![ + mk_hit(1, "c2", 0.8, SearchMode::Vector), + mk_hit(2, "c3", 0.6, SearchMode::Vector), + ], + }); + let hybrid = HybridRetriever::with_policy( + lex.clone(), + vec_r.clone(), + FusionPolicy::Rrf { k_rrf: 60 }, + 2, + ); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Hybrid, + k: 2, + filters: Default::default(), + }; + let (hits, trace) = hybrid.search_with_trace(&q).unwrap(); + assert!(!hits.is_empty()); + assert_eq!(trace.lexical.len(), 2); + assert_eq!(trace.vector.len(), 2); + // Union: c1, c2, c3 → 3 entries. + assert_eq!(trace.rrf_inputs.len(), 3); + } + + #[test] + fn search_with_trace_lexical_mode_empty_vector() { + use kebab_core::{IndexVersion, SearchMode, SearchQuery}; + use std::sync::Arc; + struct EmptyR; + impl Retriever for EmptyR { + fn search(&self, _q: &SearchQuery) -> anyhow::Result> { + Ok(vec![]) + } + fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) } + } + let lex = Arc::new(EmptyR); + let vec_r = Arc::new(EmptyR); + let hybrid = HybridRetriever::with_policy(lex, vec_r, FusionPolicy::Rrf { k_rrf: 60 }, 2); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 2, + filters: Default::default(), + }; + let (_hits, trace) = hybrid.search_with_trace(&q).unwrap(); + assert!(trace.vector.is_empty()); + assert_eq!(trace.timing.vector_ms, 0); + } } diff --git a/crates/kebab-search/src/lib.rs b/crates/kebab-search/src/lib.rs index 47f832d..fef87f3 100644 --- a/crates/kebab-search/src/lib.rs +++ b/crates/kebab-search/src/lib.rs @@ -19,6 +19,7 @@ mod citation_helper; mod hybrid; mod lexical; +mod trace; mod vector; pub use hybrid::{FusionPolicy, HybridRetriever}; diff --git a/crates/kebab-search/src/trace.rs b/crates/kebab-search/src/trace.rs new file mode 100644 index 0000000..5ddbf9c --- /dev/null +++ b/crates/kebab-search/src/trace.rs @@ -0,0 +1,85 @@ +//! p9-fb-37: trace capture helpers for `HybridRetriever::search_with_trace`. + +use std::collections::BTreeMap; + +use kebab_core::{ + SearchHit, SearchTrace, TraceCandidate, TraceFusionInput, TraceTiming, +}; + +/// Build a `TraceCandidate` from a `SearchHit`. The score field reflects +/// each side's score (lexical / vector / fusion) — caller selects which +/// retriever's hit list this is. +pub fn candidates_from_hits(hits: &[SearchHit], score_kind: ScoreKind) -> Vec { + hits.iter() + .map(|h| TraceCandidate { + chunk_id: h.chunk_id.clone(), + doc_id: h.doc_id.clone(), + doc_path: h.doc_path.clone(), + rank: h.rank, + score: match score_kind { + ScoreKind::Lexical => h.retrieval.lexical_score.unwrap_or(0.0), + ScoreKind::Vector => h.retrieval.vector_score.unwrap_or(0.0), + }, + }) + .collect() +} + +#[derive(Clone, Copy, Debug)] +pub enum ScoreKind { + Lexical, + Vector, +} + +/// Build the union of (chunk_id) across lex and vec hit lists, with +/// each side's rank captured. `fusion_score` is filled by the caller +/// (RRF computes it during fusion, this helper just pre-builds the +/// rank table — caller overwrites fusion_score in a second pass). +pub fn build_fusion_input_skeleton( + lex: &[SearchHit], + vec: &[SearchHit], +) -> Vec { + let mut by_chunk: BTreeMap = BTreeMap::new(); + for h in lex { + by_chunk + .entry(h.chunk_id.0.clone()) + .or_insert(TraceFusionInput { + chunk_id: h.chunk_id.clone(), + lexical_rank: None, + vector_rank: None, + fusion_score: 0.0, + }) + .lexical_rank = Some(h.rank); + } + for h in vec { + by_chunk + .entry(h.chunk_id.0.clone()) + .or_insert(TraceFusionInput { + chunk_id: h.chunk_id.clone(), + lexical_rank: None, + vector_rank: None, + fusion_score: 0.0, + }) + .vector_rank = Some(h.rank); + } + by_chunk.into_values().collect() +} + +/// Container the hybrid retriever fills during a traced run. +#[derive(Default)] +pub struct TraceBuilder { + pub lexical: Vec, + pub vector: Vec, + pub rrf_inputs: Vec, + pub timing: TraceTiming, +} + +impl TraceBuilder { + pub fn into_trace(self) -> SearchTrace { + SearchTrace { + lexical: self.lexical, + vector: self.vector, + rrf_inputs: self.rrf_inputs, + timing: self.timing, + } + } +} -- 2.49.1 From 69037c313a9352c3884263ef3dbc8bb90df5d94f Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 13:01:18 +0900 Subject: [PATCH 07/13] feat(app): SearchResponse.trace + opts.trace threading (fb-37) Adds the `trace: Option` field to `SearchResponse` and threads `SearchOpts.trace` through `App::search_with_opts`. When the caller sets `opts.trace = true` the path bypasses the LRU search cache and runs through `HybridRetriever::search_with_trace`, which dispatches all 3 SearchModes internally; this means `--trace` requires embeddings (same constraint as `--mode hybrid`). The non-trace path keeps its exact prior behavior with `trace: None` stamped on the response. Picked up Task 1 / Task 3 follow-ups in the same commit so the workspace compiles: SearchOpts struct-literals in kebab-cli/main.rs + kebab-mcp/tools/search.rs default the new `trace` field to false, and the schema-wrapper test in kebab-cli/wire.rs fills the new media_breakdown / lang_breakdown / index_bytes / stale_doc_count fields on Stats with `Default::default()`. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/app.rs | 131 +++++++++++++++++++++++++++ crates/kebab-cli/src/main.rs | 1 + crates/kebab-cli/src/wire.rs | 5 + crates/kebab-mcp/src/tools/search.rs | 1 + 4 files changed, 138 insertions(+) diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 3e0c53d..7895459 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -70,6 +70,9 @@ pub struct SearchResponse { pub hits: Vec, pub next_cursor: Option, pub truncated: bool, + /// p9-fb-37: present when caller passed `SearchOpts.trace = true`. + /// Consumers that ignore trace should leave this `None`. + pub trace: Option, } /// Facade state — see module docs for lifetime rules. @@ -341,6 +344,65 @@ impl App { k: fetch_k, ..query.clone() }; + + // p9-fb-37: when --trace is requested, bypass the LRU cache and + // run through `HybridRetriever::search_with_trace`, which + // dispatches by mode internally. This requires embeddings (same + // as `--mode hybrid`); `require_embeddings()` surfaces the + // existing "switch to --mode lexical" error otherwise. + if opts.trace { + let lex = Arc::new(LexicalRetriever::with_settings( + self.sqlite.clone(), + lexical_index_version(&self.config), + self.config.search.snippet_chars, + )) as Arc; + let (emb, vec_store) = self.require_embeddings()?; + let vec_iv = vector_index_version(emb.as_ref()); + let vec_dyn: Arc = vec_store; + let emb_dyn: Arc = emb; + let vec_retr = Arc::new(VectorRetriever::with_settings( + vec_dyn, + emb_dyn, + self.sqlite.clone(), + vec_iv, + self.config.search.snippet_chars, + )) as Arc; + let hybrid = HybridRetriever::new(&self.config, lex, vec_retr); + let (mut traced_hits, trace) = hybrid.search_with_trace(&fetch_query)?; + + // Stamp staleness — same as search_uncached. + let now = time::OffsetDateTime::now_utc(); + crate::staleness::mark_stale_in_place( + &mut traced_hits, + now, + self.config.search.stale_threshold_days, + ); + + // Apply offset + k_effective truncation (mirrors non-trace path). + let drop_n = offset.min(traced_hits.len()); + traced_hits.drain(..drop_n); + let mut hits: Vec = + traced_hits.into_iter().take(k_effective).collect(); + + // Snippet truncation if opts.snippet_chars set (mirror non-trace path). + if opts.snippet_chars.is_some() { + for h in hits.iter_mut() { + if h.snippet.chars().count() > snippet_chars { + h.snippet = trim_to_chars(&h.snippet, snippet_chars); + } + } + } + + // Trace path skips the budget loop. Caller will inspect + // `hits.len()` and `trace.timing` rather than paginate. + return Ok(SearchResponse { + hits, + next_cursor: None, + truncated: false, + trace: Some(trace), + }); + } + let mut all_hits = self.search(fetch_query)?; // Skip offset. @@ -421,6 +483,7 @@ impl App { hits, next_cursor, truncated, + trace: None, }) } @@ -847,3 +910,71 @@ mod tests { assert_ne!(a, d, "different session_id → different hash"); } } + +#[cfg(test)] +mod tests_trace { + use super::*; + use kebab_core::{SearchMode, SearchOpts, SearchQuery}; + + fn open_app_with_temp_dir() -> (tempfile::TempDir, App) { + let dir = tempfile::tempdir().unwrap(); + let mut cfg = kebab_config::Config::defaults(); + cfg.storage.data_dir = dir.path().to_string_lossy().into_owned(); + // Bring up migrations. + let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap(); + store.run_migrations().unwrap(); + drop(store); + let app = App::open_with_config(cfg).unwrap(); + (dir, app) + } + + #[test] + fn search_response_trace_none_when_opts_trace_false() { + let (_dir, app) = open_app_with_temp_dir(); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 1, + filters: Default::default(), + }; + let resp = app.search_with_opts(q, SearchOpts::default()).unwrap(); + assert!(resp.trace.is_none()); + } + + #[test] + fn search_response_trace_some_when_opts_trace_true_lexical_mode() { + // Lexical mode doesn't require embeddings — the trace path + // builds HybridRetriever which holds both retrievers, but + // for SearchMode::Lexical only the lexical side is invoked. + // require_embeddings will fail if no embedding provider is + // configured. Default Config has provider = "none" so this + // test will fail unless we tolerate that. Skip the assertion + // if the call returns the embedding-disabled error. + let (_dir, app) = open_app_with_temp_dir(); + let q = SearchQuery { + text: "x".into(), + mode: SearchMode::Lexical, + k: 1, + filters: Default::default(), + }; + let opts = SearchOpts { + trace: true, + ..Default::default() + }; + match app.search_with_opts(q, opts) { + Ok(resp) => { + assert!(resp.trace.is_some(), "trace populated when opts.trace=true"); + } + Err(e) => { + // Acceptable in test environment without embeddings — + // verify the error is the expected embedding-disabled + // shape, not an unrelated panic. + let msg = format!("{e:#}"); + assert!( + msg.contains("embedding") || msg.contains("--mode lexical"), + "unexpected error: {msg}" + ); + } + } + } +} diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 7e41d8a..21ee509 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -732,6 +732,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { max_tokens: *max_tokens, snippet_chars: *snippet_chars, cursor: cursor.clone(), + trace: false, }; // p9-fb-34: budget-aware path. --no-cache still bypasses the // App-level LRU; wire wrapper applies regardless. diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index 178fa22..504288d 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -264,6 +264,7 @@ mod tests { hits: vec![], next_cursor: Some("opaque-cursor-abc".to_string()), truncated: true, + trace: None, }; let v = wire_search_response(&r); assert_eq!(schema_of(&v), Some("search_response.v1")); @@ -303,6 +304,10 @@ mod tests { stats: Stats { doc_count: 1, chunk_count: 2, asset_count: 1, last_ingest_at: None, + media_breakdown: Default::default(), + lang_breakdown: Default::default(), + index_bytes: Default::default(), + stale_doc_count: 0, }, }; let v = wire_schema(&schema); diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs index 74af6e9..167cb61 100644 --- a/crates/kebab-mcp/src/tools/search.rs +++ b/crates/kebab-mcp/src/tools/search.rs @@ -118,6 +118,7 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { max_tokens: input.max_tokens, snippet_chars: input.snippet_chars, cursor: input.cursor, + trace: false, }; let cfg_clone = (*state.config).clone(); match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) { -- 2.49.1 From 72c227af239acaae623fe23f22095209c66bcb55 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 13:08:48 +0900 Subject: [PATCH 08/13] feat(cli): kebab search --trace flag + wire trace + pretty print (fb-37) Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-cli/src/main.rs | 26 +++++++++++++++++- crates/kebab-cli/src/wire.rs | 53 +++++++++++++++++++++++++++++++++++- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 21ee509..305397c 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -163,6 +163,13 @@ enum Cmd { /// p9-fb-36: filter to a single doc by id. #[arg(long)] doc_id: Option, + + /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate + /// lists + per-stage timing in the response. Bypasses cache + /// (debug intent — fresh run guaranteed). Requires embeddings + /// to be enabled. + #[arg(long)] + trace: bool, }, /// Retrieval-augmented question answering. @@ -669,6 +676,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { media, ingested_after, doc_id, + trace, } => { let cfg = kebab_config::Config::load(cli.config.as_deref())?; @@ -732,7 +740,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { max_tokens: *max_tokens, snippet_chars: *snippet_chars, cursor: cursor.clone(), - trace: false, + trace: *trace, }; // p9-fb-34: budget-aware path. --no-cache still bypasses the // App-level LRU; wire wrapper applies regardless. @@ -790,6 +798,22 @@ fn run(cli: &Cli) -> anyhow::Result<()> { let next = resp.next_cursor.as_deref().unwrap_or("(none)"); eprintln!("[truncated; use --cursor {next} for the next page]"); } + if *trace { + if let Some(t) = &resp.trace { + eprintln!(); + eprintln!("Trace:"); + eprintln!(" lexical ({} hits, {}ms):", t.lexical.len(), t.timing.lexical_ms); + for c in t.lexical.iter().take(3) { + eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0); + } + eprintln!(" vector ({} hits, {}ms):", t.vector.len(), t.timing.vector_ms); + for c in t.vector.iter().take(3) { + eprintln!(" rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0); + } + eprintln!(" fusion ({} inputs, {}ms)", t.rrf_inputs.len(), t.timing.fusion_ms); + eprintln!(" total: {}ms", t.timing.total_ms); + } + } } Ok(()) } diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index 504288d..29ab7aa 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -81,11 +81,17 @@ pub fn wire_search_hit(h: &SearchHit) -> Value { /// array (`wire_search_hits`) — see HOTFIXES / fb-34 for the /// breaking shape change. pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value { - let v = serde_json::json!({ + let mut v = serde_json::json!({ "hits": r.hits.iter().map(wire_search_hit).collect::>(), "next_cursor": r.next_cursor, "truncated": r.truncated, }); + if let Some(trace) = &r.trace { + let trace_v = serde_json::to_value(trace).expect("SearchTrace serializes"); + if let Value::Object(ref mut map) = v { + map.insert("trace".to_string(), trace_v); + } + } tag_object(v, "search_response.v1") } @@ -348,4 +354,49 @@ mod tests { assert_eq!(paths.len(), 1); assert_eq!(paths[0].as_str(), Some("/tmp/x")); } + + #[test] + fn search_response_with_trace_serializes_trace_field() { + use kebab_core::{SearchTrace, TraceCandidate, TraceFusionInput, + TraceTiming, ChunkId, DocumentId, WorkspacePath}; + let r = kebab_app::SearchResponse { + hits: vec![], + next_cursor: None, + truncated: false, + trace: Some(SearchTrace { + lexical: vec![TraceCandidate { + chunk_id: ChunkId("c1".into()), + doc_id: DocumentId("d1".into()), + doc_path: WorkspacePath::new("a.md".into()).unwrap(), + rank: 1, + score: 0.42, + }], + vector: vec![], + rrf_inputs: vec![TraceFusionInput { + chunk_id: ChunkId("c1".into()), + lexical_rank: Some(1), + vector_rank: None, + fusion_score: 0.0, + }], + timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 }, + }), + }; + let v = wire_search_response(&r); + assert_eq!(schema_of(&v), Some("search_response.v1")); + assert!(v["trace"].is_object()); + assert_eq!(v["trace"]["timing"]["lexical_ms"], 5); + assert_eq!(v["trace"]["lexical"][0]["chunk_id"], "c1"); + } + + #[test] + fn search_response_without_trace_omits_field() { + let r = kebab_app::SearchResponse { + hits: vec![], + next_cursor: None, + truncated: false, + trace: None, + }; + let v = wire_search_response(&r); + assert!(v.get("trace").is_none(), "trace field absent when None"); + } } -- 2.49.1 From f7e2072d6693151683e7e67f42a32bf445dccf0a Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 13:21:33 +0900 Subject: [PATCH 09/13] test(cli): integration tests for --trace + schema breakdowns (fb-37) Also fixes App::search_with_opts trace branch to use NoopRetriever for SearchMode::Lexical, removing the embeddings requirement when the user only wants lexical-mode trace. --- crates/kebab-app/src/app.rs | 86 +++++++++++-------- .../kebab-cli/tests/wire_schema_breakdowns.rs | 57 ++++++++++++ crates/kebab-cli/tests/wire_search_trace.rs | 58 +++++++++++++ 3 files changed, 166 insertions(+), 35 deletions(-) create mode 100644 crates/kebab-cli/tests/wire_schema_breakdowns.rs create mode 100644 crates/kebab-cli/tests/wire_search_trace.rs diff --git a/crates/kebab-app/src/app.rs b/crates/kebab-app/src/app.rs index 7895459..a3d2c07 100644 --- a/crates/kebab-app/src/app.rs +++ b/crates/kebab-app/src/app.rs @@ -347,26 +347,36 @@ impl App { // p9-fb-37: when --trace is requested, bypass the LRU cache and // run through `HybridRetriever::search_with_trace`, which - // dispatches by mode internally. This requires embeddings (same - // as `--mode hybrid`); `require_embeddings()` surfaces the - // existing "switch to --mode lexical" error otherwise. + // dispatches by mode internally. Vector / hybrid modes require + // embeddings (same as `--mode hybrid`); lexical mode skips + // embedder construction via `NoopRetriever` so lexical-only + // workspaces (provider = "none") can use `--trace` without + // surfacing the "switch to --mode lexical" error. if opts.trace { let lex = Arc::new(LexicalRetriever::with_settings( self.sqlite.clone(), lexical_index_version(&self.config), self.config.search.snippet_chars, )) as Arc; - let (emb, vec_store) = self.require_embeddings()?; - let vec_iv = vector_index_version(emb.as_ref()); - let vec_dyn: Arc = vec_store; - let emb_dyn: Arc = emb; - let vec_retr = Arc::new(VectorRetriever::with_settings( - vec_dyn, - emb_dyn, - self.sqlite.clone(), - vec_iv, - self.config.search.snippet_chars, - )) as Arc; + let vec_retr: Arc = if matches!(query.mode, SearchMode::Lexical) { + // `HybridRetriever::search_with_trace` never invokes the + // vector retriever for `SearchMode::Lexical` (Task 4). + // A no-op stand-in lets us avoid the ~470 MB embedder + // load when the user only asked for lexical trace. + Arc::new(NoopRetriever) + } else { + let (emb, vec_store) = self.require_embeddings()?; + let vec_iv = vector_index_version(emb.as_ref()); + let vec_dyn: Arc = vec_store; + let emb_dyn: Arc = emb; + Arc::new(VectorRetriever::with_settings( + vec_dyn, + emb_dyn, + self.sqlite.clone(), + vec_iv, + self.config.search.snippet_chars, + )) as Arc + }; let hybrid = HybridRetriever::new(&self.config, lex, vec_retr); let (mut traced_hits, trace) = hybrid.search_with_trace(&fetch_query)?; @@ -800,6 +810,24 @@ fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion { IndexVersion(format!("lex:{}", config.chunking.chunker_version)) } +/// p9-fb-37: stand-in for the vector retriever in the trace path when +/// `query.mode == SearchMode::Lexical`. `HybridRetriever::search_with_trace`'s +/// Lexical branch never calls `vector.search()`, so returning an empty +/// hit list here is safe and lets lexical-only workspaces (embedding +/// `provider = "none"`) use `--trace` without paying the ~470 MB +/// embedder load. +struct NoopRetriever; + +impl Retriever for NoopRetriever { + fn search(&self, _q: &kebab_core::SearchQuery) -> anyhow::Result> { + Ok(Vec::new()) + } + + fn index_version(&self) -> kebab_core::IndexVersion { + kebab_core::IndexVersion("noop:trace".into()) + } +} + /// Compose a stable `IndexVersion` for the vector retriever. Tracks /// `(embedding_model, embedding_version, dimensions)` so a model swap /// flags drift via the existing index_version mismatch warning in @@ -944,12 +972,11 @@ mod tests_trace { #[test] fn search_response_trace_some_when_opts_trace_true_lexical_mode() { // Lexical mode doesn't require embeddings — the trace path - // builds HybridRetriever which holds both retrievers, but - // for SearchMode::Lexical only the lexical side is invoked. - // require_embeddings will fail if no embedding provider is - // configured. Default Config has provider = "none" so this - // test will fail unless we tolerate that. Skip the assertion - // if the call returns the embedding-disabled error. + // builds HybridRetriever with a `NoopRetriever` stand-in for + // the vector side, since `HybridRetriever::search_with_trace`'s + // Lexical branch never invokes `vector.search()`. Default + // Config has embedding `provider = "none"`, and lexical-mode + // trace must succeed under that config (no embedder load). let (_dir, app) = open_app_with_temp_dir(); let q = SearchQuery { text: "x".into(), @@ -961,20 +988,9 @@ mod tests_trace { trace: true, ..Default::default() }; - match app.search_with_opts(q, opts) { - Ok(resp) => { - assert!(resp.trace.is_some(), "trace populated when opts.trace=true"); - } - Err(e) => { - // Acceptable in test environment without embeddings — - // verify the error is the expected embedding-disabled - // shape, not an unrelated panic. - let msg = format!("{e:#}"); - assert!( - msg.contains("embedding") || msg.contains("--mode lexical"), - "unexpected error: {msg}" - ); - } - } + let resp = app + .search_with_opts(q, opts) + .expect("lexical-mode trace must succeed without embeddings"); + assert!(resp.trace.is_some(), "trace populated when opts.trace=true"); } } diff --git a/crates/kebab-cli/tests/wire_schema_breakdowns.rs b/crates/kebab-cli/tests/wire_schema_breakdowns.rs new file mode 100644 index 0000000..5696cd2 --- /dev/null +++ b/crates/kebab-cli/tests/wire_schema_breakdowns.rs @@ -0,0 +1,57 @@ +//! p9-fb-37: integration tests for `kebab schema --json` extended stats. + +mod common; + +use serde_json::Value; +use std::fs; +use std::process::Command; + +fn run_schema(cfg: &std::path::Path) -> Value { + let bin = env!("CARGO_BIN_EXE_kebab"); + let out = Command::new(bin) + .args(["--config", cfg.to_str().unwrap(), "schema", "--json"]) + .output() + .expect("run kebab schema"); + assert!( + out.status.success(), + "schema failed: stderr={}", + String::from_utf8_lossy(&out.stderr) + ); + serde_json::from_slice(&out.stdout).expect("valid JSON") +} + +#[test] +fn schema_stats_includes_breakdowns_on_fresh_corpus() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 0); + // Run a no-op ingest to bring up migrations + create the SQLite file. + fs::write(workspace.join("placeholder.md"), "# placeholder\n").unwrap(); + common::ingest(&cfg, &workspace); + + let v = run_schema(&cfg); + let stats = &v["stats"]; + let m = stats["media_breakdown"].as_object().unwrap(); + assert_eq!(m.len(), 5, "5 media keys padded"); + for k in &["markdown", "pdf", "image", "audio", "other"] { + assert!(m[*k].is_number(), "media[{k}] is integer"); + } + assert!(stats["lang_breakdown"].is_object()); + assert!(stats["index_bytes"]["sqlite"].is_number()); + assert!(stats["index_bytes"]["lancedb"].is_number()); + assert!(stats["stale_doc_count"].is_number()); +} + +#[test] +fn schema_stats_breakdowns_after_ingest() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 0); + fs::write(workspace.join("a.md"), "---\nlang: en\n---\nhello\n").unwrap(); + fs::write(workspace.join("b.md"), "---\nlang: ko\n---\n안녕\n").unwrap(); + common::ingest(&cfg, &workspace); + + let v = run_schema(&cfg); + let stats = &v["stats"]; + assert_eq!(stats["media_breakdown"]["markdown"], 2); + assert!(stats["lang_breakdown"].is_object()); + assert!(stats["index_bytes"]["sqlite"].as_u64().unwrap() > 0); +} diff --git a/crates/kebab-cli/tests/wire_search_trace.rs b/crates/kebab-cli/tests/wire_search_trace.rs new file mode 100644 index 0000000..4b8daff --- /dev/null +++ b/crates/kebab-cli/tests/wire_search_trace.rs @@ -0,0 +1,58 @@ +//! p9-fb-37: integration tests for `kebab search --trace --json`. + +mod common; + +use serde_json::Value; +use std::fs; + +#[test] +fn search_trace_json_includes_trace_block() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 0); + fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--trace", "--json", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["trace"].is_object(), "trace block present"); + assert!(v["trace"]["timing"].is_object()); + assert!(v["trace"]["timing"]["total_ms"].is_number()); + assert!(v["trace"]["lexical"].is_array()); + assert!(v["trace"]["vector"].is_array()); + assert!(v["trace"]["rrf_inputs"].is_array()); +} + +#[test] +fn search_without_trace_omits_trace_field() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 0); + fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--json", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); + assert!(v.get("trace").is_none(), "trace field absent without --trace"); +} + +#[test] +fn search_trace_lexical_mode_vector_list_empty() { + let dir = tempfile::tempdir().unwrap(); + let (cfg, workspace, _data) = common::write_config(dir.path(), 0); + fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap(); + common::ingest(&cfg, &workspace); + + let (stdout, _stderr) = common::run_search_with_args( + &cfg, + &["--mode", "lexical", "--trace", "--json", "rust"], + ); + let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON"); + assert_eq!(v["trace"]["vector"].as_array().unwrap().len(), 0); + assert_eq!(v["trace"]["timing"]["vector_ms"], 0); +} -- 2.49.1 From 653e432a3050a2bf6c749f8a541a8f2fbb8e2aa5 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 13:32:30 +0900 Subject: [PATCH 10/13] feat(mcp): kebab__search trace input + output mirror (fb-37) Co-Authored-By: Claude Sonnet 4.6 --- crates/kebab-mcp/src/tools/search.rs | 15 ++- crates/kebab-mcp/tests/tools_call_fetch.rs | 1 + crates/kebab-mcp/tests/tools_call_search.rs | 4 + .../tests/tools_call_search_trace.rs | 104 ++++++++++++++++++ 4 files changed, 122 insertions(+), 2 deletions(-) create mode 100644 crates/kebab-mcp/tests/tools_call_search_trace.rs diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs index 167cb61..722dbdd 100644 --- a/crates/kebab-mcp/src/tools/search.rs +++ b/crates/kebab-mcp/src/tools/search.rs @@ -47,6 +47,10 @@ pub struct SearchInput { pub ingested_after: Option, /// p9-fb-36: filter to a single doc. pub doc_id: Option, + /// p9-fb-37: when true, include a `trace` field on the response + /// with pre-fusion lexical/vector candidate lists + per-stage timing. + /// Bypasses cache (debug intent — fresh run guaranteed). Default false. + pub trace: Option, } pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { @@ -118,7 +122,7 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { max_tokens: input.max_tokens, snippet_chars: input.snippet_chars, cursor: input.cursor, - trace: false, + trace: input.trace.unwrap_or(false), }; let cfg_clone = (*state.config).clone(); match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) { @@ -139,12 +143,19 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult { v }) .collect(); - let envelope = serde_json::json!({ + let mut envelope = serde_json::json!({ "schema_version": "search_response.v1", "hits": tagged, "next_cursor": resp.next_cursor, "truncated": resp.truncated, }); + if let Some(trace) = &resp.trace { + let trace_v = + serde_json::to_value(trace).unwrap_or(serde_json::Value::Null); + if let serde_json::Value::Object(ref mut map) = envelope { + map.insert("trace".to_string(), trace_v); + } + } match serde_json::to_string(&envelope) { Ok(json) => to_tool_success(json), Err(e) => to_tool_error(&anyhow::anyhow!(e)), diff --git a/crates/kebab-mcp/tests/tools_call_fetch.rs b/crates/kebab-mcp/tests/tools_call_fetch.rs index 8da70a7..821db4d 100644 --- a/crates/kebab-mcp/tests/tools_call_fetch.rs +++ b/crates/kebab-mcp/tests/tools_call_fetch.rs @@ -69,6 +69,7 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() { media: None, ingested_after: None, doc_id: None, + trace: None, }, ); let search_text = match &search_result.content.first().unwrap().raw { diff --git a/crates/kebab-mcp/tests/tools_call_search.rs b/crates/kebab-mcp/tests/tools_call_search.rs index 58a32d8..58456f7 100644 --- a/crates/kebab-mcp/tests/tools_call_search.rs +++ b/crates/kebab-mcp/tests/tools_call_search.rs @@ -65,6 +65,7 @@ async fn search_tool_returns_search_response_v1() { media: None, ingested_after: None, doc_id: None, + trace: None, }, ); @@ -166,6 +167,7 @@ async fn search_with_doc_id_filter_returns_only_target() { media: None, ingested_after: None, doc_id: None, + trace: None, }, ); assert!( @@ -204,6 +206,7 @@ async fn search_with_doc_id_filter_returns_only_target() { media: None, ingested_after: None, doc_id: Some(target_doc_id.clone()), + trace: None, }, ); assert!( @@ -260,6 +263,7 @@ async fn search_with_invalid_ingested_after_returns_invalid_input() { media: None, ingested_after: Some("garbage".to_string()), doc_id: None, + trace: None, }, ); diff --git a/crates/kebab-mcp/tests/tools_call_search_trace.rs b/crates/kebab-mcp/tests/tools_call_search_trace.rs new file mode 100644 index 0000000..1cb07cd --- /dev/null +++ b/crates/kebab-mcp/tests/tools_call_search_trace.rs @@ -0,0 +1,104 @@ +//! p9-fb-37: integration test for `mcp__kebab__search` trace input/output. + +use std::fs; + +use kebab_config::Config; +use kebab_core::SourceScope; +use kebab_mcp::{KebabAppState, KebabHandler}; +use rmcp::model::RawContent; + +fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) -> Config { + let mut cfg = Config::defaults(); + cfg.storage.data_dir = data_dir.to_string_lossy().into_owned(); + cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned(); + cfg.workspace.root = workspace_root.to_string_lossy().into_owned(); + cfg.workspace.exclude.clear(); + cfg.models.embedding.provider = "none".to_string(); + cfg.models.embedding.dimensions = 0; + cfg +} + +fn setup() -> (tempfile::TempDir, KebabHandler) { + let dir = tempfile::tempdir().unwrap(); + let data_dir = dir.path().join("data"); + let workspace_root = dir.path().join("notes"); + fs::create_dir_all(&data_dir).unwrap(); + fs::create_dir_all(&workspace_root).unwrap(); + let config = minimal_config(&data_dir, &workspace_root); + fs::write( + workspace_root.join("a.md"), + "# Alpha\n\nThis document mentions kebab and bread.", + ) + .unwrap(); + let scope = SourceScope { + root: workspace_root.clone(), + include: vec![], + exclude: vec![], + }; + let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap(); + let state = KebabAppState::new(config, None); + let handler = KebabHandler::new(state); + (dir, handler) +} + +fn make_input(trace: Option) -> kebab_mcp::tools::search::SearchInput { + kebab_mcp::tools::search::SearchInput { + query: "kebab".to_string(), + mode: Some("lexical".to_string()), + k: Some(5), + max_tokens: None, + snippet_chars: None, + cursor: None, + tags: None, + lang: None, + path_glob: None, + trust_min: None, + media: None, + ingested_after: None, + doc_id: None, + trace, + } +} + +fn extract_json(result: &rmcp::model::CallToolResult) -> serde_json::Value { + assert!( + !result.is_error.unwrap_or(false), + "expected isError=false, got {result:?}" + ); + let content = result.content.first().expect("at least one content item"); + let text = match &content.raw { + RawContent::Text(t) => &t.text, + other => panic!("expected Text content, got {other:?}"), + }; + serde_json::from_str(text).expect("valid JSON") +} + +#[tokio::test] +async fn search_with_trace_true_returns_trace_field() { + let (_dir, handler) = setup(); + let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(true))); + let v = extract_json(&result); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v["trace"].is_object(), "trace field present when trace:true"); + assert!(v["trace"]["timing"]["total_ms"].is_number()); + assert!(v["trace"]["lexical"].is_array()); + assert!(v["trace"]["vector"].is_array()); + assert!(v["trace"]["rrf_inputs"].is_array()); +} + +#[tokio::test] +async fn search_without_trace_omits_trace_field() { + let (_dir, handler) = setup(); + let result = kebab_mcp::tools::search::handle(handler.state(), make_input(None)); + let v = extract_json(&result); + assert_eq!(v["schema_version"], "search_response.v1"); + assert!(v.get("trace").is_none(), "trace absent when None"); +} + +#[tokio::test] +async fn search_with_trace_false_omits_trace_field() { + let (_dir, handler) = setup(); + let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(false))); + let v = extract_json(&result); + assert!(v.get("trace").is_none(), "trace absent when false"); +} -- 2.49.1 From 5687cbc0e28e5a1e559fe8cacdca866e021d49a2 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 13:39:11 +0900 Subject: [PATCH 11/13] feat(tui): search pane t-key opens TracePopup (fb-37) --- crates/kebab-tui/src/app.rs | 3 + crates/kebab-tui/src/cheatsheet.rs | 1 + crates/kebab-tui/src/lib.rs | 1 + crates/kebab-tui/src/run.rs | 43 +++++++++ crates/kebab-tui/src/search.rs | 43 +++++++++ crates/kebab-tui/src/trace_popup.rs | 139 ++++++++++++++++++++++++++++ 6 files changed, 230 insertions(+) create mode 100644 crates/kebab-tui/src/trace_popup.rs diff --git a/crates/kebab-tui/src/app.rs b/crates/kebab-tui/src/app.rs index 1d53d0c..a87f8c8 100644 --- a/crates/kebab-tui/src/app.rs +++ b/crates/kebab-tui/src/app.rs @@ -387,6 +387,8 @@ pub struct App { pub ask: Option, /// Populated by p9-4. pub inspect: Option, + /// p9-fb-37: trace popup state, `Some` while open. + pub trace_popup: Option, /// Populated by p9-fb-03 when the user kicks off an in-shell /// ingest (Library `r`). Cleared by the run loop a few seconds /// after the run reaches a terminal event. @@ -461,6 +463,7 @@ impl App { search: None, ask: None, inspect: None, + trace_popup: None, ingest_state: None, error_overlay: None, should_quit: false, diff --git a/crates/kebab-tui/src/cheatsheet.rs b/crates/kebab-tui/src/cheatsheet.rs index 1af1751..f490ff9 100644 --- a/crates/kebab-tui/src/cheatsheet.rs +++ b/crates/kebab-tui/src/cheatsheet.rs @@ -80,6 +80,7 @@ pub fn render_cheatsheet(f: &mut Frame, area: Rect, app: &App) { ("Delete", "remove char at cursor"), ("g", "open hit's citation in $EDITOR (Normal)"), ("o", "inspect selected hit's chunk (Normal — was `i` pre-fb-21)"), + ("t", "open retrieval trace popup (Normal — p9-fb-37)"), ("i", "Normal → Insert (toggle back to typing)"), ("Esc", "back to Library"), ]); diff --git a/crates/kebab-tui/src/lib.rs b/crates/kebab-tui/src/lib.rs index d61c6f2..1457c1e 100644 --- a/crates/kebab-tui/src/lib.rs +++ b/crates/kebab-tui/src/lib.rs @@ -27,6 +27,7 @@ mod run; mod search; mod terminal; mod theme; +pub mod trace_popup; pub use input::{InputBuffer, display_width, place_cursor_x, truncate_to_display_width}; pub use theme::{Palette, Role, Theme}; diff --git a/crates/kebab-tui/src/run.rs b/crates/kebab-tui/src/run.rs index cc2db24..fb24b22 100644 --- a/crates/kebab-tui/src/run.rs +++ b/crates/kebab-tui/src/run.rs @@ -130,6 +130,21 @@ pub(crate) fn run_loop(app: &mut App) -> Result<()> { if event::poll(POLL_INTERVAL)? { match event::read()? { Event::Key(key) if key.kind == KeyEventKind::Press => { + // p9-fb-37: trace popup eats keys while open. + // Sits ahead of cheatsheet + mode + pane dispatch + // so Esc / j / k / arrows route to the popup + // instead of leaking through to the search pane. + if app.trace_popup.is_some() { + let close = if let Some(popup) = app.trace_popup.as_mut() { + crate::trace_popup::handle_key_trace_popup(popup, key) + } else { + false + }; + if close { + app.trace_popup = None; + } + continue; + } // p9-fb-13: cheatsheet popup toggle takes // precedence over both mode + pane dispatch. // F1 toggles open/close. While visible, Esc @@ -255,6 +270,12 @@ fn render_root(f: &mut Frame, app: &App) { } render_status_bar(f, outer[2], app); render_key_hints(f, outer[3], app); + // p9-fb-37: trace popup overlays on top of pane content but + // below the error overlay (errors are higher-priority modal). + if let Some(popup) = &app.trace_popup { + let popup_area = centered_rect(80, 80, f.area()); + crate::trace_popup::render_trace_popup(f, popup_area, popup); + } if let Some(err) = &app.error_overlay { render_error_overlay(f, f.area(), err, &app.theme); } @@ -263,6 +284,28 @@ fn render_root(f: &mut Frame, app: &App) { } } +/// p9-fb-37: centered sub-rect helper for the trace popup. Returns +/// a rect of `percent_x` × `percent_y` percent of `r`, centered. +fn centered_rect(percent_x: u16, percent_y: u16, r: ratatui::layout::Rect) -> ratatui::layout::Rect { + use ratatui::layout::{Constraint, Direction, Layout}; + let popup_layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Percentage((100 - percent_y) / 2), + Constraint::Percentage(percent_y), + Constraint::Percentage((100 - percent_y) / 2), + ]) + .split(r); + Layout::default() + .direction(Direction::Horizontal) + .constraints([ + Constraint::Percentage((100 - percent_x) / 2), + Constraint::Percentage(percent_x), + Constraint::Percentage((100 - percent_x) / 2), + ]) + .split(popup_layout[1])[1] +} + fn render_header(f: &mut Frame, area: Rect, app: &App) { let pane_label = match app.focus { Pane::Library => "Library", diff --git a/crates/kebab-tui/src/search.rs b/crates/kebab-tui/src/search.rs index cd1fb99..9166fe3 100644 --- a/crates/kebab-tui/src/search.rs +++ b/crates/kebab-tui/src/search.rs @@ -209,6 +209,49 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { // pre-fb-12 SHIFT/none heuristic). let is_normal = state.mode == crate::app::Mode::Normal; + // p9-fb-37: `t` opens the trace popup. Re-runs the last submitted + // query with SearchOpts.trace = true. Bypasses cache by going + // through `search_with_opts_with_config` (Task 5 wires opts.trace + // to skip the LRU cache). + if is_normal + && matches!( + (key.code, key.modifiers), + (KeyCode::Char('t'), KeyModifiers::NONE) + ) + { + let (last_query, has_results) = { + let s = state.search.as_ref().unwrap(); + (s.last_query.clone(), !s.hits.is_empty()) + }; + if !has_results { + return KeyOutcome::Continue; + } + if let Some((q_text, q_mode)) = last_query { + let q = kebab_core::SearchQuery { + text: q_text, + mode: q_mode, + k: state.config.search.default_k, + filters: kebab_core::SearchFilters::default(), + }; + let opts = kebab_core::SearchOpts { + trace: true, + ..Default::default() + }; + match kebab_app::search_with_opts_with_config(state.config.clone(), q, opts) { + Ok(resp) => { + if let Some(t) = resp.trace { + state.trace_popup = Some(crate::trace_popup::TracePopupState::new(t)); + } + } + Err(_) => { + // Silent failure — trace is debug-only; user + // can still see search hits without it. + } + } + } + return KeyOutcome::Continue; + } + // p9-fb-21: chunk-inspect rebound from `i` to `o` (vim "open"). // The `i` key is now the universal Normal→Insert toggle (handled // in `mode_intercept`), so it cannot also mean "inspect chunk" diff --git a/crates/kebab-tui/src/trace_popup.rs b/crates/kebab-tui/src/trace_popup.rs new file mode 100644 index 0000000..5374936 --- /dev/null +++ b/crates/kebab-tui/src/trace_popup.rs @@ -0,0 +1,139 @@ +//! p9-fb-37: TUI trace popup. Opens from Search pane via `t` key +//! when results are visible. Re-runs the current query with +//! `SearchOpts.trace = true` and displays the lex / vec / rrf union +//! + per-stage timing as a single scroll list. + +use crossterm::event::{KeyCode, KeyEvent}; +use kebab_core::SearchTrace; +use ratatui::Frame; +use ratatui::layout::Rect; +use ratatui::style::{Modifier, Style}; +use ratatui::text::{Line, Span}; +use ratatui::widgets::{Block, Borders, Paragraph, Wrap}; + +#[derive(Debug, Clone)] +pub struct TracePopupState { + pub trace: SearchTrace, + pub scroll: u16, +} + +impl TracePopupState { + pub fn new(trace: SearchTrace) -> Self { + Self { trace, scroll: 0 } + } +} + +pub fn render_trace_popup(f: &mut Frame, area: Rect, state: &TracePopupState) { + let mut lines: Vec = Vec::new(); + let bold = Style::default().add_modifier(Modifier::BOLD); + + lines.push(Line::from(Span::styled( + format!( + "Lexical ({} hits, {} ms)", + state.trace.lexical.len(), + state.trace.timing.lexical_ms, + ), + bold, + ))); + for c in &state.trace.lexical { + lines.push(Line::from(format!( + " #{:>2} score={:.4} chunk={}", + c.rank, c.score, c.chunk_id.0 + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!( + "Vector ({} hits, {} ms)", + state.trace.vector.len(), + state.trace.timing.vector_ms, + ), + bold, + ))); + for c in &state.trace.vector { + lines.push(Line::from(format!( + " #{:>2} score={:.4} chunk={}", + c.rank, c.score, c.chunk_id.0 + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!( + "RRF inputs ({} entries, {} ms fusion)", + state.trace.rrf_inputs.len(), + state.trace.timing.fusion_ms, + ), + bold, + ))); + for e in &state.trace.rrf_inputs { + lines.push(Line::from(format!( + " chunk={} lex={:?} vec={:?} fusion={:.4}", + e.chunk_id.0, e.lexical_rank, e.vector_rank, e.fusion_score + ))); + } + lines.push(Line::from("")); + lines.push(Line::from(Span::styled( + format!("Total: {} ms", state.trace.timing.total_ms), + bold, + ))); + + let block = Block::default() + .title("Trace — Esc to close, j/k or ↑↓ to scroll") + .borders(Borders::ALL); + let p = Paragraph::new(lines) + .block(block) + .scroll((state.scroll, 0)) + .wrap(Wrap { trim: false }); + f.render_widget(p, area); +} + +/// Handle keys while popup is open. Returns true if the popup should close. +pub fn handle_key_trace_popup(state: &mut TracePopupState, key: KeyEvent) -> bool { + match key.code { + KeyCode::Esc => true, + KeyCode::Char('j') | KeyCode::Down => { + state.scroll = state.scroll.saturating_add(1); + false + } + KeyCode::Char('k') | KeyCode::Up => { + state.scroll = state.scroll.saturating_sub(1); + false + } + _ => false, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crossterm::event::KeyModifiers; + use kebab_core::TraceTiming; + + fn dummy_state() -> TracePopupState { + TracePopupState::new(SearchTrace { + lexical: vec![], + vector: vec![], + rrf_inputs: vec![], + timing: TraceTiming::default(), + }) + } + + #[test] + fn esc_closes() { + let mut s = dummy_state(); + assert!(handle_key_trace_popup( + &mut s, + KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE), + )); + } + + #[test] + fn j_scrolls_down() { + let mut s = dummy_state(); + assert!(!handle_key_trace_popup( + &mut s, + KeyEvent::new(KeyCode::Char('j'), KeyModifiers::NONE), + )); + assert_eq!(s.scroll, 1); + } +} -- 2.49.1 From a40593590b9277d345e049ae472b7210314d2107 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 14:04:54 +0900 Subject: [PATCH 12/13] docs(fb-37): wire schema + README + SMOKE + INDEX + SKILL --- README.md | 4 ++-- crates/kebab-cli/src/main.rs | 3 ++- docs/SMOKE.md | 16 +++++++++++++ docs/wire-schema/v1/schema.schema.json | 24 +++++++++++++++++++ .../v1/search_response.schema.json | 22 ++++++++++++++++- integrations/claude-code/kebab/SKILL.md | 5 ++-- tasks/INDEX.md | 2 +- tasks/p9/p9-fb-37-trace-and-stats.md | 7 ++++-- 8 files changed, 74 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 7697391..5dd9ef7 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ kebab doctor |------|------| | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | | `kebab ingest []` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. | -| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor ] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). | +| `kebab search --mode {lexical,vector,hybrid} "" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor ] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID] [--trace]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). **`--trace` (p9-fb-37)** — `search_response.v1.trace` 에 lexical / vector pre-fusion 후보 + RRF union + per-stage timing (`lexical_ms` / `vector_ms` / `fusion_ms` / `total_ms`) 노출. trace 요청은 캐시 우회 (`--no-cache` 없이도 항상 cold). | | `kebab list docs` | 색인된 문서 목록 | | `kebab inspect doc ` / `kebab inspect chunk ` | raw record 보기 | | `kebab fetch chunk [--context N]` / `kebab fetch doc [--max-tokens N]` / `kebab fetch span [--max-tokens N]` | (p9-fb-35) verbatim text fetch from indexed corpus. wire = `fetch_result.v1` (kind discriminator). chunk: target + ±N ordinal-context chunks. doc: full normalized markdown. span: 1-based line range (PDF/audio rejected as `error.v1.code = span_not_supported`). chars/4 budget on doc/span. | @@ -80,7 +80,7 @@ kebab doctor | `kebab tui` | Ratatui 셸 (Library + Search + Ask + Inspect 패널, desktop 진행 중). Library 에서 `r` 키로 background ingest 시작 — 화면 하단 status bar 가 진행 표시, 완료/abort 시 final 라인 잠시 유지 후 자동 hide. ingest 진행 중 `Esc` / `Ctrl-C` 가 cancel signal (그 외에는 quit). vim-style mode (header 우측 `-- NORMAL --` / `-- INSERT --`) — Library/Inspect 는 자동 NORMAL, Search/Ask 는 자동 INSERT. `i` 로 Normal→Insert (모든 pane — p9-fb-21), `Esc` 로 Insert→Normal 어디서나. mode-authoritative dispatch — Search 의 `j/k/o/g`, Ask 의 `e/j/k` 는 NORMAL 모드에서만 명령으로 동작, INSERT 에서는 입력 문자로 typing. (Search 의 chunk inspect 키는 `i`→`o` 로 rebind — `i` 가 universal Insert toggle.) **`F1` 로 cheatsheet popup** (현재 pane 의 키 매핑 + global 토글 표) — `Esc` / `F1` 로 닫기. Search 패널은 200ms debounce 후 background worker 가 검색 — 키 입력으로 UI freeze 안 됨, 사용자가 계속 타이핑하면 stale 결과 자동 폐기 (generation counter). Ask 패널은 multi-turn — 같은 conversation 안에서 Q1/A1, Q2/A2 transcript 누적, 다음 질문이 이전 턴을 history 로 받아 답변. 답변 본문은 markdown 렌더 (bold/italic/inline code/heading/list/code fence/table/blockquote, raw `**bold**` 가 실제 굵게 표시). `Ctrl-L` 로 새 conversation 시작. Search 의 `g` 키가 `$EDITOR` (기본 `vi`) 로 hit 의 citation 위치 열기 — 종료 후 TUI 화면이 자동으로 깨끗이 redraw. CLI `kebab ask` 는 raw markdown 그대로 (terminal 호환성 위해). Library 의 doc-list 가 한글 / 일본어 / 중국어 (CJK) 제목을 wide-char 정확한 column width 로 truncate — 한글 제목이 한 줄을 넘기지 않음 (CJK 1 자 = 2 col). Search/Ask/Filter 입력의 cursor 가 wide char 위에서 column 단위로 정렬 — 한글 입력 시 caret 이 글자 옆에 정확히 놓임. `← / →` 로 입력 문자열 중간 cursor 이동 (한글 한 글자 = 2 column 이라도 한 번에 이동), `Home / End` 로 양 끝 점프, `Delete` 로 cursor 위치 char 삭제 — 모든 input pane (Ask / Search / Library filter overlay) 동일 (p9-fb-22). Ask 트랜스크립트는 새 답변이 viewport 아래로 누적될 때 자동으로 tail 을 따라감 (auto-scroll); `j` / `k` 로 위로 스크롤하면 freeze, `Shift-G` 로 다시 bottom + auto-tail 재개. 화면 하단 hint line 은 한국어 동사구로 (`"위로"` / `"아래로"` / `"필터"` / `"타이핑 검색어"` / `"Esc 로 NORMAL 모드"` / `"i 입력모드"` 등) + 현재 (pane, mode) 조합에 맞춰 자동 분기, **첫 fragment 가 항상 `F1 도움말`** (cheatsheet 발견성 보장). 모든 모드에서 항상 떠 있는 상태바 — `kebab v docs │ ` (state: streaming/searching/indexing/idle, ingest 진행 중에는 progress 가 같은 자리에 흡수됨). Ask 진입 시 conversation id 8 자 prefix 도 함께 표시. Ask 트랜스크립트와 Inspect 양쪽에서 `PgUp / PgDn` 으로 10 줄씩 페이지 스크롤. Library 의 doc list 위에는 `TITLE / TAGS / UPDATED / CHUNKS` 컬럼 헤더 행 표시 (display-width 정렬, Hangul / CJK 안전). | | `kebab reset [--all / --data-only / --vector-only / --config-only] [--yes]` | XDG 데이터 wipe. **Irreversible.** TTY 면 confirm prompt, 아니면 `--yes` 필수. `--vector-only` 는 SQLite `embedding_records` 도 함께 truncate (orphan 방지) | | `kebab eval run / compare` | golden query 회귀 측정 | -| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. | +| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. **stats 에 (p9-fb-37) `media_breakdown` (5 keys: markdown / pdf / image / audio / other) + `lang_breakdown` (BCP-47 코드, NULL 은 literal `"null"`) + `index_bytes` (sqlite + lancedb on-disk 합계) + `stale_doc_count` (`config.search.stale_threshold_days` 초과 doc 수) 추가.** | | `kebab ingest-file ` | 단일 파일 ingest (workspace 외부 가능). 바이트는 `/_external/.` 로 copy. `.kebabignore` 매치 시 stderr warn 후 진행 (explicit ingest 가 bypass intent). | | `kebab ingest-stdin --title [--source-uri ]` | stdin 의 markdown 본문 ingest. frontmatter (title + source_uri) 자동 prepend. v1 markdown only. | | `kebab mcp` | MCP (Model Context Protocol) stdio server. agent host (Claude Code / Cursor / OpenAI Agents) 가 spawn 하여 tool 호출 (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`). `--config` honor. | diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 305397c..fa11508 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -167,7 +167,8 @@ enum Cmd { /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate /// lists + per-stage timing in the response. Bypasses cache /// (debug intent — fresh run guaranteed). Requires embeddings - /// to be enabled. + /// when `--mode hybrid` or `--mode vector`; lexical mode runs + /// without embeddings via a no-op vector stub. #[arg(long)] trace: bool, }, diff --git a/docs/SMOKE.md b/docs/SMOKE.md index 9a68800..3121076 100644 --- a/docs/SMOKE.md +++ b/docs/SMOKE.md @@ -206,6 +206,22 @@ kebab search "rust" --doc-id "" --tag rust --json Bad `--ingested-after` → `error.v1.code = config_invalid`, exit 2. Unknown `--media` value → silently empty (no error). +### Trace + stats (fb-37) + +Re-run a search with `--trace` to see per-stage candidate lists + timing: + +```bash +kebab --config /tmp/kebab-smoke/config.toml search "rust async" --trace --json | jq .trace +``` + +Inspect the corpus health surface: + +```bash +kebab --config /tmp/kebab-smoke/config.toml schema --json | jq .stats +``` + +Look for: `media_breakdown` (5 keys), `lang_breakdown`, `index_bytes`, `stale_doc_count`. + ## P6-4 이미지 ingestion 옵션 `config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략): diff --git a/docs/wire-schema/v1/schema.schema.json b/docs/wire-schema/v1/schema.schema.json index f168ff4..0866134 100644 --- a/docs/wire-schema/v1/schema.schema.json +++ b/docs/wire-schema/v1/schema.schema.json @@ -54,6 +54,30 @@ { "type": "string", "format": "date-time" }, { "type": "null" } ] + }, + "media_breakdown": { + "type": "object", + "description": "p9-fb-37: per-media-kind doc count. 5 keys (markdown/pdf/image/audio/other), zero-padded.", + "additionalProperties": { "type": "integer", "minimum": 0 } + }, + "lang_breakdown": { + "type": "object", + "description": "p9-fb-37: per-language doc count. NULL lang keyed as the literal string 'null'. Map may be empty on empty corpus.", + "additionalProperties": { "type": "integer", "minimum": 0 } + }, + "index_bytes": { + "type": "object", + "description": "p9-fb-37: on-disk byte sums.", + "required": ["sqlite", "lancedb"], + "properties": { + "sqlite": { "type": "integer", "minimum": 0 }, + "lancedb": { "type": "integer", "minimum": 0 } + } + }, + "stale_doc_count": { + "type": "integer", + "minimum": 0, + "description": "p9-fb-37: docs whose updated_at exceeds config.search.stale_threshold_days. 0 when threshold=0." } } } diff --git a/docs/wire-schema/v1/search_response.schema.json b/docs/wire-schema/v1/search_response.schema.json index 20e6eb8..ca89792 100644 --- a/docs/wire-schema/v1/search_response.schema.json +++ b/docs/wire-schema/v1/search_response.schema.json @@ -9,6 +9,26 @@ "schema_version": { "const": "search_response.v1" }, "hits": { "type": "array", "description": "search_hit.v1[]" }, "next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." }, - "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." } + "truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." }, + "trace": { + "type": "object", + "description": "p9-fb-37: present iff caller passed --trace / SearchOpts.trace=true. Lex/vec pre-fusion lists + RRF union + per-stage timing.", + "required": ["lexical", "vector", "rrf_inputs", "timing"], + "properties": { + "lexical": { "type": "array", "items": { "type": "object" } }, + "vector": { "type": "array", "items": { "type": "object" } }, + "rrf_inputs":{ "type": "array", "items": { "type": "object" } }, + "timing": { + "type": "object", + "required": ["lexical_ms", "vector_ms", "fusion_ms", "total_ms"], + "properties": { + "lexical_ms": { "type": "integer", "minimum": 0 }, + "vector_ms": { "type": "integer", "minimum": 0 }, + "fusion_ms": { "type": "integer", "minimum": 0 }, + "total_ms": { "type": "integer", "minimum": 0 } + } + } + } + } } } diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md index fea4e2e..f3571af 100644 --- a/integrations/claude-code/kebab/SKILL.md +++ b/integrations/claude-code/kebab/SKILL.md @@ -48,7 +48,7 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun Input: ```json -{ "query": "", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null } +{ "query": "", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null, "trace": null } ``` - `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`"). @@ -57,6 +57,7 @@ Input: - Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`. - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source. - When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one. +- **`trace: true` (p9-fb-37)** — debug aid. Response carries an extra `trace` block: `lexical[]` + `vector[]` (pre-fusion candidates), `rrf_inputs[]` (RRF union before final cut), and `timing` (`lexical_ms`, `vector_ms`, `fusion_ms`, `total_ms`). Trace bypasses the search cache (always cold). Use sparingly — it bloats the wire response and is for diagnosing "why did this hit / not hit", not normal retrieval. ### `mcp__kebab__ask` — when you need the answer @@ -133,7 +134,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across Before using streaming or multi-turn features, probe what this binary supports — call `mcp__kebab__schema` (or CLI `kebab schema --json`): -Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session. +Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at, plus p9-fb-37 health surface: `media_breakdown` per-kind doc counts (5 zero-padded keys: markdown / pdf / image / audio / other), `lang_breakdown` per BCP-47 lang (NULL keyed as the literal string `"null"`), `index_bytes.{sqlite,lancedb}` on-disk byte sums, `stale_doc_count` for docs older than `config.search.stale_threshold_days`). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session. ## Quick health check diff --git a/tasks/INDEX.md b/tasks/INDEX.md index db35a0b..803acbc 100644 --- a/tasks/INDEX.md +++ b/tasks/INDEX.md @@ -125,7 +125,7 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능. - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09) - [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09) - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ✅ 머지 (2026-05-10) - - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27) + - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ✅ 머지 (2026-05-10) ### 🎯 0.5.0 — RAG quality (cascade 동반: V00X + reindex) - [p9-fb-38 score semantics](p9/p9-fb-38-score-semantics.md) — ⏳ 미구현, brainstorm 필요 diff --git a/tasks/p9/p9-fb-37-trace-and-stats.md b/tasks/p9/p9-fb-37-trace-and-stats.md index e881ce9..4ed057e 100644 --- a/tasks/p9/p9-fb-37-trace-and-stats.md +++ b/tasks/p9/p9-fb-37-trace-and-stats.md @@ -3,7 +3,7 @@ phase: P9 component: kebab-cli + kebab-search + kebab-rag task_id: p9-fb-37 title: "Trace (--trace) + stats — pipeline 가시성" -status: open +status: completed target_version: 0.4.0 depends_on: [p9-fb-27] unblocks: [] @@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent / 사용자가 "왜 # p9-fb-37 — Trace + stats -> ⏳ **백로그 only — 미구현 (Nice-to-have).** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. trace 의 verbosity level / wire shape / stats 의 별도 명령 vs schema 통합 brainstorm 후 확정. +> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. +> +> - Design: [`docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`](../../docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md) +> - Plan: [`docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md`](../../docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md) ## 증상 / 동기 -- 2.49.1 From 6a33d08aea09c515dccfc9411a2f8dfaa9836166 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 16:26:34 +0900 Subject: [PATCH 13/13] fix(fb-37): address PR #129 round 1 review - doc TraceFusionInput.fusion_score semantics (single-mode vs hybrid) - comment why total_ms vs stage sum can drift (millis truncation) - TODO marker on TUI trace popup filter passthrough Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-core/src/search.rs | 3 +++ crates/kebab-search/src/hybrid.rs | 4 ++++ crates/kebab-tui/src/search.rs | 2 ++ 3 files changed, 9 insertions(+) diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs index bb66be9..38e41ad 100644 --- a/crates/kebab-core/src/search.rs +++ b/crates/kebab-core/src/search.rs @@ -158,6 +158,9 @@ pub struct TraceFusionInput { pub chunk_id: ChunkId, pub lexical_rank: Option, pub vector_rank: Option, + /// Hybrid mode: normalized RRF score in `[0, 1]`. + /// Lexical / Vector mode: equals the underlying retriever's score + /// (no fusion ran). 0.0 for chunks dropped past `target_k`. pub fusion_score: f32, } diff --git a/crates/kebab-search/src/hybrid.rs b/crates/kebab-search/src/hybrid.rs index 58b6678..7f415a9 100644 --- a/crates/kebab-search/src/hybrid.rs +++ b/crates/kebab-search/src/hybrid.rs @@ -391,6 +391,10 @@ impl HybridRetriever { } } + // total_ms is wall-clock from start; per-stage `lexical_ms` / + // `vector_ms` / `fusion_ms` each truncate to whole millis via + // `as_millis() as u64`, so their sum can drift below total + // (sub-ms losses) — DO NOT assert `total_ms >= sum(stages)`. tb.timing.total_ms = start_total.elapsed().as_millis() as u64; Ok((final_hits, tb.into_trace())) } diff --git a/crates/kebab-tui/src/search.rs b/crates/kebab-tui/src/search.rs index 9166fe3..13c9f43 100644 --- a/crates/kebab-tui/src/search.rs +++ b/crates/kebab-tui/src/search.rs @@ -227,6 +227,8 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome { return KeyOutcome::Continue; } if let Some((q_text, q_mode)) = last_query { + // TODO: thread filters when TUI gains a filter UI (currently + // mirrors fire_search which also passes default filters). let q = kebab_core::SearchQuery { text: q_text, mode: q_mode, -- 2.49.1