Merge pull request 'chore: bump version 0.4 → 0.5' (#130 ) from chore/bump-v0.5.0 into main

Reviewed-on: #130
chore: bump version 0.4 → 0.5
2026-05-10 08:08:06 +00:00 · 2026-05-10 17:04:51 +09:00 · 2026-05-10 07:59:56 +00:00 · 2026-05-10 16:26:34 +09:00 · 2026-05-10 14:13:47 +09:00 · 2026-05-10 13:39:11 +09:00
85 changed files with 15948 additions and 222 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3525,9 +3525,10 @@ dependencies = [

 [[package]]
 name = "kebab-app"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
+ "base64 0.22.1",
 "blake3",
 "dirs 5.0.1",
 "ignore",
@@ -3568,7 +3569,7 @@ dependencies = [

 [[package]]
 name = "kebab-chunk"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3583,7 +3584,7 @@ dependencies = [

 [[package]]
 name = "kebab-cli"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "clap",
@@ -3604,7 +3605,7 @@ dependencies = [

 [[package]]
 name = "kebab-config"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "dirs 5.0.1",
@@ -3619,7 +3620,7 @@ dependencies = [

 [[package]]
 name = "kebab-core"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3633,7 +3634,7 @@ dependencies = [

 [[package]]
 name = "kebab-embed"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3647,7 +3648,7 @@ dependencies = [

 [[package]]
 name = "kebab-embed-local"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "fastembed",
@@ -3660,7 +3661,7 @@ dependencies = [

 [[package]]
 name = "kebab-eval"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-app",
@@ -3679,7 +3680,7 @@ dependencies = [

 [[package]]
 name = "kebab-llm"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-core",
@@ -3688,7 +3689,7 @@ dependencies = [

 [[package]]
 name = "kebab-llm-local"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-config",
@@ -3705,7 +3706,7 @@ dependencies = [

 [[package]]
 name = "kebab-mcp"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-app",
@@ -3716,13 +3717,14 @@ dependencies = [
 "serde",
 "serde_json",
 "tempfile",
+ "time",
 "tokio",
 "tracing",
 ]

 [[package]]
 name = "kebab-normalize"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-core",
@@ -3737,7 +3739,7 @@ dependencies = [

 [[package]]
 name = "kebab-parse-image"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "ab_glyph",
 "anyhow",
@@ -3761,7 +3763,7 @@ dependencies = [

 [[package]]
 name = "kebab-parse-md"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "kebab-core",
@@ -3778,7 +3780,7 @@ dependencies = [

 [[package]]
 name = "kebab-parse-pdf"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3791,7 +3793,7 @@ dependencies = [

 [[package]]
 name = "kebab-parse-types"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "kebab-core",
 "serde",
@@ -3799,7 +3801,7 @@ dependencies = [

 [[package]]
 name = "kebab-rag"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3820,7 +3822,7 @@ dependencies = [

 [[package]]
 name = "kebab-search"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "globset",
@@ -3839,7 +3841,7 @@ dependencies = [

 [[package]]
 name = "kebab-source-fs"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3856,7 +3858,7 @@ dependencies = [

 [[package]]
 name = "kebab-store-sqlite"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "blake3",
@@ -3877,7 +3879,7 @@ dependencies = [

 [[package]]
 name = "kebab-store-vector"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "arrow",
@@ -3901,7 +3903,7 @@ dependencies = [

 [[package]]
 name = "kebab-tui"
-version = "0.4.0"
+version = "0.5.0"
 dependencies = [
 "anyhow",
 "crossterm",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,7 +30,7 @@ edition       = "2024"
 rust-version  = "1.85"
 license       = "MIT OR Apache-2.0"
 repository    = "https://github.com/altair823/kebab"
-version       = "0.4.0"
+version       = "0.5.0"

 [workspace.dependencies]
 anyhow       = "1"
@@ -80,6 +80,7 @@ rmcp         = { version = "1.6", default-features = false, features = ["server"
 # a tokio runtime to host its mock server (the runtime adapter crate stays
 # sync via reqwest::blocking — wiremock is dev-only there).
 wiremock     = "0.6"
+base64       = "0.22"

 # Disk-footprint trim for dev / test builds. Codegen, opt-level, and
 # behavior are unchanged — only DWARF debug info is reduced (line
--- a/README.md
+++ b/README.md
@@ -71,15 +71,16 @@ kebab doctor
 |------|------|
 | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
 | `kebab ingest [<path>]` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. |
-| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale |
+| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID] [--trace]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). **`--trace` (p9-fb-37)** — `search_response.v1.trace` 에 lexical / vector pre-fusion 후보 + RRF union + per-stage timing (`lexical_ms` / `vector_ms` / `fusion_ms` / `total_ms`) 노출. trace 요청은 캐시 우회 (`--no-cache` 없이도 항상 cold). |
 | `kebab list docs` | 색인된 문서 목록 |
 | `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 |
-| `kebab ask "<query>" [--show-citations / --hide-citations] [--session <id>]` | RAG 답변 + 근거 인용. 답변 후 `근거:` block 으로 full path / line range / score 한 줄씩 (default ON — `--hide-citations` 로 끄기, pipe 시 유용). 근거 부족 시 거절. Ollama 필요. `--session <id>` 로 multi-turn — 첫 호출에서 SQLite `chat_sessions` 에 자동 생성, 이후 호출은 prior turns 를 history 로 받아 follow-up. session id 는 사용자 지정 (e.g. `kb-rust-async-2026-05`) — `kebab reset --data-only` 로 모든 session wipe |
+| `kebab fetch chunk <id> [--context N]` / `kebab fetch doc <id> [--max-tokens N]` / `kebab fetch span <doc_id> <ls> <le> [--max-tokens N]` | (p9-fb-35) verbatim text fetch from indexed corpus. wire = `fetch_result.v1` (kind discriminator). chunk: target + ±N ordinal-context chunks. doc: full normalized markdown. span: 1-based line range (PDF/audio rejected as `error.v1.code = span_not_supported`). chars/4 budget on doc/span. |
+| `kebab ask "<query>" [--show-citations / --hide-citations] [--session <id>] [--stream]` | RAG 답변 + 근거 인용. 답변 후 `근거:` block 으로 full path / line range / score 한 줄씩 (default ON — `--hide-citations` 로 끄기, pipe 시 유용). 근거 부족 시 거절. Ollama 필요. `--session <id>` 로 multi-turn — 첫 호출에서 SQLite `chat_sessions` 에 자동 생성, 이후 호출은 prior turns 를 history 로 받아 follow-up. session id 는 사용자 지정 (e.g. `kb-rust-async-2026-05`) — `kebab reset --data-only` 로 모든 session wipe. **`--stream` (p9-fb-33)** 로 ndjson `answer_event.v1` event (retrieval_done → token* → final) 를 stderr 에 흘리고 stdout 마지막 줄에 기존 `answer.v1` — agent 가 token 즉시 소비 가능 |
 | `kebab doctor` | 설정/모델/DB 헬스 체크 |
 | `kebab tui` | Ratatui 셸 (Library + Search + Ask + Inspect 패널, desktop 진행 중). Library 에서 `r` 키로 background ingest 시작 — 화면 하단 status bar 가 진행 표시, 완료/abort 시 final 라인 잠시 유지 후 자동 hide. ingest 진행 중 `Esc` / `Ctrl-C` 가 cancel signal (그 외에는 quit). vim-style mode (header 우측 `-- NORMAL --` / `-- INSERT --`) — Library/Inspect 는 자동 NORMAL, Search/Ask 는 자동 INSERT. `i` 로 Normal→Insert (모든 pane — p9-fb-21), `Esc` 로 Insert→Normal 어디서나. mode-authoritative dispatch — Search 의 `j/k/o/g`, Ask 의 `e/j/k` 는 NORMAL 모드에서만 명령으로 동작, INSERT 에서는 입력 문자로 typing. (Search 의 chunk inspect 키는 `i`→`o` 로 rebind — `i` 가 universal Insert toggle.) **`F1` 로 cheatsheet popup** (현재 pane 의 키 매핑 + global 토글 표) — `Esc` / `F1` 로 닫기. Search 패널은 200ms debounce 후 background worker 가 검색 — 키 입력으로 UI freeze 안 됨, 사용자가 계속 타이핑하면 stale 결과 자동 폐기 (generation counter). Ask 패널은 multi-turn — 같은 conversation 안에서 Q1/A1, Q2/A2 transcript 누적, 다음 질문이 이전 턴을 history 로 받아 답변. 답변 본문은 markdown 렌더 (bold/italic/inline code/heading/list/code fence/table/blockquote, raw `**bold**` 가 실제 굵게 표시). `Ctrl-L` 로 새 conversation 시작. Search 의 `g` 키가 `$EDITOR` (기본 `vi`) 로 hit 의 citation 위치 열기 — 종료 후 TUI 화면이 자동으로 깨끗이 redraw. CLI `kebab ask` 는 raw markdown 그대로 (terminal 호환성 위해). Library 의 doc-list 가 한글 / 일본어 / 중국어 (CJK) 제목을 wide-char 정확한 column width 로 truncate — 한글 제목이 한 줄을 넘기지 않음 (CJK 1 자 = 2 col). Search/Ask/Filter 입력의 cursor 가 wide char 위에서 column 단위로 정렬 — 한글 입력 시 caret 이 글자 옆에 정확히 놓임. `← / →` 로 입력 문자열 중간 cursor 이동 (한글 한 글자 = 2 column 이라도 한 번에 이동), `Home / End` 로 양 끝 점프, `Delete` 로 cursor 위치 char 삭제 — 모든 input pane (Ask / Search / Library filter overlay) 동일 (p9-fb-22). Ask 트랜스크립트는 새 답변이 viewport 아래로 누적될 때 자동으로 tail 을 따라감 (auto-scroll); `j` / `k` 로 위로 스크롤하면 freeze, `Shift-G` 로 다시 bottom + auto-tail 재개. 화면 하단 hint line 은 한국어 동사구로 (`"위로"` / `"아래로"` / `"필터"` / `"타이핑 검색어"` / `"Esc 로 NORMAL 모드"` / `"i 입력모드"` 등) + 현재 (pane, mode) 조합에 맞춰 자동 분기, **첫 fragment 가 항상 `F1 도움말`** (cheatsheet 발견성 보장). 모든 모드에서 항상 떠 있는 상태바 — `kebab v<version> │ <pane> │ <docs> docs │ <state>` (state: streaming/searching/indexing/idle, ingest 진행 중에는 progress 가 같은 자리에 흡수됨). Ask 진입 시 conversation id 8 자 prefix 도 함께 표시. Ask 트랜스크립트와 Inspect 양쪽에서 `PgUp / PgDn` 으로 10 줄씩 페이지 스크롤. Library 의 doc list 위에는 `TITLE / TAGS / UPDATED / CHUNKS` 컬럼 헤더 행 표시 (display-width 정렬, Hangul / CJK 안전). |
 | `kebab reset [--all / --data-only / --vector-only / --config-only] [--yes]` | XDG 데이터 wipe. **Irreversible.** TTY 면 confirm prompt, 아니면 `--yes` 필수. `--vector-only` 는 SQLite `embedding_records` 도 함께 truncate (orphan 방지) |
 | `kebab eval run / compare` | golden query 회귀 측정 |
-| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. |
+| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. **stats 에 (p9-fb-37) `media_breakdown` (5 keys: markdown / pdf / image / audio / other) + `lang_breakdown` (BCP-47 코드, NULL 은 literal `"null"`) + `index_bytes` (sqlite + lancedb on-disk 합계) + `stale_doc_count` (`config.search.stale_threshold_days` 초과 doc 수) 추가.** |
 | `kebab ingest-file <path>` | 단일 파일 ingest (workspace 외부 가능). 바이트는 `<workspace.root>/_external/<hash12>.<ext>` 로 copy. `.kebabignore` 매치 시 stderr warn 후 진행 (explicit ingest 가 bypass intent). |
 | `kebab ingest-stdin --title <T> [--source-uri <URI>]` | stdin 의 markdown 본문 ingest. frontmatter (title + source_uri) 자동 prepend. v1 markdown only. |
 | `kebab mcp` | MCP (Model Context Protocol) stdio server. agent host (Claude Code / Cursor / OpenAI Agents) 가 spawn 하여 tool 호출 (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`). `--config` honor. |
--- a/crates/kebab-app/Cargo.toml
+++ b/crates/kebab-app/Cargo.toml
@@ -52,6 +52,8 @@ unicode-normalization = "0.1"
 # p9-fb-31: GitignoreBuilder for .kebabignore matching in ingest_file_with_config.
 # Same version as kebab-source-fs (0.4) to avoid duplicate dep versions.
 ignore               = "0.4"
+# p9-fb-34: opaque pagination cursor encodes payload as base64.
+base64               = { workspace = true }

 [dev-dependencies]
 rusqlite             = { workspace = true }
--- a/crates/kebab-app/src/app.rs
+++ b/crates/kebab-app/src/app.rs
@@ -41,7 +41,7 @@ use lru::LruCache;

 use kebab_core::{
    Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
-    SearchQuery, VectorStore,
+    SearchOpts, SearchQuery, VectorStore,
 };
 use kebab_embed_local::FastembedEmbedder;
 use kebab_llm_local::OllamaLanguageModel;
@@ -50,6 +50,31 @@ use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
 use kebab_store_sqlite::SqliteStore;
 use kebab_store_vector::LanceVectorStore;

+/// p9-fb-34: top-level wrapper around a paginated, budget-limited
+/// search result. Mirrors the wire `search_response.v1` shape.
+///
+/// `next_cursor` is non-null whenever more hits may be reachable —
+/// either the retriever filled the page (more behind it), or the
+/// budget loop popped hits (those popped hits remain fetchable
+/// from `offset + returned`). It is null only when the retriever
+/// returned fewer hits than requested AND nothing was popped — i.e.
+/// the corpus has nothing more for this query.
+///
+/// `truncated` is independent of `next_cursor`: it signals that
+/// the budget loop modified the page (snippet shorten or k pop).
+/// Caller may either widen `max_tokens` (and re-issue the same
+/// query) or follow `next_cursor` (to advance through more hits)
+/// or both.
+#[derive(Clone, Debug)]
+pub struct SearchResponse {
+    pub hits: Vec<SearchHit>,
+    pub next_cursor: Option<String>,
+    pub truncated: bool,
+    /// p9-fb-37: present when caller passed `SearchOpts.trace = true`.
+    /// Consumers that ignore trace should leave this `None`.
+    pub trace: Option<kebab_core::SearchTrace>,
+}
+
 /// Facade state — see module docs for lifetime rules.
 ///
 /// The struct is public so long-lived callers (kb-eval, the future P9
@@ -274,6 +299,204 @@ impl App {
        Ok(hits)
    }

+    /// p9-fb-34: budget-aware search facade. Returns hits trimmed to
+    /// `opts.max_tokens` (chars/4 approximation) plus pagination
+    /// metadata. `App::search` is now a thin wrapper that drops the
+    /// metadata for backwards compat.
+    ///
+    /// `SearchResponse.next_cursor` and `truncated` are independent
+    /// signals — see `SearchResponse` doc for details.
+    pub fn search_with_opts(
+        &self,
+        query: SearchQuery,
+        opts: SearchOpts,
+    ) -> Result<SearchResponse> {
+        use crate::cursor;
+
+        let corpus_revision = self.sqlite.corpus_revision().to_string();
+        let offset = match opts.cursor.as_ref() {
+            // p9-fb-34: wrap the typed ErrorV1 in StructuredError so
+            // anyhow carries the structured payload all the way to
+            // `classify` — string formatting here would degrade
+            // `code = "stale_cursor"` to `code = "generic"` on the wire.
+            Some(c) => cursor::decode(c, &corpus_revision)
+                .map_err(|e| anyhow::Error::new(crate::error_wire::StructuredError(e)))?,
+            None => 0,
+        };
+
+        let snippet_chars = opts
+            .snippet_chars
+            .unwrap_or(self.config.search.snippet_chars);
+
+        // Fetch enough to satisfy offset + the requested page. The
+        // retriever returns at most `fetch_k` hits — we then drop
+        // `offset` and keep the next `k_effective`. `k = 0` is
+        // treated as "use config default" so a caller passing through
+        // a default-constructed `SearchQuery` still gets useful work
+        // out of the budget facade.
+        let k_effective = if query.k == 0 {
+            self.config.search.default_k
+        } else {
+            query.k
+        };
+        let fetch_k = offset.saturating_add(k_effective);
+        let fetch_query = SearchQuery {
+            k: fetch_k,
+            ..query.clone()
+        };
+
+        // p9-fb-37: when --trace is requested, bypass the LRU cache and
+        // run through `HybridRetriever::search_with_trace`, which
+        // dispatches by mode internally. Vector / hybrid modes require
+        // embeddings (same as `--mode hybrid`); lexical mode skips
+        // embedder construction via `NoopRetriever` so lexical-only
+        // workspaces (provider = "none") can use `--trace` without
+        // surfacing the "switch to --mode lexical" error.
+        if opts.trace {
+            let lex = Arc::new(LexicalRetriever::with_settings(
+                self.sqlite.clone(),
+                lexical_index_version(&self.config),
+                self.config.search.snippet_chars,
+            )) as Arc<dyn Retriever>;
+            let vec_retr: Arc<dyn Retriever> = if matches!(query.mode, SearchMode::Lexical) {
+                // `HybridRetriever::search_with_trace` never invokes the
+                // vector retriever for `SearchMode::Lexical` (Task 4).
+                // A no-op stand-in lets us avoid the ~470 MB embedder
+                // load when the user only asked for lexical trace.
+                Arc::new(NoopRetriever)
+            } else {
+                let (emb, vec_store) = self.require_embeddings()?;
+                let vec_iv = vector_index_version(emb.as_ref());
+                let vec_dyn: Arc<dyn VectorStore + Send + Sync> = vec_store;
+                let emb_dyn: Arc<dyn Embedder> = emb;
+                Arc::new(VectorRetriever::with_settings(
+                    vec_dyn,
+                    emb_dyn,
+                    self.sqlite.clone(),
+                    vec_iv,
+                    self.config.search.snippet_chars,
+                )) as Arc<dyn Retriever>
+            };
+            let hybrid = HybridRetriever::new(&self.config, lex, vec_retr);
+            let (mut traced_hits, trace) = hybrid.search_with_trace(&fetch_query)?;
+
+            // Stamp staleness — same as search_uncached.
+            let now = time::OffsetDateTime::now_utc();
+            crate::staleness::mark_stale_in_place(
+                &mut traced_hits,
+                now,
+                self.config.search.stale_threshold_days,
+            );
+
+            // Apply offset + k_effective truncation (mirrors non-trace path).
+            let drop_n = offset.min(traced_hits.len());
+            traced_hits.drain(..drop_n);
+            let mut hits: Vec<SearchHit> =
+                traced_hits.into_iter().take(k_effective).collect();
+
+            // Snippet truncation if opts.snippet_chars set (mirror non-trace path).
+            if opts.snippet_chars.is_some() {
+                for h in hits.iter_mut() {
+                    if h.snippet.chars().count() > snippet_chars {
+                        h.snippet = trim_to_chars(&h.snippet, snippet_chars);
+                    }
+                }
+            }
+
+            // Trace path skips the budget loop. Caller will inspect
+            // `hits.len()` and `trace.timing` rather than paginate.
+            return Ok(SearchResponse {
+                hits,
+                next_cursor: None,
+                truncated: false,
+                trace: Some(trace),
+            });
+        }
+
+        let mut all_hits = self.search(fetch_query)?;
+
+        // Skip offset.
+        let drop_n = offset.min(all_hits.len());
+        all_hits.drain(..drop_n);
+        let mut hits: Vec<SearchHit> =
+            all_hits.into_iter().take(k_effective).collect();
+
+        // Apply snippet_chars override if shorter than what the
+        // retriever returned (retriever already honored
+        // `config.search.snippet_chars`; this only kicks in when the
+        // caller asked for *less*).
+        if opts.snippet_chars.is_some() {
+            for h in hits.iter_mut() {
+                if h.snippet.chars().count() > snippet_chars {
+                    h.snippet = trim_to_chars(&h.snippet, snippet_chars);
+                }
+            }
+        }
+
+        // Budget loop.
+        let mut truncated = false;
+        if let Some(max_tokens) = opts.max_tokens {
+            let max_chars = max_tokens.saturating_mul(4);
+            // Step 1: shorten snippets progressively to a 60-char floor.
+            const SNIPPET_FLOOR: usize = 60;
+            let mut current_snippet_cap = snippet_chars;
+            while estimate_chars(&hits) > max_chars
+                && current_snippet_cap > SNIPPET_FLOOR
+            {
+                current_snippet_cap =
+                    (current_snippet_cap / 2).max(SNIPPET_FLOOR);
+                for h in hits.iter_mut() {
+                    if h.snippet.chars().count() > current_snippet_cap {
+                        h.snippet =
+                            trim_to_chars(&h.snippet, current_snippet_cap);
+                        truncated = true;
+                    }
+                }
+            }
+            // Step 2: pop hits from the end until we fit, but always
+            // keep ≥ 1.
+            while estimate_chars(&hits) > max_chars && hits.len() > 1 {
+                hits.pop();
+                truncated = true;
+            }
+        }
+
+        // p9-fb-34: emit cursor whenever more hits may be reachable.
+        // Three cases produce a non-null cursor:
+        //   (a) returned == k_effective: retriever filled the page; there
+        //       may be more behind it. Speculative — next call may return
+        //       an empty page if nothing remains.
+        //   (b) truncated by k-pop: returned < k_effective because we
+        //       popped hits to fit the budget. Those popped hits live at
+        //       offset+returned..; next call (with same or wider budget)
+        //       resumes from there.
+        //   (c) truncated by snippet-only shrink: returned == k_effective,
+        //       falls under (a). Cursor lets caller paginate; widening
+        //       --max-tokens lets caller re-fetch fuller snippets at the
+        //       same offset.
+        //
+        // No cursor when neither (a) nor (b) applies — i.e. the retriever
+        // returned fewer than k_effective AND we didn't pop. That means
+        // end of available results.
+        let returned = hits.len();
+        let next_cursor = if returned == k_effective || truncated {
+            if offset.saturating_add(returned) > 0 {
+                Some(cursor::encode(offset + returned, &corpus_revision))
+            } else {
+                None
+            }
+        } else {
+            None
+        };
+
+        Ok(SearchResponse {
+            hits,
+            next_cursor,
+            truncated,
+            trace: None,
+        })
+    }
+
    /// Run a RAG `ask` against the configured retriever + LLM. Reuses
    /// the memoized embedder / vector / LLM where applicable.
    pub fn ask(&self, query: &str, opts: AskOpts) -> Result<Answer> {
@@ -587,6 +810,24 @@ fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
    IndexVersion(format!("lex:{}", config.chunking.chunker_version))
 }

+/// p9-fb-37: stand-in for the vector retriever in the trace path when
+/// `query.mode == SearchMode::Lexical`. `HybridRetriever::search_with_trace`'s
+/// Lexical branch never calls `vector.search()`, so returning an empty
+/// hit list here is safe and lets lexical-only workspaces (embedding
+/// `provider = "none"`) use `--trace` without paying the ~470 MB
+/// embedder load.
+struct NoopRetriever;
+
+impl Retriever for NoopRetriever {
+    fn search(&self, _q: &kebab_core::SearchQuery) -> anyhow::Result<Vec<kebab_core::SearchHit>> {
+        Ok(Vec::new())
+    }
+
+    fn index_version(&self) -> kebab_core::IndexVersion {
+        kebab_core::IndexVersion("noop:trace".into())
+    }
+}
+
 /// Compose a stable `IndexVersion` for the vector retriever. Tracks
 /// `(embedding_model, embedding_version, dimensions)` so a model swap
 /// flags drift via the existing index_version mismatch warning in
@@ -627,6 +868,34 @@ fn blake3_truncate(input: &str) -> u128 {
    u128::from_be_bytes(buf)
 }

+/// p9-fb-34: trim `s` to at most `n` Unicode scalar chars. Cheap
+/// alternative to a `.chars().take(n).collect::<String>()` pattern;
+/// reserves capacity proportional to UTF-8 worst case (4 bytes / char)
+/// so the inner push never re-allocates.
+fn trim_to_chars(s: &str, n: usize) -> String {
+    if s.chars().count() <= n {
+        return s.to_string();
+    }
+    let mut out = String::with_capacity(n.saturating_mul(4));
+    for (i, c) in s.chars().enumerate() {
+        if i >= n {
+            break;
+        }
+        out.push(c);
+    }
+    out
+}
+
+/// p9-fb-34: estimate wire JSON char cost of the hit list. Returns 0
+/// per-hit when serialization fails — a SearchHit serialization
+/// failure is an invariant violation; we degrade gracefully (loop
+/// terminates early) rather than panic in the budget loop.
+fn estimate_chars(hits: &[SearchHit]) -> usize {
+    hits.iter()
+        .map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0))
+        .sum()
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -669,3 +938,59 @@ mod tests {
        assert_ne!(a, d, "different session_id → different hash");
    }
 }
+
+#[cfg(test)]
+mod tests_trace {
+    use super::*;
+    use kebab_core::{SearchMode, SearchOpts, SearchQuery};
+
+    fn open_app_with_temp_dir() -> (tempfile::TempDir, App) {
+        let dir = tempfile::tempdir().unwrap();
+        let mut cfg = kebab_config::Config::defaults();
+        cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+        // Bring up migrations.
+        let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+        store.run_migrations().unwrap();
+        drop(store);
+        let app = App::open_with_config(cfg).unwrap();
+        (dir, app)
+    }
+
+    #[test]
+    fn search_response_trace_none_when_opts_trace_false() {
+        let (_dir, app) = open_app_with_temp_dir();
+        let q = SearchQuery {
+            text: "x".into(),
+            mode: SearchMode::Lexical,
+            k: 1,
+            filters: Default::default(),
+        };
+        let resp = app.search_with_opts(q, SearchOpts::default()).unwrap();
+        assert!(resp.trace.is_none());
+    }
+
+    #[test]
+    fn search_response_trace_some_when_opts_trace_true_lexical_mode() {
+        // Lexical mode doesn't require embeddings — the trace path
+        // builds HybridRetriever with a `NoopRetriever` stand-in for
+        // the vector side, since `HybridRetriever::search_with_trace`'s
+        // Lexical branch never invokes `vector.search()`. Default
+        // Config has embedding `provider = "none"`, and lexical-mode
+        // trace must succeed under that config (no embedder load).
+        let (_dir, app) = open_app_with_temp_dir();
+        let q = SearchQuery {
+            text: "x".into(),
+            mode: SearchMode::Lexical,
+            k: 1,
+            filters: Default::default(),
+        };
+        let opts = SearchOpts {
+            trace: true,
+            ..Default::default()
+        };
+        let resp = app
+            .search_with_opts(q, opts)
+            .expect("lexical-mode trace must succeed without embeddings");
+        assert!(resp.trace.is_some(), "trace populated when opts.trace=true");
+    }
+}
--- a/crates/kebab-app/src/cursor.rs
+++ b/crates/kebab-app/src/cursor.rs
@@ -0,0 +1,75 @@
+//! p9-fb-34 opaque pagination cursor.
+//!
+//! Format: base64(JSON({offset: usize, corpus_revision: string})).
+//! Opaque to callers — they MUST NOT decode the contents themselves;
+//! the schema is internal and may change without notice.
+
+use base64::Engine;
+use base64::engine::general_purpose::URL_SAFE_NO_PAD;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+
+use crate::error_wire::ErrorV1;
+
+#[derive(Serialize, Deserialize)]
+struct Payload {
+    offset: usize,
+    corpus_revision: String,
+}
+
+/// Encode `(offset, corpus_revision)` as an opaque base64 string.
+pub fn encode(offset: usize, corpus_revision: &str) -> String {
+    let payload = Payload {
+        offset,
+        corpus_revision: corpus_revision.to_string(),
+    };
+    let json = serde_json::to_vec(&payload).expect("Payload serializes");
+    URL_SAFE_NO_PAD.encode(&json)
+}
+
+/// Decode an opaque cursor against the expected `corpus_revision`.
+/// Mismatch or malformed input returns an `ErrorV1` with
+/// `code = "stale_cursor"`.
+//
+// p9-fb-34: ErrorV1 is the workspace-wide wire error struct (~200B
+// after monomorphization with Value + String fields). Boxing here
+// would force every call site to deref through a Box for no win —
+// the err-path is rare. Single allow at the function level.
+//
+// p9-fb-34 round-1 review: differentiate the three failure modes
+// (base64 / JSON / revision mismatch) with distinct messages — all
+// keep `code = "stale_cursor"` so the agent's branching logic stays
+// the same, but humans reading the message get a precise hint.
+#[allow(clippy::result_large_err)]
+pub fn decode(s: &str, expected_revision: &str) -> Result<usize, ErrorV1> {
+    let bytes = URL_SAFE_NO_PAD.decode(s.as_bytes()).map_err(|_| ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "stale_cursor".to_string(),
+        message: "cursor is not valid base64. Re-issue search to obtain a fresh cursor."
+            .to_string(),
+        details: Value::Null,
+        hint: None,
+    })?;
+    let payload: Payload = serde_json::from_slice(&bytes).map_err(|_| ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "stale_cursor".to_string(),
+        message: "cursor payload is malformed. Re-issue search to obtain a fresh cursor."
+            .to_string(),
+        details: Value::Null,
+        hint: None,
+    })?;
+    if payload.corpus_revision != expected_revision {
+        return Err(ErrorV1 {
+            schema_version: "error.v1".to_string(),
+            code: "stale_cursor".to_string(),
+            message: format!(
+                "cursor was issued against corpus_revision '{}'; current revision is \
+                 '{}'. Re-issue search to obtain a fresh cursor.",
+                payload.corpus_revision, expected_revision
+            ),
+            details: Value::Null,
+            hint: None,
+        });
+    }
+    Ok(payload.offset)
+}
--- a/crates/kebab-app/src/error_wire.rs
+++ b/crates/kebab-app/src/error_wire.rs
@@ -11,6 +11,12 @@ use serde_json::{Value, json};

 use crate::error_signal::{ConfigInvalid, LlmError, NotIndexed};

+// p9-fb-34: `stale_cursor` is constructed directly by `cursor::decode`
+// and surfaced through `StructuredError` (an anyhow-friendly wrapper
+// that carries the typed `ErrorV1` payload without lossy string
+// formatting). `classify` short-circuits on it at the top of the
+// function so the typed `code = "stale_cursor"` reaches the wire.
+
 /// Wire schema id for [`ErrorV1`]. Single source of truth — kebab-cli
 /// + kebab-mcp use this via `kebab_app::ERROR_V1_ID`.
 pub const ERROR_V1_ID: &str = "error.v1";
@@ -24,7 +30,29 @@ pub struct ErrorV1 {
    pub hint: Option<String>,
 }

+/// p9-fb-34: typed wrapper around an [`ErrorV1`] so callers that
+/// surface `anyhow::Error` can downcast back to the structured wire
+/// payload instead of losing it to string formatting. Constructed by
+/// the cursor code path (`cursor::decode` → `App::search_with_opts`)
+/// and short-circuited inside [`classify`].
+#[derive(Debug)]
+pub struct StructuredError(pub ErrorV1);
+
+impl std::fmt::Display for StructuredError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[{}] {}", self.0.code, self.0.message)
+    }
+}
+
+impl std::error::Error for StructuredError {}
+
 pub fn classify(err: &anyhow::Error, verbose: bool) -> ErrorV1 {
+    // p9-fb-34: structured wrapper short-circuits — preserves the
+    // typed payload that callers (cursor::decode) constructed
+    // instead of falling through to `code = "generic"`.
+    if let Some(s) = err.downcast_ref::<StructuredError>() {
+        return s.0.clone();
+    }
    if let Some(s) = err.downcast_ref::<ConfigInvalid>() {
        return ErrorV1 {
            schema_version: ERROR_V1_ID.to_string(),
@@ -197,4 +225,36 @@ mod tests {
        let v1 = classify(&err, false);
        assert_eq!(v1.code, "io_error");
    }
+
+    #[test]
+    fn stale_cursor_is_not_routed_through_classify() {
+        use anyhow::anyhow;
+        let err: anyhow::Error = anyhow!("stale_cursor: rev mismatch");
+        let v1 = classify(&err, false);
+        // p9-fb-34: stale_cursor is constructed directly by cursor::decode
+        // (single source of truth). classify must not pattern-match on
+        // anyhow string contents — that would create two sources of
+        // truth. The bare anyhow string falls through to "generic".
+        assert_ne!(v1.code, "stale_cursor", "classify must not produce stale_cursor from bare anyhow string");
+    }
+
+    #[test]
+    fn stale_cursor_propagates_through_structured_wrapper() {
+        // p9-fb-34: positive-side contract for the structured-wrapper
+        // path. cursor::decode constructs a typed ErrorV1, the call site
+        // wraps it in `StructuredError`, anyhow carries it, and classify
+        // short-circuits via downcast — preserving the typed code +
+        // message instead of falling through to "generic".
+        let original = ErrorV1 {
+            schema_version: ERROR_V1_ID.to_string(),
+            code: "stale_cursor".to_string(),
+            message: "test stale cursor".to_string(),
+            details: Value::Null,
+            hint: None,
+        };
+        let err: anyhow::Error = anyhow::Error::new(StructuredError(original));
+        let v1 = classify(&err, false);
+        assert_eq!(v1.code, "stale_cursor");
+        assert_eq!(v1.message, "test stale cursor");
+    }
 }
--- a/crates/kebab-app/src/fetch.rs
+++ b/crates/kebab-app/src/fetch.rs
@@ -0,0 +1,447 @@
+//! p9-fb-35 verbatim fetch implementation.
+//!
+//! [`App::fetch`] is the facade entry point. It dispatches on
+//! [`FetchQuery`] variants:
+//!
+//! - `Chunk(id)` — return the chunk row from `chunks.text`, optionally
+//!   with ±N surrounding chunks (`FetchOpts::context`).
+//! - `Doc(id)` — return the entire document re-serialized to markdown.
+//!   (Implemented in Task 4.)
+//! - `Span { doc_id, line_start, line_end }` — return a contiguous line
+//!   slice. (Implemented in Task 5.)
+//!
+//! Errors are surfaced as [`StructuredError`] (anyhow-friendly wrapper
+//! around `ErrorV1`) so the CLI / MCP wire layer's `classify` keeps the
+//! typed `code` (`chunk_not_found` / `doc_not_found` /
+//! `span_not_supported`) instead of falling through to `code =
+//! "generic"`.
+
+use anyhow::Result;
+use time::OffsetDateTime;
+
+use kebab_core::{
+    Block, CanonicalDocument, Chunk, ChunkId, DocumentId, DocumentStore, FetchKind, FetchOpts,
+    FetchQuery, FetchResult,
+};
+
+use crate::App;
+use crate::error_wire::{ERROR_V1_ID, ErrorV1, StructuredError};
+use crate::staleness::compute_stale;
+
+impl App {
+    /// p9-fb-35: verbatim fetch facade. Returns text from
+    /// `chunks.text` / `CanonicalDocument` based on the requested
+    /// mode. Errors surface as `StructuredError(ErrorV1)` with one
+    /// of `chunk_not_found` / `doc_not_found` / `span_not_supported`
+    /// so the wire-layer classifier preserves the typed code.
+    pub fn fetch(&self, query: FetchQuery, opts: FetchOpts) -> Result<FetchResult> {
+        match query {
+            FetchQuery::Chunk(id) => fetch_chunk(self, id, opts),
+            FetchQuery::Doc(id) => fetch_doc(self, id, opts),
+            FetchQuery::Span {
+                doc_id,
+                line_start,
+                line_end,
+            } => fetch_span(self, doc_id, line_start, line_end, opts),
+        }
+    }
+}
+
+fn fetch_chunk(app: &App, id: ChunkId, opts: FetchOpts) -> Result<FetchResult> {
+    let target = <kebab_store_sqlite::SqliteStore as DocumentStore>::get_chunk(&app.sqlite, &id)?
+        .ok_or_else(|| {
+            anyhow::Error::new(StructuredError(ErrorV1 {
+                schema_version: ERROR_V1_ID.to_string(),
+                code: "chunk_not_found".to_string(),
+                message: format!("chunk_id '{}' not found", id.0),
+                details: serde_json::Value::Null,
+                hint: None,
+            }))
+        })?;
+
+    let doc_id = target.doc_id.clone();
+    let doc =
+        <kebab_store_sqlite::SqliteStore as DocumentStore>::get_document(&app.sqlite, &doc_id)?
+            .ok_or_else(|| {
+                anyhow::Error::new(StructuredError(ErrorV1 {
+                    schema_version: ERROR_V1_ID.to_string(),
+                    code: "doc_not_found".to_string(),
+                    message: format!(
+                        "doc_id '{}' (parent of chunk '{}') not found",
+                        doc_id.0, id.0
+                    ),
+                    details: serde_json::Value::Null,
+                    hint: None,
+                }))
+            })?;
+
+    let (context_before, context_after) = match opts.context {
+        Some(n) if n > 0 => surrounding_chunks(app, &doc_id, &id, n)?,
+        _ => (Vec::new(), Vec::new()),
+    };
+
+    let now = OffsetDateTime::now_utc();
+    let stale = compute_stale(
+        doc_metadata_updated_at(&doc),
+        now,
+        app.config.search.stale_threshold_days,
+    );
+
+    Ok(FetchResult {
+        kind: FetchKind::Chunk,
+        doc_id: doc.doc_id.clone(),
+        doc_path: doc.workspace_path.clone(),
+        indexed_at: doc_metadata_updated_at(&doc),
+        stale,
+        chunk: Some(target),
+        context_before,
+        context_after,
+        text: None,
+        line_start: None,
+        line_end: None,
+        effective_end: None,
+        truncated: false,
+    })
+}
+
+fn fetch_doc(app: &App, id: DocumentId, opts: FetchOpts) -> Result<FetchResult> {
+    let doc = <kebab_store_sqlite::SqliteStore as DocumentStore>::get_document(&app.sqlite, &id)?
+        .ok_or_else(|| {
+            anyhow::Error::new(StructuredError(ErrorV1 {
+                schema_version: ERROR_V1_ID.to_string(),
+                code: "doc_not_found".to_string(),
+                message: format!("doc_id '{}' not found", id.0),
+                details: serde_json::Value::Null,
+                hint: None,
+            }))
+        })?;
+
+    let mut text = fmt_canonical_to_markdown(&doc);
+    let mut truncated = false;
+    if let Some(max_tokens) = opts.max_tokens {
+        let max_chars = max_tokens.saturating_mul(4);
+        if text.chars().count() > max_chars {
+            text = trim_to_chars(&text, max_chars);
+            truncated = true;
+        }
+    }
+
+    let now = OffsetDateTime::now_utc();
+    let stale = compute_stale(
+        doc_metadata_updated_at(&doc),
+        now,
+        app.config.search.stale_threshold_days,
+    );
+
+    Ok(FetchResult {
+        kind: FetchKind::Doc,
+        doc_id: doc.doc_id.clone(),
+        doc_path: doc.workspace_path.clone(),
+        indexed_at: doc_metadata_updated_at(&doc),
+        stale,
+        chunk: None,
+        context_before: Vec::new(),
+        context_after: Vec::new(),
+        text: Some(text),
+        line_start: None,
+        line_end: None,
+        effective_end: None,
+        truncated,
+    })
+}
+
+/// p9-fb-35: trim string to N chars (Unicode-safe). Mirrors fb-34's
+/// helper at `crates/kebab-app/src/app.rs` — kept local to avoid
+/// re-exporting an internal helper.
+fn trim_to_chars(s: &str, n: usize) -> String {
+    if s.chars().count() <= n {
+        return s.to_string();
+    }
+    let mut out = String::with_capacity(n * 4);
+    for (i, c) in s.chars().enumerate() {
+        if i >= n {
+            break;
+        }
+        out.push(c);
+    }
+    out
+}
+
+fn fetch_span(
+    app: &App,
+    id: DocumentId,
+    line_start: u32,
+    line_end: u32,
+    opts: FetchOpts,
+) -> Result<FetchResult> {
+    let doc = <kebab_store_sqlite::SqliteStore as DocumentStore>::get_document(&app.sqlite, &id)?
+        .ok_or_else(|| {
+            anyhow::Error::new(StructuredError(ErrorV1 {
+                schema_version: ERROR_V1_ID.to_string(),
+                code: "doc_not_found".to_string(),
+                message: format!("doc_id '{}' not found", id.0),
+                details: serde_json::Value::Null,
+                hint: None,
+            }))
+        })?;
+
+    // Reject line-incompatible media types (PDF / audio). `SourceType`
+    // (markdown / note / paper / reference / inbox) is the *user-facing*
+    // category, not the rendering format — the actual byte-level format
+    // lives on the source `RawAsset.media_type`. Look it up via
+    // workspace_path (unique key per asset).
+    if let Some(asset) = <kebab_store_sqlite::SqliteStore as DocumentStore>::get_asset_by_workspace_path(
+        &app.sqlite,
+        &doc.workspace_path,
+    )? {
+        if matches!(
+            asset.media_type,
+            kebab_core::MediaType::Pdf | kebab_core::MediaType::Audio(_)
+        ) {
+            return Err(anyhow::Error::new(StructuredError(ErrorV1 {
+                schema_version: ERROR_V1_ID.to_string(),
+                code: "span_not_supported".to_string(),
+                message: format!(
+                    "doc '{}' has media_type {:?}; line-based span fetch unsupported. \
+                     Use `fetch chunk` or `fetch doc` instead.",
+                    id.0, asset.media_type
+                ),
+                details: serde_json::Value::Null,
+                hint: Some("kind = chunk or kind = doc instead".to_string()),
+            })));
+        }
+    }
+
+    if line_start == 0 || line_end == 0 || line_end < line_start {
+        return Err(anyhow::Error::new(StructuredError(ErrorV1 {
+            schema_version: ERROR_V1_ID.to_string(),
+            code: "invalid_input".to_string(),
+            message: format!(
+                "line_start ({line_start}) and line_end ({line_end}) must be 1-based with start <= end"
+            ),
+            details: serde_json::Value::Null,
+            hint: None,
+        })));
+    }
+
+    let full = fmt_canonical_to_markdown(&doc);
+    let lines: Vec<&str> = full.lines().collect();
+    let total = lines.len() as u32;
+
+    // p9-fb-35 round-1 review fix: empty / out-of-range request must
+    // not slice. Returning empty text + `effective_end = line_start - 1`
+    // lets the caller detect "no lines fetched" via
+    // `text.is_empty() && effective_end < line_start`. `truncated`
+    // stays false because line-range clamp is NOT a budget event —
+    // budget-driven truncation is the only thing `truncated` signals.
+    if total == 0 || line_start > total {
+        let now = OffsetDateTime::now_utc();
+        let stale = compute_stale(
+            doc_metadata_updated_at(&doc),
+            now,
+            app.config.search.stale_threshold_days,
+        );
+        return Ok(FetchResult {
+            kind: FetchKind::Span,
+            doc_id: doc.doc_id.clone(),
+            doc_path: doc.workspace_path.clone(),
+            indexed_at: doc_metadata_updated_at(&doc),
+            stale,
+            chunk: None,
+            context_before: Vec::new(),
+            context_after: Vec::new(),
+            text: Some(String::new()),
+            line_start: Some(line_start),
+            line_end: Some(line_end),
+            // saturating_sub: when line_start = 1 we end at 0, signaling
+            // "no lines fetched" without underflowing u32.
+            effective_end: Some(line_start.saturating_sub(1)),
+            truncated: false,
+        });
+    }
+
+    let effective_end_raw = line_end.min(total);
+    let lo = (line_start - 1) as usize;
+    let hi = effective_end_raw as usize;
+    let mut text = lines[lo..hi].join("\n");
+
+    // p9-fb-35 round-1 review fix: `truncated` is reserved for
+    // budget-driven truncation only. Line-range clamp (line_end >
+    // total) is signaled via `effective_end < line_end`, not via
+    // `truncated`.
+    let mut truncated = false;
+    let mut effective_end = effective_end_raw;
+    if let Some(max_tokens) = opts.max_tokens {
+        let max_chars = max_tokens.saturating_mul(4);
+        if text.chars().count() > max_chars {
+            text = trim_to_chars(&text, max_chars);
+            truncated = true;
+            let kept = text.lines().count() as u32;
+            effective_end = (line_start - 1) + kept;
+        }
+    }
+
+    let now = OffsetDateTime::now_utc();
+    let stale = compute_stale(
+        doc_metadata_updated_at(&doc),
+        now,
+        app.config.search.stale_threshold_days,
+    );
+
+    Ok(FetchResult {
+        kind: FetchKind::Span,
+        doc_id: doc.doc_id.clone(),
+        doc_path: doc.workspace_path.clone(),
+        indexed_at: doc_metadata_updated_at(&doc),
+        stale,
+        chunk: None,
+        context_before: Vec::new(),
+        context_after: Vec::new(),
+        text: Some(text),
+        line_start: Some(line_start),
+        line_end: Some(line_end),
+        effective_end: Some(effective_end),
+        truncated,
+    })
+}
+
+/// p9-fb-35: list chunks for a document in ordinal order, return
+/// `(before, after)` slices around the target chunk_id. `n` caps each
+/// side independently — the worst case is `2n` total neighbors when
+/// the target sits in the middle of the doc.
+fn surrounding_chunks(
+    app: &App,
+    doc_id: &DocumentId,
+    target: &ChunkId,
+    n: u32,
+) -> Result<(Vec<Chunk>, Vec<Chunk>)> {
+    let chunks = list_chunks_in_order(app, doc_id)?;
+    let target_idx = chunks
+        .iter()
+        .position(|c| c.chunk_id == *target)
+        .ok_or_else(|| anyhow::anyhow!("chunk not found in doc chunk list"))?;
+    let n = n as usize;
+    let lo = target_idx.saturating_sub(n);
+    let hi = target_idx
+        .saturating_add(n)
+        .saturating_add(1)
+        .min(chunks.len());
+    let before: Vec<Chunk> = chunks[lo..target_idx].to_vec();
+    let after: Vec<Chunk> = chunks[target_idx + 1..hi].to_vec();
+    Ok((before, after))
+}
+
+/// p9-fb-35: chunks have no explicit ordinal column, so the underlying
+/// helper sorts by `(created_at, chunk_id)` which matches insertion
+/// order produced by the chunker (deterministic). The actual SQL lives
+/// inside `kebab-store-sqlite` (`SqliteStore::list_chunk_ids_for_doc`)
+/// to keep the facade crate free of direct rusqlite usage.
+fn list_chunks_in_order(app: &App, doc_id: &DocumentId) -> Result<Vec<Chunk>> {
+    let chunk_ids = app.sqlite.list_chunk_ids_for_doc(doc_id)?;
+    let mut out: Vec<Chunk> = Vec::with_capacity(chunk_ids.len());
+    for cid in chunk_ids {
+        if let Some(chunk) =
+            <kebab_store_sqlite::SqliteStore as DocumentStore>::get_chunk(&app.sqlite, &cid)?
+        {
+            out.push(chunk);
+        }
+    }
+    Ok(out)
+}
+
+fn doc_metadata_updated_at(doc: &CanonicalDocument) -> OffsetDateTime {
+    doc.metadata.updated_at
+}
+
+/// p9-fb-35: serialize a `CanonicalDocument` back to markdown. Best-
+/// effort round-trip — inline-styled spans (Strong/Emph children)
+/// flatten to plain text via the already-flattened `TextBlock.text`
+/// field. Good enough for an agent reading verbatim context. Used by
+/// Task 4 (doc mode) and Task 5 (span mode).
+pub(crate) fn fmt_canonical_to_markdown(doc: &CanonicalDocument) -> String {
+    let mut out = String::with_capacity(1024);
+    for (i, block) in doc.blocks.iter().enumerate() {
+        if i > 0 {
+            out.push_str("\n\n");
+        }
+        match block {
+            Block::Heading(h) => {
+                let level = h.level.clamp(1, 6) as usize;
+                for _ in 0..level {
+                    out.push('#');
+                }
+                out.push(' ');
+                out.push_str(&h.text);
+            }
+            Block::Paragraph(t) => out.push_str(&t.text),
+            Block::Quote(t) => {
+                // Prefix every line with `> ` so block-quote round-trips.
+                for (li, line) in t.text.split('\n').enumerate() {
+                    if li > 0 {
+                        out.push('\n');
+                    }
+                    out.push_str("> ");
+                    out.push_str(line);
+                }
+            }
+            Block::List(l) => {
+                for (idx, item) in l.items.iter().enumerate() {
+                    if idx > 0 {
+                        out.push('\n');
+                    }
+                    if l.ordered {
+                        out.push_str(&format!("{}. {}", idx + 1, item.text));
+                    } else {
+                        out.push_str(&format!("- {}", item.text));
+                    }
+                }
+            }
+            Block::Code(c) => {
+                out.push_str("```");
+                if let Some(lang) = &c.lang {
+                    out.push_str(lang);
+                }
+                out.push('\n');
+                out.push_str(&c.code);
+                if !c.code.ends_with('\n') {
+                    out.push('\n');
+                }
+                out.push_str("```");
+            }
+            Block::Table(t) => {
+                out.push_str(&t.headers.join(" | "));
+                out.push('\n');
+                // Markdown table separator — N copies of `---|` is
+                // acceptable for a verbatim re-serialization (renderer
+                // tolerates trailing pipe).
+                out.push_str(&"---|".repeat(t.headers.len()));
+                for row in &t.rows {
+                    out.push('\n');
+                    out.push_str(&row.join(" | "));
+                }
+            }
+            Block::ImageRef(img) => {
+                out.push_str(&format!("![{}]({})", img.alt, img.src));
+            }
+            Block::AudioRef(_a) => {
+                // Canonical doc carries the transcript on AudioRefBlock,
+                // but markdown has no native audio embed. Emit a stub
+                // marker so the agent sees something ran here.
+                out.push_str("(audio reference)");
+            }
+        }
+    }
+    out
+}
+
+/// p9-fb-35: free-function entry for CLI / MCP. Mirrors the
+/// `*_with_config` pattern documented in the kebab-app crate root —
+/// `kebab-cli` calls this so a `--config <path>` flag is honored.
+#[doc(hidden)]
+pub fn fetch_with_config(
+    config: kebab_config::Config,
+    query: FetchQuery,
+    opts: FetchOpts,
+) -> Result<FetchResult> {
+    App::open_with_config(config)?.fetch(query, opts)
+}
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -55,20 +55,23 @@ use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
 use kebab_source_fs::FsSourceConnector;

 mod app;
+pub mod cursor;
 pub mod doctor_signal;
 pub mod error_signal;
 pub mod error_wire;
 pub mod external;
+pub mod fetch;
 pub mod ingest_progress;
 pub mod logging;
 pub mod reset;
 pub mod schema;
 mod staleness;

-pub use app::App;
+pub use app::{App, SearchResponse};
 pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
 pub use reset::{ResetReport, ResetScope};
-pub use error_wire::{ERROR_V1_ID, ErrorV1, classify};
+pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify};
+pub use fetch::fetch_with_config;
 pub use schema::{Capabilities, Models, SCHEMA_V1_ID, SchemaV1, Stats, WireBlock, schema_with_config};
 pub use staleness::{compute_stale, mark_stale_in_place};

@@ -85,7 +88,7 @@ pub const NO_EXT_SENTINEL: &str = "<no-ext>";
 /// `use kebab_app::AskOpts` keeps working without churn. The struct gained
 /// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today)
 /// pass `stream_sink: None`.
-pub use kebab_rag::AskOpts;
+pub use kebab_rag::{AskOpts, StreamEvent};

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub struct DoctorReport {
@@ -1739,6 +1742,19 @@ pub fn search_uncached_with_config(
    App::open_with_config(config)?.search_uncached(query)
 }

+/// p9-fb-34: budget-aware search free function. Mirrors
+/// [`search_with_config`] but threads `SearchOpts` (max_tokens,
+/// snippet_chars, cursor) and returns the [`SearchResponse`]
+/// pagination wrapper. Tasks 6+8 surface this via CLI / MCP.
+#[doc(hidden)]
+pub fn search_with_opts_with_config(
+    config: kebab_config::Config,
+    query: kebab_core::SearchQuery,
+    opts: kebab_core::SearchOpts,
+) -> anyhow::Result<SearchResponse> {
+    App::open_with_config(config)?.search_with_opts(query, opts)
+}
+
 // ── ask ──────────────────────────────────────────────────────────────────
 //
 // P4-3 wires `ask` end-to-end. The retriever is built per `opts.mode`;
--- a/crates/kebab-app/src/schema.rs
+++ b/crates/kebab-app/src/schema.rs
@@ -50,6 +50,18 @@ pub struct Stats {
    pub chunk_count: u64,
    pub asset_count: u64,
    pub last_ingest_at: Option<String>,
+    /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+    #[serde(default)]
+    pub media_breakdown: std::collections::BTreeMap<String, u64>,
+    /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+    #[serde(default)]
+    pub lang_breakdown: std::collections::BTreeMap<String, u64>,
+    /// p9-fb-37: on-disk byte sums.
+    #[serde(default)]
+    pub index_bytes: kebab_core::IndexBytes,
+    /// p9-fb-37: docs whose `updated_at` exceeds the staleness threshold.
+    #[serde(default)]
+    pub stale_doc_count: u64,
 }

 const KEBAB_VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -63,6 +75,7 @@ pub const SCHEMA_V1_ID: &str = "schema.v1";
 const WIRE_SCHEMAS: &[&str] = &[
    "answer.v1",
    "search_hit.v1",
+    "search_response.v1",
    "doc_summary.v1",
    "chunk_inspection.v1",
    "doctor.v1",
@@ -84,7 +97,7 @@ const WIRE_SCHEMAS: &[&str] = &[
 #[doc(hidden)]
 pub fn schema_with_config(cfg: &Config) -> anyhow::Result<SchemaV1> {
    let store = open_store_for_stats(cfg)?;
-    let stats = collect_stats(&store)?;
+    let stats = collect_stats(cfg, &store)?;
    let models = collect_models(cfg, &store);
    Ok(SchemaV1 {
        schema_version: SCHEMA_V1_ID.to_string(),
@@ -123,13 +136,24 @@ fn open_store_for_stats(cfg: &Config) -> anyhow::Result<kebab_store_sqlite::Sqli
    kebab_store_sqlite::SqliteStore::open_existing(&db_path)
 }

-fn collect_stats(store: &kebab_store_sqlite::SqliteStore) -> anyhow::Result<Stats> {
-    let counts = store.count_summary()?;
+fn collect_stats(
+    cfg: &Config,
+    store: &kebab_store_sqlite::SqliteStore,
+) -> anyhow::Result<Stats> {
+    let counts = store
+        .count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?;
+    let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
+    let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir)
+        .map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?;
    Ok(Stats {
        doc_count: counts.doc_count,
        chunk_count: counts.chunk_count,
        asset_count: counts.asset_count,
        last_ingest_at: counts.last_ingest_at,
+        media_breakdown: counts.media_breakdown,
+        lang_breakdown: counts.lang_breakdown,
+        index_bytes,
+        stale_doc_count: counts.stale_doc_count,
    })
 }

@@ -149,3 +173,31 @@ fn collect_models(cfg: &Config, store: &kebab_store_sqlite::SqliteStore) -> Mode
        corpus_revision: store.corpus_revision(),
    }
 }
+
+#[cfg(test)]
+mod tests_stats_ext {
+    use super::*;
+
+    #[test]
+    fn stats_includes_breakdowns_and_bytes_on_fresh_corpus() {
+        let dir = tempfile::tempdir().unwrap();
+        let mut cfg = kebab_config::Config::defaults();
+        cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+        // Bring up migrations so the sqlite file is created.
+        let store = kebab_store_sqlite::SqliteStore::open(&cfg).unwrap();
+        store.run_migrations().unwrap();
+        drop(store);
+
+        let s = schema_with_config(&cfg).unwrap();
+        // 5 keys padded.
+        assert_eq!(s.stats.media_breakdown.len(), 5);
+        assert_eq!(s.stats.media_breakdown.get("markdown"), Some(&0));
+        assert_eq!(s.stats.media_breakdown.get("pdf"), Some(&0));
+        // lang map empty on empty corpus.
+        assert!(s.stats.lang_breakdown.is_empty());
+        // sqlite bytes positive after migrations, lancedb 0.
+        assert!(s.stats.index_bytes.sqlite > 0);
+        assert_eq!(s.stats.index_bytes.lancedb, 0);
+        assert_eq!(s.stats.stale_doc_count, 0);
+    }
+}
--- a/crates/kebab-app/tests/common/mod.rs
+++ b/crates/kebab-app/tests/common/mod.rs
@@ -79,6 +79,37 @@ impl TestEnv {
            ..Default::default()
        }
    }
+
+    /// p9-fb-34 alias — tests added in fb-34 invoke `TestEnv::new()`
+    /// per the plan; route to the existing lexical-only constructor
+    /// so the lane stays AVX-free without churning all the existing
+    /// callers.
+    pub fn new() -> Self {
+        Self::lexical_only()
+    }
+
+    /// p9-fb-34: open a fresh `App` against this env's config. Used
+    /// by integration tests that need to call `App::search_with_opts`
+    /// directly. Caller can invoke this multiple times to simulate
+    /// re-opening the binary after a corpus revision bump.
+    pub fn app(&self) -> kebab_app::App {
+        kebab_app::App::open_with_config(self.config.clone())
+            .expect("App::open_with_config")
+    }
+}
+
+/// p9-fb-34: write `content` into the env's workspace at
+/// `relative_path`, then run a full ingest so the document is
+/// searchable. Mirrors the convenience helpers used by other
+/// `TestEnv`-driven crates.
+pub fn ingest_md(env: &TestEnv, relative_path: &str, content: &str) {
+    let path = env.workspace_root.join(relative_path);
+    if let Some(parent) = path.parent() {
+        std::fs::create_dir_all(parent).expect("create parent dirs");
+    }
+    std::fs::write(&path, content).expect("write workspace file");
+    kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
+        .expect("ingest_with_config");
 }

 /// Test helper: build a `SearchQuery` for lexical mode at k=10. Used
--- a/crates/kebab-app/tests/cursor.rs
+++ b/crates/kebab-app/tests/cursor.rs
@@ -0,0 +1,24 @@
+//! p9-fb-34: cursor encode/decode round-trip + corpus_revision mismatch.
+
+use kebab_app::cursor;
+
+#[test]
+fn cursor_roundtrip_preserves_offset() {
+    let encoded = cursor::encode(5, "rev-abc");
+    let offset = cursor::decode(&encoded, "rev-abc").unwrap();
+    assert_eq!(offset, 5);
+}
+
+#[test]
+fn cursor_decode_rejects_mismatched_revision() {
+    let encoded = cursor::encode(7, "rev-old");
+    let err = cursor::decode(&encoded, "rev-new").unwrap_err();
+    assert_eq!(err.code, "stale_cursor");
+    assert!(err.message.contains("rev-old") || err.message.contains("rev-new"));
+}
+
+#[test]
+fn cursor_decode_rejects_garbage_input() {
+    let err = cursor::decode("not-base64!!!", "any").unwrap_err();
+    assert_eq!(err.code, "stale_cursor");
+}
--- a/crates/kebab-app/tests/fetch_integration.rs
+++ b/crates/kebab-app/tests/fetch_integration.rs
@@ -0,0 +1,329 @@
+//! p9-fb-35 App::fetch integration tests.
+
+mod common;
+
+use kebab_app::App;
+use kebab_core::{FetchKind, FetchOpts, FetchQuery};
+
+fn open(env: &common::TestEnv) -> App {
+    env.app()
+}
+
+#[test]
+fn fetch_chunk_returns_target_only_when_no_context() {
+    let env = common::TestEnv::new();
+    common::ingest_md(&env, "a.md", "# Title\n\nFirst paragraph.\n\n## Section\n\nSecond.\n");
+    let app = open(&env);
+
+    // Find a chunk via search to obtain its id.
+    let q = kebab_core::SearchQuery {
+        text: "First".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let chunk_id = hits[0].chunk_id.clone();
+
+    let result = app
+        .fetch(FetchQuery::Chunk(chunk_id), FetchOpts::default())
+        .unwrap();
+    assert_eq!(result.kind, FetchKind::Chunk);
+    assert!(result.chunk.is_some(), "target chunk populated");
+    assert!(result.context_before.is_empty());
+    assert!(result.context_after.is_empty());
+    assert!(!result.truncated);
+}
+
+#[test]
+fn fetch_chunk_with_context_returns_neighbors() {
+    let env = common::TestEnv::new();
+    let body = "# H1\n\nA1\n\n# H2\n\nA2\n\n# H3\n\nA3\n\n# H4\n\nA4\n\n# H5\n\nA5\n";
+    common::ingest_md(&env, "multi.md", body);
+    let app = env.app();
+
+    let q = kebab_core::SearchQuery {
+        text: "A3".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let chunk_id = hits[0].chunk_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Chunk(chunk_id),
+            FetchOpts {
+                context: Some(2),
+                max_tokens: None,
+            },
+        )
+        .unwrap();
+    assert_eq!(result.kind, FetchKind::Chunk);
+    assert!(result.chunk.is_some());
+    let total = result.context_before.len() + result.context_after.len();
+    assert!(total >= 1, "at least one neighbor expected");
+    assert!(total <= 4, "context capped at +-2 ⇒ max 4 neighbors");
+}
+
+#[test]
+fn fetch_chunk_unknown_id_returns_chunk_not_found() {
+    let env = common::TestEnv::new();
+    let app = env.app();
+    let err = app
+        .fetch(
+            FetchQuery::Chunk(kebab_core::ChunkId("nonexistent-id".to_string())),
+            FetchOpts::default(),
+        )
+        .unwrap_err();
+    let msg = err.to_string();
+    assert!(
+        msg.contains("chunk_not_found") || msg.contains("nonexistent-id"),
+        "expected chunk_not_found error, got: {msg}"
+    );
+}
+
+#[test]
+fn fetch_doc_returns_serialized_markdown() {
+    let env = common::TestEnv::new();
+    let body = "# Heading One\n\nFirst paragraph.\n\n## Sub\n\nSecond.\n";
+    common::ingest_md(&env, "doc.md", body);
+    let app = env.app();
+
+    // Discover doc_id via search hit (avoids depending on list_docs API shape).
+    let q = kebab_core::SearchQuery {
+        text: "First".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let result = app
+        .fetch(FetchQuery::Doc(doc_id), FetchOpts::default())
+        .unwrap();
+    assert_eq!(result.kind, FetchKind::Doc);
+    let text = result.text.expect("doc text");
+    assert!(text.contains("Heading One"), "doc text contains heading: {text:?}");
+    assert!(text.contains("First paragraph"), "doc text contains body");
+    assert!(!result.truncated);
+}
+
+#[test]
+fn fetch_doc_unknown_id_returns_doc_not_found() {
+    let env = common::TestEnv::new();
+    let app = env.app();
+    let err = app
+        .fetch(
+            FetchQuery::Doc(kebab_core::DocumentId("nonexistent-doc".to_string())),
+            FetchOpts::default(),
+        )
+        .unwrap_err();
+    assert!(err.to_string().contains("doc_not_found"), "got: {err}");
+}
+
+#[test]
+fn fetch_doc_with_max_tokens_truncates() {
+    let env = common::TestEnv::new();
+    let p = "Lorem ipsum dolor sit amet consectetur adipiscing elit. ".repeat(20);
+    let body = format!("# Big\n\n{p}\n");
+    common::ingest_md(&env, "big.md", &body);
+    let app = env.app();
+    let q = kebab_core::SearchQuery {
+        text: "Lorem".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Doc(doc_id),
+            FetchOpts {
+                context: None,
+                max_tokens: Some(20), // ~80 chars
+            },
+        )
+        .unwrap();
+    assert!(result.truncated);
+    let text = result.text.expect("doc text");
+    assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
+}
+
+#[test]
+fn fetch_span_returns_line_range() {
+    let env = common::TestEnv::new();
+    // Use a list so the canonical-to-markdown roundtrip emits 5
+    // single-line entries joined by `\n` (paragraphs would be joined by
+    // `\n\n`, and CommonMark soft breaks inside one paragraph collapse to
+    // spaces — see crates/kebab-parse-md/src/blocks.rs `Event::SoftBreak`).
+    let body = "- Line one.\n- Line two.\n- Line three.\n- Line four.\n- Line five.\n";
+    common::ingest_md(&env, "lines.md", body);
+    let app = env.app();
+
+    let q = kebab_core::SearchQuery {
+        text: "Line".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Span {
+                doc_id,
+                line_start: 2,
+                line_end: 4,
+            },
+            FetchOpts::default(),
+        )
+        .unwrap();
+    assert_eq!(result.kind, FetchKind::Span);
+    let text = result.text.expect("span text");
+    let line_count = text.lines().count();
+    assert_eq!(line_count, 3, "span should be 3 lines: {text:?}");
+    assert_eq!(result.line_start, Some(2));
+    assert_eq!(result.line_end, Some(4));
+    assert_eq!(result.effective_end, Some(4));
+    assert!(!result.truncated);
+}
+
+#[test]
+fn fetch_span_clamps_line_end_when_out_of_range() {
+    let env = common::TestEnv::new();
+    common::ingest_md(&env, "short.md", "Line one.\nLine two.\n");
+    let app = env.app();
+    let q = kebab_core::SearchQuery {
+        text: "Line".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Span {
+                doc_id,
+                line_start: 1,
+                line_end: 999,
+            },
+            FetchOpts::default(),
+        )
+        .unwrap();
+    let text = result.text.expect("span text");
+    let actual_lines = text.lines().count();
+    assert_eq!(result.effective_end, Some(actual_lines as u32));
+    assert!(actual_lines < 999);
+}
+
+#[test]
+fn fetch_span_invalid_input_when_zero_lines() {
+    let env = common::TestEnv::new();
+    common::ingest_md(&env, "a.md", "Line one.\n");
+    let app = env.app();
+    let q = kebab_core::SearchQuery {
+        text: "Line".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let err = app
+        .fetch(
+            FetchQuery::Span {
+                doc_id,
+                line_start: 0,
+                line_end: 0,
+            },
+            FetchOpts::default(),
+        )
+        .unwrap_err();
+    assert!(err.to_string().contains("invalid_input"), "got: {err}");
+}
+
+#[test]
+fn fetch_span_line_start_beyond_total_returns_empty_text() {
+    let env = common::TestEnv::new();
+    let body = "- Line one.\n- Line two.\n";
+    common::ingest_md(&env, "two_lines.md", body);
+    let app = env.app();
+    let q = kebab_core::SearchQuery {
+        text: "Line".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let doc_id = hits[0].doc_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Span {
+                doc_id,
+                line_start: 100,
+                line_end: 200,
+            },
+            FetchOpts::default(),
+        )
+        .unwrap();
+    let text = result.text.expect("text field");
+    assert!(text.is_empty(), "out-of-range request returns empty text");
+    assert!(
+        !result.truncated,
+        "out-of-range is NOT truncated (budget-only flag)"
+    );
+}
+
+#[test]
+fn fetch_chunk_context_at_first_chunk_clamps_lower_bound() {
+    let env = common::TestEnv::new();
+    // Multi-chunk markdown so context ±N has neighbors.
+    let body =
+        "# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
+    common::ingest_md(&env, "boundary.md", body);
+    let app = env.app();
+    let q = kebab_core::SearchQuery {
+        text: "First".to_string(),
+        mode: kebab_core::SearchMode::Lexical,
+        k: 1,
+        filters: kebab_core::SearchFilters::default(),
+    };
+    let hits = app.search(q).unwrap();
+    let chunk_id = hits[0].chunk_id.clone();
+
+    let result = app
+        .fetch(
+            FetchQuery::Chunk(chunk_id),
+            FetchOpts {
+                context: Some(2),
+                max_tokens: None,
+            },
+        )
+        .unwrap();
+    // p9-fb-35 R2: doc has 3 chunks; ±2 should clamp the total
+    // neighbor count to ≤ 2 + 1 (= excludes target).
+    //
+    // ⚠ Strict "first-chunk → context_before is empty" cannot be
+    // asserted here yet because chunks.ordinal column does not exist
+    // — `list_chunk_ids_for_doc` orders by `(created_at, chunk_id)`
+    // and chunk_id is a blake3 hash, so the "First chunk" content
+    // may land at any hash-order position within the doc. The clamp
+    // logic itself is correct (target_idx ± n → [0..len]); we just
+    // can't pin which chunk is hash-order-first. Tracked as
+    // follow-up: V007 chunks.ordinal migration.
+    let total = result.context_before.len() + result.context_after.len();
+    assert!(
+        total <= 2,
+        "doc with 3 chunks ±2 → at most 2 neighbors (excludes target), got {total}"
+    );
+}
--- a/crates/kebab-app/tests/search_budget_integration.rs
+++ b/crates/kebab-app/tests/search_budget_integration.rs
@@ -0,0 +1,165 @@
+//! p9-fb-34: App::search_with_opts integration tests.
+
+mod common;
+
+use kebab_app::SearchResponse;
+use kebab_core::{SearchFilters, SearchMode, SearchOpts, SearchQuery};
+
+fn lex(text: &str, k: usize) -> SearchQuery {
+    SearchQuery {
+        text: text.to_string(),
+        mode: SearchMode::Lexical,
+        k,
+        filters: SearchFilters::default(),
+    }
+}
+
+#[test]
+fn search_with_opts_no_budget_matches_search() {
+    let env = common::TestEnv::new();
+    common::ingest_md(&env, "a.md", "# T\n\napples are red\n");
+    let app = env.app();
+
+    let baseline = app.search(lex("apples", 5)).unwrap();
+    let resp: SearchResponse = app
+        .search_with_opts(lex("apples", 5), SearchOpts::default())
+        .unwrap();
+
+    assert_eq!(resp.hits.len(), baseline.len());
+    assert!(!resp.truncated);
+    assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page");
+}
+
+#[test]
+fn budget_truncates_snippets_when_below_threshold() {
+    let env = common::TestEnv::new();
+    let body: String = "rust ownership is a memory model. ".repeat(10);
+    common::ingest_md(&env, "a.md", &format!("# T\n\n{body}\n"));
+    let app = env.app();
+
+    let unrestricted = app.search(lex("rust", 5)).unwrap();
+    let unrestricted_chars: usize = unrestricted.iter().map(|h| h.snippet.chars().count()).sum();
+
+    let resp = app
+        .search_with_opts(
+            lex("rust", 5),
+            SearchOpts {
+                max_tokens: Some(50),
+                snippet_chars: None,
+                cursor: None,
+                trace: false,
+            },
+        )
+        .unwrap();
+    let limited_chars: usize = resp.hits.iter().map(|h| h.snippet.chars().count()).sum();
+
+    assert!(resp.truncated, "small budget must trip truncation");
+    assert!(limited_chars < unrestricted_chars, "snippet should shrink");
+    assert!(!resp.hits.is_empty(), "always retain ≥1 hit");
+}
+
+#[test]
+fn cursor_paginates_to_next_page() {
+    let env = common::TestEnv::new();
+    for i in 0..6 {
+        common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n"));
+    }
+    let app = env.app();
+
+    let page1 = app
+        .search_with_opts(lex("rust", 2), SearchOpts::default())
+        .unwrap();
+    assert_eq!(page1.hits.len(), 2);
+    let cursor = page1.next_cursor.expect("more hits available");
+
+    let page2 = app
+        .search_with_opts(
+            lex("rust", 2),
+            SearchOpts {
+                max_tokens: None,
+                snippet_chars: None,
+                cursor: Some(cursor),
+                trace: false,
+            },
+        )
+        .unwrap();
+    assert_eq!(page2.hits.len(), 2);
+    let p1_ids: std::collections::HashSet<_> =
+        page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
+    let p2_ids: std::collections::HashSet<_> =
+        page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
+    assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits");
+}
+
+#[test]
+fn cursor_rejected_after_corpus_revision_bump() {
+    let env = common::TestEnv::new();
+    common::ingest_md(&env, "a.md", "# T\n\napples\n");
+    let app = env.app();
+
+    let page1 = app
+        .search_with_opts(lex("apples", 1), SearchOpts::default())
+        .unwrap();
+    // p9-fb-34 round-1 review: replaced silent `if let Some(c) = ...`
+    // with `.expect(...)` so a fixture regression that breaks the
+    // cursor-emission contract fails loudly instead of passing vacuously.
+    let c = page1
+        .next_cursor
+        .expect("k=1 page must emit next_cursor — fixture too small if this fails");
+
+    common::ingest_md(&env, "b.md", "# B\n\nbananas\n");
+    let app2 = env.app();
+
+    let result = app2.search_with_opts(
+        lex("apples", 1),
+        SearchOpts {
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: Some(c),
+            trace: false,
+        },
+    );
+    let err = result.unwrap_err();
+    assert!(
+        err.to_string().contains("stale_cursor"),
+        "must surface stale_cursor: {err}"
+    );
+}
+
+#[test]
+fn max_tokens_zero_returns_one_hit_truncated() {
+    // p9-fb-34 round-1 review: pin the documented "≥1 hit floor"
+    // contract — even with `max_tokens=0` (an absurdly tight budget)
+    // the budget loop must keep one hit and flip `truncated: true`.
+    // Fixture intentionally seeds multiple matches so step 2 of the
+    // budget loop (pop hits to 1) actually fires.
+    let env = common::TestEnv::new();
+    for i in 0..3 {
+        common::ingest_md(
+            &env,
+            &format!("d{i}.md"),
+            &format!("# T{i}\n\napples are red {i}\n"),
+        );
+    }
+    let app = env.app();
+
+    let resp = app
+        .search_with_opts(
+            lex("apples", 5),
+            SearchOpts {
+                max_tokens: Some(0),
+                snippet_chars: None,
+                cursor: None,
+                trace: false,
+            },
+        )
+        .unwrap();
+    assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor");
+    assert!(resp.truncated);
+    // p9-fb-34 R2: cursor IS emitted on k-pop case so the popped
+    // hits remain reachable.
+    assert!(
+        resp.next_cursor.is_some(),
+        "k-pop truncation must still emit next_cursor; popped hits at offset+returned"
+    );
+}
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -86,6 +86,12 @@ enum Cmd {
        what: InspectWhat,
    },

+    /// p9-fb-35: verbatim chunk / doc / span fetch.
+    Fetch {
+        #[command(subcommand)]
+        what: FetchWhat,
+    },
+
    /// Lexical / vector / hybrid search over chunks.
    Search {
        query: String,
@@ -108,6 +114,63 @@ enum Cmd {
        /// future TUI cache-aware search and for explicit intent.
        #[arg(long)]
        no_cache: bool,
+
+        /// p9-fb-34: cap result wire JSON size at approximately N tokens
+        /// (chars/4 estimate). When set, smaller snippets and fewer hits
+        /// may be returned; check `truncated` in the JSON wire.
+        #[arg(long)]
+        max_tokens: Option<usize>,
+
+        /// p9-fb-34: per-hit snippet character cap, overrides
+        /// `config.search.snippet_chars` for this call only.
+        #[arg(long)]
+        snippet_chars: Option<usize>,
+
+        /// p9-fb-34: opaque cursor from a previous response's
+        /// `next_cursor` to fetch the next page. Mismatched
+        /// `corpus_revision` returns `error.v1.code = stale_cursor`.
+        #[arg(long)]
+        cursor: Option<String>,
+
+        /// p9-fb-36: filter by `metadata.tags`. Repeatable; OR-within (any tag).
+        #[arg(long)]
+        tag: Vec<String>,
+
+        /// p9-fb-36: filter by `documents.lang` (ISO code).
+        #[arg(long)]
+        lang: Option<String>,
+
+        /// p9-fb-36: filter by `documents.workspace_path` glob.
+        #[arg(long)]
+        path_glob: Option<String>,
+
+        /// p9-fb-36: filter by minimum `documents.trust_level`.
+        #[arg(long, value_enum)]
+        trust_min: Option<TrustLevelFlag>,
+
+        /// p9-fb-36: filter by `assets.media_type` kind. Comma-separated.
+        /// Aliases: `md` → `markdown`. Other accepted: `markdown`, `pdf`,
+        /// `image`, `audio`, `other`. Unknown values match nothing.
+        #[arg(long, value_delimiter = ',')]
+        media: Vec<String>,
+
+        /// p9-fb-36: filter to docs whose `updated_at` is >= this RFC3339
+        /// timestamp (UTC). Invalid format → exit 2 with error.v1
+        /// code = config_invalid.
+        #[arg(long)]
+        ingested_after: Option<String>,
+
+        /// p9-fb-36: filter to a single doc by id.
+        #[arg(long)]
+        doc_id: Option<String>,
+
+        /// p9-fb-37: emit pre-fusion lexical / vector / RRF candidate
+        /// lists + per-stage timing in the response. Bypasses cache
+        /// (debug intent — fresh run guaranteed). Requires embeddings
+        /// when `--mode hybrid` or `--mode vector`; lexical mode runs
+        /// without embeddings via a no-op vector stub.
+        #[arg(long)]
+        trace: bool,
    },

    /// Retrieval-augmented question answering.
@@ -153,6 +216,12 @@ enum Cmd {
        /// (e.g. `kebab-rust-async-2026-05`).
        #[arg(long, value_name = "ID")]
        session: Option<String>,
+
+        /// p9-fb-33: emit ndjson `answer_event.v1` events on stderr
+        /// while streaming. Final stdout line is the existing
+        /// `answer.v1`. Off by default to preserve final-only behavior.
+        #[arg(long)]
+        stream: bool,
    },

    /// Wipe XDG data dirs (and optionally the Lance vector store) so the
@@ -238,6 +307,33 @@ enum InspectWhat {
    Chunk { id: String },
 }

+#[derive(Subcommand, Debug)]
+enum FetchWhat {
+    /// Fetch a single chunk verbatim, optionally with surrounding context.
+    Chunk {
+        id: String,
+        /// p9-fb-35: include ±N chunks before and after the target.
+        #[arg(long)]
+        context: Option<u32>,
+    },
+    /// Fetch the entire normalized markdown text of a document.
+    Doc {
+        id: String,
+        /// p9-fb-35: chars/4 budget cap.
+        #[arg(long)]
+        max_tokens: Option<usize>,
+    },
+    /// Fetch a 1-based line range of a document. PDF / audio rejected.
+    Span {
+        doc_id: String,
+        line_start: u32,
+        line_end: u32,
+        /// p9-fb-35: chars/4 budget cap.
+        #[arg(long)]
+        max_tokens: Option<usize>,
+    },
+}
+
 #[derive(Subcommand, Debug)]
 enum EvalWhat {
    /// Run the golden suite end-to-end and persist `eval_runs` +
@@ -295,6 +391,25 @@ impl From<ModeFlag> for kebab_core::SearchMode {
    }
 }

+/// p9-fb-36: clap value enum for `--trust-min`. Maps to
+/// `kebab_core::TrustLevel` via `From`.
+#[derive(clap::ValueEnum, Clone, Debug)]
+enum TrustLevelFlag {
+    Primary,
+    Secondary,
+    Generated,
+}
+
+impl From<TrustLevelFlag> for kebab_core::TrustLevel {
+    fn from(f: TrustLevelFlag) -> Self {
+        match f {
+            TrustLevelFlag::Primary => kebab_core::TrustLevel::Primary,
+            TrustLevelFlag::Secondary => kebab_core::TrustLevel::Secondary,
+            TrustLevelFlag::Generated => kebab_core::TrustLevel::Generated,
+        }
+    }
+}
+
 /// Parse boolean env var accepting "1", "true", "yes", "on" (case-insensitive)
 /// as truthy; "0", "false", "no", "off" as falsy. Used for `KEBAB_READONLY`.
 fn parse_bool_env(s: &str) -> Result<bool, String> {
@@ -503,30 +618,144 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
            }
        },

+        Cmd::Fetch { what } => {
+            let cfg = kebab_config::Config::load(cli.config.as_deref())?;
+            let (query, opts) = match what {
+                FetchWhat::Chunk { id, context } => (
+                    kebab_core::FetchQuery::Chunk(kebab_core::ChunkId(id.clone())),
+                    kebab_core::FetchOpts {
+                        context: *context,
+                        max_tokens: None,
+                    },
+                ),
+                FetchWhat::Doc { id, max_tokens } => (
+                    kebab_core::FetchQuery::Doc(kebab_core::DocumentId(id.clone())),
+                    kebab_core::FetchOpts {
+                        context: None,
+                        max_tokens: *max_tokens,
+                    },
+                ),
+                FetchWhat::Span {
+                    doc_id,
+                    line_start,
+                    line_end,
+                    max_tokens,
+                } => (
+                    kebab_core::FetchQuery::Span {
+                        doc_id: kebab_core::DocumentId(doc_id.clone()),
+                        line_start: *line_start,
+                        line_end: *line_end,
+                    },
+                    kebab_core::FetchOpts {
+                        context: None,
+                        max_tokens: *max_tokens,
+                    },
+                ),
+            };
+            let result = kebab_app::fetch_with_config(cfg, query, opts)?;
+            if cli.json {
+                println!("{}", serde_json::to_string(&wire::wire_fetch_result(&result))?);
+            } else {
+                render_fetch_plain(&result);
+            }
+            Ok(())
+        }
+
        Cmd::Search {
            query,
            k,
            mode,
            explain: _,
            no_cache,
+            max_tokens,
+            snippet_chars,
+            cursor,
+            tag,
+            lang,
+            path_glob,
+            trust_min,
+            media,
+            ingested_after,
+            doc_id,
+            trace,
        } => {
            let cfg = kebab_config::Config::load(cli.config.as_deref())?;
+
+            // p9-fb-36: normalize --media aliases (md → markdown).
+            fn normalize_media_alias(s: &str) -> String {
+                match s.to_ascii_lowercase().as_str() {
+                    "md" => "markdown".to_string(),
+                    other => other.to_string(),
+                }
+            }
+            let media_norm: Vec<String> =
+                media.iter().map(|s| normalize_media_alias(s)).collect();
+
+            // p9-fb-36: parse --ingested-after as RFC3339; structured error on failure.
+            let ingested_after_parsed: Option<time::OffsetDateTime> =
+                match ingested_after.as_deref() {
+                    Some(s) => {
+                        match time::OffsetDateTime::parse(
+                            s,
+                            &time::format_description::well_known::Rfc3339,
+                        ) {
+                            Ok(ts) => Some(ts),
+                            Err(e) => {
+                                return Err(anyhow::Error::new(
+                                    kebab_app::StructuredError(kebab_app::ErrorV1 {
+                                        schema_version: kebab_app::ERROR_V1_ID.to_string(),
+                                        code: "config_invalid".to_string(),
+                                        message: format!(
+                                            "--ingested-after: invalid RFC3339 timestamp '{s}': {e}"
+                                        ),
+                                        details: serde_json::Value::Null,
+                                        hint: Some(
+                                            "expected format like 2026-04-01T00:00:00Z".to_string(),
+                                        ),
+                                    }),
+                                ));
+                            }
+                        }
+                    }
+                    None => None,
+                };
+
+            // p9-fb-36: build SearchFilters from the 7 new flags.
+            let filters = kebab_core::SearchFilters {
+                tags_any: tag.clone(),
+                lang: lang.as_ref().map(|s| kebab_core::Lang(s.clone())),
+                path_glob: path_glob.clone(),
+                trust_min: trust_min.clone().map(Into::into),
+                media: media_norm,
+                ingested_after: ingested_after_parsed,
+                doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
+            };
+
            let q = kebab_core::SearchQuery {
                text: query.clone(),
                mode: (*mode).into(),
                k: *k,
-                filters: kebab_core::SearchFilters::default(),
+                filters,
            };
-            // p9-fb-19: --no-cache routes to the uncached facade.
-            // Both calls go through the same App; only the cache
-            // lookup/insert is skipped.
-            let hits = if *no_cache {
-                kebab_app::search_uncached_with_config(cfg, q)?
-            } else {
-                kebab_app::search_with_config(cfg, q)?
+            let opts = kebab_core::SearchOpts {
+                max_tokens: *max_tokens,
+                snippet_chars: *snippet_chars,
+                cursor: cursor.clone(),
+                trace: *trace,
            };
+            // p9-fb-34: budget-aware path. --no-cache still bypasses the
+            // App-level LRU; wire wrapper applies regardless.
+            let app = kebab_app::App::open_with_config(cfg)?;
+            if *no_cache {
+                app.clear_search_cache();
+            }
+            let resp = app.search_with_opts(q, opts)?;
+
            if cli.json {
-                println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
+                println!(
+                    "{}",
+                    serde_json::to_string(&wire::wire_search_response(&resp))?
+                );
            } else {
                // p9-fb-32: prefix `[stale]` on the doc_path for hits
                // whose `stale: true`. Yellow on TTY, plain otherwise —
@@ -536,7 +765,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
                // lands on); no new dep.
                use std::io::IsTerminal;
                let color = std::io::stdout().is_terminal();
-                for h in &hits {
+                for h in &resp.hits {
                    // Show 4-digit score so RRF fused scores (bounded
                    // ~0–0.033 for k_rrf=60) don't all collapse to "0.02".
                    // Append heading_path so multiple chunks from the same
@@ -564,6 +793,28 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
                        heading,
                    );
                }
+                // p9-fb-34: truncation hint goes to stderr so it
+                // doesn't pollute the stdout hit list.
+                if resp.truncated {
+                    let next = resp.next_cursor.as_deref().unwrap_or("(none)");
+                    eprintln!("[truncated; use --cursor {next} for the next page]");
+                }
+                if *trace {
+                    if let Some(t) = &resp.trace {
+                        eprintln!();
+                        eprintln!("Trace:");
+                        eprintln!("  lexical ({} hits, {}ms):", t.lexical.len(), t.timing.lexical_ms);
+                        for c in t.lexical.iter().take(3) {
+                            eprintln!("    rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+                        }
+                        eprintln!("  vector ({} hits, {}ms):", t.vector.len(), t.timing.vector_ms);
+                        for c in t.vector.iter().take(3) {
+                            eprintln!("    rank={} score={:.4} chunk={}", c.rank, c.score, c.chunk_id.0);
+                        }
+                        eprintln!("  fusion ({} inputs, {}ms)", t.rrf_inputs.len(), t.timing.fusion_ms);
+                        eprintln!("  total: {}ms", t.timing.total_ms);
+                    }
+                }
            }
            Ok(())
        }
@@ -578,55 +829,138 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
            show_citations,
            hide_citations,
            session,
+            stream,
        } => {
            let cfg = kebab_config::Config::load(cli.config.as_deref())?;
-            let opts = kebab_app::AskOpts {
-                k: *k,
-                explain: *explain,
-                mode: (*mode).into(),
-                temperature: *temperature,
-                seed: *seed,
-                // CLI ask is non-streaming today (the answer prints all at
-                // once on completion). The TUI ask pane (P9-3) is what
-                // wires up a real `mpsc::Sender` here.
-                stream_sink: None,
-                // p9-fb-18: when `--session` is set, the facade
-                // (`ask_with_session_with_config`) loads prior turns
-                // from SQLite and stuffs them into AskOpts.history
-                // before calling `ask_with_history`. Single-shot path
-                // (no `--session`) keeps the empty defaults.
-                history: Vec::new(),
-                conversation_id: None,
-                turn_index: None,
-            };
-            let ans = match session.as_deref() {
-                Some(sid) => kebab_app::ask_with_session_with_config(cfg, sid, query, opts)?,
-                None => kebab_app::ask_with_config(cfg, query, opts)?,
-            };
-            if cli.json {
-                println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
-            } else {
-                println!("{}", ans.answer);
-                // p9-fb-20: print the citation block after the
-                // answer body when --hide-citations is not set
-                // (--show-citations is the default). Skipped on
-                // refusal-with-zero-citations to avoid an empty
-                // `근거:` header.
-                let print_citations = *show_citations && !*hide_citations;
-                if print_citations && !ans.citations.is_empty() {
-                    // p9-fb-32: yellow `[stale]` prefix on TTY (mirrors
-                    // the search renderer's pattern in `Cmd::Search`).
-                    use std::io::IsTerminal;
-                    let color = std::io::stdout().is_terminal();
-                    let mut out = std::io::stdout().lock();
-                    render_ask_plain_citations(&mut out, &ans, color)?;
+            if *stream {
+                // p9-fb-33: streaming branch. Background thread runs
+                // ask_with_config (which calls into the rag pipeline);
+                // main thread drains the receiver and writes
+                // `answer_event.v1` ndjson to stderr. On BrokenPipe
+                // (downstream consumer closed), drop the receiver so
+                // the worker's next `send` returns SendError →
+                // pipeline cancels with LlmStreamAborted. Final stdout
+                // line is the existing `answer.v1` (mirrors
+                // ingest_progress.v1 + ingest_report.v1 split).
+                use std::io::Write;
+                use std::sync::mpsc;
+
+                let (tx, rx) = mpsc::channel::<kebab_app::StreamEvent>();
+                let opts = kebab_app::AskOpts {
+                    k: *k,
+                    explain: *explain,
+                    mode: (*mode).into(),
+                    temperature: *temperature,
+                    seed: *seed,
+                    stream_sink: Some(tx),
+                    history: Vec::new(),
+                    conversation_id: None,
+                    turn_index: None,
+                };
+                let cfg2 = cfg.clone();
+                let q = query.clone();
+                let session2 = session.clone();
+                let handle = std::thread::spawn(
+                    move || -> anyhow::Result<kebab_core::Answer> {
+                        match session2.as_deref() {
+                            Some(sid) => kebab_app::ask_with_session_with_config(
+                                cfg2, sid, &q, opts,
+                            ),
+                            None => kebab_app::ask_with_config(cfg2, &q, opts),
+                        }
+                    },
+                );
+
+                // Drain receiver, write ndjson to stderr until
+                // completion or BrokenPipe.
+                let mut cancelled_pipe = false;
+                {
+                    let mut stderr = std::io::stderr().lock();
+                    for ev in &rx {
+                        let now = time::OffsetDateTime::now_utc();
+                        let v = wire::wire_answer_event(&ev, now);
+                        let line = serde_json::to_string(&v)?;
+                        if let Err(e) = writeln!(stderr, "{line}") {
+                            if e.kind() == std::io::ErrorKind::BrokenPipe {
+                                cancelled_pipe = true;
+                                break;
+                            }
+                            return Err(e.into());
+                        }
+                    }
                }
+                if cancelled_pipe {
+                    // Dropping the receiver signals to the worker —
+                    // the next `send` returns SendError, which the
+                    // pipeline interprets as a cancel.
+                    drop(rx);
+                }
+
+                let result = handle
+                    .join()
+                    .map_err(|_| anyhow::anyhow!("ask worker panicked"))?;
+                let ans = result?;
+
+                // Final stdout line — answer.v1 for backwards
+                // compat. BrokenPipe on stdout is silent (caller
+                // already gone).
+                let final_json = serde_json::to_string(&wire::wire_answer(&ans))?;
+                let _ = writeln!(std::io::stdout().lock(), "{final_json}");
+
+                if !ans.grounded {
+                    return Err(RefusalSignal.into());
+                }
+                Ok(())
+            } else {
+                let opts = kebab_app::AskOpts {
+                    k: *k,
+                    explain: *explain,
+                    mode: (*mode).into(),
+                    temperature: *temperature,
+                    seed: *seed,
+                    // CLI ask is non-streaming by default (the answer
+                    // prints all at once on completion). `--stream`
+                    // takes the branch above; the TUI ask pane (P9-3)
+                    // wires up its own `mpsc::Sender`.
+                    stream_sink: None,
+                    // p9-fb-18: when `--session` is set, the facade
+                    // (`ask_with_session_with_config`) loads prior turns
+                    // from SQLite and stuffs them into AskOpts.history
+                    // before calling `ask_with_history`. Single-shot path
+                    // (no `--session`) keeps the empty defaults.
+                    history: Vec::new(),
+                    conversation_id: None,
+                    turn_index: None,
+                };
+                let ans = match session.as_deref() {
+                    Some(sid) => kebab_app::ask_with_session_with_config(cfg, sid, query, opts)?,
+                    None => kebab_app::ask_with_config(cfg, query, opts)?,
+                };
+                if cli.json {
+                    println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
+                } else {
+                    println!("{}", ans.answer);
+                    // p9-fb-20: print the citation block after the
+                    // answer body when --hide-citations is not set
+                    // (--show-citations is the default). Skipped on
+                    // refusal-with-zero-citations to avoid an empty
+                    // `근거:` header.
+                    let print_citations = *show_citations && !*hide_citations;
+                    if print_citations && !ans.citations.is_empty() {
+                        // p9-fb-32: yellow `[stale]` prefix on TTY (mirrors
+                        // the search renderer's pattern in `Cmd::Search`).
+                        use std::io::IsTerminal;
+                        let color = std::io::stdout().is_terminal();
+                        let mut out = std::io::stdout().lock();
+                        render_ask_plain_citations(&mut out, &ans, color)?;
+                    }
+                }
+                // Refusal → exit 1.
+                if !ans.grounded {
+                    return Err(RefusalSignal.into());
+                }
+                Ok(())
            }
-            // Refusal → exit 1.
-            if !ans.grounded {
-                return Err(RefusalSignal.into());
-            }
-            Ok(())
        }

        Cmd::Reset {
@@ -989,6 +1323,53 @@ fn confirm_destructive(
    Ok(matches!(s.as_str(), "y" | "yes"))
 }

+/// p9-fb-35: human-friendly plain output for `kebab fetch`.
+fn render_fetch_plain(r: &kebab_core::FetchResult) {
+    println!("# {} ({})", r.doc_path.0, format_kind(r.kind));
+    if r.stale {
+        println!("[stale; indexed_at = {}]", r.indexed_at);
+    }
+    match r.kind {
+        kebab_core::FetchKind::Chunk => {
+            if !r.context_before.is_empty() {
+                println!("\n=== before ===");
+                for c in &r.context_before {
+                    let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
+                    println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
+                }
+            }
+            if let Some(c) = &r.chunk {
+                println!("\n=== target ===");
+                let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
+                println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
+            }
+            if !r.context_after.is_empty() {
+                println!("\n=== after ===");
+                for c in &r.context_after {
+                    let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
+                    println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
+                }
+            }
+        }
+        kebab_core::FetchKind::Doc | kebab_core::FetchKind::Span => {
+            if let Some(text) = &r.text {
+                println!("\n{text}");
+            }
+            if r.truncated {
+                eprintln!("[truncated; widen --max-tokens for fuller text]");
+            }
+        }
+    }
+}
+
+fn format_kind(k: kebab_core::FetchKind) -> &'static str {
+    match k {
+        kebab_core::FetchKind::Chunk => "chunk",
+        kebab_core::FetchKind::Doc => "doc",
+        kebab_core::FetchKind::Span => "span",
+    }
+}
+
 #[cfg(test)]
 mod tests {
    //! p9-fb-32: unit tests for `render_ask_plain_citations`. The
--- a/crates/kebab-cli/src/wire.rs
+++ b/crates/kebab-cli/src/wire.rs
@@ -75,10 +75,24 @@ pub fn wire_search_hit(h: &SearchHit) -> Value {
    tag_object(v, "search_hit.v1")
 }

-/// Wrap a list of [`SearchHit`] values as a JSON array of `search_hit.v1`
-/// objects (one tag per element, per design §2.2).
-pub fn wire_search_hits(hits: &[SearchHit]) -> Value {
-    Value::Array(hits.iter().map(wire_search_hit).collect())
+/// p9-fb-34: tag a `SearchResponse` as `search_response.v1`. Wraps
+/// the existing `search_hit.v1[]` array with pagination + truncation
+/// metadata. Replaces the previous bare `search_hit.v1[]` top-level
+/// array (`wire_search_hits`) — see HOTFIXES / fb-34 for the
+/// breaking shape change.
+pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value {
+    let mut v = serde_json::json!({
+        "hits": r.hits.iter().map(wire_search_hit).collect::<Vec<_>>(),
+        "next_cursor": r.next_cursor,
+        "truncated": r.truncated,
+    });
+    if let Some(trace) = &r.trace {
+        let trace_v = serde_json::to_value(trace).expect("SearchTrace serializes");
+        if let Value::Object(ref mut map) = v {
+            map.insert("trace".to_string(), trace_v);
+        }
+    }
+    tag_object(v, "search_response.v1")
 }

 /// Wrap an [`Answer`] as `answer.v1`.
@@ -87,6 +101,25 @@ pub fn wire_answer(a: &Answer) -> Value {
    tag_object(v, "answer.v1")
 }

+/// p9-fb-33: tag a [`StreamEvent`] as `answer_event.v1` ndjson.
+///
+/// The timestamp is added at emit time (caller fills `ts`), since the
+/// pipeline doesn't carry one in the in-process enum — mirrors the
+/// `wire_ingest_progress` pattern (§2 ingest_progress.v1).
+pub fn wire_answer_event(
+    ev: &kebab_app::StreamEvent,
+    ts: time::OffsetDateTime,
+) -> Value {
+    let mut v = serde_json::to_value(ev).expect("StreamEvent serializes");
+    let ts_str = ts
+        .format(&time::format_description::well_known::Rfc3339)
+        .expect("OffsetDateTime formats as RFC3339");
+    if let Value::Object(ref mut map) = v {
+        map.insert("ts".to_string(), Value::String(ts_str));
+    }
+    tag_object(v, "answer_event.v1")
+}
+
 /// Idempotent pass-through for [`DoctorReport`] — the type already carries
 /// `schema_version: "doctor.v1"` (struct-field convention, the one
 /// exception called out in the module doc above). This helper exists so
@@ -162,6 +195,12 @@ pub fn wire_error_v1(e: &kebab_app::ErrorV1) -> Value {
    tag_object(v, "error.v1")
 }

+/// p9-fb-35: tag a [`kebab_core::FetchResult`] as `fetch_result.v1`.
+pub fn wire_fetch_result(r: &kebab_core::FetchResult) -> Value {
+    let v = serde_json::to_value(r).expect("FetchResult serializes");
+    tag_object(v, "fetch_result.v1")
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -215,13 +254,6 @@ mod tests {
        assert_eq!(v.as_array().unwrap().len(), 0);
    }

-    #[test]
-    fn search_hits_wraps_each_element() {
-        let v = wire_search_hits(&[]);
-        assert!(v.is_array());
-        assert_eq!(v.as_array().unwrap().len(), 0);
-    }
-
    #[test]
    fn tag_object_inserts_into_object() {
        let v = Value::Object(serde_json::Map::new());
@@ -229,6 +261,31 @@ mod tests {
        assert_eq!(schema_of(&tagged), Some("x.v1"));
    }

+    #[test]
+    fn search_response_carries_pagination_metadata() {
+        // p9-fb-34: empty-hits SearchResponse round-trips through the
+        // wrapper with its `next_cursor` + `truncated` fields preserved
+        // and the top-level `schema_version` set to `search_response.v1`.
+        let r = kebab_app::SearchResponse {
+            hits: vec![],
+            next_cursor: Some("opaque-cursor-abc".to_string()),
+            truncated: true,
+            trace: None,
+        };
+        let v = wire_search_response(&r);
+        assert_eq!(schema_of(&v), Some("search_response.v1"));
+        assert!(v.get("hits").and_then(|h| h.as_array()).is_some());
+        assert_eq!(
+            v.get("hits").and_then(|h| h.as_array()).unwrap().len(),
+            0
+        );
+        assert_eq!(
+            v.get("next_cursor").and_then(|c| c.as_str()),
+            Some("opaque-cursor-abc")
+        );
+        assert_eq!(v.get("truncated").and_then(|t| t.as_bool()), Some(true));
+    }
+
    #[test]
    fn schema_wrapper_tags_schema_version() {
        use kebab_app::{Capabilities, Models, SchemaV1, Stats, WireBlock};
@@ -253,6 +310,10 @@ mod tests {
            stats: Stats {
                doc_count: 1, chunk_count: 2, asset_count: 1,
                last_ingest_at: None,
+                media_breakdown: Default::default(),
+                lang_breakdown: Default::default(),
+                index_bytes: Default::default(),
+                stale_doc_count: 0,
            },
        };
        let v = wire_schema(&schema);
@@ -293,4 +354,49 @@ mod tests {
        assert_eq!(paths.len(), 1);
        assert_eq!(paths[0].as_str(), Some("/tmp/x"));
    }
+
+    #[test]
+    fn search_response_with_trace_serializes_trace_field() {
+        use kebab_core::{SearchTrace, TraceCandidate, TraceFusionInput,
+                         TraceTiming, ChunkId, DocumentId, WorkspacePath};
+        let r = kebab_app::SearchResponse {
+            hits: vec![],
+            next_cursor: None,
+            truncated: false,
+            trace: Some(SearchTrace {
+                lexical: vec![TraceCandidate {
+                    chunk_id: ChunkId("c1".into()),
+                    doc_id: DocumentId("d1".into()),
+                    doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+                    rank: 1,
+                    score: 0.42,
+                }],
+                vector: vec![],
+                rrf_inputs: vec![TraceFusionInput {
+                    chunk_id: ChunkId("c1".into()),
+                    lexical_rank: Some(1),
+                    vector_rank: None,
+                    fusion_score: 0.0,
+                }],
+                timing: TraceTiming { lexical_ms: 5, vector_ms: 0, fusion_ms: 1, total_ms: 7 },
+            }),
+        };
+        let v = wire_search_response(&r);
+        assert_eq!(schema_of(&v), Some("search_response.v1"));
+        assert!(v["trace"].is_object());
+        assert_eq!(v["trace"]["timing"]["lexical_ms"], 5);
+        assert_eq!(v["trace"]["lexical"][0]["chunk_id"], "c1");
+    }
+
+    #[test]
+    fn search_response_without_trace_omits_field() {
+        let r = kebab_app::SearchResponse {
+            hits: vec![],
+            next_cursor: None,
+            truncated: false,
+            trace: None,
+        };
+        let v = wire_search_response(&r);
+        assert!(v.get("trace").is_none(), "trace field absent when None");
+    }
 }
--- a/crates/kebab-cli/tests/cli_mcp_smoke.rs
+++ b/crates/kebab-cli/tests/cli_mcp_smoke.rs
@@ -66,8 +66,8 @@ fn cli_mcp_initialize_then_tools_list() {
        .expect("tools/list result.tools must be an array");
    assert_eq!(
        tools.len(),
-        6,
-        "expected 6 tools (schema, doctor, search, ask, ingest_file, ingest_stdin), got {}: {list}",
+        7,
+        "expected 7 tools (schema, doctor, search, ask, fetch, ingest_file, ingest_stdin), got {}: {list}",
        tools.len()
    );

--- a/crates/kebab-cli/tests/common/mod.rs
+++ b/crates/kebab-cli/tests/common/mod.rs
@@ -126,6 +126,100 @@ pub fn ingest(cfg: &Path, workspace: &Path) {
    );
 }

+/// p9-fb-34: invoke `kebab search` with arbitrary trailing flags +
+/// query, capture stdout + stderr. Caller is responsible for
+/// supplying `--mode lexical` / `--json` etc. as needed; this helper
+/// stays unopinionated so a single test can exercise both wire shapes
+/// (JSON wrapper + plain stderr hint). Asserts the binary exited 0;
+/// non-zero exits fail the test with stderr included.
+pub fn run_search_with_args(cfg: &Path, args: &[&str]) -> (String, String) {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let mut cmd = Command::new(bin);
+    cmd.arg("--config").arg(cfg).arg("search");
+    cmd.args(args);
+    let out = cmd.output().expect("kebab search");
+    assert!(
+        out.status.success(),
+        "search failed: args={args:?} stderr={}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+    (
+        String::from_utf8_lossy(&out.stdout).to_string(),
+        String::from_utf8_lossy(&out.stderr).to_string(),
+    )
+}
+
+/// p9-fb-33: invoke `kebab ask --stream --mode lexical <query>` and
+/// capture stdout + stderr. Lexical mode skips embeddings (matches
+/// `wire_ask_stale.rs::run_ask_lexical`). Caller asserts on the
+/// resulting (stdout, stderr) pair.
+pub fn run_ask_stream(cfg: &Path, query: &str) -> (String, String) {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let out = Command::new(bin)
+        .args([
+            "--config",
+            cfg.to_str().unwrap(),
+            "ask",
+            "--stream",
+            "--mode",
+            "lexical",
+            query,
+        ])
+        .output()
+        .expect("kebab ask --stream");
+    (
+        String::from_utf8_lossy(&out.stdout).to_string(),
+        String::from_utf8_lossy(&out.stderr).to_string(),
+    )
+}
+
+/// p9-fb-33: invoke `kebab --json ask --mode lexical <query>` (no
+/// `--stream`) — used by `wire_ask_stream::non_stream_path_unchanged`
+/// to confirm the non-streaming JSON path still emits a single
+/// `answer.v1` line on stdout. Returns stdout only (mirrors
+/// `wire_ask_stale.rs::run_ask_lexical(json=true)` minus the
+/// `Output` indirection).
+pub fn run_ask_json(cfg: &Path, query: &str) -> String {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let out = Command::new(bin)
+        .args([
+            "--config",
+            cfg.to_str().unwrap(),
+            "--json",
+            "ask",
+            "--mode",
+            "lexical",
+            query,
+        ])
+        .output()
+        .expect("kebab ask --json");
+    String::from_utf8_lossy(&out.stdout).to_string()
+}
+
+/// p9-fb-35: invoke `kebab fetch` with arbitrary trailing flags,
+/// capture stdout + stderr. Caller is responsible for supplying
+/// `--json` (global flag) before the subcommand position via the
+/// `args` slice (e.g. `&["--json", "chunk", &id]`). Asserts the
+/// binary exited 0; non-zero exits fail the test with stderr
+/// included — for negative-path tests (unknown chunk_id etc.) drive
+/// the binary directly via `std::process::Command`.
+pub fn run_fetch_with_args(cfg: &Path, args: &[&str]) -> (String, String) {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let mut cmd = Command::new(bin);
+    cmd.arg("--config").arg(cfg).arg("fetch");
+    cmd.args(args);
+    let out = cmd.output().expect("kebab fetch");
+    assert!(
+        out.status.success(),
+        "fetch failed: args={args:?} stderr={}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+    (
+        String::from_utf8_lossy(&out.stdout).to_string(),
+        String::from_utf8_lossy(&out.stderr).to_string(),
+    )
+}
+
 /// Rewrite `documents.updated_at` for one workspace path to
 /// `now - days_ago` (RFC3339 UTC). Mirrors
 /// `kebab-app/tests/common/mod.rs::backdate_document_updated_at`.
--- a/crates/kebab-cli/tests/wire_ask_stream.rs
+++ b/crates/kebab-cli/tests/wire_ask_stream.rs
@@ -0,0 +1,241 @@
+//! p9-fb-33: CLI streaming surface — stderr ndjson `answer_event.v1`
+//! events while the answer streams; final stdout line is the existing
+//! `answer.v1` (backwards compat with the non-`--stream` path).
+//!
+//! These end-to-end checks exercise `kebab ask --stream`, which
+//! requires a real Ollama on `127.0.0.1:11434` (same constraint as
+//! `wire_ask_stale.rs` + `kebab-app/tests/ask_smoke.rs`). All three
+//! tests are therefore `#[ignore]` by default — run with
+//! `cargo test -p kebab-cli --test wire_ask_stream -- --ignored`
+//! against a live Ollama with `gemma4:e4b` pulled.
+//!
+//! The `BrokenPipe → cancel` test (Task 7 of the fb-33 plan) verifies
+//! that closing the stderr reader propagates SendError through the
+//! pipeline so the child terminates instead of hanging. That's the
+//! main thing the integration test layer can prove that unit tests
+//! can't — pipeline cancel is a cross-process concern.
+//!
+//! Shared TempDir / ingest helpers live in `tests/common/mod.rs`.
+
+mod common;
+
+use std::fs;
+use std::path::Path;
+
+use serde_json::Value;
+
+/// Drop `[rag].score_gate` to ~0 in the test config so the
+/// score-gate refusal path doesn't short-circuit the LLM call.
+/// Lexical retrieval against a one-doc corpus produces tiny fusion
+/// scores (well below the default 0.30 gate); the pipeline would
+/// take the `refuse_score_gate` early-return — which does not emit
+/// a `Final` event — making the streaming-event ordering assertion
+/// vacuous. Lower the gate so the LLM actually runs.
+fn relax_score_gate(cfg: &Path) {
+    let body = fs::read_to_string(cfg).expect("read config.toml");
+    let body = body.replace("score_gate = 0.30", "score_gate = 0.0");
+    fs::write(cfg, body).expect("write relaxed config.toml");
+}
+
+#[test]
+#[ignore = "requires real Ollama on 127.0.0.1:11434"]
+fn stream_emits_ndjson_events_on_stderr() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) =
+        common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
+    relax_score_gate(&cfg);
+    fs::write(
+        workspace.join("a.md"),
+        "# T\n\nrust ownership is a memory model.\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, stderr) = common::run_ask_stream(&cfg, "ownership");
+
+    // stderr: every non-empty line should parse as JSON with
+    // schema_version == "answer_event.v1" and a recognized kind.
+    let mut kinds: Vec<String> = vec![];
+    for line in stderr.lines() {
+        if line.trim().is_empty() {
+            continue;
+        }
+        let v: Value = serde_json::from_str(line)
+            .unwrap_or_else(|e| panic!("non-JSON stderr line: {line:?}: {e}"));
+        assert_eq!(v["schema_version"], "answer_event.v1");
+        let kind = v["kind"].as_str().expect("kind").to_string();
+        assert!(
+            matches!(kind.as_str(), "retrieval_done" | "token" | "final"),
+            "unexpected kind: {kind}"
+        );
+        assert!(v["ts"].is_string(), "ts must be RFC3339 string");
+        kinds.push(kind);
+    }
+
+    // First event must be retrieval_done. Last must be final.
+    // Note: this test only exercises the LLM-running path which always
+    // closes with `final`. score-gate / no-chunks refusal paths emit
+    // only `retrieval_done` and skip `final` — that's why the test uses
+    // `relax_score_gate()` above to force the LLM path. See
+    // `stream_score_gate_refusal_emits_only_retrieval_done` for the
+    // refusal-path coverage.
+    assert_eq!(
+        kinds.first().map(String::as_str),
+        Some("retrieval_done"),
+        "first event must be retrieval_done, all kinds: {kinds:?}"
+    );
+    assert_eq!(
+        kinds.last().map(String::as_str),
+        Some("final"),
+        "last event must be final, all kinds: {kinds:?}"
+    );
+
+    // stdout: last line is answer.v1 (backwards compat with the
+    // non-streaming path — same wire shape, just emitted after the
+    // ndjson event stream rather than instead of it).
+    let final_line = stdout
+        .lines()
+        .last()
+        .expect("stdout has at least one line");
+    let answer: Value =
+        serde_json::from_str(final_line).expect("stdout final line = answer.v1");
+    assert_eq!(answer["schema_version"], "answer.v1");
+}
+
+#[test]
+#[ignore = "requires real Ollama on 127.0.0.1:11434"]
+fn non_stream_path_unchanged() {
+    // Verify that the non-streaming JSON path (no `--stream`) still
+    // emits a single `answer.v1` line on stdout — fb-33 must not
+    // perturb the existing wire surface.
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) =
+        common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
+    relax_score_gate(&cfg);
+    fs::write(
+        workspace.join("a.md"),
+        "# T\n\nrust ownership is a memory model.\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let stdout = common::run_ask_json(&cfg, "ownership");
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("expected answer.v1, got {stdout:?}: {e}"));
+    assert_eq!(v["schema_version"], "answer.v1");
+}
+
+// p9-fb-33 (Task 7): BrokenPipe → cancel propagation. Spawn the
+// binary, read the first stderr line (retrieval_done), drop the
+// reader. The pipeline's next `Token` send returns SendError, the
+// cancel branch fires, child.wait() returns instead of blocking
+// forever. The key invariant is *liveness* — that `wait()` returns
+// in bounded time. Don't assert exit code: refusal is exit 1, but
+// the child may also exit 0 if the LLM happened to finish before
+// cancel propagated.
+#[test]
+#[ignore = "requires real Ollama on 127.0.0.1:11434 + writes to a closed pipe"]
+fn stream_cancels_when_stderr_closes() {
+    use std::io::{BufRead, BufReader};
+    use std::process::{Command, Stdio};
+
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) =
+        common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
+    relax_score_gate(&cfg);
+    fs::write(
+        workspace.join("a.md"),
+        "# T\n\nrust ownership is a memory model. it tracks lifetimes.\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let mut child = Command::new(bin)
+        .args([
+            "--config",
+            cfg.to_str().unwrap(),
+            "ask",
+            "--stream",
+            "--mode",
+            "lexical",
+            "ownership",
+        ])
+        .stdout(Stdio::piped())
+        .stderr(Stdio::piped())
+        .spawn()
+        .expect("spawn kebab");
+
+    {
+        let stderr = child.stderr.take().expect("stderr piped");
+        let mut reader = BufReader::new(stderr);
+        let mut first = String::new();
+        reader
+            .read_line(&mut first)
+            .expect("read first stderr line");
+        assert!(
+            first.contains("\"kind\":\"retrieval_done\""),
+            "first event must be retrieval_done, got {first:?}"
+        );
+        // Drop the reader → child's stderr write end will see
+        // BrokenPipe on the next write → main thread drops rx →
+        // worker's pipeline.send returns SendError → cancel.
+    }
+
+    let status = child.wait().expect("child completes after cancel");
+    // Don't assert specific exit code — refusal is exit 1, but child
+    // may also exit 0 if the LLM finished before cancel propagated.
+    // The load-bearing assertion is that wait() returned at all.
+    let _ = status;
+}
+
+// p9-fb-33 (PR #124 round 1, item 4): score-gate refusal path —
+// thin doc + unrelated query trips the default 0.30 score gate
+// before the LLM runs. The pipeline emits only `retrieval_done`
+// on stderr (no `token`, no `final`); stdout still carries the
+// canonical `answer.v1` with `grounded=false`.
+#[test]
+#[ignore = "requires real Ollama on 127.0.0.1:11434"]
+fn stream_score_gate_refusal_emits_only_retrieval_done() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) =
+        common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
+    // Intentionally NO relax_score_gate — keep the default 0.30
+    // so the thin-doc + unrelated-query combo trips refusal.
+    fs::write(
+        workspace.join("a.md"),
+        "# Title\n\nrust is a language.\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, stderr) =
+        common::run_ask_stream(&cfg, "completely unrelated topic about cooking pasta");
+
+    let kinds: Vec<String> = stderr
+        .lines()
+        .filter(|l| !l.trim().is_empty())
+        .filter_map(|l| serde_json::from_str::<Value>(l).ok())
+        .filter_map(|v| v["kind"].as_str().map(String::from))
+        .collect();
+
+    // Refusal path: only retrieval_done, no token, no final.
+    assert!(
+        kinds.iter().all(|k| k == "retrieval_done"),
+        "refusal path must emit only retrieval_done, got {kinds:?}"
+    );
+    assert!(
+        !kinds.is_empty(),
+        "expected at least one retrieval_done event, got empty stderr"
+    );
+
+    // Stdout still has answer.v1 with grounded=false.
+    let final_line = stdout
+        .lines()
+        .last()
+        .expect("stdout has at least one line");
+    let answer: Value =
+        serde_json::from_str(final_line).expect("answer.v1");
+    assert_eq!(answer["schema_version"], "answer.v1");
+    assert_eq!(answer["grounded"], false);
+}
--- a/crates/kebab-cli/tests/wire_fetch.rs
+++ b/crates/kebab-cli/tests/wire_fetch.rs
@@ -0,0 +1,130 @@
+//! p9-fb-35: CLI fetch wire shape + plain output + exit codes.
+//!
+//! Lexical-only — no fastembed / no Ollama. Each test builds its own
+//! TempDir KB via `common::write_config` + `common::ingest` and drives
+//! `kebab fetch` through `common::run_fetch_with_args`. Verifies:
+//!
+//! - `--json fetch chunk <id>` emits the `fetch_result.v1` wrapper
+//!   with `kind = "chunk"` and a populated `chunk` object.
+//! - `--json fetch doc <id> --max-tokens N` flips `truncated: true`
+//!   once the budget binds.
+//! - Unknown `chunk_id` exits non-zero and emits an `error.v1`
+//!   ndjson line on stderr with `code = "chunk_not_found"`.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+
+#[test]
+fn fetch_chunk_json_emits_fetch_result_v1() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Find chunk_id via search.
+    let (search_stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--k", "1", "apples"],
+    );
+    let search: Value = serde_json::from_str(search_stdout.trim())
+        .unwrap_or_else(|e| panic!("search not JSON: {search_stdout:?}: {e}"));
+    let chunk_id = search["hits"][0]["chunk_id"]
+        .as_str()
+        .expect("chunk_id on first hit")
+        .to_string();
+
+    let (stdout, _) = common::run_fetch_with_args(
+        &cfg,
+        &["--json", "chunk", &chunk_id],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("fetch not JSON: {stdout:?}: {e}"));
+    assert_eq!(v["schema_version"], "fetch_result.v1");
+    assert_eq!(v["kind"], "chunk");
+    assert!(
+        v["chunk"].is_object(),
+        "target chunk must be populated: {v}"
+    );
+    assert_eq!(v["truncated"], false);
+}
+
+#[test]
+fn fetch_doc_json_with_max_tokens_truncates() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    let body: String = "Lorem ipsum dolor sit amet. ".repeat(20);
+    fs::write(workspace.join("big.md"), format!("# Big\n\n{body}\n")).unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Find doc_id via search.
+    let (search_stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--k", "1", "Lorem"],
+    );
+    let search: Value = serde_json::from_str(search_stdout.trim())
+        .unwrap_or_else(|e| panic!("search not JSON: {search_stdout:?}: {e}"));
+    let doc_id = search["hits"][0]["doc_id"]
+        .as_str()
+        .expect("doc_id on first hit")
+        .to_string();
+
+    let (stdout, _) = common::run_fetch_with_args(
+        &cfg,
+        &["--json", "doc", &doc_id, "--max-tokens", "20"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("fetch not JSON: {stdout:?}: {e}"));
+    assert_eq!(v["kind"], "doc");
+    assert_eq!(
+        v["truncated"], true,
+        "20-token cap must trip truncation: {v}"
+    );
+}
+
+#[test]
+fn fetch_chunk_unknown_id_exits_with_error_v1() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, _workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Direct invocation (not via the success-asserting helper) so we
+    // can read stderr on failure — mirrors the stale_cursor test in
+    // `wire_search_response.rs`.
+    let exe = env!("CARGO_BIN_EXE_kebab");
+    let cfg_str = cfg.to_str().expect("utf8");
+    let out = std::process::Command::new(exe)
+        .args([
+            "--config",
+            cfg_str,
+            "--json",
+            "fetch",
+            "chunk",
+            "nonexistent",
+        ])
+        .output()
+        .expect("kebab fetch");
+
+    assert_ne!(out.status.code(), Some(0), "must exit non-zero");
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    let err_line = stderr
+        .lines()
+        .find(|l| {
+            serde_json::from_str::<Value>(l)
+                .ok()
+                .and_then(|v| {
+                    v.get("schema_version")
+                        .and_then(|s| s.as_str())
+                        .map(String::from)
+                })
+                .as_deref()
+                == Some("error.v1")
+        })
+        .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}"));
+
+    let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
+    assert_eq!(
+        v["code"], "chunk_not_found",
+        "code must be chunk_not_found: {err_line}"
+    );
+}
--- a/crates/kebab-cli/tests/wire_schema_breakdowns.rs
+++ b/crates/kebab-cli/tests/wire_schema_breakdowns.rs
@@ -0,0 +1,57 @@
+//! p9-fb-37: integration tests for `kebab schema --json` extended stats.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+use std::process::Command;
+
+fn run_schema(cfg: &std::path::Path) -> Value {
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let out = Command::new(bin)
+        .args(["--config", cfg.to_str().unwrap(), "schema", "--json"])
+        .output()
+        .expect("run kebab schema");
+    assert!(
+        out.status.success(),
+        "schema failed: stderr={}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+    serde_json::from_slice(&out.stdout).expect("valid JSON")
+}
+
+#[test]
+fn schema_stats_includes_breakdowns_on_fresh_corpus() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+    // Run a no-op ingest to bring up migrations + create the SQLite file.
+    fs::write(workspace.join("placeholder.md"), "# placeholder\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let v = run_schema(&cfg);
+    let stats = &v["stats"];
+    let m = stats["media_breakdown"].as_object().unwrap();
+    assert_eq!(m.len(), 5, "5 media keys padded");
+    for k in &["markdown", "pdf", "image", "audio", "other"] {
+        assert!(m[*k].is_number(), "media[{k}] is integer");
+    }
+    assert!(stats["lang_breakdown"].is_object());
+    assert!(stats["index_bytes"]["sqlite"].is_number());
+    assert!(stats["index_bytes"]["lancedb"].is_number());
+    assert!(stats["stale_doc_count"].is_number());
+}
+
+#[test]
+fn schema_stats_breakdowns_after_ingest() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+    fs::write(workspace.join("a.md"), "---\nlang: en\n---\nhello\n").unwrap();
+    fs::write(workspace.join("b.md"), "---\nlang: ko\n---\n안녕\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let v = run_schema(&cfg);
+    let stats = &v["stats"];
+    assert_eq!(stats["media_breakdown"]["markdown"], 2);
+    assert!(stats["lang_breakdown"].is_object());
+    assert!(stats["index_bytes"]["sqlite"].as_u64().unwrap() > 0);
+}
--- a/crates/kebab-cli/tests/wire_search_filters.rs
+++ b/crates/kebab-cli/tests/wire_search_filters.rs
@@ -0,0 +1,306 @@
+//! p9-fb-36: CLI integration tests for search filter flags.
+//!
+//! Lexical-only — no fastembed / no Ollama. Each test builds its own
+//! TempDir KB via `common::write_config` + `common::ingest` and drives
+//! `kebab search` through `common::run_search_with_args` or direct
+//! `Command` invocations. Verifies:
+//!
+//! - `--doc-id <id>` restricts all returned hits to the target document.
+//! - `--ingested-after <bad>` exits non-zero and emits `error.v1` on
+//!   stderr with `code = "config_invalid"`.
+//! - `--media md` (alias) normalises to `markdown` and matches `.md` docs.
+//! - `--tag <tag>` (repeatable, OR-within) filters by frontmatter tags.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+use std::process::Command;
+
+// ---------------------------------------------------------------------------
+// Test 1: --doc-id restricts hits to a single document
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_doc_id_filter_returns_only_target_doc() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Two docs that both contain the search term.
+    fs::write(workspace.join("a.md"), "# Alpha\n\nrust ownership rules\n").unwrap();
+    fs::write(workspace.join("b.md"), "# Beta\n\nrust borrow checker\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // First, search without a doc-id filter to find what doc_ids exist.
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let resp: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    let hits = resp["hits"].as_array().expect("hits array");
+    assert!(
+        hits.len() >= 2,
+        "expected ≥2 hits from two docs before filter: {resp}"
+    );
+
+    // Grab one doc_id from the results.
+    let target_doc_id = hits[0]["doc_id"]
+        .as_str()
+        .expect("doc_id string")
+        .to_string();
+
+    // Re-search with --doc-id set to the first hit's doc_id.
+    let (stdout2, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json",
+            "--mode",
+            "lexical",
+            "--doc-id",
+            &target_doc_id,
+            "rust",
+        ],
+    );
+    let resp2: Value = serde_json::from_str(stdout2.trim())
+        .unwrap_or_else(|e| panic!("not JSON after filter: {stdout2:?}: {e}"));
+    let filtered_hits = resp2["hits"].as_array().expect("hits array (filtered)");
+
+    assert!(
+        !filtered_hits.is_empty(),
+        "expected at least one hit for the target doc"
+    );
+    for hit in filtered_hits {
+        let got = hit["doc_id"].as_str().expect("doc_id string in hit");
+        assert_eq!(
+            got, target_doc_id,
+            "--doc-id filter must restrict all hits to target doc, got {got}"
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: --ingested-after with bad RFC3339 → exit non-zero + error.v1
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_invalid_ingested_after_emits_config_invalid() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\nrust stuff\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let out = Command::new(bin)
+        .args([
+            "--config",
+            cfg.to_str().unwrap(),
+            "--json",
+            "search",
+            "--mode",
+            "lexical",
+            "--ingested-after",
+            "not-a-date",
+            "rust",
+        ])
+        .output()
+        .expect("kebab search --ingested-after bad");
+
+    assert!(
+        !out.status.success(),
+        "expected non-zero exit for invalid --ingested-after, got: status={} stderr={}",
+        out.status,
+        String::from_utf8_lossy(&out.stderr)
+    );
+
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    // Find the error.v1 ndjson line on stderr (one JSON event per line).
+    let err_line = stderr
+        .lines()
+        .find(|l| {
+            serde_json::from_str::<Value>(l)
+                .ok()
+                .and_then(|v| {
+                    v.get("schema_version")
+                        .and_then(|s| s.as_str())
+                        .map(String::from)
+                })
+                .as_deref()
+                == Some("error.v1")
+        })
+        .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}"));
+
+    let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
+    assert_eq!(
+        v["code"], "config_invalid",
+        "code must be config_invalid for bad RFC3339: {err_line}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: --media md (alias) normalises to markdown and matches .md docs
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_media_filter_md_alias_normalizes_to_markdown() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Only a markdown file — the `md` alias should match it.
+    fs::write(workspace.join("notes.md"), "# Notes\n\nrust async programming\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--media", "md", "rust"],
+    );
+    let resp: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    let hits = resp["hits"].as_array().expect("hits array");
+
+    assert!(
+        !hits.is_empty(),
+        "--media md must match the markdown doc; got 0 hits: {resp}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: --tag (repeatable, OR-within) filters by frontmatter tags
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_tag_filter_matches_frontmatter_tags() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Doc with `rust` tag.
+    fs::write(
+        workspace.join("rust_doc.md"),
+        "---\ntags: [rust, systems]\n---\n# Rust\n\nrust ownership\n",
+    )
+    .unwrap();
+    // Doc without the tag (but same keyword in body so it appears in
+    // unfiltered results — the tag filter must exclude it).
+    fs::write(
+        workspace.join("other_doc.md"),
+        "# Other\n\nrust programming\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Without filter — both docs must produce hits.
+    let (unfiltered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let uresp: Value = serde_json::from_str(unfiltered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (unfiltered): {unfiltered:?}: {e}"));
+    let uhits = uresp["hits"].as_array().expect("unfiltered hits array");
+    assert!(
+        uhits.len() >= 2,
+        "expected ≥2 hits before tag filter: {uresp}"
+    );
+
+    // With --tag rust — only the tagged doc's hits should appear.
+    let (filtered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--tag", "rust", "rust"],
+    );
+    let fresp: Value = serde_json::from_str(filtered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (tag-filtered): {filtered:?}: {e}"));
+    let fhits = fresp["hits"].as_array().expect("filtered hits array");
+
+    assert!(
+        !fhits.is_empty(),
+        "--tag rust must match the tagged doc; got 0 hits: {fresp}"
+    );
+
+    // Every returned hit must come from rust_doc.md (the tagged file).
+    for hit in fhits {
+        let path = hit["doc_path"].as_str().unwrap_or("");
+        assert!(
+            path.ends_with("rust_doc.md"),
+            "--tag rust must only return hits from the tagged doc, got path={path}"
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 5: --tag is repeatable (OR-within); two --tag values form an IN-list
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_two_tag_filters_returns_or_within_tags() {
+    // Two docs with different tag sets:
+    //   a.md → tags: [rust]
+    //   b.md → tags: [async]
+    //   c.md → no tags (but same keyword in body)
+    // Search with --tag rust --tag async (OR within --tag).
+    // Expect a.md and b.md, not c.md.
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    fs::write(
+        workspace.join("a.md"),
+        "---\ntags: [rust]\n---\n# A\n\nrust systems programming\n",
+    )
+    .unwrap();
+    fs::write(
+        workspace.join("b.md"),
+        "---\ntags: [async]\n---\n# B\n\nrust async programming\n",
+    )
+    .unwrap();
+    fs::write(workspace.join("c.md"), "# C\n\nrust programming\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Without filter: all three docs produce hits.
+    let (unfiltered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let uresp: Value = serde_json::from_str(unfiltered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (unfiltered): {unfiltered:?}: {e}"));
+    let uhits = uresp["hits"].as_array().expect("unfiltered hits array");
+    assert!(
+        uhits.len() >= 3,
+        "expected ≥3 hits before tag filter: {uresp}"
+    );
+
+    // With --tag rust --tag async: only a.md and b.md should appear.
+    let (filtered, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json", "--mode", "lexical",
+            "--tag", "rust",
+            "--tag", "async",
+            "rust",
+        ],
+    );
+    let fresp: Value = serde_json::from_str(filtered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (two-tag-filtered): {filtered:?}: {e}"));
+    let fhits = fresp["hits"].as_array().expect("filtered hits array");
+
+    assert!(
+        !fhits.is_empty(),
+        "--tag rust --tag async must return hits from tagged docs; got 0: {fresp}"
+    );
+
+    // c.md must not appear — it has no tags.
+    for hit in fhits {
+        let path = hit["doc_path"].as_str().unwrap_or("");
+        assert!(
+            path.ends_with("a.md") || path.ends_with("b.md"),
+            "--tag rust --tag async must only return a.md or b.md, got path={path}"
+        );
+    }
+
+    // Both a.md and b.md must appear (OR, not AND).
+    let paths: Vec<&str> = fhits
+        .iter()
+        .filter_map(|h| h["doc_path"].as_str())
+        .collect();
+    let has_a = paths.iter().any(|p| p.ends_with("a.md"));
+    let has_b = paths.iter().any(|p| p.ends_with("b.md"));
+    assert!(has_a, "--tag rust must include a.md (rust-tagged): paths={paths:?}");
+    assert!(has_b, "--tag async must include b.md (async-tagged): paths={paths:?}");
+}
--- a/crates/kebab-cli/tests/wire_search_response.rs
+++ b/crates/kebab-cli/tests/wire_search_response.rs
@@ -0,0 +1,226 @@
+//! p9-fb-34: CLI search wire wrapper + budget controls.
+//!
+//! Lexical-only — no fastembed / no Ollama. Each test builds its own
+//! TempDir KB via `common::write_config` + `common::ingest` and drives
+//! `kebab search` through `common::run_search_with_args`. Verifies:
+//!
+//! - `--json` emits the `search_response.v1` wrapper (hits + cursor +
+//!   truncated).
+//! - `--max-tokens` flips `truncated: true` once the budget binds.
+//! - `--cursor` advances paging (page 2 chunk_ids disjoint from page 1).
+//! - Plain (non-JSON) output prints the `[truncated; ...]` hint to
+//!   stderr (stdout stays the hit list).
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+
+#[test]
+fn search_json_emits_search_response_v1_wrapper() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "apples"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    assert_eq!(v["schema_version"], "search_response.v1");
+    assert!(v["hits"].is_array(), "hits must be array, got {v}");
+    assert!(
+        v["next_cursor"].is_null() || v["next_cursor"].is_string(),
+        "next_cursor must be null or string, got {}",
+        v["next_cursor"]
+    );
+    assert!(
+        v["truncated"].is_boolean(),
+        "truncated must be bool, got {}",
+        v["truncated"]
+    );
+}
+
+#[test]
+fn search_json_truncates_with_max_tokens() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    let body: String = "rust ownership is a memory model. ".repeat(10);
+    fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--max-tokens", "30", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    assert_eq!(
+        v["truncated"], true,
+        "30-token cap must trip truncation: {v}"
+    );
+}
+
+#[test]
+fn search_json_cursor_paginates() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    for i in 0..6 {
+        fs::write(
+            workspace.join(format!("d{i}.md")),
+            format!("# T{i}\n\nrust topic {i}\n"),
+        )
+        .unwrap();
+    }
+    common::ingest(&cfg, &workspace);
+
+    let (page1, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--k", "2", "rust"],
+    );
+    let v1: Value = serde_json::from_str(page1.trim())
+        .unwrap_or_else(|e| panic!("page1 not JSON: {page1:?}: {e}"));
+    let cursor = v1["next_cursor"]
+        .as_str()
+        .unwrap_or_else(|| panic!("next_cursor missing on page1: {v1}"));
+
+    let (page2, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json",
+            "--mode",
+            "lexical",
+            "--k",
+            "2",
+            "--cursor",
+            cursor,
+            "rust",
+        ],
+    );
+    let v2: Value = serde_json::from_str(page2.trim())
+        .unwrap_or_else(|e| panic!("page2 not JSON: {page2:?}: {e}"));
+
+    let p1_ids: Vec<String> = v1["hits"]
+        .as_array()
+        .expect("page1 hits array")
+        .iter()
+        .map(|h| {
+            h["chunk_id"]
+                .as_str()
+                .expect("chunk_id string")
+                .to_string()
+        })
+        .collect();
+    let p2_ids: Vec<String> = v2["hits"]
+        .as_array()
+        .expect("page2 hits array")
+        .iter()
+        .map(|h| {
+            h["chunk_id"]
+                .as_str()
+                .expect("chunk_id string")
+                .to_string()
+        })
+        .collect();
+    assert!(
+        !p2_ids.is_empty(),
+        "page2 must return at least one hit (cursor advanced past page1)"
+    );
+    assert!(
+        p2_ids.iter().all(|id| !p1_ids.contains(id)),
+        "page2 must not repeat page1 chunk_ids: page1={p1_ids:?} page2={p2_ids:?}"
+    );
+}
+
+#[test]
+fn search_stale_cursor_returns_error_v1_with_stale_cursor_code() {
+    // p9-fb-34 round-1 review: end-to-end wire contract — when the
+    // corpus_revision bumps between cursor issuance and the cursored
+    // search, `kebab --json search --cursor <stale>` must emit an
+    // `error.v1` ndjson line on stderr with `code = "stale_cursor"`.
+    // Pre-fix this returned `code = "generic"` because
+    // `App::search_with_opts` string-formatted the typed payload into
+    // anyhow, losing the structured wrapper.
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\napples\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Get a valid cursor first.
+    let (page1_stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "--k", "1", "apples"],
+    );
+    let v1: Value = serde_json::from_str(page1_stdout.trim()).expect("json");
+    let cursor = v1["next_cursor"]
+        .as_str()
+        .expect("k=1 page must emit next_cursor — fixture too small if this fails")
+        .to_string();
+
+    // Bump corpus_revision by ingesting a second doc.
+    fs::write(workspace.join("b.md"), "# B\n\nbananas\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Use the now-stale cursor. Direct invocation (not via the
+    // success-asserting helper) so we can read stderr on failure.
+    let exe = env!("CARGO_BIN_EXE_kebab");
+    let cfg_str = cfg.to_str().expect("utf8");
+    let out = std::process::Command::new(exe)
+        .args([
+            "--config",
+            cfg_str,
+            "--json",
+            "search",
+            "--mode",
+            "lexical",
+            "--json",
+            "--cursor",
+            &cursor,
+            "apples",
+        ])
+        .output()
+        .expect("kebab search --cursor");
+
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    // Find the error.v1 ndjson line on stderr (one event per line).
+    let err_line = stderr
+        .lines()
+        .find(|l| {
+            serde_json::from_str::<Value>(l)
+                .ok()
+                .and_then(|v| {
+                    v.get("schema_version")
+                        .and_then(|s| s.as_str())
+                        .map(String::from)
+                })
+                .as_deref()
+                == Some("error.v1")
+        })
+        .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}"));
+
+    let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
+    assert_eq!(
+        v["code"], "stale_cursor",
+        "code must be stale_cursor: {err_line}"
+    );
+}
+
+#[test]
+fn search_plain_emits_truncated_hint_to_stderr() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    let body: String = "rust ownership is a memory model. ".repeat(10);
+    fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (_stdout, stderr) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--max-tokens", "30", "rust"],
+    );
+    assert!(
+        stderr.contains("[truncated;"),
+        "stderr must carry truncated hint: {stderr:?}"
+    );
+}
--- a/crates/kebab-cli/tests/wire_search_stale.rs
+++ b/crates/kebab-cli/tests/wire_search_stale.rs
@@ -45,10 +45,21 @@ fn search_json_includes_indexed_at_and_stale() {

    let out = run_search_lexical(&cfg, "apples", true);
    let stdout = String::from_utf8_lossy(&out.stdout);
-    let arr: serde_json::Value = serde_json::from_str(stdout.trim())
-        .unwrap_or_else(|e| panic!("expected JSON array, got {stdout:?}: {e}"));
-    let arr = arr.as_array().unwrap_or_else(|| panic!("expected array, got {stdout}"));
-    let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty array: {stdout}"));
+    // p9-fb-34: top-level wire is now `search_response.v1` wrapping the
+    // legacy `search_hit.v1[]` under a `hits` field (with pagination +
+    // truncation metadata). Hit shape inside `hits` is unchanged.
+    let resp: serde_json::Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("expected JSON object, got {stdout:?}: {e}"));
+    assert_eq!(
+        resp.get("schema_version").and_then(|v| v.as_str()),
+        Some("search_response.v1"),
+        "expected search_response.v1 wrapper, got {resp}"
+    );
+    let arr = resp
+        .get("hits")
+        .and_then(|h| h.as_array())
+        .unwrap_or_else(|| panic!("expected hits array, got {stdout}"));
+    let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty hits: {stdout}"));
    assert!(
        first.get("indexed_at").is_some(),
        "missing indexed_at in {first}"
--- a/crates/kebab-cli/tests/wire_search_trace.rs
+++ b/crates/kebab-cli/tests/wire_search_trace.rs
@@ -0,0 +1,58 @@
+//! p9-fb-37: integration tests for `kebab search --trace --json`.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+
+#[test]
+fn search_trace_json_includes_trace_block() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+    fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--trace", "--json", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+    assert_eq!(v["schema_version"], "search_response.v1");
+    assert!(v["trace"].is_object(), "trace block present");
+    assert!(v["trace"]["timing"].is_object());
+    assert!(v["trace"]["timing"]["total_ms"].is_number());
+    assert!(v["trace"]["lexical"].is_array());
+    assert!(v["trace"]["vector"].is_array());
+    assert!(v["trace"]["rrf_inputs"].is_array());
+}
+
+#[test]
+fn search_without_trace_omits_trace_field() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+    fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+    assert!(v.get("trace").is_none(), "trace field absent without --trace");
+}
+
+#[test]
+fn search_trace_lexical_mode_vector_list_empty() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 0);
+    fs::write(workspace.join("doc1.md"), "# Title\n\nrust async hello\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _stderr) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--trace", "--json", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("valid JSON");
+    assert_eq!(v["trace"]["vector"].as_array().unwrap().len(), 0);
+    assert_eq!(v["trace"]["timing"]["vector_ms"], 0);
+}
--- a/crates/kebab-core/src/fetch.rs
+++ b/crates/kebab-core/src/fetch.rs
@@ -0,0 +1,87 @@
+//! p9-fb-35 verbatim fetch domain types.
+//!
+//! Three modes (chunk / doc / span) carried by [`FetchQuery`]; one
+//! response shape ([`FetchResult`]) discriminated by [`FetchKind`].
+//! All types are `Serialize` so the CLI / MCP wire layers can hand
+//! them straight through `serde_json::to_value`.
+
+use serde::{Deserialize, Serialize};
+use time::OffsetDateTime;
+
+use crate::asset::WorkspacePath;
+use crate::chunk::Chunk;
+use crate::ids::{ChunkId, DocumentId};
+
+#[derive(Clone, Debug)]
+pub enum FetchQuery {
+    Chunk(ChunkId),
+    Doc(DocumentId),
+    Span {
+        doc_id: DocumentId,
+        line_start: u32,
+        line_end: u32,
+    },
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct FetchOpts {
+    /// chunk mode only: ±N chunks. None = no surrounding context.
+    pub context: Option<u32>,
+    /// doc / span mode only: chars/4 budget. None = no cap.
+    pub max_tokens: Option<usize>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FetchKind {
+    Chunk,
+    Doc,
+    Span,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct FetchResult {
+    pub kind: FetchKind,
+    pub doc_id: DocumentId,
+    pub doc_path: WorkspacePath,
+    #[serde(with = "time::serde::rfc3339")]
+    pub indexed_at: OffsetDateTime,
+    pub stale: bool,
+    // chunk mode payloads
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub chunk: Option<Chunk>,
+    #[serde(skip_serializing_if = "Vec::is_empty", default)]
+    pub context_before: Vec<Chunk>,
+    #[serde(skip_serializing_if = "Vec::is_empty", default)]
+    pub context_after: Vec<Chunk>,
+    // doc / span payloads
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub line_start: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub line_end: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effective_end: Option<u32>,
+    pub truncated: bool,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn fetch_opts_default_is_all_none() {
+        let o = FetchOpts::default();
+        assert!(o.context.is_none());
+        assert!(o.max_tokens.is_none());
+    }
+
+    #[test]
+    fn fetch_kind_serializes_snake_case() {
+        let v = serde_json::to_value(FetchKind::Chunk).unwrap();
+        assert_eq!(v, serde_json::json!("chunk"));
+        let v = serde_json::to_value(FetchKind::Span).unwrap();
+        assert_eq!(v, serde_json::json!("span"));
+    }
+}
--- a/crates/kebab-core/src/lib.rs
+++ b/crates/kebab-core/src/lib.rs
@@ -23,6 +23,7 @@ pub mod vector;
 pub mod errors;
 pub mod traits;
 pub mod normalize;
+pub mod fetch;

 // Re-export the most commonly used items at the crate root, mirroring the
 // public surface listed in the task spec.
@@ -50,8 +51,9 @@ pub use metadata::{
    TrustLevel,
 };
 pub use search::{
-    DocFilter, DocSummary, RetrievalDetail, SearchFilters, SearchHit,
-    SearchMode, SearchQuery,
+    DocFilter, DocSummary, IndexBytes, MEDIA_KINDS, RetrievalDetail, SearchFilters, SearchHit,
+    SearchMode, SearchOpts, SearchQuery, SearchTrace, TraceCandidate, TraceFusionInput,
+    TraceTiming,
 };
 pub use answer::{
    Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
@@ -68,3 +70,4 @@ pub use traits::{
    SourceScope, TokenChunk, VectorStore,
 };
 pub use normalize::{nfc, to_posix};
+pub use fetch::{FetchKind, FetchOpts, FetchQuery, FetchResult};
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -26,12 +26,30 @@ pub struct SearchQuery {
    pub filters: SearchFilters,
 }

+/// p9-fb-36: canonical kind labels for `SearchFilters.media`. Mirrors
+/// `MediaType` variant tags; CLI / MCP normalize aliases (`md` → `markdown`)
+/// before populating this Vec.
+pub const MEDIA_KINDS: &[&str] = &["markdown", "pdf", "image", "audio", "other"];
+
 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 pub struct SearchFilters {
    pub tags_any: Vec<String>,
    pub lang: Option<Lang>,
    pub path_glob: Option<String>,
    pub trust_min: Option<TrustLevel>,
+    /// p9-fb-36: media_type filter — IN-list of `MediaType.kind`
+    /// strings (`"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`).
+    /// Empty Vec = no filter. Match is on the variant tag only;
+    /// e.g. `["image"]` matches `Image(Png)` and `Image(Jpeg)`.
+    #[serde(default)]
+    pub media: Vec<String>,
+    /// p9-fb-36: hits whose source doc's `documents.updated_at` is at
+    /// or after this timestamp. None = no filter. RFC3339 / UTC.
+    #[serde(default, with = "time::serde::rfc3339::option")]
+    pub ingested_after: Option<OffsetDateTime>,
+    /// p9-fb-36: restrict hits to a single document. None = no filter.
+    #[serde(default)]
+    pub doc_id: Option<DocumentId>,
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -96,6 +114,72 @@ pub struct DocSummary {
    pub chunker_version: ChunkerVersion,
 }

+/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`.
+/// All `None` = no enforcement (existing behavior).
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchOpts {
+    /// chars/4 approximation of wire JSON token cost. None = no cap.
+    pub max_tokens: Option<usize>,
+    /// Per-hit snippet character cap. None = use config default.
+    pub snippet_chars: Option<usize>,
+    /// Opaque base64 cursor from a previous response. None = first page.
+    pub cursor: Option<String>,
+    /// p9-fb-37: when true, capture pipeline trace (cache bypassed,
+    /// lex / vec pre-fusion lists + timing populated on the response).
+    #[serde(default)]
+    pub trace: bool,
+}
+
+/// p9-fb-37: search retrieval pipeline trace. Populated only when
+/// `SearchOpts.trace = true`; `None` on the wrapping `SearchResponse`
+/// otherwise. `lexical` / `vector` are pre-fusion candidate lists
+/// (each retriever's full output for the fanout query). `rrf_inputs`
+/// is the union (chunk_id) used by RRF, with each side's rank
+/// captured. `timing` is wall-clock per stage.
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+    pub lexical: Vec<TraceCandidate>,
+    pub vector: Vec<TraceCandidate>,
+    pub rrf_inputs: Vec<TraceFusionInput>,
+    pub timing: TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+    pub chunk_id: ChunkId,
+    pub doc_id: DocumentId,
+    pub doc_path: WorkspacePath,
+    pub rank: u32,
+    pub score: f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+    pub chunk_id: ChunkId,
+    pub lexical_rank: Option<u32>,
+    pub vector_rank: Option<u32>,
+    /// Hybrid mode: normalized RRF score in `[0, 1]`.
+    /// Lexical / Vector mode: equals the underlying retriever's score
+    /// (no fusion ran). 0.0 for chunks dropped past `target_k`.
+    pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+    pub lexical_ms: u64,
+    pub vector_ms: u64,
+    pub fusion_ms: u64,
+    pub total_ms: u64,
+}
+
+/// p9-fb-37: on-disk index size breakdown. Mirrored on the
+/// wire `schema.v1.stats.index_bytes` block.
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+    pub sqlite: u64,
+    pub lancedb: u64,
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -135,4 +219,79 @@ mod tests {
        assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
        assert_eq!(v["stale"], true);
    }
+
+    #[test]
+    fn search_opts_default_is_all_none() {
+        let opts = SearchOpts::default();
+        assert!(opts.max_tokens.is_none());
+        assert!(opts.snippet_chars.is_none());
+        assert!(opts.cursor.is_none());
+    }
+
+    #[test]
+    fn search_filters_default_includes_new_fb36_fields() {
+        let f = SearchFilters::default();
+        assert!(f.media.is_empty(), "media default empty");
+        assert!(f.ingested_after.is_none(), "ingested_after default None");
+        assert!(f.doc_id.is_none(), "doc_id default None");
+        assert!(f.tags_any.is_empty());
+        assert!(f.lang.is_none());
+        assert!(f.path_glob.is_none());
+        assert!(f.trust_min.is_none());
+    }
+
+    #[test]
+    fn search_filters_serialize_with_serde_default_compat() {
+        let old: SearchFilters = serde_json::from_str(r#"{"tags_any":[],"lang":null,"path_glob":null,"trust_min":null}"#).unwrap();
+        assert!(old.media.is_empty());
+        assert!(old.ingested_after.is_none());
+        assert!(old.doc_id.is_none());
+    }
+
+    #[test]
+    fn search_trace_serde_roundtrip() {
+        let t = SearchTrace {
+            lexical: vec![TraceCandidate {
+                chunk_id: ChunkId("c1".into()),
+                doc_id: DocumentId("d1".into()),
+                doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+                rank: 1,
+                score: 0.42,
+            }],
+            vector: vec![],
+            rrf_inputs: vec![TraceFusionInput {
+                chunk_id: ChunkId("c1".into()),
+                lexical_rank: Some(1),
+                vector_rank: None,
+                fusion_score: 0.0234,
+            }],
+            timing: TraceTiming {
+                lexical_ms: 12,
+                vector_ms: 0,
+                fusion_ms: 1,
+                total_ms: 14,
+            },
+        };
+        let v = serde_json::to_value(&t).unwrap();
+        assert_eq!(v["timing"]["lexical_ms"], 12);
+        assert_eq!(
+            v["lexical"][0]["score"].as_f64().unwrap() as f32,
+            0.42_f32
+        );
+        let back: SearchTrace = serde_json::from_value(v).unwrap();
+        assert_eq!(back, t);
+    }
+
+    #[test]
+    fn index_bytes_default_is_zero() {
+        let b = IndexBytes::default();
+        assert_eq!(b.sqlite, 0);
+        assert_eq!(b.lancedb, 0);
+    }
+
+    #[test]
+    fn search_opts_trace_default_false() {
+        let opts = SearchOpts::default();
+        assert!(!opts.trace);
+    }
 }
--- a/crates/kebab-core/src/traits.rs
+++ b/crates/kebab-core/src/traits.rs
@@ -98,6 +98,11 @@ pub enum FinishReason {
    Stop,
    Length,
    Aborted,
+    /// p9-fb-33: caller-side cancel. The pipeline breaks the LM loop
+    /// when a `Token` send into `AskOpts.stream_sink` returns
+    /// `SendError` (receiver dropped). The persisted answer is
+    /// flagged with `RefusalReason::LlmStreamAborted`.
+    Cancelled,
    Error(String),
 }

--- a/crates/kebab-mcp/Cargo.toml
+++ b/crates/kebab-mcp/Cargo.toml
@@ -19,6 +19,8 @@ tracing     = { workspace = true }
 # /dependencies endpoint — rmcp declares optional schemars = "^1.0").
 schemars    = "1"

+time         = { workspace = true }
+
 kebab-app    = { path = "../kebab-app" }
 kebab-config = { path = "../kebab-config" }
 kebab-core   = { path = "../kebab-core" }
--- a/crates/kebab-mcp/src/lib.rs
+++ b/crates/kebab-mcp/src/lib.rs
@@ -1,6 +1,7 @@
-//! MCP (Model Context Protocol) server over stdio. Exposes 6 tools
-//! (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`)
-//! backed by `kebab-app` facade methods. Used by `kebab-cli`'s `Cmd::Mcp` arm.
+//! MCP (Model Context Protocol) server over stdio. Exposes 7 tools
+//! (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`
+//! / `fetch`) backed by `kebab-app` facade methods. Used by `kebab-cli`'s
+//! `Cmd::Mcp` arm.
 //!
 //! See spec `docs/superpowers/specs/2026-05-07-p9-fb-30-mcp-server-design.md`.

@@ -61,6 +62,11 @@ pub fn build_tools_vec() -> Vec<Tool> {
            "Ingest markdown content into the knowledge base. v1 markdown only. Frontmatter (title + source_uri) auto-injected.",
            schema_for_type::<tools::ingest_stdin::IngestStdinInput>(),
        ),
+        Tool::new(
+            "fetch",
+            "Verbatim fetch — chunk / doc / span modes. Returns fetch_result.v1 with the indexed text (no LLM rewrite).",
+            schema_for_type::<tools::fetch::FetchInput>(),
+        ),
    ]
 }

@@ -157,6 +163,13 @@ impl ServerHandler for KebabHandler {
                })
                .await
            }
+            "fetch" => {
+                let args = request.arguments.unwrap_or_default();
+                self.spawn_tool(args, |state, input| {
+                    tools::fetch::handle(&state, input)
+                })
+                .await
+            }
            _other => Err(ErrorData::method_not_found::<
                rmcp::model::CallToolRequestMethod,
            >()),
--- a/crates/kebab-mcp/src/tools/fetch.rs
+++ b/crates/kebab-mcp/src/tools/fetch.rs
@@ -0,0 +1,99 @@
+//! p9-fb-35 `fetch` tool — wraps `kebab_app::fetch_with_config`.
+//!
+//! Three modes (chunk / doc / span). Output is `fetch_result.v1`.
+//!
+//! Mirrors the CLI surface (`kebab fetch <kind> ...`): same input shape,
+//! same wire envelope. Missing kind-specific fields produce an `error.v1`
+//! with `code = "invalid_input"`.
+
+use rmcp::model::CallToolResult;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use crate::error::{to_tool_error, to_tool_success};
+use crate::state::KebabAppState;
+
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct FetchInput {
+    /// "chunk" | "doc" | "span"
+    pub kind: String,
+    /// Required when kind = "chunk".
+    pub chunk_id: Option<String>,
+    /// Required when kind = "doc" or "span".
+    pub doc_id: Option<String>,
+    /// Required when kind = "span" (1-based, inclusive).
+    pub line_start: Option<u32>,
+    pub line_end: Option<u32>,
+    /// chunk only: ±N surrounding chunks.
+    pub context: Option<u32>,
+    /// doc/span only: chars/4 budget.
+    pub max_tokens: Option<usize>,
+}
+
+pub fn handle(state: &KebabAppState, input: FetchInput) -> CallToolResult {
+    let query = match input.kind.as_str() {
+        "chunk" => match input.chunk_id {
+            Some(id) => kebab_core::FetchQuery::Chunk(kebab_core::ChunkId(id)),
+            None => return invalid_input("kind=chunk requires chunk_id"),
+        },
+        "doc" => match input.doc_id {
+            Some(id) => kebab_core::FetchQuery::Doc(kebab_core::DocumentId(id)),
+            None => return invalid_input("kind=doc requires doc_id"),
+        },
+        "span" => match (input.doc_id, input.line_start, input.line_end) {
+            (Some(id), Some(start), Some(end)) => kebab_core::FetchQuery::Span {
+                doc_id: kebab_core::DocumentId(id),
+                line_start: start,
+                line_end: end,
+            },
+            _ => return invalid_input("kind=span requires doc_id, line_start, line_end"),
+        },
+        other => {
+            return invalid_input(&format!(
+                "unknown kind '{other}'; expected chunk|doc|span"
+            ));
+        }
+    };
+
+    let opts = kebab_core::FetchOpts {
+        context: input.context,
+        max_tokens: input.max_tokens,
+    };
+
+    let cfg_clone = (*state.config).clone();
+    match kebab_app::fetch_with_config(cfg_clone, query, opts) {
+        Ok(r) => {
+            // FetchResult does not carry a `schema_version` field, so we
+            // tag the envelope inline (mirrors search.rs's pattern).
+            let mut v = match serde_json::to_value(&r) {
+                Ok(v) => v,
+                Err(e) => {
+                    return to_tool_error(&anyhow::anyhow!("FetchResult serialize: {e}"));
+                }
+            };
+            if let serde_json::Value::Object(ref mut map) = v {
+                map.insert(
+                    "schema_version".to_string(),
+                    serde_json::Value::String("fetch_result.v1".to_string()),
+                );
+            }
+            match serde_json::to_string(&v) {
+                Ok(json) => to_tool_success(json),
+                Err(e) => to_tool_error(&anyhow::anyhow!(e)),
+            }
+        }
+        Err(e) => to_tool_error(&e),
+    }
+}
+
+fn invalid_input(msg: &str) -> CallToolResult {
+    use kebab_app::{ErrorV1, StructuredError};
+    let err = anyhow::Error::new(StructuredError(ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "invalid_input".to_string(),
+        message: msg.to_string(),
+        details: serde_json::Value::Null,
+        hint: None,
+    }));
+    to_tool_error(&err)
+}
--- a/crates/kebab-mcp/src/tools/mod.rs
+++ b/crates/kebab-mcp/src/tools/mod.rs
@@ -6,3 +6,4 @@ pub mod search;
 pub mod ask;
 pub mod ingest_file;
 pub mod ingest_stdin;
+pub mod fetch;
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -1,5 +1,8 @@
-//! `search` tool — wraps `kebab_app::search_with_config`.
-//! Input: { query, mode?, k? }. Output: search_hit.v1 array JSON.
+//! `search` tool — wraps `kebab_app::search_with_opts_with_config`.
+//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor?,
+//!          tags?, lang?, path_glob?, trust_min?, media?,
+//!          ingested_after?, doc_id? }.
+//! Output: search_response.v1 envelope (hits + next_cursor + truncated).
 //!
 //! First tool with a non-empty `inputSchema`: `SearchInput` derives
 //! `JsonSchema` and `Tool::new` uses
@@ -9,6 +12,8 @@ use rmcp::model::CallToolResult;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};

+use kebab_app::ERROR_V1_ID;
+
 use crate::error::{to_tool_error, to_tool_success};
 use crate::state::KebabAppState;

@@ -17,38 +22,115 @@ pub struct SearchInput {
    /// User query (free text).
    pub query: String,
    /// Retrieval mode: "hybrid" (default), "lexical", or "vector".
-    #[serde(default = "default_mode")]
-    pub mode: String,
+    pub mode: Option<String>,
    /// Top-K results. Defaults to 10. Clamped to 1–100.
-    #[serde(default = "default_k")]
-    pub k: usize,
-}
-
-fn default_mode() -> String {
-    "hybrid".to_string()
-}
-fn default_k() -> usize {
-    10
+    pub k: Option<usize>,
+    /// p9-fb-34: cap result wire size at ~N tokens (chars/4 estimate).
+    pub max_tokens: Option<usize>,
+    /// p9-fb-34: per-hit snippet character cap.
+    pub snippet_chars: Option<usize>,
+    /// p9-fb-34: opaque cursor from a previous response.
+    pub cursor: Option<String>,
+    /// p9-fb-36: filter by `metadata.tags` (OR-within).
+    pub tags: Option<Vec<String>>,
+    /// p9-fb-36: filter by `documents.lang` (ISO code).
+    pub lang: Option<String>,
+    /// p9-fb-36: filter by `documents.workspace_path` glob.
+    pub path_glob: Option<String>,
+    /// p9-fb-36: filter by minimum `documents.trust_level`.
+    /// Accepts: `"primary"`, `"secondary"`, `"generated"`.
+    pub trust_min: Option<String>,
+    /// p9-fb-36: filter by `assets.media_type` kind. IN-list. Accepts:
+    /// `"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`. Aliases: `md` → `markdown`.
+    pub media: Option<Vec<String>>,
+    /// p9-fb-36: RFC3339 UTC timestamp. Invalid format → invalid_input.
+    pub ingested_after: Option<String>,
+    /// p9-fb-36: filter to a single doc.
+    pub doc_id: Option<String>,
+    /// p9-fb-37: when true, include a `trace` field on the response
+    /// with pre-fusion lexical/vector candidate lists + per-stage timing.
+    /// Bypasses cache (debug intent — fresh run guaranteed). Default false.
+    pub trace: Option<bool>,
 }

 pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
-    let k = input.k.clamp(1, 100);
-    let mode = match input.mode.as_str() {
+    let k = input.k.unwrap_or(10).clamp(1, 100);
+    let mode_str = input.mode.as_deref().unwrap_or("hybrid");
+    let mode = match mode_str {
        "lexical" => kebab_core::SearchMode::Lexical,
        "vector" => kebab_core::SearchMode::Vector,
        _ => kebab_core::SearchMode::Hybrid,
    };
+
+    // p9-fb-36: parse filter inputs, returning invalid_input on bad values.
+    let trust_min = match input.trust_min.as_deref() {
+        Some(s) => match s.to_ascii_lowercase().as_str() {
+            "primary" => Some(kebab_core::TrustLevel::Primary),
+            "secondary" => Some(kebab_core::TrustLevel::Secondary),
+            "generated" => Some(kebab_core::TrustLevel::Generated),
+            other => {
+                return invalid_input(&format!(
+                    "trust_min: unknown level '{other}'; expected primary|secondary|generated"
+                ));
+            }
+        },
+        None => None,
+    };
+
+    let ingested_after = match input.ingested_after.as_deref() {
+        Some(s) => {
+            match time::OffsetDateTime::parse(
+                s,
+                &time::format_description::well_known::Rfc3339,
+            ) {
+                Ok(ts) => Some(ts),
+                Err(e) => {
+                    return invalid_input(&format!(
+                        "ingested_after: invalid RFC3339 '{s}': {e}"
+                    ));
+                }
+            }
+        }
+        None => None,
+    };
+
+    let media: Vec<String> = input
+        .media
+        .clone()
+        .unwrap_or_default()
+        .iter()
+        .map(|s| normalize_media_alias(s))
+        .collect();
+
+    let filters = kebab_core::SearchFilters {
+        tags_any: input.tags.clone().unwrap_or_default(),
+        lang: input.lang.clone().map(kebab_core::Lang),
+        path_glob: input.path_glob.clone(),
+        trust_min,
+        media,
+        ingested_after,
+        doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
+    };
+
    let query = kebab_core::SearchQuery {
        text: input.query,
        mode,
        k,
-        filters: kebab_core::SearchFilters::default(),
+        filters,
    };
-    match kebab_app::search_with_config((*state.config).clone(), query) {
-        Ok(hits) => {
+    let opts = kebab_core::SearchOpts {
+        max_tokens: input.max_tokens,
+        snippet_chars: input.snippet_chars,
+        cursor: input.cursor,
+        trace: input.trace.unwrap_or(false),
+    };
+    let cfg_clone = (*state.config).clone();
+    match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) {
+        Ok(resp) => {
            // SearchHit (kebab-core) does not carry a `schema_version` field,
            // so we tag each element inline before serialising.
-            let tagged: Vec<serde_json::Value> = hits
+            let tagged: Vec<serde_json::Value> = resp
+                .hits
                .iter()
                .map(|h| {
                    let mut v = serde_json::to_value(h).unwrap_or_default();
@@ -61,7 +143,20 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
                    v
                })
                .collect();
-            match serde_json::to_string(&serde_json::Value::Array(tagged)) {
+            let mut envelope = serde_json::json!({
+                "schema_version": "search_response.v1",
+                "hits": tagged,
+                "next_cursor": resp.next_cursor,
+                "truncated": resp.truncated,
+            });
+            if let Some(trace) = &resp.trace {
+                let trace_v =
+                    serde_json::to_value(trace).unwrap_or(serde_json::Value::Null);
+                if let serde_json::Value::Object(ref mut map) = envelope {
+                    map.insert("trace".to_string(), trace_v);
+                }
+            }
+            match serde_json::to_string(&envelope) {
                Ok(json) => to_tool_success(json),
                Err(e) => to_tool_error(&anyhow::anyhow!(e)),
            }
@@ -69,3 +164,22 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
        Err(e) => to_tool_error(&e),
    }
 }
+
+fn normalize_media_alias(s: &str) -> String {
+    match s.to_ascii_lowercase().as_str() {
+        "md" => "markdown".to_string(),
+        other => other.to_string(),
+    }
+}
+
+fn invalid_input(msg: &str) -> CallToolResult {
+    use kebab_app::{ErrorV1, StructuredError};
+    let err = anyhow::Error::new(StructuredError(ErrorV1 {
+        schema_version: ERROR_V1_ID.to_string(),
+        code: "invalid_input".to_string(),
+        message: msg.to_string(),
+        details: serde_json::Value::Null,
+        hint: None,
+    }));
+    to_tool_error(&err)
+}
--- a/crates/kebab-mcp/tests/tools_call_fetch.rs
+++ b/crates/kebab-mcp/tests/tools_call_fetch.rs
@@ -0,0 +1,223 @@
+//! p9-fb-35: tools/call name=fetch — chunk happy path + invalid_input.
+//!
+//! Mirrors `tools_call_search.rs` setup: a TempDir KB with embedding
+//! provider = "none" (no Ollama / fastembed) and a single ingested
+//! markdown doc. We discover a `chunk_id` via the search tool, call
+//! `fetch` with it, then exercise the missing-arg branch separately.
+
+use std::fs;
+
+use kebab_config::Config;
+use kebab_core::SourceScope;
+use kebab_mcp::{KebabAppState, KebabHandler};
+use rmcp::model::RawContent;
+
+fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) -> Config {
+    let mut cfg = Config::defaults();
+    cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
+    cfg.storage.model_dir = data_dir
+        .join("models")
+        .to_string_lossy()
+        .into_owned();
+    cfg.workspace.root = workspace_root.to_string_lossy().into_owned();
+    cfg.workspace.exclude.clear();
+    cfg.models.embedding.provider = "none".to_string();
+    cfg.models.embedding.dimensions = 0;
+    cfg
+}
+
+#[tokio::test]
+async fn fetch_tool_chunk_returns_fetch_result_v1() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    fs::write(
+        workspace_root.join("a.md"),
+        "# Alpha\n\nThis document mentions kebab and bread.",
+    )
+    .unwrap();
+
+    let scope = SourceScope {
+        root: workspace_root.clone(),
+        include: vec![],
+        exclude: vec![],
+    };
+    let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    // Discover a chunk_id via the search tool.
+    let search_result = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(1),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
+            trace: None,
+        },
+    );
+    let search_text = match &search_result.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let search_v: serde_json::Value = serde_json::from_str(&search_text).unwrap();
+    let chunk_id = search_v["hits"][0]["chunk_id"]
+        .as_str()
+        .expect("chunk_id on first hit")
+        .to_string();
+
+    // Call fetch with kind=chunk.
+    let result = kebab_mcp::tools::fetch::handle(
+        handler.state(),
+        kebab_mcp::tools::fetch::FetchInput {
+            kind: "chunk".to_string(),
+            chunk_id: Some(chunk_id),
+            doc_id: None,
+            line_start: None,
+            line_end: None,
+            context: None,
+            max_tokens: None,
+        },
+    );
+
+    assert!(
+        !result.is_error.unwrap_or(false),
+        "expected isError=false, got {:?}",
+        result
+    );
+
+    let content = result
+        .content
+        .first()
+        .expect("expected at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("fetch_result.v1"),
+        "envelope must carry schema_version=fetch_result.v1"
+    );
+    assert_eq!(
+        v.get("kind").and_then(|s| s.as_str()),
+        Some("chunk"),
+        "kind must be 'chunk'"
+    );
+    assert!(
+        v.get("chunk").is_some_and(|c| c.is_object()),
+        "chunk payload must be populated for kind=chunk"
+    );
+}
+
+#[tokio::test]
+async fn fetch_tool_invalid_kind_returns_invalid_input() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    let result = kebab_mcp::tools::fetch::handle(
+        handler.state(),
+        kebab_mcp::tools::fetch::FetchInput {
+            kind: "garbage".to_string(),
+            chunk_id: None,
+            doc_id: None,
+            line_start: None,
+            line_end: None,
+            context: None,
+            max_tokens: None,
+        },
+    );
+
+    assert!(
+        result.is_error.unwrap_or(false),
+        "expected isError=true for unknown kind"
+    );
+    let content = result
+        .content
+        .first()
+        .expect("expected at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("error.v1"),
+        "must carry error.v1 envelope"
+    );
+    assert_eq!(
+        v.get("code").and_then(|s| s.as_str()),
+        Some("invalid_input"),
+        "code must be invalid_input for unknown kind"
+    );
+}
+
+#[tokio::test]
+async fn fetch_tool_chunk_missing_id_returns_invalid_input() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    // kind=chunk but no chunk_id — invalid_input.
+    let result = kebab_mcp::tools::fetch::handle(
+        handler.state(),
+        kebab_mcp::tools::fetch::FetchInput {
+            kind: "chunk".to_string(),
+            chunk_id: None,
+            doc_id: None,
+            line_start: None,
+            line_end: None,
+            context: None,
+            max_tokens: None,
+        },
+    );
+
+    assert!(
+        result.is_error.unwrap_or(false),
+        "expected isError=true when chunk_id is missing"
+    );
+    let content = result.content.first().unwrap();
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("code").and_then(|s| s.as_str()),
+        Some("invalid_input")
+    );
+}
--- a/crates/kebab-mcp/tests/tools_call_search.rs
+++ b/crates/kebab-mcp/tests/tools_call_search.rs
@@ -1,4 +1,4 @@
-//! Integration: tools/call name=search — verify response is search_hit.v1 array.
+//! Integration: tools/call name=search — verify response is search_response.v1.

 use std::fs;

@@ -22,7 +22,7 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
 }

 #[tokio::test]
-async fn search_tool_returns_search_hits_array() {
+async fn search_tool_returns_search_response_v1() {
    let dir = tempfile::tempdir().unwrap();
    let data_dir = dir.path().join("data");
    let workspace_root = dir.path().join("notes");
@@ -53,8 +53,19 @@ async fn search_tool_returns_search_hits_array() {
        handler.state(),
        kebab_mcp::tools::search::SearchInput {
            query: "kebab".to_string(),
-            mode: "lexical".to_string(),
-            k: 5,
+            mode: Some("lexical".to_string()),
+            k: Some(5),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
+            trace: None,
        },
    );

@@ -75,16 +86,208 @@ async fn search_tool_returns_search_hits_array() {
    };

    let v: serde_json::Value = serde_json::from_str(text).unwrap();
-    let arr = v.as_array().expect("search returns a JSON array");
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("search_response.v1"),
+        "envelope should carry schema_version=search_response.v1"
+    );
+    let hits = v
+        .get("hits")
+        .and_then(|h| h.as_array())
+        .expect("hits must be a JSON array");
    assert!(
-        !arr.is_empty(),
+        !hits.is_empty(),
        "expected at least one hit for 'kebab' in 'a.md'"
    );
    assert_eq!(
-        arr[0]
+        hits[0]
            .get("schema_version")
            .and_then(|s| s.as_str()),
        Some("search_hit.v1"),
        "first hit should carry schema_version=search_hit.v1"
    );
+    // truncated must be present (bool); next_cursor may be null on last page.
+    assert!(
+        v.get("truncated").and_then(|t| t.as_bool()).is_some(),
+        "envelope should carry truncated:bool"
+    );
+    assert!(
+        v.get("next_cursor").is_some(),
+        "envelope should carry next_cursor (possibly null)"
+    );
+}
+
+/// p9-fb-36: search with doc_id filter — only hits from the target doc.
+#[tokio::test]
+async fn search_with_doc_id_filter_returns_only_target() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    // Write two markdown documents, both containing the query term.
+    fs::write(
+        workspace_root.join("a.md"),
+        "# Alpha\n\nThis document mentions kebab and flatbread.",
+    )
+    .unwrap();
+    fs::write(
+        workspace_root.join("b.md"),
+        "# Beta\n\nAnother document about kebab wraps and fillings.",
+    )
+    .unwrap();
+
+    let scope = SourceScope {
+        root: workspace_root.clone(),
+        include: vec![],
+        exclude: vec![],
+    };
+    let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    // First: unfiltered search to discover a doc_id from one of the docs.
+    let unfiltered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
+            trace: None,
+        },
+    );
+    assert!(
+        !unfiltered.is_error.unwrap_or(false),
+        "unfiltered search failed: {:?}",
+        unfiltered
+    );
+    let unfiltered_text = match &unfiltered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let unfiltered_v: serde_json::Value = serde_json::from_str(&unfiltered_text).unwrap();
+    let hits = unfiltered_v["hits"].as_array().expect("hits must be array");
+    assert!(hits.len() >= 2, "expected hits from both docs");
+
+    // Pick the doc_id of the first hit.
+    let target_doc_id = hits[0]["doc_id"]
+        .as_str()
+        .expect("doc_id on first hit")
+        .to_string();
+
+    // Now search with doc_id filter — all results must belong to that doc.
+    let filtered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: Some(target_doc_id.clone()),
+            trace: None,
+        },
+    );
+    assert!(
+        !filtered.is_error.unwrap_or(false),
+        "filtered search failed: {:?}",
+        filtered
+    );
+    let filtered_text = match &filtered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let filtered_v: serde_json::Value = serde_json::from_str(&filtered_text).unwrap();
+    let filtered_hits = filtered_v["hits"].as_array().expect("hits must be array");
+
+    assert!(
+        !filtered_hits.is_empty(),
+        "expected at least one hit for target doc"
+    );
+    for hit in filtered_hits {
+        assert_eq!(
+            hit["doc_id"].as_str(),
+            Some(target_doc_id.as_str()),
+            "all filtered hits must belong to the target doc"
+        );
+    }
+}
+
+/// p9-fb-36: invalid RFC3339 for ingested_after → invalid_input error.v1.
+#[tokio::test]
+async fn search_with_invalid_ingested_after_returns_invalid_input() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    let result = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: None,
+            k: None,
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: Some("garbage".to_string()),
+            doc_id: None,
+            trace: None,
+        },
+    );
+
+    assert!(
+        result.is_error.unwrap_or(false),
+        "expected isError=true for invalid ingested_after"
+    );
+    let content = result
+        .content
+        .first()
+        .expect("expected at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("error.v1"),
+        "must carry error.v1 envelope"
+    );
+    assert_eq!(
+        v.get("code").and_then(|s| s.as_str()),
+        Some("invalid_input"),
+        "code must be invalid_input for bad RFC3339"
+    );
 }
--- a/crates/kebab-mcp/tests/tools_call_search_trace.rs
+++ b/crates/kebab-mcp/tests/tools_call_search_trace.rs
@@ -0,0 +1,104 @@
+//! p9-fb-37: integration test for `mcp__kebab__search` trace input/output.
+
+use std::fs;
+
+use kebab_config::Config;
+use kebab_core::SourceScope;
+use kebab_mcp::{KebabAppState, KebabHandler};
+use rmcp::model::RawContent;
+
+fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) -> Config {
+    let mut cfg = Config::defaults();
+    cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
+    cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
+    cfg.workspace.root = workspace_root.to_string_lossy().into_owned();
+    cfg.workspace.exclude.clear();
+    cfg.models.embedding.provider = "none".to_string();
+    cfg.models.embedding.dimensions = 0;
+    cfg
+}
+
+fn setup() -> (tempfile::TempDir, KebabHandler) {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+    let config = minimal_config(&data_dir, &workspace_root);
+    fs::write(
+        workspace_root.join("a.md"),
+        "# Alpha\n\nThis document mentions kebab and bread.",
+    )
+    .unwrap();
+    let scope = SourceScope {
+        root: workspace_root.clone(),
+        include: vec![],
+        exclude: vec![],
+    };
+    let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+    (dir, handler)
+}
+
+fn make_input(trace: Option<bool>) -> kebab_mcp::tools::search::SearchInput {
+    kebab_mcp::tools::search::SearchInput {
+        query: "kebab".to_string(),
+        mode: Some("lexical".to_string()),
+        k: Some(5),
+        max_tokens: None,
+        snippet_chars: None,
+        cursor: None,
+        tags: None,
+        lang: None,
+        path_glob: None,
+        trust_min: None,
+        media: None,
+        ingested_after: None,
+        doc_id: None,
+        trace,
+    }
+}
+
+fn extract_json(result: &rmcp::model::CallToolResult) -> serde_json::Value {
+    assert!(
+        !result.is_error.unwrap_or(false),
+        "expected isError=false, got {result:?}"
+    );
+    let content = result.content.first().expect("at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected Text content, got {other:?}"),
+    };
+    serde_json::from_str(text).expect("valid JSON")
+}
+
+#[tokio::test]
+async fn search_with_trace_true_returns_trace_field() {
+    let (_dir, handler) = setup();
+    let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(true)));
+    let v = extract_json(&result);
+    assert_eq!(v["schema_version"], "search_response.v1");
+    assert!(v["trace"].is_object(), "trace field present when trace:true");
+    assert!(v["trace"]["timing"]["total_ms"].is_number());
+    assert!(v["trace"]["lexical"].is_array());
+    assert!(v["trace"]["vector"].is_array());
+    assert!(v["trace"]["rrf_inputs"].is_array());
+}
+
+#[tokio::test]
+async fn search_without_trace_omits_trace_field() {
+    let (_dir, handler) = setup();
+    let result = kebab_mcp::tools::search::handle(handler.state(), make_input(None));
+    let v = extract_json(&result);
+    assert_eq!(v["schema_version"], "search_response.v1");
+    assert!(v.get("trace").is_none(), "trace absent when None");
+}
+
+#[tokio::test]
+async fn search_with_trace_false_omits_trace_field() {
+    let (_dir, handler) = setup();
+    let result = kebab_mcp::tools::search::handle(handler.state(), make_input(Some(false)));
+    let v = extract_json(&result);
+    assert!(v.get("trace").is_none(), "trace absent when false");
+}
--- a/crates/kebab-mcp/tests/tools_list.rs
+++ b/crates/kebab-mcp/tests/tools_list.rs
@@ -1,13 +1,13 @@
-//! Integration: `build_tools_vec` returns 6 tools with correct names and
+//! Integration: `build_tools_vec` returns 7 tools with correct names and
 //! inputSchema. Uses the extracted `pub fn build_tools_vec()` helper — no
 //! transport or RequestContext needed.

 use kebab_mcp::build_tools_vec;

 #[test]
-fn tools_list_returns_six_tools() {
+fn tools_list_returns_seven_tools() {
    let tools = build_tools_vec();
-    assert_eq!(tools.len(), 6, "expected exactly 6 tools, got {}", tools.len());
+    assert_eq!(tools.len(), 7, "expected exactly 7 tools, got {}", tools.len());

    let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
    assert!(names.contains(&"schema"), "missing 'schema' tool");
@@ -16,6 +16,7 @@ fn tools_list_returns_six_tools() {
    assert!(names.contains(&"ask"), "missing 'ask' tool");
    assert!(names.contains(&"ingest_file"), "missing 'ingest_file' tool");
    assert!(names.contains(&"ingest_stdin"), "missing 'ingest_stdin' tool");
+    assert!(names.contains(&"fetch"), "missing 'fetch' tool");
 }

 #[test]
--- a/crates/kebab-parse-image/Cargo.toml
+++ b/crates/kebab-parse-image/Cargo.toml
@@ -34,7 +34,7 @@ kamadak-exif = "0.6"
 # rustls-tls) so both crates share the same TLS backend and the
 # transitive tokio runtime is brought in once.
 reqwest      = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
-base64       = "0.22"
+base64       = { workspace = true }

 [dev-dependencies]
 tempfile     = { workspace = true }
@@ -47,7 +47,7 @@ tokio        = { workspace = true, features = ["rt-multi-thread"] }
 # fixture. Only loaded for tests; the production crate doesn't need
 # font rendering.
 ab_glyph     = "0.2"
-base64       = "0.22"
+base64       = { workspace = true }
 # `kebab-llm/mock` exposes `MockLanguageModel` for hermetic caption
 # tests. Real adapters (Ollama) live in `kebab-llm-local`, which is
 # only allowed at the dev-dep level here — the runtime crate stays
--- a/crates/kebab-rag/src/lib.rs
+++ b/crates/kebab-rag/src/lib.rs
@@ -22,4 +22,4 @@ pub use kebab_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReas

 mod pipeline;

-pub use pipeline::{AskOpts, RagPipeline};
+pub use pipeline::{AskOpts, RagPipeline, StreamEvent};
--- a/crates/kebab-rag/src/pipeline.rs
+++ b/crates/kebab-rag/src/pipeline.rs
@@ -12,9 +12,12 @@
 //!    ~4 chars / token, matching the kb-chunk convention).
 //! 4. Render the `rag-v1` prompt (system + user) verbatim per design.
 //! 5. Generate via `LanguageModel::generate_stream`. The token loop runs
-//!    on the calling thread; `opts.stream_sink` (if any) gets each
-//!    token forwarded synchronously and a dropped receiver does not
-//!    abort generation.
+//!    on the calling thread; `opts.stream_sink` (if any) emits
+//!    `StreamEvent::RetrievalDone` once after retrieve+stale-stamp,
+//!    `StreamEvent::Token` per LM chunk, and `StreamEvent::Final` on
+//!    success. A dropped receiver triggers cancel: SendError on Token
+//!    breaks the LM loop + records `RefusalReason::LlmStreamAborted`
+//!    in the persisted Answer (p9-fb-33).
 //! 6. Citation extract — STRICT regex `\[#(\d{1,3})\]`, no false
 //!    positives from prose `[1]` / `vec![1]` / Markdown link refs.
 //! 7. Citation validate — every extracted marker must map to a packed
@@ -67,6 +70,42 @@ struct PackedCitation {
 /// prompt section the LLM will see (system + query + packed context).
 type PackedContext = (String, Vec<PackedCitation>, usize);

+/// p9-fb-33: streaming events the pipeline forwards into
+/// [`AskOpts::stream_sink`] when present. Discriminated on `kind`
+/// to match the wire `answer_event.v1` schema. Three variants:
+///
+/// - `RetrievalDone` — emitted once after retrieval + stale-stamp.
+/// - `Token` — emitted per `TokenChunk::Token` from the LM.
+/// - `Final` — emitted once after the full Answer is built (before
+///   persistence). Always the terminal event on the success path.
+///
+/// On caller-side cancel (receiver dropped), the pipeline observes
+/// the `SendError` from the next `Token` send and breaks the LM
+/// loop — see `RagPipeline::ask` cancel branch. In that case
+/// `Final` is NOT emitted (the answer still gets persisted with
+/// `RefusalReason::LlmStreamAborted`).
+#[derive(Clone, Debug, serde::Serialize)]
+#[serde(tag = "kind", rename_all = "snake_case")]
+// p9-fb-33: clippy flags Final.answer (~320B) as the heavy variant.
+// In practice RetrievalDone.hits (Vec<SearchHit>, k≤10×~1KB each)
+// dominates per-emit cost, but it fires once. Boxing either would
+// force every consumer (TUI, CLI ndjson driver, future MCP) to
+// deref through a Box for marginal win on a short-lived per-ask
+// channel. Keep both unboxed.
+#[allow(clippy::large_enum_variant)]
+pub enum StreamEvent {
+    RetrievalDone {
+        hits: Vec<SearchHit>,
+    },
+    Token {
+        delta: String,
+        turn_index: Option<u32>,
+    },
+    Final {
+        answer: Answer,
+    },
+}
+
 // ── AskOpts ─────────────────────────────────────────────────────────────────

 /// Caller-supplied knobs for one [`RagPipeline::ask`] invocation.
@@ -92,11 +131,10 @@ pub struct AskOpts {
    pub temperature: Option<f32>,
    /// Override `config.models.llm.seed` for this call.
    pub seed: Option<u64>,
-    /// Optional sink: every `TokenChunk::Token` produced by the LM is
-    /// forwarded synchronously. A dropped receiver does NOT abort the
-    /// pipeline — `SendError` is silently swallowed and generation
-    /// continues so the `Answer` row still gets persisted.
-    pub stream_sink: Option<std::sync::mpsc::Sender<String>>,
+    /// Optional sink: every staged event (`RetrievalDone`, `Token`,
+    /// `Final`) is forwarded synchronously. A dropped receiver
+    /// triggers cancel — see `RagPipeline::ask` for the break path.
+    pub stream_sink: Option<std::sync::mpsc::Sender<StreamEvent>>,
    /// p9-fb-15: prior turns of the same conversation. Empty for
    /// single-shot ask. The pipeline prepends a serialized `[이전
    /// 대화]` block to the user prompt and uses the most-recent
@@ -203,6 +241,16 @@ impl RagPipeline {
        for h in &mut hits {
            h.stale = compute_stale(h.indexed_at, now, stale_threshold_days);
        }
+        // p9-fb-33: emit retrieval_done as soon as the hit list is
+        // ready (post stale-stamp so consumers see the same `stale`
+        // values the App-level wire path emits). Cancel is best-effort
+        // here — if the caller already dropped the receiver we just
+        // skip and let the LLM-loop SendError handle it consistently.
+        if let Some(sink) = &opts.stream_sink {
+            let _ = sink.send(StreamEvent::RetrievalDone {
+                hits: hits.clone(),
+            });
+        }
        let chunks_returned = u32::try_from(hits.len()).unwrap_or(u32::MAX);
        let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0);

@@ -301,16 +349,28 @@ impl RagPipeline {
            .llm
            .generate_stream(req)
            .context("kb-rag: llm.generate_stream")?;
+        let mut cancelled = false;
        for item in stream {
            let chunk = item.context("kb-rag: stream item")?;
            match chunk {
                TokenChunk::Token(t) => {
                    acc.push_str(&t);
                    if let Some(sink) = &opts.stream_sink {
-                        // SendError silently dropped — caller cancelled but the
-                        // pipeline still drives generation to completion so the
-                        // `answers` row gets a faithful record.
-                        let _ = sink.send(t);
+                        // p9-fb-33: SendError → caller dropped the
+                        // receiver (probably a closed stdout downstream).
+                        // Stop generation, mark the answer cancelled so
+                        // the persistence path records refusal_reason =
+                        // LlmStreamAborted.
+                        if sink
+                            .send(StreamEvent::Token {
+                                delta: t,
+                                turn_index: opts.turn_index,
+                            })
+                            .is_err()
+                        {
+                            cancelled = true;
+                            break;
+                        }
                    }
                }
                TokenChunk::Done {
@@ -323,6 +383,9 @@ impl RagPipeline {
                }
            }
        }
+        if cancelled {
+            finish_reason = FinishReason::Cancelled;
+        }

        // ── 6. Citation extract ────────────────────────────────────────────
        let extracted: Vec<u32> = extract_markers(&acc);
@@ -347,15 +410,20 @@ impl RagPipeline {
        });
        let trimmed_answer = acc.trim();
        let matched_refusal_phrase = refusal_phrase.is_match(&acc);
-        let grounded = !trimmed_answer.is_empty()
+        let grounded_unaware = !trimmed_answer.is_empty()
            && unknown_markers.is_empty()
            && !extracted.is_empty();
-        let refusal_reason = if grounded {
-            None
+        // p9-fb-33: cancel takes priority over LlmSelfJudge — the
+        // caller bailed mid-stream, so the recorded reason should
+        // reflect that, not "model didn't cite".
+        let (grounded, refusal_reason) = if matches!(finish_reason, FinishReason::Cancelled) {
+            (false, Some(RefusalReason::LlmStreamAborted))
+        } else if grounded_unaware {
+            (true, None)
        } else {
            // Spec §7: empty answer, unknown markers, silent ungrounded,
            // and explicit "근거가 부족" all collapse to LlmSelfJudge.
-            Some(RefusalReason::LlmSelfJudge)
+            (false, Some(RefusalReason::LlmSelfJudge))
        };

        // ── 8. Build Answer ────────────────────────────────────────────────
@@ -433,6 +501,17 @@ impl RagPipeline {
            "kb-rag: ask done"
        );

+        // p9-fb-33: emit final on the success path. On cancel we
+        // skip Final — the receiver is gone and persistence still
+        // records the partial answer below.
+        if !cancelled
+            && let Some(sink) = &opts.stream_sink
+        {
+            let _ = sink.send(StreamEvent::Final {
+                answer: answer.clone(),
+            });
+        }
+
        // ── 9. Persist ─────────────────────────────────────────────────────
        let packed_chunks_json = if opts.explain {
            // Snapshot the packed entries as a portable list of objects so
@@ -997,3 +1076,91 @@ mod compute_stale_mirror_tests {
        assert!(!compute_stale(future, now(), 30));
    }
 }
+
+#[cfg(test)]
+mod stream_event_serde_tests {
+    use super::*;
+    use kebab_core::{
+        AnswerRetrievalSummary, ChunkId, ChunkerVersion, Citation,
+        DocumentId, IndexVersion, ModelRef, RetrievalDetail, SearchHit, SearchMode,
+        TokenUsage, TraceId,
+    };
+    use kebab_core::asset::WorkspacePath;
+    use kebab_core::versions::PromptTemplateVersion;
+    use time::macros::datetime;
+
+    fn mk_hit() -> SearchHit {
+        SearchHit {
+            rank: 1,
+            chunk_id: ChunkId("c1".into()),
+            doc_id: DocumentId("d1".into()),
+            doc_path: WorkspacePath::new("a.md".into()).unwrap(),
+            heading_path: vec!["H".into()],
+            section_label: None,
+            snippet: "s".into(),
+            citation: Citation::Line {
+                path: WorkspacePath::new("a.md".into()).unwrap(),
+                start: 1,
+                end: 1,
+                section: None,
+            },
+            retrieval: RetrievalDetail {
+                method: SearchMode::Lexical,
+                fusion_score: 0.5,
+                lexical_score: Some(0.5),
+                vector_score: None,
+                lexical_rank: Some(1),
+                vector_rank: None,
+            },
+            index_version: IndexVersion("v1".into()),
+            embedding_model: None,
+            chunker_version: ChunkerVersion("c@1".into()),
+            indexed_at: datetime!(2026-05-09 12:00:00 UTC),
+            stale: false,
+        }
+    }
+
+    #[test]
+    fn stream_event_token_serializes_with_kind_discriminator() {
+        let ev = StreamEvent::Token { delta: "안녕".into(), turn_index: Some(0) };
+        let v = serde_json::to_value(&ev).unwrap();
+        assert_eq!(v["kind"], "token");
+        assert_eq!(v["delta"], "안녕");
+        assert_eq!(v["turn_index"], 0);
+    }
+
+    #[test]
+    fn stream_event_retrieval_done_serializes_hits() {
+        let ev = StreamEvent::RetrievalDone { hits: vec![mk_hit()] };
+        let v = serde_json::to_value(&ev).unwrap();
+        assert_eq!(v["kind"], "retrieval_done");
+        assert_eq!(v["hits"].as_array().unwrap().len(), 1);
+    }
+
+    #[test]
+    fn stream_event_final_serializes_answer() {
+        let answer = Answer {
+            answer: "x".into(),
+            citations: vec![],
+            grounded: true,
+            refusal_reason: None,
+            model: ModelRef { id: "m".into(), provider: "p".into(), dimensions: None },
+            embedding: None,
+            prompt_template_version: PromptTemplateVersion("rag-v1".into()),
+            retrieval: AnswerRetrievalSummary {
+                trace_id: TraceId("t".into()),
+                mode: SearchMode::Hybrid,
+                k: 10, score_gate: 0.3, top_score: 0.5,
+                chunks_returned: 1, chunks_used: 1,
+            },
+            usage: TokenUsage { prompt_tokens: 0, completion_tokens: 0, latency_ms: 0 },
+            created_at: datetime!(2026-05-09 12:00:00 UTC),
+            conversation_id: None,
+            turn_index: None,
+        };
+        let ev = StreamEvent::Final { answer };
+        let v = serde_json::to_value(&ev).unwrap();
+        assert_eq!(v["kind"], "final");
+        assert!(v["answer"].is_object());
+    }
+}
--- a/crates/kebab-rag/tests/pipeline.rs
+++ b/crates/kebab-rag/tests/pipeline.rs
@@ -14,7 +14,7 @@ use kebab_core::{
    FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage,
 };
 use kebab_llm::MockLanguageModel;
-use kebab_rag::{AskOpts, RagPipeline, RefusalReason};
+use kebab_rag::{AskOpts, RagPipeline, RefusalReason, StreamEvent};

 /// LM ID used everywhere — kept short so snapshots stay stable.
 const TEST_LM_ID: &str = "mock-lm";
@@ -270,18 +270,32 @@ fn streaming_forwards_tokens_to_sink() {
    let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
    let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());

-    let (tx, rx) = std::sync::mpsc::channel::<String>();
+    let (tx, rx) = std::sync::mpsc::channel::<StreamEvent>();
    let mut opts = default_opts();
    opts.stream_sink = Some(tx);
    let _ = pipeline.ask("q", opts).unwrap();
-    let collected: String = rx.into_iter().collect::<Vec<_>>().join("");
+    // p9-fb-33: extract Token deltas from the staged event stream.
+    let collected: String = rx
+        .into_iter()
+        .filter_map(|ev| match ev {
+            StreamEvent::Token { delta, .. } => Some(delta),
+            _ => None,
+        })
+        .collect::<Vec<_>>()
+        .join("");
    assert_eq!(collected, canned);
 }

-// ── 10. dropped receiver does NOT abort generation ────────────────────────
+// ── 10. dropped receiver aborts generation, records LlmStreamAborted ──────
+//
+// p9-fb-33: cancel semantics changed. Pre-fb-33 the pipeline drove
+// the LM loop to completion and silently dropped sends. Now a
+// SendError breaks the loop and stamps `RefusalReason::LlmStreamAborted`
+// onto the persisted row — the partial answer (whatever was buffered
+// before the cancel) still gets written for audit.

 #[test]
-fn dropped_receiver_does_not_abort_generation() {
+fn dropped_receiver_aborts_with_llm_stream_aborted() {
    let env = RagEnv::new();
    let cid = id32("c1");
    let did = id32("d1");
@@ -292,13 +306,17 @@ fn dropped_receiver_does_not_abort_generation() {
    let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
    let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());

-    let (tx, rx) = std::sync::mpsc::channel::<String>();
-    drop(rx); // receiver gone — every send fails silently
+    let (tx, rx) = std::sync::mpsc::channel::<StreamEvent>();
+    drop(rx); // receiver gone — first Token send fails, loop breaks
    let mut opts = default_opts();
    opts.stream_sink = Some(tx);
    let answer = pipeline.ask("q", opts).unwrap();
-    assert_eq!(answer.answer, canned, "generation completes despite dead sink");
-    assert!(answer.grounded);
+    assert!(!answer.grounded, "cancel takes priority over grounded");
+    assert_eq!(
+        answer.refusal_reason,
+        Some(RefusalReason::LlmStreamAborted),
+        "cancel records LlmStreamAborted",
+    );
    assert_eq!(env.count_answers(), 1, "answers row still persisted");
 }

--- a/crates/kebab-rag/tests/streaming_events.rs
+++ b/crates/kebab-rag/tests/streaming_events.rs
@@ -0,0 +1,217 @@
+//! p9-fb-33: pipeline-level streaming behavior — order invariants,
+//! cancel propagation, refusal flagging.
+
+mod common;
+
+use std::sync::Arc;
+use std::sync::atomic::Ordering;
+use std::sync::mpsc;
+
+use common::{MockRetriever, RagEnv, id32, mk_hit};
+use kebab_core::{
+    FinishReason, LanguageModel, RefusalReason, Retriever, SearchMode, TokenChunk, TokenUsage,
+};
+use kebab_llm::MockLanguageModel;
+use kebab_rag::{AskOpts, RagPipeline, StreamEvent};
+
+const TEST_LM_ID: &str = "mock-lm";
+
+/// Minimal LM mirroring `tests/pipeline.rs::CountingLm` so the
+/// streaming-events suite stays self-contained.
+struct CountingLm {
+    inner: MockLanguageModel,
+    calls: std::sync::atomic::AtomicUsize,
+}
+
+impl CountingLm {
+    fn new(canned: &str) -> Self {
+        Self {
+            inner: MockLanguageModel {
+                model_id: TEST_LM_ID.to_string(),
+                provider: "mock".to_string(),
+                context_tokens: 32_768,
+                canned_response: canned.to_string(),
+                canned_finish: FinishReason::Stop,
+                canned_usage: TokenUsage {
+                    prompt_tokens: 10,
+                    completion_tokens: 5,
+                    latency_ms: 7,
+                },
+            },
+            calls: std::sync::atomic::AtomicUsize::new(0),
+        }
+    }
+}
+
+impl LanguageModel for CountingLm {
+    fn model_ref(&self) -> kebab_core::ModelRef {
+        self.inner.model_ref()
+    }
+    fn context_tokens(&self) -> usize {
+        self.inner.context_tokens()
+    }
+    fn generate_stream(
+        &self,
+        req: kebab_core::GenerateRequest,
+    ) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
+        self.calls.fetch_add(1, Ordering::SeqCst);
+        self.inner.generate_stream(req)
+    }
+}
+
+fn opts_with_sink(tx: mpsc::Sender<StreamEvent>) -> AskOpts {
+    AskOpts {
+        k: 3,
+        explain: false,
+        mode: SearchMode::Lexical,
+        temperature: Some(0.0),
+        seed: Some(0),
+        stream_sink: Some(tx),
+        history: Vec::new(),
+        conversation_id: None,
+        turn_index: None,
+    }
+}
+
+/// Build a pipeline with one seeded chunk + canned LM response so
+/// retrieval lands a single hit and the LM emits at least one token.
+fn env_with_one_hit(canned: &str) -> (RagEnv, RagPipeline) {
+    let env = RagEnv::new();
+    let cid = id32("c1");
+    let did = id32("d1");
+    env.seed_chunk(&cid, &did, "notes/a.md", "apples are red.", &["Intro"]);
+    let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
+    let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
+    let lm: Arc<dyn LanguageModel> = Arc::new(CountingLm::new(canned));
+    let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
+    (env, pipeline)
+}
+
+#[test]
+fn ask_emits_retrieval_then_tokens_then_final() {
+    let (_env, pipeline) = env_with_one_hit("apples are red. [#1]");
+    let (tx, rx) = mpsc::channel::<StreamEvent>();
+    let _ans = pipeline.ask("apples", opts_with_sink(tx)).unwrap();
+    let events: Vec<StreamEvent> = rx.iter().collect();
+
+    // First event must be RetrievalDone.
+    assert!(
+        matches!(events.first(), Some(StreamEvent::RetrievalDone { .. })),
+        "first event must be RetrievalDone, got {:?}",
+        events.first()
+    );
+
+    // Last event must be Final.
+    assert!(
+        matches!(events.last(), Some(StreamEvent::Final { .. })),
+        "last event must be Final, got {:?}",
+        events.last()
+    );
+
+    // Everything in between is Token.
+    for ev in &events[1..events.len() - 1] {
+        assert!(
+            matches!(ev, StreamEvent::Token { .. }),
+            "middle events must be Token, got {ev:?}"
+        );
+    }
+}
+
+#[test]
+fn ask_records_llm_stream_aborted_when_receiver_drops() {
+    let (env, pipeline) = env_with_one_hit("apples are red. [#1]");
+    let (tx, rx) = mpsc::channel::<StreamEvent>();
+    // Drop the receiver immediately so the first Token send fails.
+    drop(rx);
+    let ans = pipeline.ask("apples", opts_with_sink(tx)).unwrap();
+    assert!(!ans.grounded);
+    assert_eq!(ans.refusal_reason, Some(RefusalReason::LlmStreamAborted));
+    // Persistence still happens on cancel — the row is the audit trail.
+    assert_eq!(env.count_answers(), 1, "answers row written on cancel");
+}
+
+/// p9-fb-33 (PR #124 round 1, item 5): pin the "no Final on cancel"
+/// invariant. Uses a barrier-gated LM so the test can observe the
+/// `RetrievalDone` event before any `Token`/`Final` lands in the
+/// channel — then drops `rx` to force SendError on the next `Token`.
+/// The pipeline's cancel branch must avoid emitting `Final` and
+/// record `RefusalReason::LlmStreamAborted`.
+struct BlockingLm {
+    inner: MockLanguageModel,
+    /// Pipeline thread waits on this before yielding any token.
+    /// Test thread releases it after observing `RetrievalDone`.
+    gate: Arc<std::sync::Barrier>,
+}
+
+impl LanguageModel for BlockingLm {
+    fn model_ref(&self) -> kebab_core::ModelRef {
+        self.inner.model_ref()
+    }
+    fn context_tokens(&self) -> usize {
+        self.inner.context_tokens()
+    }
+    fn generate_stream(
+        &self,
+        req: kebab_core::GenerateRequest,
+    ) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
+        // Block until the test signals — guarantees `RetrievalDone`
+        // arrives at the receiver before any `Token` is queued.
+        self.gate.wait();
+        self.inner.generate_stream(req)
+    }
+}
+
+#[test]
+fn ask_emits_no_final_when_cancelled_mid_stream() {
+    use std::sync::Barrier;
+
+    let env = RagEnv::new();
+    let cid = id32("c1");
+    let did = id32("d1");
+    env.seed_chunk(&cid, &did, "notes/a.md", "apples are red.", &["Intro"]);
+    let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
+    let retriever: Arc<dyn Retriever> = Arc::new(MockRetriever::new(hits));
+
+    let gate = Arc::new(Barrier::new(2));
+    let lm: Arc<dyn LanguageModel> = Arc::new(BlockingLm {
+        inner: MockLanguageModel {
+            model_id: TEST_LM_ID.to_string(),
+            provider: "mock".to_string(),
+            context_tokens: 32_768,
+            canned_response: "apples are red. [#1]".to_string(),
+            canned_finish: FinishReason::Stop,
+            canned_usage: TokenUsage {
+                prompt_tokens: 10,
+                completion_tokens: 5,
+                latency_ms: 7,
+            },
+        },
+        gate: Arc::clone(&gate),
+    });
+    let pipeline = RagPipeline::new(env.config.clone(), retriever, lm, env.sqlite.clone());
+
+    let (tx, rx) = mpsc::channel::<StreamEvent>();
+    let opts = opts_with_sink(tx);
+    let handle = std::thread::spawn(move || pipeline.ask("apples", opts));
+
+    // Receive RetrievalDone first — pipeline emits this before
+    // calling generate_stream (where the LM blocks on the gate).
+    let first = rx.recv().expect("RetrievalDone must arrive");
+    assert!(
+        matches!(first, StreamEvent::RetrievalDone { .. }),
+        "first event must be RetrievalDone, got {first:?}",
+    );
+
+    // Drop rx now, BEFORE releasing the gate. Once the LM unblocks
+    // and the pipeline tries to send the first Token, it'll get
+    // SendError → cancel branch.
+    drop(rx);
+    gate.wait();
+
+    let ans = handle.join().expect("ask thread").unwrap();
+
+    // Cancel was observed: no Final emitted, refusal recorded.
+    assert!(!ans.grounded);
+    assert_eq!(ans.refusal_reason, Some(RefusalReason::LlmStreamAborted));
+    assert_eq!(env.count_answers(), 1, "answers row written on cancel");
+}
--- a/crates/kebab-search/src/hybrid.rs
+++ b/crates/kebab-search/src/hybrid.rs
@@ -18,12 +18,15 @@

 use std::collections::HashMap;
 use std::sync::Arc;
+use std::time::Instant;

 use anyhow::Result;
 use kebab_core::{
-    IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
+    IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, SearchTrace,
 };

+use crate::trace::{build_fusion_input_skeleton, candidates_from_hits, ScoreKind, TraceBuilder};
+
 /// Default `k_rrf` if `kb-config::SearchCfg::rrf_k` is misconfigured.
 /// Matches §6.4's documented default (60).
 const DEFAULT_K_RRF: u32 = 60;
@@ -145,20 +148,22 @@ impl Retriever for HybridRetriever {
 impl HybridRetriever {
    fn fuse(&self, query: &SearchQuery) -> Result<Vec<SearchHit>> {
        let target_k = if query.k == 0 { self.default_k } else { query.k };
-
-        // Fanout: ask each retriever for `target_k * MULTIPLIER` so
-        // the disjoint set of candidates is wide enough. The two
-        // per-side queries are identical (same text, k, mode, filters);
-        // only the dispatch differs, so we share one `SearchQuery`.
        let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
        let lex_query = SearchQuery {
            k: fanout_k,
            ..query.clone()
        };
-
        let lex_hits = self.lexical.search(&lex_query)?;
        let vec_hits = self.vector.search(&lex_query)?;
+        self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)
+    }

+    fn fuse_with_inputs(
+        &self,
+        lex_hits: &[SearchHit],
+        vec_hits: &[SearchHit],
+        target_k: usize,
+    ) -> Result<Vec<SearchHit>> {
        tracing::debug!(
            lex = lex_hits.len(),
            vec = vec_hits.len(),
@@ -171,11 +176,13 @@ impl HybridRetriever {
        // already 1-based by both LexicalRetriever and VectorRetriever
        // (and any well-behaved Retriever should mirror).
        let lex_index: HashMap<String, (u32, SearchHit)> = lex_hits
-            .into_iter()
+            .iter()
+            .cloned()
            .map(|h| (h.chunk_id.0.clone(), (h.rank, h)))
            .collect();
        let vec_index: HashMap<String, (u32, SearchHit)> = vec_hits
-            .into_iter()
+            .iter()
+            .cloned()
            .map(|h| (h.chunk_id.0.clone(), (h.rank, h)))
            .collect();

@@ -312,6 +319,85 @@ impl HybridRetriever {
        tracing::debug!(rows = hits.len(), "kb-search hybrid: search done");
        Ok(hits)
    }
+
+    /// p9-fb-37: parallel to `Retriever::search` but additionally returns
+    /// a trace of pre-fusion lex/vec lists, RRF inputs (union with each
+    /// side's rank), and per-stage timing.
+    pub fn search_with_trace(
+        &self,
+        query: &SearchQuery,
+    ) -> anyhow::Result<(Vec<SearchHit>, SearchTrace)> {
+        let start_total = Instant::now();
+        let target_k = if query.k == 0 { self.default_k } else { query.k };
+        let fanout_k = target_k.saturating_mul(HYBRID_FANOUT_MULTIPLIER);
+        let fanout_query = SearchQuery {
+            k: fanout_k,
+            ..query.clone()
+        };
+
+        let mut tb = TraceBuilder::default();
+
+        let (lex_hits, vec_hits): (Vec<SearchHit>, Vec<SearchHit>) = match query.mode {
+            SearchMode::Lexical => {
+                let t0 = Instant::now();
+                let lh = self.lexical.search(&fanout_query)?;
+                tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+                (lh, Vec::new())
+            }
+            SearchMode::Vector => {
+                let t0 = Instant::now();
+                let vh = self.vector.search(&fanout_query)?;
+                tb.timing.vector_ms = t0.elapsed().as_millis() as u64;
+                (Vec::new(), vh)
+            }
+            SearchMode::Hybrid => {
+                let t0 = Instant::now();
+                let lh = self.lexical.search(&fanout_query)?;
+                tb.timing.lexical_ms = t0.elapsed().as_millis() as u64;
+                let t1 = Instant::now();
+                let vh = self.vector.search(&fanout_query)?;
+                tb.timing.vector_ms = t1.elapsed().as_millis() as u64;
+                (lh, vh)
+            }
+        };
+
+        tb.lexical = candidates_from_hits(&lex_hits, ScoreKind::Lexical);
+        tb.vector = candidates_from_hits(&vec_hits, ScoreKind::Vector);
+        tb.rrf_inputs = build_fusion_input_skeleton(&lex_hits, &vec_hits);
+
+        let t_fusion = Instant::now();
+        let final_hits = match query.mode {
+            SearchMode::Lexical => {
+                let mut h = lex_hits.clone();
+                h.truncate(target_k);
+                h
+            }
+            SearchMode::Vector => {
+                let mut h = vec_hits.clone();
+                h.truncate(target_k);
+                h
+            }
+            SearchMode::Hybrid => self.fuse_with_inputs(&lex_hits, &vec_hits, target_k)?,
+        };
+        tb.timing.fusion_ms = t_fusion.elapsed().as_millis() as u64;
+
+        let score_by_chunk: std::collections::HashMap<String, f32> = final_hits
+            .iter()
+            .map(|h| (h.chunk_id.0.clone(), h.retrieval.fusion_score))
+            .collect();
+        for entry in &mut tb.rrf_inputs {
+            if let Some(s) = score_by_chunk.get(&entry.chunk_id.0) {
+                entry.fusion_score = *s;
+            }
+        }
+
+        // total_ms is wall-clock from start; per-stage `lexical_ms` /
+        // `vector_ms` / `fusion_ms` each truncate to whole millis via
+        // `as_millis() as u64`, so their sum can drift below total
+        // (sub-ms losses) — DO NOT assert `total_ms >= sum(stages)`.
+        tb.timing.total_ms = start_total.elapsed().as_millis() as u64;
+        Ok((final_hits, tb.into_trace()))
+    }
 }

 /// Parse the `hybrid_fusion` config string into a [`FusionPolicy`].
@@ -633,4 +719,107 @@ mod tests {
        let FusionPolicy::Rrf { k_rrf } = parse_fusion("rrf", 0);
        assert_eq!(k_rrf, DEFAULT_K_RRF);
    }
+
+    #[test]
+    fn search_with_trace_returns_lex_and_vec_lists() {
+        use kebab_core::{ChunkId, DocumentId, IndexVersion, ChunkerVersion,
+                         RetrievalDetail, SearchHit, SearchMode, SearchQuery,
+                         WorkspacePath, Citation};
+        use std::sync::Arc;
+
+        fn mk_hit(rank: u32, chunk: &str, score: f32, mode: SearchMode) -> SearchHit {
+            SearchHit {
+                rank,
+                chunk_id: ChunkId(chunk.into()),
+                doc_id: DocumentId(format!("d-{chunk}")),
+                doc_path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+                heading_path: vec![],
+                section_label: None,
+                snippet: chunk.into(),
+                citation: Citation::Line {
+                    path: WorkspacePath::new(format!("{chunk}.md")).unwrap(),
+                    start: 1,
+                    end: 1,
+                    section: None,
+                },
+                retrieval: RetrievalDetail {
+                    method: mode,
+                    fusion_score: score,
+                    lexical_score: if mode == SearchMode::Lexical { Some(score) } else { None },
+                    vector_score: if mode == SearchMode::Vector { Some(score) } else { None },
+                    lexical_rank: if mode == SearchMode::Lexical { Some(rank) } else { None },
+                    vector_rank: if mode == SearchMode::Vector { Some(rank) } else { None },
+                },
+                index_version: IndexVersion("v1".into()),
+                embedding_model: None,
+                chunker_version: ChunkerVersion("c1".into()),
+                indexed_at: time::OffsetDateTime::UNIX_EPOCH,
+                stale: false,
+            }
+        }
+
+        struct Stub { hits: Vec<SearchHit> }
+        impl Retriever for Stub {
+            fn search(&self, _q: &SearchQuery) -> anyhow::Result<Vec<SearchHit>> {
+                Ok(self.hits.clone())
+            }
+            fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+        }
+
+        let lex = Arc::new(Stub {
+            hits: vec![
+                mk_hit(1, "c1", 0.9, SearchMode::Lexical),
+                mk_hit(2, "c2", 0.5, SearchMode::Lexical),
+            ],
+        });
+        let vec_r = Arc::new(Stub {
+            hits: vec![
+                mk_hit(1, "c2", 0.8, SearchMode::Vector),
+                mk_hit(2, "c3", 0.6, SearchMode::Vector),
+            ],
+        });
+        let hybrid = HybridRetriever::with_policy(
+            lex.clone(),
+            vec_r.clone(),
+            FusionPolicy::Rrf { k_rrf: 60 },
+            2,
+        );
+        let q = SearchQuery {
+            text: "x".into(),
+            mode: SearchMode::Hybrid,
+            k: 2,
+            filters: Default::default(),
+        };
+        let (hits, trace) = hybrid.search_with_trace(&q).unwrap();
+        assert!(!hits.is_empty());
+        assert_eq!(trace.lexical.len(), 2);
+        assert_eq!(trace.vector.len(), 2);
+        // Union: c1, c2, c3 → 3 entries.
+        assert_eq!(trace.rrf_inputs.len(), 3);
+    }
+
+    #[test]
+    fn search_with_trace_lexical_mode_empty_vector() {
+        use kebab_core::{IndexVersion, SearchMode, SearchQuery};
+        use std::sync::Arc;
+        struct EmptyR;
+        impl Retriever for EmptyR {
+            fn search(&self, _q: &SearchQuery) -> anyhow::Result<Vec<kebab_core::SearchHit>> {
+                Ok(vec![])
+            }
+            fn index_version(&self) -> IndexVersion { IndexVersion("v1".into()) }
+        }
+        let lex = Arc::new(EmptyR);
+        let vec_r = Arc::new(EmptyR);
+        let hybrid = HybridRetriever::with_policy(lex, vec_r, FusionPolicy::Rrf { k_rrf: 60 }, 2);
+        let q = SearchQuery {
+            text: "x".into(),
+            mode: SearchMode::Lexical,
+            k: 2,
+            filters: Default::default(),
+        };
+        let (_hits, trace) = hybrid.search_with_trace(&q).unwrap();
+        assert!(trace.vector.is_empty());
+        assert_eq!(trace.timing.vector_ms, 0);
+    }
 }
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -319,6 +319,54 @@ fn run_query(
        };
        params.push(Box::new(rank));
    }
+    // p9-fb-36: media_type filter (IN-list).
+    // `assets.media_type` JSON has two shapes:
+    //   - unit variant (Markdown / Pdf): JSON text, e.g. `"markdown"`
+    //   - tuple variant (Image(Png) / Audio(Mp3) / Other(s)): JSON object,
+    //     e.g. `{"image": "png"}`
+    // Extract a unified "kind" string for both shapes via:
+    //   CASE WHEN json_type = 'text' THEN json_extract($)
+    //        ELSE (first object key)
+    //   END IN (?, ...)
+    if !filters.media.is_empty() {
+        let placeholders: Vec<&str> =
+            std::iter::repeat_n("?", filters.media.len()).collect();
+        let placeholders = placeholders.join(",");
+        sql.push_str(&format!(
+            " AND f.doc_id IN (\
+               SELECT d2.doc_id FROM documents d2 \
+               JOIN assets a ON a.asset_id = d2.asset_id \
+               WHERE CASE \
+                 WHEN json_type(a.media_type) = 'text' THEN json_extract(a.media_type, '$') \
+                 ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+               END IN ({placeholders}))"
+        ));
+        for kind in &filters.media {
+            params.push(Box::new(kind.clone()));
+        }
+    }
+
+    // p9-fb-36: ingested_after filter.
+    // `documents.updated_at` is RFC3339 stored as TEXT (always UTC `Z` per
+    // fb-32 ingest path), so lexicographic >= compare is correct — but only
+    // when the filter instant is also formatted as UTC `Z`. A non-UTC offset
+    // (e.g. `+09:00`) would compare as ASCII after `Z` (0x2B < 0x5A) and
+    // produce wrong results. Convert to UTC before formatting.
+    if let Some(after) = &filters.ingested_after {
+        let formatted = after
+            .to_offset(time::UtcOffset::UTC)
+            .format(&time::format_description::well_known::Rfc3339)
+            .expect("OffsetDateTime (UTC) formats to RFC3339");
+        sql.push_str(" AND d.updated_at >= ?");
+        params.push(Box::new(formatted));
+    }
+
+    // p9-fb-36: doc_id filter — single-doc scoping.
+    if let Some(id) = &filters.doc_id {
+        sql.push_str(" AND d.doc_id = ?");
+        params.push(Box::new(id.0.clone()));
+    }
+
    // path_glob is intentionally NOT applied here — see module comment
    // on PATH_GLOB_OVERFETCH and the post-filter in `LexicalRetriever::search`.

--- a/crates/kebab-search/src/lib.rs
+++ b/crates/kebab-search/src/lib.rs
@@ -19,6 +19,7 @@
 mod citation_helper;
 mod hybrid;
 mod lexical;
+mod trace;
 mod vector;

 pub use hybrid::{FusionPolicy, HybridRetriever};
--- a/crates/kebab-search/src/trace.rs
+++ b/crates/kebab-search/src/trace.rs
@@ -0,0 +1,85 @@
+//! p9-fb-37: trace capture helpers for `HybridRetriever::search_with_trace`.
+
+use std::collections::BTreeMap;
+
+use kebab_core::{
+    SearchHit, SearchTrace, TraceCandidate, TraceFusionInput, TraceTiming,
+};
+
+/// Build a `TraceCandidate` from a `SearchHit`. The score field reflects
+/// each side's score (lexical / vector / fusion) — caller selects which
+/// retriever's hit list this is.
+pub fn candidates_from_hits(hits: &[SearchHit], score_kind: ScoreKind) -> Vec<TraceCandidate> {
+    hits.iter()
+        .map(|h| TraceCandidate {
+            chunk_id: h.chunk_id.clone(),
+            doc_id: h.doc_id.clone(),
+            doc_path: h.doc_path.clone(),
+            rank: h.rank,
+            score: match score_kind {
+                ScoreKind::Lexical => h.retrieval.lexical_score.unwrap_or(0.0),
+                ScoreKind::Vector => h.retrieval.vector_score.unwrap_or(0.0),
+            },
+        })
+        .collect()
+}
+
+#[derive(Clone, Copy, Debug)]
+pub enum ScoreKind {
+    Lexical,
+    Vector,
+}
+
+/// Build the union of (chunk_id) across lex and vec hit lists, with
+/// each side's rank captured. `fusion_score` is filled by the caller
+/// (RRF computes it during fusion, this helper just pre-builds the
+/// rank table — caller overwrites fusion_score in a second pass).
+pub fn build_fusion_input_skeleton(
+    lex: &[SearchHit],
+    vec: &[SearchHit],
+) -> Vec<TraceFusionInput> {
+    let mut by_chunk: BTreeMap<String, TraceFusionInput> = BTreeMap::new();
+    for h in lex {
+        by_chunk
+            .entry(h.chunk_id.0.clone())
+            .or_insert(TraceFusionInput {
+                chunk_id: h.chunk_id.clone(),
+                lexical_rank: None,
+                vector_rank: None,
+                fusion_score: 0.0,
+            })
+            .lexical_rank = Some(h.rank);
+    }
+    for h in vec {
+        by_chunk
+            .entry(h.chunk_id.0.clone())
+            .or_insert(TraceFusionInput {
+                chunk_id: h.chunk_id.clone(),
+                lexical_rank: None,
+                vector_rank: None,
+                fusion_score: 0.0,
+            })
+            .vector_rank = Some(h.rank);
+    }
+    by_chunk.into_values().collect()
+}
+
+/// Container the hybrid retriever fills during a traced run.
+#[derive(Default)]
+pub struct TraceBuilder {
+    pub lexical: Vec<TraceCandidate>,
+    pub vector: Vec<TraceCandidate>,
+    pub rrf_inputs: Vec<TraceFusionInput>,
+    pub timing: TraceTiming,
+}
+
+impl TraceBuilder {
+    pub fn into_trace(self) -> SearchTrace {
+        SearchTrace {
+            lexical: self.lexical,
+            vector: self.vector,
+            rrf_inputs: self.rrf_inputs,
+            timing: self.timing,
+        }
+    }
+}
--- a/crates/kebab-search/tests/common/mod.rs
+++ b/crates/kebab-search/tests/common/mod.rs
@@ -19,7 +19,9 @@ use std::sync::Arc;
 use kebab_config::Config;
 use kebab_core::{
    ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
-    EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
+    EmbeddingModelId, EmbeddingVersion, IndexVersion, MediaType,
+    Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
+    VectorRecord, VectorStore,
 };
 use kebab_embed::{Embedder, MockEmbedder};
 use kebab_search::{LexicalRetriever, VectorRetriever};
@@ -173,6 +175,93 @@ impl HybridEnv {
        .unwrap();
    }

+    /// High-level helper: seed a doc with the default media type
+    /// (Markdown) and embed its text. Returns the `DocumentId` so
+    /// callers can use it in `doc_id` filter tests.
+    pub fn insert_doc(&self, path: &str, text: &str) -> DocumentId {
+        self.insert_doc_with_media(path, text, MediaType::Markdown)
+    }
+
+    /// High-level helper: seed a doc with an explicit `MediaType`.
+    /// The `media_type` is serialized to JSON (mirrors how
+    /// `DocumentStore::put_document` writes it) and stored in `assets`.
+    pub fn insert_doc_with_media(
+        &self,
+        path: &str,
+        text: &str,
+        media: MediaType,
+    ) -> DocumentId {
+        // Derive deterministic IDs from the path so repeated calls with
+        // the same path are idempotent (INSERT OR IGNORE).
+        let path_hash: String = {
+            use std::collections::hash_map::DefaultHasher;
+            use std::hash::{Hash, Hasher};
+            let mut h = DefaultHasher::new();
+            path.hash(&mut h);
+            format!("{:032x}", h.finish())
+        };
+        let doc_id = format!("d{}", &path_hash[..31]);
+        let chunk_id = format!("c{}", &path_hash[..31]);
+        let asset_id = format!("a{}", &path_hash[..31]);
+
+        let media_json = serde_json::to_string(&media).expect("serialize MediaType");
+        let conn = self.sqlite.read_conn();
+        conn.execute(
+            "INSERT OR IGNORE INTO assets (
+                asset_id, source_uri, workspace_path, media_type, byte_len,
+                checksum, storage_kind, storage_path, discovered_at
+             ) VALUES (?, ?, ?, ?, 0,
+                       'deadbeefdeadbeefdeadbeefdeadbeef',
+                       'reference', ?, '1970-01-01T00:00:00Z')",
+            params![
+                asset_id,
+                format!("file:///{path}"),
+                path,
+                media_json,
+                path,
+            ],
+        )
+        .unwrap();
+        conn.execute(
+            "INSERT OR IGNORE INTO documents (
+                doc_id, asset_id, workspace_path, title, lang, source_type,
+                trust_level, parser_version, doc_version, schema_version,
+                metadata_json, provenance_json, created_at, updated_at
+             ) VALUES (?, ?, ?, NULL, 'en', 'markdown', 'primary', 'v1', 1, 1,
+                       '{}', '{}', '1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z')",
+            params![doc_id, asset_id, path],
+        )
+        .unwrap();
+        let heading_json = "[]";
+        conn.execute(
+            "INSERT OR IGNORE INTO chunks (
+                chunk_id, doc_id, text, heading_path_json, section_label,
+                source_spans_json, token_estimate, chunker_version,
+                policy_hash, block_ids_json, created_at
+             ) VALUES (?, ?, ?, ?, NULL,
+                       '[{\"kind\":\"line\",\"start\":1,\"end\":1}]',
+                       1, 'v1', 'h', '[]', '1970-01-01T00:00:00Z')",
+            params![chunk_id, doc_id, text, heading_json],
+        )
+        .unwrap();
+        drop(conn);
+        self.embed_and_upsert(&chunk_id, &doc_id, text, &[]);
+        DocumentId(doc_id)
+    }
+
+    /// Run a `SearchMode::Vector` query against the seeded corpus and
+    /// return the resulting `Vec<SearchHit>`.
+    pub fn run_vector_search(&self, query: &str, filters: &SearchFilters) -> Vec<SearchHit> {
+        let r = self.vector_retriever();
+        let q = SearchQuery {
+            text: query.to_string(),
+            mode: SearchMode::Vector,
+            k: 10,
+            filters: filters.clone(),
+        };
+        r.search(&q).expect("vector search")
+    }
+
    /// Embed `text` as a Document and upsert it as the embedding for
    /// `chunk_id`. Drives the same code path production uses:
    /// MockEmbedder → VectorRecord → LanceVectorStore::upsert →
--- a/crates/kebab-search/tests/hybrid.rs
+++ b/crates/kebab-search/tests/hybrid.rs
@@ -15,7 +15,7 @@ use common::{
    HybridEnv, id32, require_avx_or_panic, TEST_LEX_INDEX_VERSION, TEST_VEC_INDEX_VERSION,
 };
 use kebab_core::{
-    Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
+    MediaType, Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
 };
 use kebab_search::{FusionPolicy, HybridRetriever};
 use rusqlite::params;
@@ -213,6 +213,57 @@ fn hybrid_snapshot_run_1() {
    }
 }

+/// p9-fb-36: vector post-filter must pass `media` through `filter_chunks`.
+/// Seeding two docs (markdown + pdf) and filtering for pdf-only must
+/// return only the pdf chunk, proving `LanceVectorStore::search` →
+/// `SqliteStore::filter_chunks` correctly applies the media arm.
+#[test]
+#[ignore = "requires AVX-capable hardware (LanceDB)"]
+fn vector_filter_by_media() {
+    require_avx_or_panic();
+    let env = HybridEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", MediaType::Pdf);
+
+    let filters = SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("rust", &filters);
+    assert_eq!(hits.len(), 1, "media filter must keep only pdf chunk");
+    assert!(
+        hits[0].doc_path.0.ends_with(".pdf"),
+        "expected .pdf path, got: {}",
+        hits[0].doc_path.0
+    );
+}
+
+/// p9-fb-36: vector post-filter must pass `doc_id` through `filter_chunks`.
+/// Seeding two docs with shared text, filtering by one doc_id must return
+/// only chunks from that doc.
+#[test]
+#[ignore = "requires AVX-capable hardware (LanceDB)"]
+fn vector_filter_by_doc_id() {
+    require_avx_or_panic();
+    let env = HybridEnv::new();
+    let target = env.insert_doc("a.md", "shared knowledge");
+    env.insert_doc("b.md", "shared knowledge");
+
+    let filters = SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("shared", &filters);
+    assert!(
+        !hits.is_empty(),
+        "doc_id filter must return hits for the target doc"
+    );
+    assert!(
+        hits.iter().all(|h| h.doc_id == target),
+        "all hits must belong to the target doc_id"
+    );
+}
+
 #[test]
 #[ignore = "requires AVX-capable hardware (LanceDB)"]
 fn vector_hit_carries_indexed_at() {
--- a/crates/kebab-search/tests/lexical.rs
+++ b/crates/kebab-search/tests/lexical.rs
@@ -8,11 +8,15 @@
 use std::sync::Arc;

 use kebab_config::Config;
-use kebab_core::{IndexVersion, Lang, Retriever, SearchFilters, SearchMode, SearchQuery, TrustLevel};
+use kebab_core::{
+    DocumentId, IndexVersion, Lang, MediaType, Retriever, SearchFilters, SearchHit, SearchMode,
+    SearchQuery, TrustLevel,
+};
 use kebab_search::LexicalRetriever;
 use kebab_store_sqlite::SqliteStore;
 use rusqlite::Connection;
 use tempfile::TempDir;
+use time::OffsetDateTime;

 // ── Test scaffolding ─────────────────────────────────────────────────────

@@ -679,6 +683,210 @@ fn search_hit_carries_indexed_at_from_documents_updated_at() {
    assert!(!hit.stale, "lexical retriever must default stale=false");
 }

+// ── TestEnv helper for fb-36 filter tests ───────────────────────────────
+
+/// Convenience wrapper over `Env` that exposes higher-level fixture helpers
+/// for the fb-36 filter tests.  Intentionally kept separate from `Env` so
+/// the original tests are untouched.
+struct TestEnv {
+    inner: Env,
+    counter: std::cell::Cell<u32>,
+}
+
+impl TestEnv {
+    fn new() -> Self {
+        Self {
+            inner: Env::new(),
+            counter: std::cell::Cell::new(0),
+        }
+    }
+
+    /// Allocate a fresh monotone counter suffix so every inserted doc / chunk
+    /// gets a unique 32-hex ID without the caller worrying about collisions.
+    fn next_id(&self, prefix: &str) -> String {
+        let n = self.counter.get();
+        self.counter.set(n + 1);
+        let suffix = format!("{prefix}{n:04}");
+        id32(&suffix)
+    }
+
+    /// Insert a markdown doc with the given `body` and return its `DocumentId`.
+    fn insert_doc(&self, path: &str, body: &str) -> DocumentId {
+        self.insert_doc_with_media(path, body, MediaType::Markdown)
+    }
+
+    /// Insert a doc whose `assets.media_type` JSON is set to the serialized
+    /// form of `media`.  The `documents.updated_at` defaults to now.
+    fn insert_doc_with_media(&self, path: &str, body: &str, media: MediaType) -> DocumentId {
+        self.insert_doc_full(path, body, media, OffsetDateTime::now_utc())
+    }
+
+    /// Insert a doc with an explicit `updated_at` timestamp (for
+    /// `ingested_after` filter tests).
+    fn insert_doc_with_updated_at(
+        &self,
+        path: &str,
+        body: &str,
+        updated_at: OffsetDateTime,
+    ) -> DocumentId {
+        self.insert_doc_full(path, body, MediaType::Markdown, updated_at)
+    }
+
+    fn insert_doc_full(
+        &self,
+        path: &str,
+        body: &str,
+        media: MediaType,
+        updated_at: OffsetDateTime,
+    ) -> DocumentId {
+        use time::format_description::well_known::Rfc3339;
+        let doc_id = self.next_id("doc");
+        let chunk_id = self.next_id("chk");
+        let asset_id = self.next_id("ast");
+        let media_json = serde_json::to_string(&media).expect("serialize MediaType");
+        let updated_at_str = updated_at.format(&Rfc3339).expect("format updated_at");
+
+        let conn = self.inner.raw_conn();
+        conn.execute(
+            "INSERT OR IGNORE INTO assets (
+                asset_id, source_uri, workspace_path, media_type, byte_len,
+                checksum, storage_kind, storage_path, discovered_at
+            ) VALUES (?, ?, ?, ?, 0,
+                      'd0', 'reference', ?, '2024-01-01T00:00:00Z')",
+            rusqlite::params![asset_id, format!("file:///{path}"), path, media_json, path],
+        )
+        .expect("insert asset");
+
+        conn.execute(
+            "INSERT INTO documents (
+                doc_id, asset_id, workspace_path, title, lang,
+                source_type, trust_level, parser_version,
+                doc_version, schema_version, metadata_json,
+                provenance_json, created_at, updated_at
+            ) VALUES (?, ?, ?, NULL, 'en', 'markdown', 'primary', 'pv1', 1, 1,
+                      '{}', '{\"events\":[]}',
+                      '2024-01-01T00:00:00Z', ?)",
+            rusqlite::params![doc_id, asset_id, path, updated_at_str],
+        )
+        .expect("insert document");
+
+        let empty_headings: Vec<&str> = vec![];
+        let heading_json = serde_json::to_string(&empty_headings).unwrap();
+        conn.execute(
+            "INSERT INTO chunks (
+                chunk_id, doc_id, text, heading_path_json, section_label,
+                source_spans_json, token_estimate, chunker_version,
+                policy_hash, block_ids_json, created_at
+            ) VALUES (?, ?, ?, ?, NULL,
+                      '[{\"kind\":\"line\",\"start\":1,\"end\":1}]',
+                      1, 'v1', 'h', '[]', '2024-01-01T00:00:00Z')",
+            rusqlite::params![chunk_id, doc_id, body, heading_json],
+        )
+        .expect("insert chunk");
+
+        DocumentId(doc_id)
+    }
+
+    fn run_search(&self, query: &str, filters: &SearchFilters) -> Vec<SearchHit> {
+        let r = self.inner.retriever();
+        let q = SearchQuery {
+            text: query.to_string(),
+            mode: SearchMode::Lexical,
+            k: 10,
+            filters: filters.clone(),
+        };
+        r.search(&q).expect("search")
+    }
+}
+
+// ── fb-36 filter tests ───────────────────────────────────────────────────
+
+#[test]
+fn lexical_filter_by_media() {
+    let env = TestEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", MediaType::Pdf);
+    let filters = SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert_eq!(hits.len(), 1, "only pdf doc should match");
+    assert!(hits[0].doc_path.0.ends_with(".pdf"), "got: {}", hits[0].doc_path.0);
+}
+
+#[test]
+fn lexical_filter_by_ingested_after() {
+    let env = TestEnv::new();
+    env.insert_doc_with_updated_at(
+        "old.md",
+        "ingest test",
+        time::macros::datetime!(2020-01-01 00:00:00 UTC),
+    );
+    env.insert_doc_with_updated_at(
+        "new.md",
+        "ingest test",
+        time::macros::datetime!(2026-01-01 00:00:00 UTC),
+    );
+    let filters = SearchFilters {
+        ingested_after: Some(time::macros::datetime!(2025-01-01 00:00:00 UTC)),
+        ..Default::default()
+    };
+    let hits = env.run_search("ingest", &filters);
+    assert_eq!(hits.len(), 1, "only post-2025 doc matches");
+}
+
+#[test]
+fn lexical_filter_by_doc_id() {
+    let env = TestEnv::new();
+    let target = env.insert_doc("a.md", "shared term");
+    env.insert_doc("b.md", "shared term");
+    let filters = SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("shared", &filters);
+    assert!(!hits.is_empty(), "should get at least one hit for target doc");
+    for h in &hits {
+        assert_eq!(h.doc_id, target, "all hits must be from target doc");
+    }
+}
+
+#[test]
+fn lexical_filter_combinator_is_and() {
+    let env = TestEnv::new();
+    let target = env.insert_doc_with_media("a.md", "rust", MediaType::Markdown);
+    env.insert_doc_with_media("b.pdf", "rust", MediaType::Pdf);
+    let filters = SearchFilters {
+        media: vec!["markdown".to_string()],
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(!hits.is_empty(), "target doc should match combined filter");
+    assert!(hits.iter().all(|h| h.doc_id == target));
+}
+
+#[test]
+fn lexical_filter_unknown_media_returns_empty() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let filters = SearchFilters {
+        media: vec!["nonexistent_kind".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(hits.is_empty(), "unknown media → no hits, no error");
+}
+
+#[test]
+fn lexical_empty_filters_match_default_behavior() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let with_default = env.run_search("rust", &SearchFilters::default());
+    assert!(!with_default.is_empty());
+}
+
 #[test]
 fn lexical_snapshot_run_1() {
    // Pinned snapshot. A small, deterministic corpus; the JSON shape of
--- a/crates/kebab-store-sqlite/src/documents.rs
+++ b/crates/kebab-store-sqlite/src/documents.rs
@@ -375,6 +375,48 @@ impl kebab_core::DocumentStore for SqliteStore {
    }
 }

+impl SqliteStore {
+    /// p9-fb-35: list `chunk_id`s for a document, returning a stable
+    /// `(created_at, chunk_id)` order. Used by
+    /// `App::fetch chunk --context N` to find ordinal-adjacent chunks.
+    ///
+    /// ⚠ Round-1 review caveat: `chunk_id` is a blake3 hash of
+    /// `(doc_id, chunker_version, …)` — hex-lexicographic sort does NOT
+    /// correspond to document position. Within one ingest transaction
+    /// all chunks share `created_at` to the millisecond, so the
+    /// secondary `chunk_id` sort dominates and the "neighbors"
+    /// returned here may not be document-adjacent.
+    ///
+    /// Real fix is a `chunks.ordinal` column (V007 migration) or sort
+    /// by `chunks.source_spans_json[0]` start offset. Tracked as
+    /// follow-up. Until then `--context` neighbors are best-effort —
+    /// they may or may not align with document position depending on
+    /// whether `chunk_id` hash order happens to match insertion order
+    /// for that particular doc. Large markdown / PDF (page-aligned
+    /// chunks) likely re-orders. See `tasks/HOTFIXES.md` if escalated.
+    pub fn list_chunk_ids_for_doc(
+        &self,
+        doc_id: &kebab_core::DocumentId,
+    ) -> Result<Vec<kebab_core::ChunkId>> {
+        let conn = self.read_conn();
+        let mut stmt = conn
+            .prepare(
+                "SELECT chunk_id FROM chunks
+                 WHERE doc_id = ?
+                 ORDER BY created_at ASC, chunk_id ASC",
+            )
+            .map_err(StoreError::from)?;
+        let rows = stmt
+            .query_map(params![doc_id.0], |r| r.get::<_, String>(0))
+            .map_err(StoreError::from)?;
+        let ids: Vec<kebab_core::ChunkId> = rows
+            .map(|r| r.map(kebab_core::ChunkId))
+            .collect::<rusqlite::Result<Vec<_>>>()
+            .map_err(StoreError::from)?;
+        Ok(ids)
+    }
+}
+
 // ── Internal row + (de)serialization helpers ─────────────────────────────

 struct DocumentRow {
--- a/crates/kebab-store-sqlite/src/filters.rs
+++ b/crates/kebab-store-sqlite/src/filters.rs
@@ -129,6 +129,51 @@ impl SqliteStore {
            }
        }

+        // p9-fb-36: media_type filter (IN-list).
+        // `assets.media_type` JSON has two shapes:
+        //   - unit variant (Markdown / Pdf / …): JSON text, e.g. `"markdown"`
+        //   - tuple variant (Image(Png) / Audio(Mp3) / Other(s)): JSON object,
+        //     e.g. `{"image": "png"}`
+        // Extract a unified "kind" string for both shapes; mirrors lexical.
+        if !filters.media.is_empty() {
+            let media_ph = std::iter::repeat_n("?", filters.media.len())
+                .collect::<Vec<_>>()
+                .join(",");
+            sql.push_str(&format!(
+                " AND d.doc_id IN (\
+                   SELECT d2.doc_id FROM documents d2 \
+                   JOIN assets a ON a.asset_id = d2.asset_id \
+                   WHERE CASE \
+                     WHEN json_type(a.media_type) = 'text' THEN json_extract(a.media_type, '$') \
+                     ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+                   END IN ({media_ph}))"
+            ));
+            for kind in &filters.media {
+                bind.push(Box::new(kind.clone()));
+            }
+        }
+
+        // p9-fb-36: ingested_after filter.
+        // `documents.updated_at` is RFC3339 TEXT (UTC `Z` per fb-32);
+        // lexicographic >= compare is correct — but only when the filter
+        // instant is also formatted as UTC `Z`. A non-UTC offset (e.g.
+        // `+09:00`) would compare as ASCII after `Z` (0x2B < 0x5A) and
+        // produce wrong results. Convert to UTC before formatting.
+        if let Some(after) = &filters.ingested_after {
+            let formatted = after
+                .to_offset(time::UtcOffset::UTC)
+                .format(&time::format_description::well_known::Rfc3339)
+                .expect("OffsetDateTime (UTC) formats to RFC3339");
+            sql.push_str(" AND d.updated_at >= ?");
+            bind.push(Box::new(formatted));
+        }
+
+        // p9-fb-36: doc_id filter — single-doc scoping.
+        if let Some(id) = &filters.doc_id {
+            sql.push_str(" AND d.doc_id = ?");
+            bind.push(Box::new(id.0.clone()));
+        }
+
        // Optional path_glob: applied in Rust on the rows we get back,
        // not in SQL — matching `kb-search::lexical`'s post-filter so
        // the glob semantics are byte-identical between retrievers.
@@ -280,6 +325,89 @@ mod tests {
            .unwrap();
    }

+    /// Variant of `seed_committed` that accepts an explicit `media_type`
+    /// JSON string (e.g. `r#""markdown""#` or `r#""pdf""#`) and an
+    /// explicit `updated_at` RFC3339 string so the fb-36 filter tests can
+    /// exercise `media` and `ingested_after` without going through the full
+    /// ingest pipeline.
+    #[allow(clippy::too_many_arguments)]
+    fn seed_committed_full(
+        store: &SqliteStore,
+        chunk_id: &str,
+        doc_id: &str,
+        workspace_path: &str,
+        lang: &str,
+        tags: &[&str],
+        trust: &str,
+        media_type_json: &str,
+        updated_at: &str,
+    ) {
+        let asset_id = format!("a{}", &doc_id[..31]);
+        {
+            let conn = store.lock_conn();
+            conn.execute(
+                "INSERT INTO assets (
+                    asset_id, source_uri, workspace_path, media_type, byte_len,
+                    checksum, storage_kind, storage_path, discovered_at
+                 ) VALUES (?, ?, ?, ?, 0, 'deadbeefdeadbeefdeadbeefdeadbeef',
+                           'reference', ?, '1970-01-01T00:00:00Z')",
+                params![
+                    asset_id,
+                    format!("file://{workspace_path}"),
+                    workspace_path,
+                    media_type_json,
+                    workspace_path,
+                ],
+            )
+            .unwrap();
+            conn.execute(
+                "INSERT INTO documents (
+                    doc_id, asset_id, workspace_path, title, lang, source_type,
+                    trust_level, parser_version, doc_version, schema_version,
+                    metadata_json, provenance_json, created_at, updated_at
+                 ) VALUES (?, ?, ?, NULL, ?, 'markdown', ?, 'v1', 1, 1,
+                           '{}', '{}', '1970-01-01T00:00:00Z', ?)",
+                params![doc_id, asset_id, workspace_path, lang, trust, updated_at],
+            )
+            .unwrap();
+            for t in tags {
+                conn.execute(
+                    "INSERT INTO document_tags (doc_id, tag) VALUES (?, ?)",
+                    params![doc_id, t],
+                )
+                .unwrap();
+            }
+            conn.execute(
+                "INSERT INTO chunks (
+                    chunk_id, doc_id, text, heading_path_json, section_label,
+                    source_spans_json, token_estimate, chunker_version,
+                    policy_hash, block_ids_json, created_at
+                 ) VALUES (?, ?, 'hi', '[]', NULL, '[]', 1, 'v1', 'h', '[]',
+                           '1970-01-01T00:00:00Z')",
+                params![chunk_id, doc_id],
+            )
+            .unwrap();
+        }
+
+        let embed_row = EmbeddingRecordRow {
+            embedding_id: format!("e{}", &chunk_id[..31]),
+            chunk_id: chunk_id.to_string(),
+            model_id: "m".to_string(),
+            model_version: "v1".to_string(),
+            dimensions: 4,
+            lance_table: "t".to_string(),
+            created_at: OffsetDateTime::UNIX_EPOCH,
+        };
+        store
+            .put_embedding_records_pending(std::slice::from_ref(&embed_row))
+            .unwrap();
+        store
+            .mark_embedding_records_committed(std::slice::from_ref(
+                &embed_row.embedding_id,
+            ))
+            .unwrap();
+    }
+
    fn cid(s: &str) -> ChunkId {
        ChunkId(s.to_string())
    }
@@ -449,4 +577,147 @@ mod tests {
        let out = store.filter_chunks(&[], &SearchFilters::default()).unwrap();
        assert!(out.is_empty());
    }
+
+    // ── p9-fb-36 new filter arms ─────────────────────────────────────────
+
+    #[test]
+    fn filter_chunks_media_type_keeps_matching_kind() {
+        // c1 = markdown, c2 = pdf. Filter for pdf → only c2 survives.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "notes/a.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "notes/b.pdf", "en", &[], "primary",
+            r#""pdf""#,
+            "1970-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            media: vec!["pdf".to_string()],
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c2)], "only pdf chunk should survive media filter");
+    }
+
+    #[test]
+    fn filter_chunks_ingested_after_excludes_old_docs() {
+        // c1 ingested 2020, c2 ingested 2026.  filter ingested_after=2025 → only c2.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "old.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2020-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "new.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2026-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            ingested_after: Some(time::macros::datetime!(2025-01-01 00:00:00 UTC)),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c2)], "only post-2025 chunk should survive ingested_after filter");
+    }
+
+    #[test]
+    fn filter_chunks_doc_id_scopes_to_single_doc() {
+        // c1 belongs to d1, c2 belongs to d2. filter doc_id=d1 → only c1.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        let d1 = "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1";
+        seed_committed_full(
+            &store, c1, d1,
+            "a.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "b.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            doc_id: Some(kebab_core::DocumentId(d1.to_string())),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c1)], "doc_id filter must scope to the target doc only");
+    }
+
+    #[test]
+    fn filter_chunks_ingested_after_non_utc_offset_compares_as_instant() {
+        // Regression test for the non-UTC offset lex-compare bug.
+        //
+        // Scenario (from PR #127 review):
+        //   - doc stored at `2026-04-01T01:00:00Z`
+        //   - filter: `2026-04-01T05:00:00+09:00` == `2026-03-31T20:00:00Z` instant
+        //
+        // The doc instant (01:00 UTC on Apr 1) is AFTER the filter instant
+        // (20:00 UTC on Mar 31), so the doc SHOULD match.
+        //
+        // Buggy code: formats `+09:00` as-is → lex compare
+        //   `2026-04-01T01:00:00Z` vs `2026-04-01T05:00:00+09:00`
+        //   `01` < `05` → doc dropped incorrectly.
+        //
+        // Fixed code: converts to UTC first → compares
+        //   `2026-04-01T01:00:00Z` vs `2026-03-31T20:00:00Z`
+        //   Apr 1 > Mar 31 → doc correctly included.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "doc.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2026-04-01T01:00:00Z",
+        );
+
+        // Filter instant: 2026-04-01T05:00:00+09:00 == 2026-03-31T20:00:00 UTC.
+        // Doc (2026-04-01T01:00:00Z) is after the filter instant → should match.
+        let filter_instant = time::OffsetDateTime::parse(
+            "2026-04-01T05:00:00+09:00",
+            &time::format_description::well_known::Rfc3339,
+        )
+        .expect("valid RFC3339 with +09:00 offset");
+
+        let f = SearchFilters {
+            ingested_after: Some(filter_instant),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1)], &f)
+            .unwrap();
+        assert_eq!(
+            out,
+            vec![cid(c1)],
+            "doc ingested at 01:00Z should match filter 05:00+09:00 (== 20:00Z previous day)"
+        );
+    }
 }
--- a/crates/kebab-store-sqlite/src/lib.rs
+++ b/crates/kebab-store-sqlite/src/lib.rs
@@ -28,6 +28,7 @@ mod fts;
 mod jobs;
 mod schema;
 mod store;
+pub mod stats_ext;

 pub use embeddings::EmbeddingRecordRow;
 pub use error::StoreError;
--- a/crates/kebab-store-sqlite/src/stats_ext.rs
+++ b/crates/kebab-store-sqlite/src/stats_ext.rs
@@ -0,0 +1,168 @@
+//! p9-fb-37: extended stats helpers — per-media / per-lang doc counts,
+//! stale doc count, on-disk index byte sums.
+
+use std::collections::BTreeMap;
+use std::path::Path;
+
+use kebab_core::{IndexBytes, MEDIA_KINDS};
+use rusqlite::Connection;
+
+/// p9-fb-37: result of [`breakdowns`] — three independent counts collected in one pass.
+#[derive(Debug, Clone, Default)]
+pub struct Breakdowns {
+    pub media: BTreeMap<String, u64>,
+    pub lang: BTreeMap<String, u64>,
+    pub stale_doc_count: u64,
+}
+
+/// `media` always contains all 5 `MEDIA_KINDS` (zero-padded).
+/// `lang` only contains observed languages; NULL lang is
+/// keyed as the literal string `"null"`. `stale_doc_count` is 0 when
+/// `threshold_days == 0` (mirrors fb-32 staleness disable semantics).
+pub fn breakdowns(
+    conn: &Connection,
+    threshold_days: u64,
+) -> rusqlite::Result<Breakdowns> {
+    // media: dual JSON shape — text variant ("markdown") vs object
+    // variant ({"image":{"format":"png"}}). Same CASE WHEN as fb-36.
+    let mut media: BTreeMap<String, u64> = MEDIA_KINDS
+        .iter()
+        .map(|k| ((*k).to_string(), 0u64))
+        .collect();
+    let mut stmt = conn.prepare(
+        "SELECT \
+           CASE \
+             WHEN json_type(a.media_type) = 'text' \
+               THEN json_extract(a.media_type, '$') \
+             ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+           END AS kind, \
+           COUNT(DISTINCT d.doc_id) \
+         FROM documents d JOIN assets a ON a.asset_id = d.asset_id \
+         GROUP BY kind",
+    )?;
+    let rows = stmt.query_map([], |r| {
+        Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+    })?;
+    for row in rows {
+        let (kind, n) = row?;
+        media.insert(kind, n);
+    }
+
+    let mut lang: BTreeMap<String, u64> = BTreeMap::new();
+    let mut stmt = conn.prepare(
+        "SELECT COALESCE(lang, 'null') AS l, COUNT(*) \
+         FROM documents GROUP BY l",
+    )?;
+    let rows = stmt.query_map([], |r| {
+        Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?))
+    })?;
+    for row in rows {
+        let (l, n) = row?;
+        lang.insert(l, n);
+    }
+
+    let stale_doc_count: u64 = if threshold_days == 0 {
+        0
+    } else {
+        let secs = (threshold_days as i64) * 86_400;
+        let cutoff = time::OffsetDateTime::now_utc()
+            - time::Duration::seconds(secs);
+        let cutoff_str = cutoff
+            .format(&time::format_description::well_known::Rfc3339)
+            .expect("RFC3339 format");
+        conn.query_row(
+            "SELECT COUNT(*) FROM documents WHERE updated_at < ?",
+            [cutoff_str],
+            |r| r.get(0),
+        )?
+    };
+
+    Ok(Breakdowns {
+        media,
+        lang,
+        stale_doc_count,
+    })
+}
+
+/// Sum on-disk bytes of the SQLite database (main + WAL + SHM) and
+/// the LanceDB directory tree. Missing files / dir = 0.
+pub fn index_bytes(data_dir: &Path) -> std::io::Result<IndexBytes> {
+    fn file_size_or_zero(p: &Path) -> u64 {
+        std::fs::metadata(p).map(|m| m.len()).unwrap_or(0)
+    }
+    fn dir_walk_sum(p: &Path) -> std::io::Result<u64> {
+        if !p.exists() {
+            return Ok(0);
+        }
+        let mut total = 0u64;
+        for entry in std::fs::read_dir(p)? {
+            let entry = entry?;
+            let ty = entry.file_type()?;
+            if ty.is_dir() {
+                total += dir_walk_sum(&entry.path())?;
+            } else if ty.is_file() {
+                total += entry.metadata()?.len();
+            }
+        }
+        Ok(total)
+    }
+
+    let sqlite_main = data_dir.join("kebab.sqlite");
+    let sqlite_wal = data_dir.join("kebab.sqlite-wal");
+    let sqlite_shm = data_dir.join("kebab.sqlite-shm");
+    let sqlite = file_size_or_zero(&sqlite_main)
+        + file_size_or_zero(&sqlite_wal)
+        + file_size_or_zero(&sqlite_shm);
+    let lancedb = dir_walk_sum(&data_dir.join("lancedb"))?;
+    Ok(IndexBytes { sqlite, lancedb })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn open_fresh() -> (tempfile::TempDir, crate::SqliteStore) {
+        let dir = tempfile::tempdir().unwrap();
+        let mut cfg = kebab_config::Config::defaults();
+        cfg.storage.data_dir = dir.path().to_string_lossy().into_owned();
+        let store = crate::SqliteStore::open(&cfg).unwrap();
+        store.run_migrations().unwrap();
+        (dir, store)
+    }
+
+    #[test]
+    fn breakdowns_empty_corpus() {
+        let (_dir, store) = open_fresh();
+        let conn = store.read_conn();
+        let b = breakdowns(&conn, 0).unwrap();
+        // 5 keys all zero, lang map empty, stale 0.
+        assert_eq!(b.media.len(), 5);
+        for k in MEDIA_KINDS {
+            assert_eq!(b.media.get(*k), Some(&0u64));
+        }
+        assert!(b.lang.is_empty());
+        assert_eq!(b.stale_doc_count, 0);
+    }
+
+    #[test]
+    fn index_bytes_includes_sqlite_main() {
+        let (dir, _store) = open_fresh();
+        let b = index_bytes(dir.path()).unwrap();
+        assert!(b.sqlite > 0, "main sqlite file should exist after migrations");
+        assert_eq!(b.lancedb, 0);
+    }
+
+    #[test]
+    fn index_bytes_lancedb_dir_walk() {
+        let dir = tempfile::tempdir().unwrap();
+        let lance = dir.path().join("lancedb");
+        std::fs::create_dir_all(lance.join("vectors.lance")).unwrap();
+        std::fs::write(
+            lance.join("vectors.lance").join("data.bin"),
+            vec![0u8; 1024],
+        )
+        .unwrap();
+        let b = index_bytes(dir.path()).unwrap();
+        assert_eq!(b.lancedb, 1024);
+    }
+}
--- a/crates/kebab-store-sqlite/src/store.rs
+++ b/crates/kebab-store-sqlite/src/store.rs
@@ -604,6 +604,12 @@ pub struct CountSummary {
    /// ISO-8601 timestamp of the most-recently updated document row, or
    /// `None` when the store is empty.
    pub last_ingest_at: Option<String>,
+    /// p9-fb-37: per-media-kind doc count (5 keys, zero-padded).
+    pub media_breakdown: std::collections::BTreeMap<String, u64>,
+    /// p9-fb-37: per-language doc count, NULL keyed as `"null"`.
+    pub lang_breakdown: std::collections::BTreeMap<String, u64>,
+    /// p9-fb-37: docs whose `updated_at < now - threshold_days`. 0 when threshold=0.
+    pub stale_doc_count: u64,
 }

 impl SqliteStore {
@@ -611,39 +617,58 @@ impl SqliteStore {
    /// most-recent `documents.updated_at` timestamp.
    ///
    /// Uses `read_conn()` (no mutations) — mirrors the pattern used by
-    /// [`Self::corpus_revision`].
-    pub fn count_summary(&self) -> anyhow::Result<CountSummary> {
+    /// Shared helper: counts and breakdowns in a single pass with given threshold.
+    fn count_summary_inner(&self, threshold_days: u64) -> anyhow::Result<CountSummary> {
+        use anyhow::Context;
+        use rusqlite::OptionalExtension;
+
        let conn = self.read_conn();

        let doc_count: u64 = conn
            .query_row("SELECT COUNT(*) FROM documents", [], |r| r.get(0))
            .context("count documents")?;
-
        let chunk_count: u64 = conn
            .query_row("SELECT COUNT(*) FROM chunks", [], |r| r.get(0))
            .context("count chunks")?;
-
        let asset_count: u64 = conn
            .query_row("SELECT COUNT(*) FROM assets", [], |r| r.get(0))
            .context("count assets")?;
-
        let last_ingest_at: Option<String> = conn
-            .query_row(
-                "SELECT MAX(updated_at) FROM documents",
-                [],
-                |r| r.get(0),
-            )
+            .query_row("SELECT MAX(updated_at) FROM documents", [], |r| r.get(0))
            .optional()
            .context("max updated_at")?
            .flatten();

+        let bd = crate::stats_ext::breakdowns(&conn, threshold_days).context("breakdowns")?;
+
        Ok(CountSummary {
            doc_count,
            chunk_count,
            asset_count,
            last_ingest_at,
+            media_breakdown: bd.media,
+            lang_breakdown: bd.lang,
+            stale_doc_count: bd.stale_doc_count,
        })
    }
+
+    /// [`Self::corpus_revision`].
+    pub fn count_summary(&self) -> anyhow::Result<CountSummary> {
+        // p9-fb-37: default uses threshold_days=0 (matches fb-32 disable
+        // semantics). Callers that need real stale_doc_count call
+        // count_summary_with_threshold.
+        self.count_summary_inner(0)
+    }
+
+    /// p9-fb-37: variant that honors `config.search.stale_threshold_days`.
+    /// Callers who need a meaningful `stale_doc_count` (e.g. `kebab schema`)
+    /// pass the configured threshold; the older `count_summary` returns 0.
+    pub fn count_summary_with_threshold(
+        &self,
+        threshold_days: u64,
+    ) -> anyhow::Result<CountSummary> {
+        self.count_summary_inner(threshold_days)
+    }
 }

 /// Apply the design §5 / task-spec pragmas. Called once per connection.
@@ -681,6 +706,9 @@ mod tests {
        assert_eq!(s.chunk_count, 0);
        assert_eq!(s.asset_count, 0);
        assert!(s.last_ingest_at.is_none());
+        assert_eq!(s.media_breakdown.len(), 5);
+        assert!(s.lang_breakdown.is_empty());
+        assert_eq!(s.stale_doc_count, 0);
    }
 }

--- a/crates/kebab-tui/src/app.rs
+++ b/crates/kebab-tui/src/app.rs
@@ -186,9 +186,12 @@ impl Default for SearchState {
 /// Ask pane state — owned by p9-3, extended by p9-fb-16 for
 /// multi-turn conversation transcript.
 ///
-/// The worker thread (`thread`) owns the `mpsc::Sender<String>` that
-/// `kebab-app::ask` writes tokens into. The pane keeps the matching
-/// `rx` and drains it once per render frame (no blocking).
+/// The worker thread (`thread`) owns the `mpsc::Sender<kebab_app::StreamEvent>`
+/// that `kebab-app::ask` writes events into. The pane keeps the matching
+/// `rx` and drains it once per render frame (no blocking). Only the
+/// `Token { delta }` variant is consumed for the streaming transcript;
+/// `RetrievalDone` and `Final` are ignored (citations render from
+/// `last_answer` after the worker join).
 ///
 /// p9-fb-16: completed `Turn`s accumulate in `turns`; the worker
 /// passes a snapshot of `turns` as `history` to
@@ -214,7 +217,7 @@ pub struct AskState {
    pub thread: Option<std::thread::JoinHandle<anyhow::Result<kebab_core::Answer>>>,
    /// Token receiver paired with the worker's `Sender`. Drained
    /// every render frame.
-    pub rx: Option<std::sync::mpsc::Receiver<String>>,
+    pub rx: Option<std::sync::mpsc::Receiver<kebab_app::StreamEvent>>,
    /// Vertical scroll offset for the transcript area when content
    /// exceeds the viewport. Only consulted when `follow_tail` is
    /// false; otherwise the renderer overrides this with the
@@ -384,6 +387,8 @@ pub struct App {
    pub ask: Option<AskState>,
    /// Populated by p9-4.
    pub inspect: Option<InspectState>,
+    /// p9-fb-37: trace popup state, `Some` while open.
+    pub trace_popup: Option<crate::trace_popup::TracePopupState>,
    /// Populated by p9-fb-03 when the user kicks off an in-shell
    /// ingest (Library `r`). Cleared by the run loop a few seconds
    /// after the run reaches a terminal event.
@@ -458,6 +463,7 @@ impl App {
            search: None,
            ask: None,
            inspect: None,
+            trace_popup: None,
            ingest_state: None,
            error_overlay: None,
            should_quit: false,
--- a/crates/kebab-tui/src/ask.rs
+++ b/crates/kebab-tui/src/ask.rs
@@ -483,7 +483,7 @@ pub fn handle_key_ask(state: &mut App, key: KeyEvent) -> KeyOutcome {
 }

 fn spawn_ask_worker(state: &mut App) {
-    let (tx, rx) = mpsc::channel::<String>();
+    let (tx, rx) = mpsc::channel::<kebab_app::StreamEvent>();
    let cfg = state.config.clone();
    let s = state.ask.as_mut().unwrap();
    // p9-fb-10: take() consumes the input in one step (no clone +
@@ -542,8 +542,18 @@ fn make_conversation_id() -> String {
 pub(crate) fn drain_stream(state: &mut App) {
    let Some(s) = state.ask.as_mut() else { return };
    if let Some(rx) = &s.rx {
-        for tok in rx.try_iter() {
-            s.partial.push_str(&tok);
+        for ev in rx.try_iter() {
+            match ev {
+                kebab_app::StreamEvent::Token { delta, .. } => {
+                    s.partial.push_str(&delta);
+                }
+                // p9-fb-33: TUI ignores RetrievalDone (citation
+                // panel renders after completion via `last_answer`)
+                // and Final (the worker thread's join already
+                // delivers the canonical Answer in poll_worker).
+                kebab_app::StreamEvent::RetrievalDone { .. }
+                | kebab_app::StreamEvent::Final { .. } => {}
+            }
        }
    }
 }
--- a/crates/kebab-tui/src/cheatsheet.rs
+++ b/crates/kebab-tui/src/cheatsheet.rs
@@ -80,6 +80,7 @@ pub fn render_cheatsheet(f: &mut Frame, area: Rect, app: &App) {
        ("Delete", "remove char at cursor"),
        ("g", "open hit's citation in $EDITOR (Normal)"),
        ("o", "inspect selected hit's chunk (Normal — was `i` pre-fb-21)"),
+        ("t", "open retrieval trace popup (Normal — p9-fb-37)"),
        ("i", "Normal → Insert (toggle back to typing)"),
        ("Esc", "back to Library"),
    ]);
--- a/crates/kebab-tui/src/lib.rs
+++ b/crates/kebab-tui/src/lib.rs
@@ -27,6 +27,7 @@ mod run;
 mod search;
 mod terminal;
 mod theme;
+pub mod trace_popup;

 pub use input::{InputBuffer, display_width, place_cursor_x, truncate_to_display_width};
 pub use theme::{Palette, Role, Theme};
--- a/crates/kebab-tui/src/run.rs
+++ b/crates/kebab-tui/src/run.rs
@@ -130,6 +130,21 @@ pub(crate) fn run_loop(app: &mut App) -> Result<()> {
        if event::poll(POLL_INTERVAL)? {
            match event::read()? {
                Event::Key(key) if key.kind == KeyEventKind::Press => {
+                    // p9-fb-37: trace popup eats keys while open.
+                    // Sits ahead of cheatsheet + mode + pane dispatch
+                    // so Esc / j / k / arrows route to the popup
+                    // instead of leaking through to the search pane.
+                    if app.trace_popup.is_some() {
+                        let close = if let Some(popup) = app.trace_popup.as_mut() {
+                            crate::trace_popup::handle_key_trace_popup(popup, key)
+                        } else {
+                            false
+                        };
+                        if close {
+                            app.trace_popup = None;
+                        }
+                        continue;
+                    }
                    // p9-fb-13: cheatsheet popup toggle takes
                    // precedence over both mode + pane dispatch.
                    // F1 toggles open/close. While visible, Esc
@@ -255,6 +270,12 @@ fn render_root(f: &mut Frame, app: &App) {
    }
    render_status_bar(f, outer[2], app);
    render_key_hints(f, outer[3], app);
+    // p9-fb-37: trace popup overlays on top of pane content but
+    // below the error overlay (errors are higher-priority modal).
+    if let Some(popup) = &app.trace_popup {
+        let popup_area = centered_rect(80, 80, f.area());
+        crate::trace_popup::render_trace_popup(f, popup_area, popup);
+    }
    if let Some(err) = &app.error_overlay {
        render_error_overlay(f, f.area(), err, &app.theme);
    }
@@ -263,6 +284,28 @@ fn render_root(f: &mut Frame, app: &App) {
    }
 }

+/// p9-fb-37: centered sub-rect helper for the trace popup. Returns
+/// a rect of `percent_x` × `percent_y` percent of `r`, centered.
+fn centered_rect(percent_x: u16, percent_y: u16, r: ratatui::layout::Rect) -> ratatui::layout::Rect {
+    use ratatui::layout::{Constraint, Direction, Layout};
+    let popup_layout = Layout::default()
+        .direction(Direction::Vertical)
+        .constraints([
+            Constraint::Percentage((100 - percent_y) / 2),
+            Constraint::Percentage(percent_y),
+            Constraint::Percentage((100 - percent_y) / 2),
+        ])
+        .split(r);
+    Layout::default()
+        .direction(Direction::Horizontal)
+        .constraints([
+            Constraint::Percentage((100 - percent_x) / 2),
+            Constraint::Percentage(percent_x),
+            Constraint::Percentage((100 - percent_x) / 2),
+        ])
+        .split(popup_layout[1])[1]
+}
+
 fn render_header(f: &mut Frame, area: Rect, app: &App) {
    let pane_label = match app.focus {
        Pane::Library => "Library",
--- a/crates/kebab-tui/src/search.rs
+++ b/crates/kebab-tui/src/search.rs
@@ -209,6 +209,51 @@ pub fn handle_key_search(state: &mut App, key: KeyEvent) -> KeyOutcome {
    // pre-fb-12 SHIFT/none heuristic).
    let is_normal = state.mode == crate::app::Mode::Normal;

+    // p9-fb-37: `t` opens the trace popup. Re-runs the last submitted
+    // query with SearchOpts.trace = true. Bypasses cache by going
+    // through `search_with_opts_with_config` (Task 5 wires opts.trace
+    // to skip the LRU cache).
+    if is_normal
+        && matches!(
+            (key.code, key.modifiers),
+            (KeyCode::Char('t'), KeyModifiers::NONE)
+        )
+    {
+        let (last_query, has_results) = {
+            let s = state.search.as_ref().unwrap();
+            (s.last_query.clone(), !s.hits.is_empty())
+        };
+        if !has_results {
+            return KeyOutcome::Continue;
+        }
+        if let Some((q_text, q_mode)) = last_query {
+            // TODO: thread filters when TUI gains a filter UI (currently
+            // mirrors fire_search which also passes default filters).
+            let q = kebab_core::SearchQuery {
+                text: q_text,
+                mode: q_mode,
+                k: state.config.search.default_k,
+                filters: kebab_core::SearchFilters::default(),
+            };
+            let opts = kebab_core::SearchOpts {
+                trace: true,
+                ..Default::default()
+            };
+            match kebab_app::search_with_opts_with_config(state.config.clone(), q, opts) {
+                Ok(resp) => {
+                    if let Some(t) = resp.trace {
+                        state.trace_popup = Some(crate::trace_popup::TracePopupState::new(t));
+                    }
+                }
+                Err(_) => {
+                    // Silent failure — trace is debug-only; user
+                    // can still see search hits without it.
+                }
+            }
+        }
+        return KeyOutcome::Continue;
+    }
+
    // p9-fb-21: chunk-inspect rebound from `i` to `o` (vim "open").
    // The `i` key is now the universal Normal→Insert toggle (handled
    // in `mode_intercept`), so it cannot also mean "inspect chunk"
--- a/crates/kebab-tui/src/trace_popup.rs
+++ b/crates/kebab-tui/src/trace_popup.rs
@@ -0,0 +1,139 @@
+//! p9-fb-37: TUI trace popup. Opens from Search pane via `t` key
+//! when results are visible. Re-runs the current query with
+//! `SearchOpts.trace = true` and displays the lex / vec / rrf union
+//! + per-stage timing as a single scroll list.
+
+use crossterm::event::{KeyCode, KeyEvent};
+use kebab_core::SearchTrace;
+use ratatui::Frame;
+use ratatui::layout::Rect;
+use ratatui::style::{Modifier, Style};
+use ratatui::text::{Line, Span};
+use ratatui::widgets::{Block, Borders, Paragraph, Wrap};
+
+#[derive(Debug, Clone)]
+pub struct TracePopupState {
+    pub trace: SearchTrace,
+    pub scroll: u16,
+}
+
+impl TracePopupState {
+    pub fn new(trace: SearchTrace) -> Self {
+        Self { trace, scroll: 0 }
+    }
+}
+
+pub fn render_trace_popup(f: &mut Frame, area: Rect, state: &TracePopupState) {
+    let mut lines: Vec<Line> = Vec::new();
+    let bold = Style::default().add_modifier(Modifier::BOLD);
+
+    lines.push(Line::from(Span::styled(
+        format!(
+            "Lexical ({} hits, {} ms)",
+            state.trace.lexical.len(),
+            state.trace.timing.lexical_ms,
+        ),
+        bold,
+    )));
+    for c in &state.trace.lexical {
+        lines.push(Line::from(format!(
+            "  #{:>2} score={:.4} chunk={}",
+            c.rank, c.score, c.chunk_id.0
+        )));
+    }
+    lines.push(Line::from(""));
+    lines.push(Line::from(Span::styled(
+        format!(
+            "Vector ({} hits, {} ms)",
+            state.trace.vector.len(),
+            state.trace.timing.vector_ms,
+        ),
+        bold,
+    )));
+    for c in &state.trace.vector {
+        lines.push(Line::from(format!(
+            "  #{:>2} score={:.4} chunk={}",
+            c.rank, c.score, c.chunk_id.0
+        )));
+    }
+    lines.push(Line::from(""));
+    lines.push(Line::from(Span::styled(
+        format!(
+            "RRF inputs ({} entries, {} ms fusion)",
+            state.trace.rrf_inputs.len(),
+            state.trace.timing.fusion_ms,
+        ),
+        bold,
+    )));
+    for e in &state.trace.rrf_inputs {
+        lines.push(Line::from(format!(
+            "  chunk={} lex={:?} vec={:?} fusion={:.4}",
+            e.chunk_id.0, e.lexical_rank, e.vector_rank, e.fusion_score
+        )));
+    }
+    lines.push(Line::from(""));
+    lines.push(Line::from(Span::styled(
+        format!("Total: {} ms", state.trace.timing.total_ms),
+        bold,
+    )));
+
+    let block = Block::default()
+        .title("Trace — Esc to close, j/k or ↑↓ to scroll")
+        .borders(Borders::ALL);
+    let p = Paragraph::new(lines)
+        .block(block)
+        .scroll((state.scroll, 0))
+        .wrap(Wrap { trim: false });
+    f.render_widget(p, area);
+}
+
+/// Handle keys while popup is open. Returns true if the popup should close.
+pub fn handle_key_trace_popup(state: &mut TracePopupState, key: KeyEvent) -> bool {
+    match key.code {
+        KeyCode::Esc => true,
+        KeyCode::Char('j') | KeyCode::Down => {
+            state.scroll = state.scroll.saturating_add(1);
+            false
+        }
+        KeyCode::Char('k') | KeyCode::Up => {
+            state.scroll = state.scroll.saturating_sub(1);
+            false
+        }
+        _ => false,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crossterm::event::KeyModifiers;
+    use kebab_core::TraceTiming;
+
+    fn dummy_state() -> TracePopupState {
+        TracePopupState::new(SearchTrace {
+            lexical: vec![],
+            vector: vec![],
+            rrf_inputs: vec![],
+            timing: TraceTiming::default(),
+        })
+    }
+
+    #[test]
+    fn esc_closes() {
+        let mut s = dummy_state();
+        assert!(handle_key_trace_popup(
+            &mut s,
+            KeyEvent::new(KeyCode::Esc, KeyModifiers::NONE),
+        ));
+    }
+
+    #[test]
+    fn j_scrolls_down() {
+        let mut s = dummy_state();
+        assert!(!handle_key_trace_popup(
+            &mut s,
+            KeyEvent::new(KeyCode::Char('j'), KeyModifiers::NONE),
+        ));
+        assert_eq!(s.scroll, 1);
+    }
+}
--- a/docs/SMOKE.md
+++ b/docs/SMOKE.md
@@ -142,6 +142,86 @@ A 30-day default flags docs that haven't been touched in a month — the
 intent is to nudge a reingest before relying on the snapshot. Set to `0`
 to disable.

+### Streaming ask (fb-33)
+
+```bash
+kebab ask "what is rust ownership" --stream 2> events.ndjson > final.json
+```
+
+stderr 의 events.ndjson 은 한 줄 = 한 event 의 ndjson — `retrieval_done` 한 번, `token` 여러 번, `final` 한 번 (refusal 경로는 `final` 생략). final.json 은 기존 `answer.v1` 그대로 (backwards-compat).
+
+agent 가 stderr 를 닫으면 (`head -c 1` 등) pipeline 이 LLM stream 을 즉시 중단하고 `RefusalReason::LlmStreamAborted` 로 partial answer 를 `answers` 테이블에 기록.
+
+### Pagination + budget (fb-34)
+
+```bash
+# First page
+kebab search "rust" --json --k 5 > page1.json
+jq '.next_cursor' page1.json
+
+# Next page using the returned cursor
+NEXT=$(jq -r '.next_cursor' page1.json)
+kebab search "rust" --json --k 5 --cursor "$NEXT" > page2.json
+
+# Budget cap — returns smaller snippet / fewer hits + truncated=true
+kebab search "rust" --json --max-tokens 200 | jq '.truncated, (.hits | length)'
+```
+
+`next_cursor` 는 corpus_revision 변경 (이후 ingest 등) 시 invalid — 다음 호출이 `error.v1.code = stale_cursor` 로 거절. agent 는 새 search 로 재발급 받기.
+
+`--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare `search_hit.v1[]` 배열과 호환 안 됨.
+
+### Verbatim fetch (fb-35)
+
+```bash
+# Search to get a chunk_id.
+CHUNK_ID=$(kebab search "rust ownership" --json --k 1 | jq -r '.hits[0].chunk_id')
+
+# Fetch verbatim with surrounding context.
+kebab fetch chunk "$CHUNK_ID" --context 2 --json | jq .
+
+# Fetch the full doc as markdown.
+DOC_ID=$(kebab search "rust ownership" --json --k 1 | jq -r '.hits[0].doc_id')
+kebab fetch doc "$DOC_ID" --max-tokens 1000 --json | jq '{kind, truncated, len: (.text | length)}'
+
+# Fetch a line range (markdown / text only).
+kebab fetch span "$DOC_ID" 1 5 --json | jq '{line_start, line_end, effective_end, text}'
+```
+
+PDF / audio docs reject `fetch span` with `error.v1.code = span_not_supported` — use `fetch chunk` (PDF chunks are page-aligned) or `fetch doc` instead.
+
+### Filter args (fb-36)
+
+````bash
+# Filter by media kind (md alias normalizes to markdown).
+kebab search "rust" --media md --json | jq '.hits | length'
+
+# Filter by ingest timestamp (RFC3339).
+kebab search "rust" --ingested-after 2026-04-01T00:00:00Z --json
+
+# Combine: doc-id scope + tag (AND across flags).
+kebab search "rust" --doc-id "<doc-id>" --tag rust --json
+````
+
+Bad `--ingested-after` → `error.v1.code = config_invalid`, exit 2.
+Unknown `--media` value → silently empty (no error).
+
+### Trace + stats (fb-37)
+
+Re-run a search with `--trace` to see per-stage candidate lists + timing:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml search "rust async" --trace --json | jq .trace
+```
+
+Inspect the corpus health surface:
+
+```bash
+kebab --config /tmp/kebab-smoke/config.toml schema --json | jq .stats
+```
+
+Look for: `media_breakdown` (5 keys), `lang_breakdown`, `index_bytes`, `stale_doc_count`.
+
 ## P6-4 이미지 ingestion 옵션

 `config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략):
--- a/docs/superpowers/plans/2026-05-09-p9-fb-33-streaming-ask.md
+++ b/docs/superpowers/plans/2026-05-09-p9-fb-33-streaming-ask.md
--- a/docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md
+++ b/docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md
--- a/docs/superpowers/plans/2026-05-09-p9-fb-35-verbatim-fetch.md
+++ b/docs/superpowers/plans/2026-05-09-p9-fb-35-verbatim-fetch.md
--- a/docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md
+++ b/docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md
--- a/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md
+++ b/docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md
--- a/docs/superpowers/specs/2026-05-09-p9-fb-33-streaming-ask-design.md
+++ b/docs/superpowers/specs/2026-05-09-p9-fb-33-streaming-ask-design.md
@@ -0,0 +1,253 @@
+---
+title: "p9-fb-33 — Streaming ask (ndjson delta) design"
+phase: P9
+component: kebab-rag + kebab-cli + kebab-tui + wire-schema
+task_id: p9-fb-33
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§7 RAG, §10 UX, wire-schema answer.v1]
+date: 2026-05-09
+---
+
+# p9-fb-33 — Streaming ask (ndjson delta)
+
+## Goal
+
+`kebab ask --stream` — agent 가 LLM token 을 도착 즉시 소비할 수 있도록 retrieval / token / final 세 단계 ndjson event 를 stderr 에 흘리고, 마지막 stdout 한 줄은 기존 `answer.v1` 그대로 유지. CLI surface 우선, MCP `kebab__ask` streaming 은 v0.5+ 별도 검토 (이 spec 의 scope 아님).
+
+## Behavior contract
+
+### Stream event taxonomy
+
+3 variant 로 confined. `kind` discriminator + `ts` 타임스탬프 + variant 별 페이로드.
+
+1. **`retrieval_done`** — pipeline 의 retrieve + stale-stamp 직후 1회. 페이로드는 `hits: search_hit.v1[]` (fb-32 의 `indexed_at` / `stale` 포함).
+2. **`token`** — LLM 의 `TokenChunk::Token` 매 도착 시. 페이로드는 `delta: string` + `turn_index: integer | null` (multi-turn ask 의 `Answer.turn_index` 와 일치).
+3. **`final`** — 모든 token 수신 + citation extract / validate 완료 후 1회. 페이로드는 `answer: answer.v1` (스키마 v1 통째).
+
+terminal event = `final`. 모든 ask 는 `final` 또는 (cancel 경로) 0개 event 로 끝남 — 후자는 ndjson 흐름이 중간에 끊긴 형태.
+
+### CLI flag
+
+`kebab ask --stream` (boolean flag, default off). `--json` 와 독립:
+
+| flag 조합 | stderr | stdout |
+|----------|--------|--------|
+| (없음) | (없음) | plain text answer + 근거 블록 |
+| `--json` | (없음) | `answer.v1` 1회 |
+| `--stream` | ndjson `answer_event.v1` events | `answer.v1` 1회 (final stdout line) |
+| `--stream --json` | 동일 (stream 이 dominant) | 동일 |
+
+backwards-compat: `--stream` 미사용 시 모든 동작 보존.
+
+### Output stream
+
+- ndjson event → **stderr**. 매 줄 한 event, `serde_json::to_string` + `writeln!`.
+- final `answer.v1` → **stdout**. 기존 final-only consumer 가 stdout 만 파싱해도 호환.
+- 선례: `ingest_progress.v1` 가 stderr ndjson + stdout `ingest_report.v1` final 패턴 사용.
+
+### Cancel semantics
+
+`kebab ask --stream` 의 stdout/stderr 가 외부에서 닫힘 (예: agent 가 SIGPIPE / `head -c 1` / connection close):
+
+1. CLI main thread 의 `writeln!(stderr, ...)` 가 `io::ErrorKind::BrokenPipe` 반환.
+2. CLI 가 receiver 폐기 (rx drop).
+3. background thread 의 `pipeline.ask` 가 `stream_sink.send(StreamEvent::Token { .. })` 시 `SendError` 반환.
+4. pipeline 의 token loop — 현재 `let _ = sink.send(t)` 로 swallow 하지만 본 task 에서 cancel 분기 추가: `SendError` 감지 시 LLM stream `break`, `finish_reason = FinishReason::Cancelled`, `RefusalReason::LlmStreamAborted` 로 Answer 채움, `answers` 테이블에 partial answer + cancel 사유 기록.
+5. CLI background thread join → cancel 사유 명시한 Answer return → CLI 종료. stdout 은 이미 닫혀 final answer.v1 출력 시도해도 BrokenPipe 무시.
+
+`io::ErrorKind::BrokenPipe` 만 cancel 처리. 그 외 IoError 는 fatal — `error.v1` stderr emit + exit 2.
+
+### Wire schema delta
+
+신규 `docs/wire-schema/v1/answer_event.schema.json`:
+
+```json
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://kb.local/wire/v1/answer_event.schema.json",
+  "title": "AnswerEvent v1",
+  "description": "Streaming event emitted by `kebab ask --stream`. One event per line on stderr. Discriminated by `kind`. Terminal: `final`. Final stdout line is `answer.v1` for backwards compat.",
+  "type": "object",
+  "required": ["schema_version", "kind", "ts"],
+  "properties": {
+    "schema_version": { "const": "answer_event.v1" },
+    "kind": { "enum": ["retrieval_done", "token", "final"] },
+    "ts":   { "type": "string", "format": "date-time" },
+    "hits":       { "type": "array",   "description": "retrieval_done: search_hit.v1[]" },
+    "delta":      { "type": "string",  "description": "token: incremental string chunk" },
+    "turn_index": { "type": ["integer", "null"], "minimum": 0,
+                    "description": "token: matches Answer.turn_index" },
+    "answer":     { "type": "object",  "description": "final: complete answer.v1 payload" }
+  }
+}
+```
+
+기존 `answer.v1` / `search_hit.v1` / `citation.v1` 변경 없음.
+
+### Domain API change
+
+`kebab-rag::pipeline`:
+
+```rust
+#[derive(Clone, Debug)]
+pub enum StreamEvent {
+    RetrievalDone { hits: Vec<SearchHit> },
+    Token { delta: String, turn_index: Option<u32> },
+    Final { answer: Answer },
+}
+
+pub struct AskOpts {
+    // ... 기존 필드
+    /// p9-fb-33: was `Option<Sender<String>>`. Now carries discriminated
+    /// events so callers can distinguish retrieval / per-token / final.
+    pub stream_sink: Option<std::sync::mpsc::Sender<StreamEvent>>,
+}
+```
+
+- internal API breaking. consumer = TUI worker + (없을 시) MCP. TUI 만 갱신.
+- non-streaming consumer (`stream_sink: None`) 는 무영향.
+
+## Allowed / forbidden dependencies
+
+각 crate 기존 deps 유지. `mpsc::Sender` 는 std. 신규 dep 없음.
+
+- `kebab-core` 는 `StreamEvent` 정의 안 함 (도메인 type 가 wire 변환과 분리되어 있고, StreamEvent 는 pipeline 의 communication channel — kebab-rag 안 위치 적절).
+- `kebab-cli` 는 wire 변환 코드 (`wire::wire_answer_event(&StreamEvent) -> Value`) 추가 — `kebab-cli/src/wire.rs` 의 기존 패턴 따라.
+- UI crate (kebab-tui) 가 직접 retriever / store 호출 X — `kebab-app` facade 통과만.
+
+## Components
+
+### kebab-rag::pipeline
+
+- `enum StreamEvent` 신규 정의 (`pub`).
+- `AskOpts.stream_sink` 타입 변경.
+- `RagPipeline::ask`:
+  - retrieve + stale-stamp 직후 `if let Some(sink) = &opts.stream_sink { let _ = sink.send(StreamEvent::RetrievalDone { hits: hits.clone() }); }` 발사. cancel 시 즉시 break out (이때는 LLM 도 안 부름).
+  - token loop: `sink.send(StreamEvent::Token { delta: t, turn_index: opts.turn_index })`. SendError → cancel 분기.
+  - 끝에서 `Final { answer: built_answer.clone() }` 발사.
+- cancel 분기:
+  ```rust
+  // p9-fb-33: SendError → caller (CLI) closed the receiver,
+  // probably due to BrokenPipe on stdout. Stop generation, mark
+  // refusal, persist partial answer.
+  if matches!(send_result, Err(_)) {
+      finish_reason = FinishReason::Cancelled;
+      break;
+  }
+  ```
+  finish_reason = Cancelled 일 때 grounded=false + RefusalReason::LlmStreamAborted.
+
+### kebab-app
+
+- `AskOpts` re-export 만 (이미 public). `StreamEvent` 도 `pub use`.
+- `App::ask` / `ask_with_session` 변경 없음 (opts 통과).
+
+### kebab-cli
+
+- `Cmd::Ask` 에 `#[arg(long)] stream: bool` 추가.
+- `--stream` 분기:
+  ```rust
+  if cli.json && !stream || !cli.json && !stream {
+      // 기존 final-only path
+  } else if stream {
+      let (tx, rx) = std::sync::mpsc::channel::<StreamEvent>();
+      let cfg2 = cfg.clone();
+      let q = query.clone();
+      let opts2 = AskOpts { stream_sink: Some(tx), ..opts };
+      let handle = std::thread::spawn(move || {
+          kebab_app::ask_with_config(cfg2, &q, opts2)
+      });
+      let mut stderr = std::io::stderr().lock();
+      let mut cancelled = false;
+      for ev in rx {
+          let v = wire::wire_answer_event(&ev);
+          let line = serde_json::to_string(&v)?;
+          if let Err(e) = writeln!(stderr, "{line}") {
+              if e.kind() == std::io::ErrorKind::BrokenPipe {
+                  cancelled = true;
+                  break;
+              }
+              return Err(e.into());
+          }
+      }
+      drop(stderr);
+      let result = handle.join().expect("ask thread panic");
+      let ans = result?;
+      // final stdout line
+      let mut stdout = std::io::stdout().lock();
+      let _ = writeln!(stdout, "{}", serde_json::to_string(&wire::wire_answer(&ans))?);
+      // cancel 또는 refusal 시 exit 1
+      if !ans.grounded { return Err(RefusalSignal.into()); }
+      Ok(())
+  }
+  ```
+- `wire::wire_answer_event(&StreamEvent) -> Value` 추가 — discriminated by variant, schema_version 태그.
+
+### kebab-tui
+
+- ask worker 가 받던 `Sender<String>` → `Sender<StreamEvent>`.
+- worker thread 의 receive loop:
+  - `StreamEvent::Token { delta, .. }` → 기존 token 누적 path 그대로.
+  - `StreamEvent::RetrievalDone { hits }` → minimal 안에선 ignore (citation 은 final 도착 후 표시 — fb-22 에서 살펴봄).
+  - `StreamEvent::Final { answer }` → 이미 `App::ask` return 으로 받으므로 무시 가능 (또는 sanity check).
+- snapshot 영향 없음 (token concat 결과 동일).
+
+### kebab-mcp
+
+변경 없음. `stream_sink: None` 유지. 향후 v0.5+ 에서 rmcp progress notification 채택 검토.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-rag) | `StreamEvent` serde round-trip — RetrievalDone / Token / Final 각각 한 줄 ndjson |
+| unit (kebab-rag) | pipeline.ask + MockLm + sink: 발사 순서 = `RetrievalDone` 1회 → `Token`* → `Final` 1회 |
+| unit (kebab-rag) | sink SendError (rx drop) → LLM loop 즉시 break + Answer.refusal_reason = `LlmStreamAborted` + answers row 기록 |
+| unit (kebab-rag) | RetrievalDone 의 hits 가 Final.answer.citations 의 부분집합 (LLM 이 마커 안 쓴 hit 도 RetrievalDone 에 포함) |
+| 통합 (kebab-cli) | `kebab ask --stream` stderr 가 valid ndjson — schema_version/kind/ts 모두 정상 |
+| 통합 (kebab-cli) | `kebab ask --stream --json` stdout 마지막 줄이 `answer.v1` 통째 |
+| 통합 (kebab-cli) | `kebab ask --json` (no --stream) 동작 무변경 — stdout final-only |
+| 통합 (kebab-cli) | stdout 닫힘 시뮬 (`kebab ask --stream | head -c 1`) → process 정상 종료 + answers row 의 refusal_reason = LlmStreamAborted |
+| 통합 (wire-schema) | answer_event.schema.json validate — RetrievalDone/Token/Final 샘플 |
+| 통합 (kebab-tui) | 기존 ask snapshot 모두 통과 (token concat 결과 동일) |
+
+LLM 의존: pipeline unit test 는 MockLm 활용 (이미 `crates/kebab-rag/tests/common/mod.rs` 의 `CountingLm` 패턴). CLI 통합 test 는 Ollama 필요 → `#[ignore]` gate.
+
+## Implementation steps (high-level)
+
+1. wire schema 신규 `answer_event.schema.json`.
+2. `kebab-rag::pipeline::StreamEvent` enum 정의 + `AskOpts.stream_sink` 타입 변경.
+3. `RagPipeline::ask`:
+   - RetrievalDone 발사 추가.
+   - token loop sink.send 의 SendError → cancel 분기.
+   - Final 발사 추가.
+4. `kebab-app` re-exports 갱신.
+5. `kebab-tui` worker 의 `Sender<String>` → `Sender<StreamEvent>` 변환.
+6. `kebab-cli`:
+   - `--stream` flag.
+   - `wire::wire_answer_event` 헬퍼.
+   - background thread + main thread receive loop.
+7. 단위 + 통합 테스트.
+8. README + SMOKE — `--stream` 사용 예시.
+9. tasks/INDEX.md / spec status flip.
+10. `integrations/claude-code/kebab/SKILL.md` — agent 가 ndjson stream 을 어떻게 소비하는지 한 단락.
+
+## Risks / notes
+
+- **TUI sink 타입 breaking**: 1 곳만 수정. 기존 token 누적 path 는 `StreamEvent::Token { delta, .. }` 만 매치하면 동일 동작. snapshot 영향 없음.
+- **`Final` event 의 Answer clone**: streaming path 만 부담. non-streaming caller 무영향.
+- **BrokenPipe vs 일반 IoError**: `io::ErrorKind::BrokenPipe` 만 cancel. 그 외는 `error.v1` stderr emit + exit 2.
+- **ndjson 줄 단위**: serde_json::to_string + writeln! 충분. embedded newline 은 serde 가 escape.
+- **partial markdown safety**: out of scope. agent 책임.
+- **multi-turn token_index**: streaming 과 fb-15 multi-turn 의 상호작용. 새 turn 마다 streaming 재시작이 자연스러움 (`Token.turn_index` 가 각 ask 호출 단위로 일관).
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md` — Quick start 또는 명령 표에 `--stream` 한 줄.
+- `docs/SMOKE.md` — `kebab ask --stream` walkthrough (실행 예시 + agent 가 stderr 파싱하는 패턴 한 단락).
+- `tasks/p9/p9-fb-33-streaming-ask.md` — `status: open → completed`, design/plan 링크 추가.
+- `tasks/INDEX.md` — fb-33 행 ✅ 표시.
+- `integrations/claude-code/kebab/SKILL.md` — `--stream` 멘션 (CLI fallback 섹션).
+- `tasks/HOTFIXES.md` — internal API breaking (AskOpts.stream_sink 타입 변경) 결정 로그 (선택, 머지 후 의문 발생 시).
--- a/docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md
+++ b/docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md
@@ -0,0 +1,230 @@
+---
+title: "p9-fb-34 — Output budget controls design"
+phase: P9
+component: kebab-core + kebab-app + kebab-cli + kebab-mcp + wire-schema
+task_id: p9-fb-34
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search, §10 UX, wire-schema search_hit.v1]
+date: 2026-05-09
+---
+
+# p9-fb-34 — Output budget controls
+
+## Goal
+
+`kebab search` agent UX 개선. context window 제약 있는 agent 가 검색 결과 size 와 페이지네이션을 명시적으로 제어할 수 있게 한다. CLI surface 우선, MCP tool 도 동일 인자로 동시 노출. ask path 는 scope out (별도 `rag.max_context_tokens` 가 이미 budget 담당).
+
+## Behavior contract
+
+### CLI flags
+
+`kebab search "<query>"` 에 세 가지 flag 신규:
+
+| flag | 의미 | default |
+|------|------|---------|
+| `--max-tokens N` | 결과 wire JSON 의 추정 token 수 cap (`chars/4` 근사). 초과 시 truncate priority 적용. | 미설정 = 비활성 (기존 동작) |
+| `--snippet-chars N` | 각 hit snippet 최대 chars. config 의 `search.snippet_chars` 보다 우선. | 미설정 = config 값 |
+| `--cursor <opaque>` | 이전 호출의 `next_cursor` 값. 다음 페이지 hits 만 반환. | 미설정 = 첫 페이지 |
+
+### Wire shape
+
+`kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` wrapper object 로 교체:
+
+```json
+{
+  "schema_version": "search_response.v1",
+  "hits": [/* search_hit.v1[] */],
+  "next_cursor": "<base64>" | null,
+  "truncated": true | false
+}
+```
+
+**Backwards-compat broken** — agent 가 `[0]` 직접 인덱싱하면 깨짐. CLI plain (`--json` 없이) 출력 무영향. HOTFIXES 에 결정 로그.
+
+### Token estimation
+
+`chars/4` 근사 (RAG `pack_context` 와 일관). tiktoken-rs 등 신규 dep 없음. 정확도 ±15% 수준 — agent budget 제어 목적상 충분. wire schema description 에 "approximation" 명시.
+
+### Truncate priority
+
+`opts.max_tokens` 가 Some 일 때만 동작. 단계별:
+
+1. **Snippet 단축** — 각 hit snippet 을 `opts.snippet_chars.unwrap_or(config.search.snippet_chars)` 로 자른 뒤, 여전히 budget 초과면 60-char floor 까지 점진 단축.
+2. **k 축소** — snippet 60 char 까지 줄여도 초과면 마지막 hit 부터 pop. 최소 1 hit 보장.
+3. **truncated flag** — 위 어느 단계라도 동작 시 `truncated: true`. agent 는 `next_cursor` 로 다음 페이지 요청 가능.
+
+metadata (rank/score/doc_path/citation) 는 끝까지 유지 — agent 가 hit 자체를 못 찾으면 무의미.
+
+### Pagination cursor
+
+cursor 는 opaque base64 — 내부적으로 `{offset: usize, corpus_revision: string}` JSON 의 base64 encode.
+
+- 첫 호출: cursor 미설정 → offset 0.
+- 응답: 남은 hit 있으면 `next_cursor = encode(offset + returned, current_revision)`. 없으면 `null`.
+- 다음 호출: `--cursor <prev>` → decode → offset 만큼 skip.
+- corpus_revision mismatch (이후 ingest 등으로 corpus 가 변경됨) → `error.v1.code = "stale_cursor"`, exit 2. agent 책임으로 재호출.
+
+retriever 호출 시 k = `effective_k + offset` 만큼 fetch 후 offset 만큼 skip 해 응답.
+
+### Stale cursor error
+
+`error.v1.code` enum 에 `"stale_cursor"` 추가. message 예시: `"cursor was issued against corpus_revision 'abc'; current revision is 'xyz'. Re-issue search to obtain a fresh cursor."`
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: `SearchOpts` 신규 도메인 type 정의. 신규 dep 없음 (option / String 만).
+- `kebab-app`: cursor encode/decode 헬퍼 (base64 + serde_json). `base64` workspace dep 가 이미 있을 가능성 높음 — 확인 후 필요 시 추가.
+- `kebab-cli`: clap 인자 추가, wire wrapper 헬퍼.
+- `kebab-mcp`: tool input schema 확장.
+- `kebab-tui`: 변경 없음 (Search 패널 budget 미사용. fb-3X 후속).
+- `kebab-search`: 변경 없음 — retriever signature 보존.
+
+`kebab-core` 가 다른 `kebab-*` crate 의존 금지 룰 준수.
+
+## Public surface delta
+
+### kebab-core
+
+```rust
+#[derive(Clone, Debug, Default)]
+pub struct SearchOpts {
+    /// p9-fb-34: chars/4 approximation. None = no budget enforcement.
+    pub max_tokens: Option<usize>,
+    /// p9-fb-34: per-hit snippet character cap. None = use config default.
+    pub snippet_chars: Option<usize>,
+    /// p9-fb-34: opaque base64 cursor from a previous response.
+    pub cursor: Option<String>,
+}
+```
+
+### kebab-app
+
+```rust
+#[derive(Clone, Debug)]
+pub struct SearchResponse {
+    pub hits: Vec<SearchHit>,
+    pub next_cursor: Option<String>,
+    pub truncated: bool,
+}
+
+impl App {
+    /// p9-fb-34: budget-aware search.
+    pub fn search_with_opts(
+        &self,
+        query: SearchQuery,
+        opts: SearchOpts,
+    ) -> Result<SearchResponse>;
+
+    // Existing — thin wrapper for backwards-compat.
+    pub fn search(&self, query: SearchQuery) -> Result<Vec<SearchHit>> {
+        let resp = self.search_with_opts(query, SearchOpts::default())?;
+        Ok(resp.hits)
+    }
+}
+
+// cursor helpers (private to app crate)
+pub(crate) fn encode_cursor(offset: usize, corpus_revision: &str) -> String;
+pub(crate) fn decode_cursor(
+    s: &str,
+    expected_revision: &str,
+) -> Result<usize /* offset */, ErrorV1 /* stale_cursor */>;
+```
+
+### kebab-cli
+
+```rust
+// Cmd::Search 새 인자
+#[arg(long)] max_tokens: Option<usize>,
+#[arg(long)] snippet_chars: Option<usize>,
+#[arg(long)] cursor: Option<String>,
+```
+
+```rust
+// wire helper
+pub fn wire_search_response(r: &SearchResponse) -> Value {
+    let v = serde_json::json!({
+        "hits": r.hits.iter().map(wire_search_hit).collect::<Vec<_>>(),
+        "next_cursor": r.next_cursor,
+        "truncated": r.truncated,
+    });
+    tag_object(v, "search_response.v1")
+}
+```
+
+plain output: 기존 hit 줄들 + truncated 시 stderr 한 줄:
+
+```
+[truncated; use --cursor <next_cursor> for the next page]
+```
+
+### kebab-mcp
+
+`SearchInput` 에 optional 필드 추가:
+
+```rust
+pub struct SearchInput {
+    pub query: String,
+    pub mode: Option<String>,
+    pub k: Option<usize>,
+    /// p9-fb-34
+    pub max_tokens: Option<usize>,
+    pub snippet_chars: Option<usize>,
+    pub cursor: Option<String>,
+}
+```
+
+출력: `search_response.v1` JSON tag 적용 (CLI 와 동일 wrapper).
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-app) | `cursor::encode/decode` round-trip + corpus_revision mismatch → `StaleCursor` |
+| unit (kebab-app) | `App::search_with_opts` budget=None → 기존 `App::search` 동일 (truncated=false, next_cursor 채움) |
+| unit (kebab-app) | budget=200 tokens → snippet 60-char floor 까지 단축, truncated=true |
+| unit (kebab-app) | budget < single-hit 최소 → k=1 + truncated=true (1 hit 보장) |
+| unit (kebab-app) | snippet_chars override → 해당 길이로 truncate |
+| 통합 (kebab-app) | cursor offset 5 호출 → 6번째 hit 부터 반환 |
+| 통합 (kebab-app) | corpus_revision bump 후 cursor 재호출 → `StaleCursor` error.v1 |
+| 통합 (kebab-cli) | `kebab search "x" --json` → `search_response.v1` shape |
+| 통합 (kebab-cli) | `--max-tokens 200 --json` → truncated=true, hits 짧음 |
+| 통합 (kebab-cli) | `--cursor <encoded>` → 다음 페이지 |
+| 통합 (kebab-cli) | plain output: `[truncated; ...]` stderr 한 줄 |
+| 통합 (kebab-mcp) | `mcp__kebab__search` tool 이 `search_response.v1` 반환 |
+| 통합 (wire-schema) | `search_response.schema.json` validate 샘플 (with/without next_cursor) |
+| 통합 (kebab-app) | 기존 `App::search` 호출자 (TUI 등) 무영향 — return type 동일 |
+
+## Implementation steps (high-level)
+
+1. wire schema 신규 `search_response.schema.json` + `error.v1` enum 에 `stale_cursor` 추가.
+2. `kebab-core::SearchOpts` 도메인 type.
+3. `kebab-app::SearchResponse` + `cursor` 모듈 (encode/decode).
+4. `App::search_with_opts` impl (budget loop, cursor handling).
+5. `App::search` thin wrapper 보존.
+6. `kebab-cli::Cmd::Search` 새 flag + wire wrapper helper + plain truncated hint.
+7. `kebab-mcp::SearchInput` 확장 + 출력 wrapper.
+8. 단위 + 통합 테스트.
+9. README + SMOKE — `--max-tokens` / `--cursor` 예시.
+10. tasks/INDEX.md / spec status flip.
+11. `tasks/HOTFIXES.md` — wire breaking 결정 로그.
+12. `integrations/claude-code/kebab/SKILL.md` — search 결과 shape 변경 명시.
+
+## Risks / notes
+
+- **Wire breaking**: agent 가 기존 `search_hit.v1[]` 배열 직접 파싱 시 깨짐. HOTFIXES 결정 로그 + skill notes 반영 필수. 내부 single-user 환경이라 실용적 영향 적음.
+- **`App::search` 시그니처 보존** 으로 TUI / 기존 caller 무영향.
+- **chars/4 추정 정확도** ±15% — agent budget 보호 목적상 충분. tiktoken 도입은 별도 task.
+- **cursor opaque** — agent 가 base64 decode 시도 막을 방법 없음. spec 에 "구조 변경 가능, 직접 파싱 금지" 명시.
+- **corpus_revision 이 fb-19 LRU cache invalidation key 와 동일 source** — 별도 source-of-truth 추가 불필요.
+- **TUI Search 패널 budget UI** — out of scope. 사용자가 원하면 fb-3X 후속.
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md` — `kebab search` 명령 표 row 업데이트, `--max-tokens` / `--cursor` 한 줄.
+- `docs/SMOKE.md` — pagination walkthrough 한 단락 (cursor 흐름 예시).
+- `tasks/p9/p9-fb-34-output-budget-controls.md` — `status: open → completed`, design/plan 링크 추가.
+- `tasks/INDEX.md` — fb-34 행 ✅.
+- `tasks/HOTFIXES.md` — `2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1` 결정 로그.
+- `integrations/claude-code/kebab/SKILL.md` — Recipe 의 search 결과 파싱 패턴 (`response.hits[]`) + cursor 예시.
--- a/docs/superpowers/specs/2026-05-09-p9-fb-35-verbatim-fetch-design.md
+++ b/docs/superpowers/specs/2026-05-09-p9-fb-35-verbatim-fetch-design.md
@@ -0,0 +1,276 @@
+---
+title: "p9-fb-35 — Verbatim fetch design"
+phase: P9
+component: kebab-core + kebab-app + kebab-cli + kebab-mcp + wire-schema
+task_id: p9-fb-35
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search, §5 storage, §10 UX]
+date: 2026-05-09
+---
+
+# p9-fb-35 — Verbatim fetch
+
+## Goal
+
+agent 가 search hit / RAG citation 의 `chunk_id` / `doc_id` 로 raw verbatim text 를 deep-link fetch 할 수 있는 surface. CLI 와 MCP 동시 노출. 3가지 mode (chunk / doc / span). PDF / audio 의 line-based span 은 명시적 거절 (`error.v1.code = span_not_supported`). image OCR text 는 line-addressable 이라 span 허용.
+
+## Behavior contract
+
+### Source of truth
+
+모든 text 는 `CanonicalDocument` / `chunks.text` 에서 가져온 정규화된 markdown. 원본 raw bytes (`assets.storage_path` 의 파일) 는 노출 안 함 — 사용자가 필요하면 직접 read. PDF / audio / image 도 동일 surface (page-text / transcript / OCR text). 단 line-based span 은 PDF / audio 거절 — image OCR 은 line-addressable 이라 허용.
+
+### CLI subcommand
+
+`kebab fetch` 신규 subcommand, 3 mode:
+
+| mode | flags |
+|------|-------|
+| `kebab fetch chunk <chunk_id> [--context N] [--json]` | `--context` 시 동일 doc 의 ordinal ±N 범위 chunks 도 포함 |
+| `kebab fetch doc <doc_id> [--max-tokens N] [--json]` | doc 정규화된 markdown text. budget 트립 시 truncated. |
+| `kebab fetch span <doc_id> <line_start> <line_end> [--max-tokens N] [--json]` | doc text 의 line range (1-based, inclusive). PDF/audio 면 거절. |
+
+`--context` 는 chunk mode 에만. `--max-tokens` 는 doc/span 에만 (chunk 는 bounded size).
+
+### Wire shape — `fetch_result.v1`
+
+discriminated by `kind`:
+
+```json
+{
+  "schema_version": "fetch_result.v1",
+  "kind": "chunk" | "doc" | "span",
+  "doc_id": "<id>",
+  "doc_path": "<workspace_path>",
+  "indexed_at": "<RFC3339>",
+  "stale": <bool>,
+  "chunk":          {/* chunk_inspection.v1, kind=chunk */},
+  "context_before": [/* chunk_inspection.v1[], kind=chunk */],
+  "context_after":  [/* chunk_inspection.v1[], kind=chunk */],
+  "text":           "<markdown>",
+  "line_start":     <int>,
+  "line_end":       <int>,
+  "effective_end":  <int>,
+  "truncated":      <bool>
+}
+```
+
+Per-kind 필수 필드 — schema description 으로 명시 (JSON Schema 의 conditional validation 은 v1 stub 단계에서 미구현, agent 책임).
+
+`indexed_at` / `stale` — fb-32 와 동일 stamping. `documents.updated_at` 기준.
+
+### Mode 동작
+
+**chunk mode**:
+1. `DocumentStore::get_chunk(chunk_id)` — 없으면 `error.v1.code = chunk_not_found`.
+2. `--context N` 시 doc 안 chunks 의 ordinal 정렬 → target ordinal ±N 의 chunks 추출. doc 경계 넘기지 않음 (clamp).
+3. wire: `kind: "chunk"`, `chunk: <target>`, `context_before: [...]`, `context_after: [...]`, `truncated: false`.
+
+**doc mode**:
+1. `DocumentStore::get_document(doc_id)` — 없으면 `error.v1.code = doc_not_found`.
+2. `CanonicalDocument` 의 blocks → markdown 직렬화 (`fmt_canonical_to_markdown` 헬퍼 신규).
+3. `--max-tokens N` 시 chars/4 추정 budget 적용 — 초과 시 끝에서 끊고 truncated=true. (line 단위 trim 은 별도 task — 단순 char-trim.)
+4. wire: `kind: "doc"`, `text: <md>`, `truncated: <bool>`.
+
+**span mode**:
+1. doc lookup 동일.
+2. media_type 검사 — PDF (`Page` citation) / audio (`Time` citation) 는 line-incompatible → `error.v1.code = span_not_supported`.
+3. doc text → `text.lines()` slice `[line_start..=line_end]`. line_end 가 total 초과 시 clamp.
+4. `--max-tokens` 적용 시 끝에서 추가 truncate, `effective_end` 갱신.
+5. wire: `kind: "span"`, `text`, `line_start`, `line_end` (요청), `effective_end` (실제 emit), `truncated`.
+
+### Budget integration
+
+fb-34 의 chars/4 추정 + truncate 패턴 재사용. `FetchOpts.max_tokens` 가 `Some(N)` 일 때만 동작. chunk mode 는 무관 (chunk 는 chunker 단위 bounded).
+
+### Error codes
+
+`error.v1.code` enum 추가:
+- `chunk_not_found` — chunk_id lookup miss.
+- `doc_not_found` — doc_id lookup miss.
+- `span_not_supported` — line-incompatible media (PDF / audio).
+- `invalid_input` — MCP tool 의 mode 별 필수 필드 누락 (e.g. `kind: "chunk"` + `chunk_id: null`).
+
+`StructuredError` wrapper (fb-34) 재사용 — `App::fetch` 의 typed `ErrorV1` 가 `classify` downcast 거쳐 wire 까지 보존.
+
+### MCP tool
+
+`mcp__kebab__fetch` 신규. Input:
+
+```rust
+pub struct FetchInput {
+    /// "chunk" | "doc" | "span"
+    pub kind: String,
+    pub chunk_id: Option<String>,
+    pub doc_id: Option<String>,
+    pub line_start: Option<u32>,
+    pub line_end: Option<u32>,
+    pub context: Option<u32>,
+    pub max_tokens: Option<usize>,
+}
+```
+
+Validation: `kind` 별 필수 필드 검증 후 `App::fetch` 호출. 출력 = `fetch_result.v1`.
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: 신규 도메인 type. 신규 dep 없음.
+- `kebab-app`: 기존 deps 충분. fb-32 staleness + fb-34 budget 헬퍼 재사용. markdown 직렬화는 단순 fmt 함수 (별도 dep 불필요).
+- `kebab-cli`: clap subcommand 추가, wire helper.
+- `kebab-mcp`: tool 추가.
+- `kebab-tui`: 변경 없음.
+- `kebab-search` / `kebab-rag`: 변경 없음.
+
+## Public surface delta
+
+### kebab-core
+
+```rust
+#[derive(Clone, Debug)]
+pub enum FetchQuery {
+    Chunk(ChunkId),
+    Doc(DocumentId),
+    Span {
+        doc_id: DocumentId,
+        line_start: u32,
+        line_end: u32,
+    },
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct FetchOpts {
+    /// chunk mode 만: ±N chunks. None = no context.
+    pub context: Option<u32>,
+    /// doc/span 만: chars/4 budget. None = no cap.
+    pub max_tokens: Option<usize>,
+}
+
+#[derive(Clone, Debug)]
+pub struct FetchResult {
+    pub kind: FetchKind,
+    pub doc_id: DocumentId,
+    pub doc_path: WorkspacePath,
+    pub indexed_at: OffsetDateTime,
+    pub stale: bool,
+    // chunk
+    pub chunk: Option<Chunk>,
+    pub context_before: Vec<Chunk>,
+    pub context_after: Vec<Chunk>,
+    // doc / span
+    pub text: Option<String>,
+    pub line_start: Option<u32>,
+    pub line_end: Option<u32>,
+    pub effective_end: Option<u32>,
+    pub truncated: bool,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FetchKind { Chunk, Doc, Span }
+```
+
+`Serialize` impl for `FetchResult` flattens to `fetch_result.v1` shape (or wire helper does the projection).
+
+### kebab-app
+
+```rust
+impl App {
+    pub fn fetch(&self, query: FetchQuery, opts: FetchOpts) -> Result<FetchResult>;
+}
+
+pub fn fetch_with_config(
+    config: kebab_config::Config,
+    query: FetchQuery,
+    opts: FetchOpts,
+) -> Result<FetchResult>;
+
+// markdown 직렬화 헬퍼 (private)
+fn fmt_canonical_to_markdown(doc: &CanonicalDocument) -> String;
+```
+
+### kebab-cli
+
+```rust
+// Cmd::Fetch 신규 enum variant
+Fetch {
+    #[command(subcommand)]
+    what: FetchWhat,
+}
+
+#[derive(Subcommand)]
+enum FetchWhat {
+    Chunk { id: String, #[arg(long)] context: Option<u32> },
+    Doc { id: String, #[arg(long)] max_tokens: Option<usize> },
+    Span {
+        doc_id: String,
+        line_start: u32,
+        line_end: u32,
+        #[arg(long)] max_tokens: Option<usize>,
+    },
+}
+```
+
+```rust
+// wire helper
+pub fn wire_fetch_result(r: &FetchResult) -> Value;
+```
+
+### kebab-mcp
+
+`FetchInput` + `mcp__kebab__fetch` tool registration.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-app) | chunk fetch (no context) — chunk + 빈 context |
+| unit (kebab-app) | chunk fetch `--context 2` 다중 chunk doc 에서 ±2 ordinal 정확 |
+| unit (kebab-app) | chunk fetch `--context 99` doc 경계 clamp |
+| unit (kebab-app) | doc fetch — markdown 직렬화 결과 |
+| unit (kebab-app) | doc fetch `--max-tokens N` budget 트립 → truncated=true + text 잘림 |
+| unit (kebab-app) | span fetch line range slice 정확 |
+| unit (kebab-app) | span line_end > total → effective_end clamped |
+| unit (kebab-app) | span PDF doc → StructuredError(span_not_supported) |
+| unit (kebab-app) | span audio doc → StructuredError(span_not_supported) |
+| unit (kebab-app) | unknown chunk_id → StructuredError(chunk_not_found) |
+| unit (kebab-app) | unknown doc_id → StructuredError(doc_not_found) |
+| unit (kebab-app) | indexed_at + stale fb-32 stamping 정확 |
+| 통합 (kebab-cli) | `kebab fetch chunk <id> --json --context 1` wire 검증 |
+| 통합 (kebab-cli) | `kebab fetch doc <id> --max-tokens 100 --json` truncated=true |
+| 통합 (kebab-cli) | `kebab fetch span <doc_id> 1 5 --json` line range |
+| 통합 (kebab-cli) | `kebab fetch chunk <unknown>` → exit 2 + error.v1.code = chunk_not_found |
+| 통합 (kebab-cli) | plain mode chunk — `[doc_path § heading]\n<text>` 형태 |
+| 통합 (kebab-mcp) | `mcp__kebab__fetch` 3 mode 정상 응답 |
+| 통합 (kebab-mcp) | `kind: "chunk"` + `chunk_id: null` → invalid_input |
+| 통합 (wire-schema) | `fetch_result.schema.json` 3 mode 샘플 validate |
+
+## Implementation steps (high-level)
+
+1. wire schema 신규 `fetch_result.schema.json` + `error.v1` enum 4 codes 추가.
+2. `kebab-core` 신규 types (`FetchQuery`, `FetchOpts`, `FetchResult`, `FetchKind`).
+3. `kebab-app::fetch` impl + `fmt_canonical_to_markdown` 헬퍼.
+4. `kebab-cli::Cmd::Fetch` clap subcommand + wire helper + plain renderer.
+5. `kebab-mcp` `kebab__fetch` tool + input validation.
+6. 단위 + 통합 테스트.
+7. README + SMOKE — fetch 예시.
+8. tasks/INDEX.md / spec status flip.
+9. `tasks/HOTFIXES.md` — 신규 surface 라 deviation 없을 가능성 (skip).
+10. `integrations/claude-code/kebab/SKILL.md` — Recipe 추가 ("agent fetched a chunk_id from search, wants surrounding context").
+
+## Risks / notes
+
+- **Markdown 직렬화 round-trip** — `CanonicalDocument.blocks` 가 round-trip 손실 적은지 확인. 손실 발견 시 ingest 시점에 raw markdown 도 store 에 보존하는 후속 task 가능 (fb-3X).
+- **chunk_id stability** — chunker_version cascade 시 invalidate. spec 에 명시 + skill notes 의 retry pattern 안내.
+- **`Chunk` 가 `chunk_inspection.v1` 와 동일** — `wire_chunk_inspection` 재사용 가능. 새 헬퍼 불필요.
+- **doc/span budget — line trim 안 함** — char-level trim 만. agent 가 끊긴 line 받을 가능성 있음. 충분히 작은 한도 (e.g. 2000 chars) 면 큰 영향 없음. 후속에서 line-aware trim 가능.
+- **media_type 판정** — `documents.source_type` 또는 첫 chunk 의 citation kind (Line/Page/Time) 로 분기. PDF/audio 는 Page/Time citation. line range 의미 없음.
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md` — 명령 표에 `kebab fetch chunk|doc|span` row.
+- `docs/SMOKE.md` — fetch walkthrough (search → fetch chunk --context flow).
+- `tasks/p9/p9-fb-35-verbatim-fetch.md` — `status: open → completed`, design/plan 링크.
+- `tasks/INDEX.md` — fb-35 행 ✅.
+- `integrations/claude-code/kebab/SKILL.md` — 신규 `mcp__kebab__fetch` row + recipe.
--- a/docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md
+++ b/docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md
@@ -0,0 +1,213 @@
+---
+title: "p9-fb-36 — Search filter args design"
+phase: P9
+component: kebab-core + kebab-search + kebab-cli + kebab-mcp
+task_id: p9-fb-36
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search]
+date: 2026-05-10
+---
+
+# p9-fb-36 — Search filter args
+
+## Goal
+
+agent / 사용자가 검색 범위를 좁힐 수 있도록 CLI / MCP 에 filter flag 추가. 기존 `SearchFilters` 도메인 type 의 4 필드 (tags_any / lang / path_glob / trust_min) 를 CLI 표면에 노출하고, 신규 3 필드 (media / ingested_after / doc_id) 추가. wire schema 변경 없음 (input-only). filter 적용 layer = SQLite WHERE (lexical) + over-fetch + post-filter (vector). AND 조합 의미 고정.
+
+## Behavior contract
+
+### CLI flags on `kebab search`
+
+7 flags 추가, 모두 optional. 비어있으면 미적용 (기존 동작 보존):
+
+| flag | 의미 | repeat? |
+|------|------|---------|
+| `--tag <name>` | doc 의 `metadata.tags` 안에 매칭 (OR-within) | yes (`--tag rust --tag async` = `tag IN (rust,async)`) |
+| `--lang <iso>` | `documents.lang` 정확 매칭 | no |
+| `--path-glob <pattern>` | `documents.workspace_path` glob 매칭 | no |
+| `--trust-min <level>` | `documents.trust_level >= level` (enum 순서) | no |
+| `--media <csv>` | `assets.media_type.kind` IN 리스트 (예: `--media md,pdf`) | csv |
+| `--ingested-after <RFC3339>` | `documents.updated_at >= timestamp` | no |
+| `--doc-id <id>` | `documents.doc_id = id` | no |
+
+다중 flag 조합 = AND 결합. 각 flag 안 다중 값 (--tag, --media) = OR.
+
+### Filter validation
+
+- `--ingested-after` RFC3339 파싱 실패 → CLI 진입 시 `error.v1.code = config_invalid`, exit 2.
+- `--media` 의 unknown value (예: `--media foo`) → 매칭 0건 (filter unmatch). 명시적 거절 안 함 (lenient).
+- `--trust-min` clap value_enum 검증 (enum 외 거절).
+- `--doc-id` 형식 검증 안 함 (DocumentId 는 단순 string wrapper). 존재하지 않으면 매칭 0건.
+
+### Filter layer
+
+**Lexical (lexical.rs)**:
+- 기존 SQL builder 의 WHERE 절 확장. `media` / `ingested_after` / `doc_id` 모두 SQL 구문 가능.
+- `media`: `JOIN assets a ON a.asset_id = d.asset_id` + `json_extract(a.media_type, '$.kind') IN (?, ?)` (다중 값).
+- `ingested_after`: `d.updated_at >= ?` (RFC3339 lexicographic compare; UTC `Z` 가정).
+- `doc_id`: `d.doc_id = ?`.
+- path_glob 은 기존 post-filter 그대로.
+
+**Vector (vector.rs)**:
+- 기존 over-fetch (k * 2) + `filter_chunks` 헬퍼에서 SQLite chunks JOIN documents JOIN assets.
+- 같은 WHERE 조건 적용. k 부족 시 truncated.
+
+### Wire shape
+
+기존 wire schema 변경 없음.
+
+- `search_response.v1` (output) — 그대로.
+- `search_hit.v1` (개별 hit) — 그대로.
+- 입력 측 (CLI args / MCP `SearchInput`) 만 확장.
+
+MCP `SearchInput` schema 는 `schemars` derive 로 자동 갱신. 수동 schema 파일 X.
+
+### MCP `SearchInput` 확장
+
+```rust
+pub struct SearchInput {
+    pub query: String,
+    pub mode: Option<String>,
+    pub k: Option<usize>,
+    pub max_tokens: Option<usize>,    // fb-34
+    pub snippet_chars: Option<usize>, // fb-34
+    pub cursor: Option<String>,       // fb-34
+    // p9-fb-36 신규 (모두 optional)
+    pub tags: Option<Vec<String>>,
+    pub lang: Option<String>,
+    pub path_glob: Option<String>,
+    pub trust_min: Option<String>,    // "low" | "medium" | "high"
+    pub media: Option<Vec<String>>,
+    pub ingested_after: Option<String>,  // RFC3339
+    pub doc_id: Option<String>,
+}
+```
+
+input → `SearchFilters` 변환 시 위와 동일 검증 (RFC3339 파싱, trust_level enum). 실패 시 `invalid_input` ErrorV1.
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: 신규 dep 없음. 기존 type 확장만.
+- `kebab-search`: 변경 없음 (SQL builder 안 WHERE 추가만).
+- `kebab-cli`: clap flag 추가, dispatch 변환.
+- `kebab-mcp`: SearchInput 확장.
+- `kebab-tui`: 변경 없음.
+
+`kebab-core` 의 다른 `kebab-*` crate 의존 금지 룰 그대로.
+
+## Public surface delta
+
+### kebab-core
+
+```rust
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchFilters {
+    pub tags_any: Vec<String>,
+    pub lang: Option<Lang>,
+    pub path_glob: Option<String>,
+    pub trust_min: Option<TrustLevel>,
+    /// p9-fb-36: media_type filter — IN-list of `MediaType.kind` strings
+    /// (e.g. `["markdown", "pdf"]`). Empty Vec = no filter.
+    #[serde(default)]
+    pub media: Vec<String>,
+    /// p9-fb-36: hits whose source doc's `documents.updated_at` is at
+    /// or after this timestamp. None = no filter. RFC3339 / UTC.
+    #[serde(default, with = "time::serde::rfc3339::option")]
+    pub ingested_after: Option<OffsetDateTime>,
+    /// p9-fb-36: restrict hits to a single document. None = no filter.
+    #[serde(default)]
+    pub doc_id: Option<DocumentId>,
+}
+```
+
+`#[serde(default)]` on each new field = backwards-compat (older JSON without these keys deserializes as defaults).
+
+### kebab-search (lexical + vector)
+
+내부 SQL builder 확장만. public API 변경 없음.
+
+### kebab-cli (`Cmd::Search`)
+
+```rust
+Cmd::Search {
+    // 기존
+    query, k, mode, explain, no_cache,
+    max_tokens, snippet_chars, cursor,   // fb-34
+    // p9-fb-36 신규
+    #[arg(long)] tag: Vec<String>,
+    #[arg(long)] lang: Option<String>,
+    #[arg(long)] path_glob: Option<String>,
+    #[arg(long, value_enum)] trust_min: Option<TrustLevelFlag>,
+    #[arg(long, value_delimiter = ',')] media: Vec<String>,
+    #[arg(long)] ingested_after: Option<String>,
+    #[arg(long)] doc_id: Option<String>,
+}
+```
+
+`TrustLevelFlag` 신규 clap value_enum (CLI-internal, kebab-core 의 `TrustLevel` 로 변환).
+
+### kebab-mcp::tools::search
+
+`SearchInput` 7 optional 필드 추가 (위 §MCP `SearchInput` 확장). dispatch 에서 `SearchFilters` 빌드 + 검증.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-core) | `SearchFilters::default()` — 7 필드 모두 비어있음 |
+| unit (kebab-search/lexical) | `media: ["pdf"]` — markdown doc 안 잡힘 |
+| unit (kebab-search/lexical) | `media: ["markdown", "pdf"]` — IN-list 동작 |
+| unit (kebab-search/lexical) | `ingested_after: <어제>` — 어제 이전 doc 안 잡힘 |
+| unit (kebab-search/lexical) | `doc_id: <X>` — 다른 doc 의 chunk 안 잡힘 |
+| unit (kebab-search/lexical) | 다중 filter AND — 모두 만족하는 hit 만 |
+| unit (kebab-search/lexical) | 빈 filter (default) — 기존 동작과 동일 |
+| unit (kebab-search/vector) | 동일 패턴 — `filter_chunks` post-filter |
+| unit (kebab-search) | 알 수 없는 media 값 (`["foo"]`) — empty result, no error |
+| 통합 (kebab-cli) | `kebab search Q --media md --json` wire shape (search_response.v1 그대로) |
+| 통합 (kebab-cli) | `kebab search Q --ingested-after 2020-01-01 --json` 모든 hit 통과 |
+| 통합 (kebab-cli) | `kebab search Q --ingested-after garbage --json` → `error.v1.code = config_invalid` exit 2 |
+| 통합 (kebab-cli) | `kebab search Q --doc-id <id> --json` 단일 doc 만 |
+| 통합 (kebab-cli) | `kebab search Q --tag rust --tag async --json` IN-list 동작 |
+| 통합 (kebab-mcp) | `mcp__kebab__search` 7 optional 필드 모두 정상 응답 |
+| 통합 (kebab-mcp) | `mcp__kebab__search` invalid `ingested_after` → invalid_input |
+
+## Implementation steps (high-level)
+
+1. `kebab-core::SearchFilters` 3 필드 추가 + 단위 테스트.
+2. `kebab-search/lexical.rs` SQL builder 확장 + 단위 테스트.
+3. `kebab-search/vector.rs` `filter_chunks` 헬퍼 동일 확장 + 단위 테스트.
+4. `kebab-cli::Cmd::Search` 7 flag 추가 + dispatch + RFC3339 파싱.
+5. `kebab-cli` 통합 테스트 (lexical-only, no Ollama).
+6. `kebab-mcp::tools::search::SearchInput` 7 필드 + dispatch + invalid_input 검증.
+7. `kebab-mcp` 통합 테스트.
+8. README + SMOKE — filter 예시.
+9. tasks/INDEX.md / spec status flip.
+10. SKILL.md — `mcp__kebab__search` input shape 갱신.
+
+## Risks / notes
+
+- **`assets.media_type` JSON shape**: `MediaType` enum 의 serde 직렬화 형태가 `{"kind": "markdown"}` 인지, 다른 형태인지 SQLite 저장 형식 확인 필요. `Markdown` 같은 unit variant 는 `"markdown"` 문자열, `Image(...)` / `Audio(...)` 같은 tuple variant 는 `{"image": {...}}` 형태일 가능성. `json_extract` 경로를 그에 맞춰 조정 (e.g. `case when typeof(...) = 'text' then ... else json_extract($.kind) end`).
+- **RFC3339 lexicographic compare**: ingest 시 항상 UTC `Z` 로 저장 (fb-32 ingest path 확인됨). 외부 도구가 다른 offset 으로 강제 update 시 비교 부정확. spec 에 "UTC `Z` 가정" 명시.
+- **path_glob 과 다른 filter 의 ordering**: path_glob 은 post-filter (lexical), 신규 3 개는 SQL — fetch_limit 도달 후 path_glob 으로 추가 cut → final hit 수가 줄 수 있음. 기존 동작과 동일 (path_glob 패턴 유지).
+- **clap `Vec<String>` 의 default**: clap 0.4 에서 미지정 = `Vec::new()`. 자동.
+- **trust_min enum 매핑**: clap value_enum 으로 안전. `TrustLevelFlag` → `TrustLevel` 변환 헬퍼.
+- **SearchFilters serde backwards-compat**: `#[serde(default)]` 로 옛 JSON 무영향. SQLite 안 SearchFilters 직렬 저장 안 함 (request-time only).
+
+## Out of scope
+
+- `--exclude-doc-id` / `--exclude-tag` (exclusion filter).
+- 다중 doc_id (`--doc-id a --doc-id b`) — 단일만.
+- TUI Search 패널 filter UI.
+- Lance metadata pre-filter.
+- tag 시스템 신규 도입 (이미 존재).
+- `--search.default-filter` config (default 값 지정) — agent 가 매번 명시.
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md` — `kebab search` row 의 flag 표기에 7 flag 추가.
+- `docs/SMOKE.md` — filter walkthrough (`--media md --ingested-after 2026-04-01` 예시).
+- `tasks/p9/p9-fb-36-search-filters.md` — `status: open → completed`, design/plan 링크.
+- `tasks/INDEX.md` — fb-36 행 ✅.
+- `integrations/claude-code/kebab/SKILL.md` — `mcp__kebab__search` input shape 갱신 (7 필드 명시 + AND 의미 + lenient unknown media).
--- a/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md
+++ b/docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md
@@ -0,0 +1,360 @@
+---
+title: "p9-fb-37 — Trace + stats design"
+phase: P9
+component: kebab-core + kebab-search + kebab-store-sqlite + kebab-app + kebab-cli + kebab-mcp + kebab-tui
+task_id: p9-fb-37
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search, §7 RAG, §10 UX]
+date: 2026-05-10
+---
+
+# p9-fb-37 — Trace + stats
+
+## Goal
+
+retrieval pipeline 가시성 + KB 건강 surface. 두 axes:
+
+- **Trace**: `kebab search Q --trace` — `search_response.v1` 에 optional `trace` 필드 (lexical/vector pre-fusion lists + RRF inputs + per-stage timing). agent / 사용자가 "왜 이 결과가 나왔는지" 진단.
+- **Stats**: `kebab schema --json` 의 기존 `stats` 객체에 4 필드 추가 (media/lang breakdown + index disk bytes + stale doc count). KB 건강 한 눈에.
+
+둘 다 wire schema additive minor — 기존 consumer 무영향. trace 는 opt-in (cost 0 when off), stats 는 항상 채움 (저렴한 GROUP BY).
+
+## Behavior contract
+
+### CLI flag
+
+```
+kebab search <query> [--trace] [--json] [기존 flags ...]
+kebab schema [--json]
+```
+
+`--trace` boolean, default false. 활성 시:
+- HybridRetriever 가 lexical / vector 각 단계 출력 + per-stage timing 캡처.
+- search cache **bypass 강제** (debug intent — cache hit timing 무의미).
+- `--json` 면 `search_response.v1.trace` 채움.
+- non-`--json` 면 hits 출력 후 `Trace:` section pretty-print (lex/vec 카운트 + timing + top 3 hit per stage).
+
+`kebab schema --json` 의 `stats` 4 필드 항상 출력 (no flag).
+
+### Wire shape
+
+**`search_response.v1`** (additive minor — schema bump 없음):
+
+```jsonc
+{
+  "schema_version": "search_response.v1",
+  "hits":           [/* search_hit.v1 */],
+  "next_cursor":    null,
+  "truncated":      false,
+  "trace": {                                  // OPTIONAL — present iff --trace
+    "lexical": [
+      {"chunk_id":"c1","doc_id":"d1","doc_path":"a.md","rank":1,"score":0.42}, ...
+    ],
+    "vector": [
+      {"chunk_id":"c2","doc_id":"d2","doc_path":"b.md","rank":1,"score":0.81}, ...
+    ],
+    "rrf_inputs": [
+      {"chunk_id":"c1","lexical_rank":2,"vector_rank":3,"fusion_score":0.0234}, ...
+    ],
+    "timing": {"lexical_ms":12,"vector_ms":45,"fusion_ms":1,"total_ms":58}
+  }
+}
+```
+
+`#[serde(default, skip_serializing_if = "Option::is_none")]` — `--trace` 없으면 `trace` 키 자체 부재.
+
+**`schema.v1.stats`** (additive minor — schema bump 없음):
+
+```jsonc
+"stats": {
+  "doc_count": 50,
+  "chunk_count": 200,
+  "asset_count": 50,
+  "last_ingest_at": "2026-05-10T12:34:56Z",
+  // fb-37 신규
+  "media_breakdown": {"markdown":12,"pdf":3,"image":5,"audio":0,"other":0},
+  "lang_breakdown":  {"en":10,"ko":5,"null":5},
+  "index_bytes":     {"sqlite":12345678,"lancedb":23456789},
+  "stale_doc_count": 2
+}
+```
+
+- `media_breakdown`: `MEDIA_KINDS` (markdown/pdf/image/audio/other) 5 키 항상 채움 (0 포함). `assets.media_type` JSON 의 dual shape (text vs object) 는 fb-36 과 동일한 CASE WHEN 패턴.
+- `lang_breakdown`: 비어있을 수 있음 (corpus 비면 `{}`). NULL lang 은 `"null"` 문자열 키.
+- `index_bytes.sqlite` = `*.sqlite` + `*.sqlite-wal` + `*.sqlite-shm` 합. `lancedb` = 디렉터리 recursive 합 (없으면 0).
+- `stale_doc_count` = `documents.updated_at < (now - threshold_days)` count. `config.search.stale_threshold_days = 0` 이면 항상 0 (fb-32 의미).
+
+### Edge cases
+
+| 상황 | 동작 |
+|------|------|
+| `--trace --mode lexical` | `vector: []`, `vector_ms: 0`. rrf_inputs 모두 `vector_rank: null` |
+| `--trace --mode vector` | 대칭 |
+| `--trace` cache 가 hit 가능 query | cache bypass 강제, fresh run |
+| 빈 corpus | hits=[], trace lex/vec=[], timing 정상 (모두 작은 값) |
+| index_bytes lancedb 디렉터리 부재 | 0 |
+| sqlite WAL/SHM aux 파일 부재 | 메인 `.sqlite` 만 합산 |
+| stale_doc_count threshold=0 | 0 (fb-32) |
+| cursor pagination + `--trace` | 첫 호출 trace, next_cursor 따라 재호출 trace 부재 (재요청 필요) |
+| `--trace` non-`--json` mode | hits + trace 텍스트 출력 (lex/vec count, timing, top 3 per stage) |
+
+### MCP `SearchInput` 확장
+
+```rust
+pub struct SearchInput {
+    pub query: String,
+    pub mode: Option<String>,
+    pub k: Option<usize>,
+    pub max_tokens: Option<usize>,    // fb-34
+    pub snippet_chars: Option<usize>, // fb-34
+    pub cursor: Option<String>,       // fb-34
+    pub tags: Option<Vec<String>>,    // fb-36
+    pub lang: Option<String>,         // fb-36
+    pub path_glob: Option<String>,    // fb-36
+    pub trust_min: Option<String>,    // fb-36
+    pub media: Option<Vec<String>>,   // fb-36
+    pub ingested_after: Option<String>, // fb-36
+    pub doc_id: Option<String>,       // fb-36
+    // fb-37
+    pub trace: Option<bool>,
+}
+```
+
+`Some(true)` = trace ON, `Some(false)` / `None` = OFF. 출력은 wire 와 동일 (trace 필드 mirror).
+
+### TUI Search pane
+
+- 결과 표시 중 (`SearchPane.results` 비어있지 않음) `t` keybind → `TracePopup` 모달.
+- TUI 가 `kebab_app::search_with_trace_with_config` 재호출 (현재 query, k, mode, filters 전부).
+- popup: 단일 scroll list (lex section / vec section / rrf section 헤더로 구분), `Esc` 닫기, `j/k` 또는 ↑↓ scroll.
+- 기존 inspect pane 무수정.
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: 신규 dep 없음. domain types 추가만.
+- `kebab-store-sqlite`: 신규 dep 없음. rusqlite + std::fs 만.
+- `kebab-search`: 신규 dep 없음. std::time::Instant 사용.
+- `kebab-app`: 신규 dep 없음. facade 확장.
+- `kebab-cli`: 신규 dep 없음. clap flag 추가.
+- `kebab-mcp`: 신규 dep 없음. SearchInput 확장.
+- `kebab-tui`: 신규 dep 없음. ratatui popup widget.
+
+`kebab-core` 의 다른 `kebab-*` 의존 금지 룰 그대로. UI 크레이트는 facade 만.
+
+## Public surface delta
+
+### kebab-core (`search.rs`)
+
+```rust
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct SearchTrace {
+    pub lexical:    Vec<TraceCandidate>,
+    pub vector:     Vec<TraceCandidate>,
+    pub rrf_inputs: Vec<TraceFusionInput>,
+    pub timing:     TraceTiming,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceCandidate {
+    pub chunk_id: ChunkId,
+    pub doc_id:   DocumentId,
+    pub doc_path: WorkspacePath,
+    pub rank:     u32,
+    pub score:    f32,
+}
+
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct TraceFusionInput {
+    pub chunk_id:     ChunkId,
+    pub lexical_rank: Option<u32>,
+    pub vector_rank:  Option<u32>,
+    pub fusion_score: f32,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct TraceTiming {
+    pub lexical_ms: u64,
+    pub vector_ms:  u64,
+    pub fusion_ms:  u64,
+    pub total_ms:   u64,
+}
+```
+
+`IndexStats` 확장 (`stats.rs` 또는 위치 동일):
+
+```rust
+pub struct IndexStats {
+    // 기존
+    pub doc_count:      u64,
+    pub chunk_count:    u64,
+    pub asset_count:    u64,
+    pub last_ingest_at: Option<OffsetDateTime>,
+    // fb-37
+    #[serde(default)]
+    pub media_breakdown: BTreeMap<String, u64>,
+    #[serde(default)]
+    pub lang_breakdown:  BTreeMap<String, u64>,
+    #[serde(default)]
+    pub index_bytes:     IndexBytes,
+    #[serde(default)]
+    pub stale_doc_count: u64,
+}
+
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
+pub struct IndexBytes {
+    pub sqlite:  u64,
+    pub lancedb: u64,
+}
+```
+
+`#[serde(default)]` — 옛 JSON 누락 시 zero-valued 으로 deserialize (backwards-compat).
+
+### kebab-store-sqlite (`stats.rs`)
+
+```rust
+pub fn breakdowns(conn: &rusqlite::Connection, threshold_days: u64)
+    -> rusqlite::Result<(BTreeMap<String,u64>, BTreeMap<String,u64>, u64)>;
+
+pub fn index_bytes(data_dir: &Path) -> std::io::Result<IndexBytes>;
+```
+
+기존 stats helper 가 이 두 함수 호출해 `IndexStats` 채움. 신규 query:
+- media: `SELECT CASE WHEN json_type(media_type)='text' THEN json_extract(media_type,'$') ELSE (SELECT key FROM json_each(media_type) LIMIT 1) END AS kind, COUNT(DISTINCT d.doc_id) FROM documents d JOIN assets a ON a.asset_id=d.asset_id GROUP BY kind`
+- lang: `SELECT COALESCE(lang,'null') AS l, COUNT(*) FROM documents GROUP BY l`
+- stale: `SELECT COUNT(*) FROM documents WHERE updated_at < ?` (threshold_days > 0 일 때만; 0 면 0 반환).
+
+### kebab-search (`hybrid.rs`)
+
+```rust
+impl HybridRetriever {
+    pub fn search_with_trace(&self, query: &SearchQuery)
+        -> Result<(Vec<SearchHit>, SearchTrace)>;
+}
+```
+
+기존 `Retriever::search` 무변경. `search_with_trace` 는 hybrid 전용 (lexical/vector mode 도 한 쪽만 채워 동일 type 반환). 내부:
+1. `Instant::now()` 기록, lex retriever 호출, lex_ms 측정.
+2. 같은 패턴 vec.
+3. fuse — fusion_ms 측정.
+4. trace 빌드: lex/vec 전체 list → TraceCandidate 매핑. rrf_inputs = lex ∪ vec union (chunk_id 기준), 각 entry 의 lexical_rank/vector_rank/fusion_score 캡처. fusion 결과 ranking 과 동일.
+5. total_ms = 처음~끝.
+
+### kebab-app (`app.rs`)
+
+```rust
+#[doc(hidden)]
+pub fn search_with_trace_with_config(
+    cfg: kebab_config::Config,
+    query: &str,
+    opts: SearchOpts,  // 기존 + trace: bool
+) -> Result<(SearchResponse, Option<SearchTrace>)>;
+```
+
+`opts.trace = true` 시:
+- cache bypass (`no_cache = true` 강제).
+- `HybridRetriever::search_with_trace` 호출.
+- `SearchResponse` 빌드 + trace 별도 반환 (caller 가 wire 합성).
+
+기존 `search_with_config` 무변경 (zero-overhead path).
+
+### kebab-cli (`Cmd::Search`)
+
+```rust
+Cmd::Search {
+    // 기존 + fb-34 + fb-36
+    query, k, mode, explain, no_cache,
+    max_tokens, snippet_chars, cursor,
+    tag, lang, path_glob, trust_min, media, ingested_after, doc_id,
+    // fb-37
+    #[arg(long)] trace: bool,
+}
+```
+
+dispatch:
+- `trace == false` → 기존 `search_with_config` 경로.
+- `trace == true` → `search_with_trace_with_config` 호출, wire 합성 시 `search_response.v1` JSON 에 `trace` 필드 inject.
+
+non-`--json` 출력:
+- `--trace` 면 hits 후 `\nTrace:\n  lexical (N hits, Xms): top3...\n  vector (M hits, Yms): top3...\n  rrf (Zms): top3...\n  total: Wms`.
+
+### kebab-mcp (`tools/search.rs`)
+
+`SearchInput.trace: Option<bool>` 추가. dispatch 시 `Some(true)` 이면 위 `_with_trace` 호출. 출력 JSON 에 trace 합성 (wire 와 동일).
+
+### kebab-tui (`search.rs` + `trace_popup.rs` 신규)
+
+- `App` 에 `trace_popup: Option<TracePopupState>` 필드.
+- search pane key handler `t` → `kebab_app::search_with_trace_with_config` (현재 query/opts) 호출 → popup state 채움.
+- `trace_popup.rs`: ratatui Paragraph 또는 List 로 lex/vec/rrf 3 section, scroll, `Esc` 닫기.
+- cheatsheet 에 `t = trace` 한 줄 추가.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-core) | `SearchTrace` serde roundtrip — 모든 필드 |
+| unit (kebab-core) | `IndexStats` 신규 4 필드 default — 비어있는 map / 0 bytes / 0 stale |
+| unit (kebab-store-sqlite) | `breakdowns`: 3 docs (md/md/pdf, en/en/null) → media `{markdown:2,pdf:1,image:0,audio:0,other:0}` (5키 패딩 적용), lang `{en:2,null:1}` |
+| unit (kebab-store-sqlite) | `index_bytes`: temp dir 내 sqlite 파일 + 빈 lancedb dir → sqlite>0, lancedb=0 |
+| unit (kebab-store-sqlite) | `breakdowns` stale_doc_count: threshold 7 day, 8일 전 doc 1 + 어제 doc 2 → 1 |
+| unit (kebab-store-sqlite) | `breakdowns` threshold=0 → stale_doc_count=0 |
+| unit (kebab-search/hybrid) | `search_with_trace`: lex/vec list 가 단일 retriever 호출 결과 == |
+| unit (kebab-search/hybrid) | timing 모두 정의됨, total ≥ lex+vec+fusion 의 sum (sequential 가정) |
+| unit (kebab-search/hybrid) | mode=lexical → vector=[], vector_ms=0, rrf_inputs.vector_rank 모두 None |
+| 통합 (kebab-cli) | `kebab search Q --trace --json` → trace 키 존재, lexical/vector/rrf_inputs/timing 모두 valid shape |
+| 통합 (kebab-cli) | `kebab search Q --json` (no --trace) → trace 키 부재 |
+| 통합 (kebab-cli) | `kebab schema --json` → media_breakdown 5 키, lang_breakdown 가능 키, index_bytes 두 필드, stale_doc_count 모두 존재 |
+| 통합 (kebab-cli) | 빈 corpus `kebab schema --json` → media_breakdown 5키 모두 0, lang_breakdown {} |
+| 통합 (kebab-cli) | `kebab search Q --trace` (non-json) → stdout 에 `Trace:` section, lex/vec count + timing 표시 |
+| 통합 (kebab-mcp) | search input `trace:true` → 응답 JSON 에 trace 필드 |
+| 통합 (kebab-mcp) | search input `trace` 미지정 → 응답 trace 부재 |
+| TUI (kebab-tui) | search pane 결과 있는 상태에서 `t` 키 → popup 열림 (state transitions) |
+| TUI (kebab-tui) | popup 열린 상태 `Esc` → popup 닫힘 |
+
+`media_breakdown` 5키 패딩 책임: `kebab-store-sqlite::breakdowns` 가 SQL GROUP BY 결과를 받아 `MEDIA_KINDS` 순회해 누락 키 0 으로 채움.
+
+## Implementation steps (high-level)
+
+1. `kebab-core`: SearchTrace + 3 sibling struct + IndexStats 4 필드 + 단위 테스트.
+2. `kebab-store-sqlite::stats`: breakdowns + index_bytes 헬퍼 + 단위 테스트.
+3. `kebab-store-sqlite::stats`: 기존 IndexStats 빌더가 신규 4 필드 채우도록.
+4. `kebab-search::hybrid`: `search_with_trace` 구현 + 단위 테스트.
+5. `kebab-app`: `search_with_trace_with_config` facade + cache bypass.
+6. `kebab-cli::Cmd::Search`: `--trace` flag + dispatch + JSON wire 합성 + non-JSON pretty-print.
+7. `kebab-cli` 통합 테스트.
+8. `kebab-mcp::tools::search`: SearchInput.trace + dispatch + 통합 테스트.
+9. `kebab-tui::search` + `trace_popup`: `t` keybind + popup widget + cheatsheet.
+10. README + SMOKE + INDEX/spec status flip + SKILL.
+
+## Risks / notes
+
+- **timing 정확도**: 현재 hybrid sequential. 추후 병렬화 시 `total_ms = max(lex,vec) + fusion` 으로 재정의 — 그 시점 schema doc note 갱신.
+- **lancedb dir walk cost**: 큰 corpus 에서 O(file count) IO. 도그푸딩 corpus 작아 무시. 큰 corpus 만나면 cache 또는 lazy 도입 검토.
+- **`media_breakdown` JSON shape**: fb-36 과 동일한 CASE WHEN 패턴 재사용 — `MediaType` serde 의 dual shape (text variant vs tuple variant) 처리.
+- **lang null 키**: ASCII string `"null"` 사용. ISO 639 어떤 코드와도 충돌 X (3자 미만).
+- **cache bypass when --trace**: agent 가 인지해야 (SKILL/README 명시). 안 그러면 trace timing 이 cache hit 의 sub-ms 보고할 위험.
+- **wire backwards-compat**: `trace` 필드 optional + skip_serializing_if. `IndexStats` 신규 필드 #[serde(default)] 로 옛 reader 가 새 응답 deserialize 가능.
+- **TUI popup**: 별도 `t` 키. 충돌 검사 — 현재 search pane keybinds 확인 (i=inspect, /=focus, j/k=move, n=next, p=prev). `t` 미사용.
+
+## Out of scope
+
+- per-stage filter 적용 전/후 카운트 (filter-debug 별도 작업).
+- search 단계 병렬화 (sequential 유지).
+- lance 테이블 별 / column 별 index_bytes (단일 sum).
+- stats 시계열 (corpus_revision history).
+- `--trace-level` verbosity (single boolean).
+- TUI inspect pane 안 trace 통합 (search popup 으로 격리).
+- `kebab stats` 별도 명령 (schema 통합 결정).
+- `--explain` flag deprecation 알림 (현재 search dead, 무영향 — 별도 cleanup task).
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md`: `kebab search` row 의 flag 표기에 `--trace` 추가, `kebab schema` row 에 신규 stats 한 줄 언급.
+- `docs/SMOKE.md`: `--trace` walkthrough + `kebab schema --json` 출력 sample.
+- `tasks/p9/p9-fb-37-trace-and-stats.md`: `status: open → completed`, design/plan 링크 추가.
+- `tasks/INDEX.md`: fb-37 행 ✅.
+- `integrations/claude-code/kebab/SKILL.md`: `mcp__kebab__search` `trace` 입력 + 출력 trace shape 명시. `kebab schema` 신규 stats 필드 mention.
+- `docs/wire-schema/v1/search_response.schema.json`: `trace` optional 필드 추가.
+- `docs/wire-schema/v1/schema.schema.json`: `stats` 4 신규 필드 추가.
--- a/docs/wire-schema/v1/answer_event.schema.json
+++ b/docs/wire-schema/v1/answer_event.schema.json
@@ -0,0 +1,17 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://kb.local/wire/v1/answer_event.schema.json",
+  "title": "AnswerEvent v1",
+  "description": "Streaming event emitted by `kebab ask --stream`. One event per line on stderr (ndjson). Discriminated by `kind`. The success path closes with `final`; refusal paths (score_gate / no_chunks) emit only `retrieval_done` and rely on the stdout `answer.v1` line for the canonical signal. Cancel paths (BrokenPipe) may emit any prefix and stop. Final stdout line is always `answer.v1` for backwards compat (see ingest_progress.v1 precedent).",
+  "type": "object",
+  "required": ["schema_version", "kind", "ts"],
+  "properties": {
+    "schema_version": { "const": "answer_event.v1" },
+    "kind":           { "enum": ["retrieval_done", "token", "final"] },
+    "ts":             { "type": "string", "format": "date-time" },
+    "hits":           { "type": "array",  "description": "retrieval_done: search_hit.v1[]" },
+    "delta":          { "type": "string", "description": "token: incremental string chunk" },
+    "turn_index":     { "type": ["integer", "null"], "minimum": 0, "description": "token: matches Answer.turn_index" },
+    "answer":         { "type": "object", "description": "final: complete answer.v1 payload" }
+  }
+}
--- a/docs/wire-schema/v1/fetch_result.schema.json
+++ b/docs/wire-schema/v1/fetch_result.schema.json
@@ -0,0 +1,24 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://kb.local/wire/v1/fetch_result.schema.json",
+  "title": "FetchResult v1",
+  "description": "Verbatim text fetch from the indexed corpus. Discriminated by `kind`. All text is normalized markdown sourced from `CanonicalDocument` / `chunks.text` — original raw bytes are not exposed. PDF / audio span fetch returns `error.v1.code = span_not_supported`.",
+  "type": "object",
+  "required": ["schema_version", "kind", "doc_id", "doc_path", "indexed_at", "stale", "truncated"],
+  "properties": {
+    "schema_version": { "const": "fetch_result.v1" },
+    "kind":           { "enum": ["chunk", "doc", "span"] },
+    "doc_id":         { "type": "string" },
+    "doc_path":       { "type": "string" },
+    "indexed_at":     { "type": "string", "format": "date-time", "description": "fb-32 documents.updated_at" },
+    "stale":          { "type": "boolean", "description": "fb-32 staleness flag against config.search.stale_threshold_days" },
+    "chunk":          { "type": "object", "description": "kind=chunk: target chunk_inspection.v1 payload" },
+    "context_before": { "type": "array", "description": "kind=chunk: --context N preceding chunks (ordinal-sorted)" },
+    "context_after":  { "type": "array", "description": "kind=chunk: --context N following chunks (ordinal-sorted)" },
+    "text":           { "type": "string", "description": "kind=doc/span: markdown text (truncated if budget tripped)" },
+    "line_start":     { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested start line (1-based)" },
+    "line_end":       { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested end line (1-based, inclusive)" },
+    "effective_end":  { "type": ["integer", "null"], "minimum": 0, "description": "kind=span: actual end line of emitted text (1-based, inclusive). Equals `line_end` on full slice; less than `line_end` when (a) requested range exceeded total lines (line clamp) or (b) `--max-tokens` budget trimmed the tail. Special case: `line_start - 1` (which is 0 when line_start=1) signals the entire requested range was beyond doc end — returned `text` is empty." },
+    "truncated":      { "type": "boolean", "description": "kind=doc/span: budget forced text truncation. Always false for chunk." }
+  }
+}
--- a/docs/wire-schema/v1/schema.schema.json
+++ b/docs/wire-schema/v1/schema.schema.json
@@ -54,6 +54,30 @@
            { "type": "string", "format": "date-time" },
            { "type": "null" }
          ]
+        },
+        "media_breakdown": {
+          "type": "object",
+          "description": "p9-fb-37: per-media-kind doc count. 5 keys (markdown/pdf/image/audio/other), zero-padded.",
+          "additionalProperties": { "type": "integer", "minimum": 0 }
+        },
+        "lang_breakdown": {
+          "type": "object",
+          "description": "p9-fb-37: per-language doc count. NULL lang keyed as the literal string 'null'. Map may be empty on empty corpus.",
+          "additionalProperties": { "type": "integer", "minimum": 0 }
+        },
+        "index_bytes": {
+          "type": "object",
+          "description": "p9-fb-37: on-disk byte sums.",
+          "required": ["sqlite", "lancedb"],
+          "properties": {
+            "sqlite":  { "type": "integer", "minimum": 0 },
+            "lancedb": { "type": "integer", "minimum": 0 }
+          }
+        },
+        "stale_doc_count": {
+          "type": "integer",
+          "minimum": 0,
+          "description": "p9-fb-37: docs whose updated_at exceeds config.search.stale_threshold_days. 0 when threshold=0."
        }
      }
    }
--- a/docs/wire-schema/v1/search_response.schema.json
+++ b/docs/wire-schema/v1/search_response.schema.json
@@ -0,0 +1,34 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://kb.local/wire/v1/search_response.schema.json",
+  "title": "SearchResponse v1",
+  "description": "Top-level wrapper for `kebab search --json` output. Replaces the bare `search_hit.v1[]` array — wraps it with pagination + truncation metadata. Token counts are approximate (chars/4 estimate, no tokenizer dep). On `truncated: true`, caller may either widen `--max-tokens` or follow `next_cursor` for the next page. Stale `next_cursor` (corpus_revision changed since issued) returns `error.v1.code = stale_cursor`.",
+  "type": "object",
+  "required": ["schema_version", "hits", "next_cursor", "truncated"],
+  "properties": {
+    "schema_version": { "const": "search_response.v1" },
+    "hits":           { "type": "array", "description": "search_hit.v1[]" },
+    "next_cursor":    { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." },
+    "truncated":      { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." },
+    "trace": {
+      "type": "object",
+      "description": "p9-fb-37: present iff caller passed --trace / SearchOpts.trace=true. Lex/vec pre-fusion lists + RRF union + per-stage timing.",
+      "required": ["lexical", "vector", "rrf_inputs", "timing"],
+      "properties": {
+        "lexical":   { "type": "array", "items": { "type": "object" } },
+        "vector":    { "type": "array", "items": { "type": "object" } },
+        "rrf_inputs":{ "type": "array", "items": { "type": "object" } },
+        "timing": {
+          "type": "object",
+          "required": ["lexical_ms", "vector_ms", "fusion_ms", "total_ms"],
+          "properties": {
+            "lexical_ms": { "type": "integer", "minimum": 0 },
+            "vector_ms":  { "type": "integer", "minimum": 0 },
+            "fusion_ms":  { "type": "integer", "minimum": 0 },
+            "total_ms":   { "type": "integer", "minimum": 0 }
+          }
+        }
+      }
+    }
+  }
+}
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -28,12 +28,13 @@ User-specific trigger keywords (team names, system names, internal acronyms) bel

 ## MCP tools (preferred)

-When `kebab` is registered as an MCP server (see `~/.claude/mcp.json` example below), six tools are exposed as `mcp__kebab__<name>`:
+When `kebab` is registered as an MCP server (see `~/.claude/mcp.json` example below), seven tools are exposed as `mcp__kebab__<name>`:

 | tool | purpose | mutation |
 |------|---------|----------|
-| `mcp__kebab__search` | corpus search → `search_hit.v1[]` | no |
+| `mcp__kebab__search` | corpus search → `search_response.v1` (`{hits, next_cursor, truncated}`) | no |
 | `mcp__kebab__ask` | RAG answer → `answer.v1` | no |
+| `mcp__kebab__fetch` | verbatim text → `fetch_result.v1` (chunk / doc / span) | no |
 | `mcp__kebab__schema` | capability discovery → `schema.v1` | no |
 | `mcp__kebab__doctor` | health check → `doctor.v1` | no |
 | `mcp__kebab__ingest_file` | save single file → `ingest_report.v1` | yes |
@@ -47,12 +48,16 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun

 Input:
 ```json
-{ "query": "<query>", "mode": "hybrid", "k": 10 }
+{ "query": "<query>", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null, "trace": null }
 ```

 - `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`").
- Output is `search_hit.v1` array. Key fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
+- **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page.
+- **p9-fb-36 filter inputs:** `tags` (string array — OR-within, AND across keys), `lang` (BCP-47 language code), `path_glob` (glob pattern matched against doc path), `trust_min` (`"primary"` | `"secondary"` | `"generated"` — includes that level and above), `media` (string array — IN-list of `"markdown"` | `"pdf"` | `"image"` | `"audio"` | `"other"`; alias `"md"` → `"markdown"`), `ingested_after` (RFC3339 UTC string), `doc_id` (exact doc UUID). AND combinator across keys. Invalid `ingested_after` or unknown `trust_min` → `error.v1.code = invalid_input`. Unknown `media` value → empty hits, no error.
+- Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
 - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source.
+- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
+- **`trace: true` (p9-fb-37)** — debug aid. Response carries an extra `trace` block: `lexical[]` + `vector[]` (pre-fusion candidates), `rrf_inputs[]` (RRF union before final cut), and `timing` (`lexical_ms`, `vector_ms`, `fusion_ms`, `total_ms`). Trace bypasses the search cache (always cold). Use sparingly — it bloats the wire response and is for diagnosing "why did this hit / not hit", not normal retrieval.

 ### `mcp__kebab__ask` — when you need the answer

@@ -67,6 +72,22 @@ Input:
 - **If `grounded == false`** → KB doesn't have enough context. Don't paraphrase the refusal as if it were an answer. Tell the user the KB came up dry and fall back to your own knowledge or ask for the source.
 - For follow-up turns on the same topic, pass `session_id` (e.g. `"team-onboarding-2026-05"`) and reuse it across the conversation. Sessions persist until `kebab reset --data-only`.

+### `mcp__kebab__fetch` — when you need raw text
+
+Use after `search` to read the verbatim chunk text + surrounding context, or to pull a full doc / line range.
+
+Input:
+```json
+{ "kind": "chunk", "chunk_id": "<id>", "context": 2 }
+{ "kind": "doc", "doc_id": "<id>", "max_tokens": 1000 }
+{ "kind": "span", "doc_id": "<id>", "line_start": 1, "line_end": 5 }
+```
+
+- `chunk` mode: `context: N` returns ordinal-adjacent chunks before/after for surrounding paragraphs.
+- `doc` mode: full normalized markdown. `max_tokens` (chars/4) caps the response — `truncated: true` when applied.
+- `span` mode: 1-based inclusive line range. PDF / audio docs reject as `error.v1.code = span_not_supported` (use `chunk` mode instead — PDF chunks are page-aligned).
+- `error.v1.code = chunk_not_found` / `doc_not_found` are non-retryable from the same id — re-issue search to get a fresh one.
+
 ## CLI fallback

 If MCP tools aren't in scope (host without MCP support, or `mcp.json` not configured), call the CLI via Bash:
@@ -75,10 +96,13 @@ If MCP tools aren't in scope (host without MCP support, or `mcp.json` not config
 kebab search "<query>" --mode hybrid --json 2>/dev/null
 kebab ask "<question>" --json 2>/dev/null
 kebab ask "<question>" --session <stable-id> --json 2>/dev/null
+kebab ask "<question>" --stream  # ndjson answer_event.v1 on stderr, final answer.v1 on stdout
 ```

 Same wire shapes as MCP. CLI pays cold start (~1-2s) per call — prefer MCP when available.

+`--stream` (p9-fb-33) emits `retrieval_done` → `token`* → `final` events on stderr while the answer streams; the final stdout line is the standard `answer.v1` for backwards compat. Use when you need progressive token consumption; otherwise the default non-streaming path is simpler. Refusal paths (score-gate / no-chunks) emit `retrieval_done` then no `token`/`final` — read stdout `answer.v1` for the canonical refusal signal.
+
 ## MCP host config

 Register `kebab mcp` once in your host's MCP config. For Claude Code, edit `~/.claude/mcp.json`:
@@ -99,7 +123,9 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
 ## Parsing tips

 - MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`.
- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.[] | {rank, doc_path, heading: .heading_path[-1], snippet}'`.
+- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance.
+- Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor.
+- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both.
 - `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape.
 - Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear.
 - `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality.
@@ -108,7 +134,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across

 Before using streaming or multi-turn features, probe what this binary supports — call `mcp__kebab__schema` (or CLI `kebab schema --json`):

-Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session.
+Returns `schema.v1`: `wire.schemas` (supported wire ids), `capabilities` (bool flags — e.g. `streaming_ask`, `rag_multi_turn`), `models` (version cascade 6-axis), `stats` (doc/chunk/asset count + last_ingest_at, plus p9-fb-37 health surface: `media_breakdown` per-kind doc counts (5 zero-padded keys: markdown / pdf / image / audio / other), `lang_breakdown` per BCP-47 lang (NULL keyed as the literal string `"null"`), `index_bytes.{sqlite,lancedb}` on-disk byte sums, `stale_doc_count` for docs older than `config.search.stale_threshold_days`). Gate streaming / session flows on `capabilities.streaming_ask` / `capabilities.rag_multi_turn` being `true`. Cheap call (no LLM), once per session.

 ## Quick health check

--- a/tasks/HOTFIXES.md
+++ b/tasks/HOTFIXES.md
@@ -14,6 +14,34 @@ historical contract that was implemented; this file accumulates the
 deltas so phase 5+ readers can find the live behavior without diffing
 git history.

+## 2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1
+
+**무엇이 바뀌었나**: `kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` object 로 교체. wrapper 가 `hits`, `next_cursor`, `truncated` 세 필드를 가짐.
+
+**Spec contract 와의 관계**: 명시적 wire breaking change. spec `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` 의 §Wire shape 절에 단일 출처 결정.
+
+**의식적 결정**:
+- pagination + truncation metadata 를 `search_hit` 자체에 흡수하면 단일 hit 의 도메인 의미가 오염됨 (모든 hit 가 `next_cursor` 필드 보유 등). top-level wrapper 가 분리도 깨끗.
+- 외부 consumer 영향: 단일 사용자 환경 + Claude Code skill 한 곳. skill 은 fb-34 와 동시 갱신.
+- 이 변경은 search_hit.v1 자체 schema 는 손대지 않음 — 도메인 stable.
+
+**영향 받는 consumer**: kebab-tui (Search 패널 — 변경 불필요, App::search 시그니처 보존), kebab-mcp (search tool — 같은 PR 에서 갱신), Claude Code skill (같은 PR 에서 갱신). 외부 producer/consumer 없음.
+
+**`--no-cache` 의미 변화**: fb-34 이전 `--no-cache` 는 `search_uncached_with_config` 로 cache 자체를 우회. fb-34 는 cached path 위에 `clear_search_cache()` 호출 후 search 실행 — long-lived process (TUI / MCP) 에서는 clear 와 fetch 사이 race window 가 있음. CLI (fresh App per call) 에서는 무영향. 후속 fb-3X 에서 `search_with_opts_uncached` 추가로 격리.
+
+## 2026-05-09 — p9-fb-33: AskOpts.stream_sink type widened to StreamEvent
+
+**무엇이 바뀌었나**: `kebab_rag::AskOpts.stream_sink` 의 타입이 `Option<mpsc::Sender<String>>` 에서 `Option<mpsc::Sender<StreamEvent>>` 로 변경됨. `kebab_app::StreamEvent` 가 새 re-export.
+
+**Spec contract 와의 관계**: `answer_event.v1` (신규 wire schema) 가 단일 sink 로 3 stage (retrieval_done / token / final) 를 운반하도록 강제하면서 자연스럽게 in-process sink 의 type 폭이 넓어진 부산물. spec `docs/superpowers/specs/2026-05-09-p9-fb-33-streaming-ask-design.md` 의 "Domain API change" 절에서 미리 명시. consumer = TUI worker 한 곳 (이번 PR 에서 같이 갱신). 외부 consumer 없음.
+
+**의식적 결정**:
+- single sink 로 retrieval / token / final 세 stage 를 모두 운반하기 위한 필수 타입 변경.
+- 기존 `Sender<String>` 으로는 retrieval / final 단계를 표현할 방법이 없음.
+- internal API 라 wire schema 와 다름 — `answer_event.v1` 는 신규 schema (additive minor at wire layer).
+
+**영향 받는 consumer**: `kebab-tui::ask::spawn_ask_worker` (PR #124 에서 동시 갱신). 외부 통합 없음.
+
 ## 2026-05-09 — p9-fb-32: search_hit.v1 / citation.v1 required-field expansion

 **무엇이 바뀌었나**: `search_hit.v1` 과 `citation.v1` 의 `required` 배열에 `indexed_at` (RFC3339) + `stale` (bool) 두 필드가 추가됨. `schema_version` 은 그대로 (`search_hit.v1` / `citation.v1`).
--- a/tasks/INDEX.md
+++ b/tasks/INDEX.md
@@ -121,11 +121,11 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능.

    ### 🎯 0.4.0 — agent surface refinement (additive only)
    - [p9-fb-32 stale doc indicator](p9/p9-fb-32-stale-doc-indicator.md) — ✅ 머지 + v0.4.0 cut 후보 (2026-05-09)
-    - [p9-fb-33 streaming ask (ndjson delta)](p9/p9-fb-33-streaming-ask.md) — ⏳ 미구현, brainstorm 필요
-    - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ⏳ 미구현, brainstorm 필요
-    - [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ⏳ 미구현, brainstorm 필요
-    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ⏳ 미구현, brainstorm 필요
-    - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27)
+    - [p9-fb-33 streaming ask (ndjson delta)](p9/p9-fb-33-streaming-ask.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
+    - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
+    - [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
+    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ✅ 머지 (2026-05-10)
+    - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ✅ 머지 (2026-05-10)

    ### 🎯 0.5.0 — RAG quality (cascade 동반: V00X + reindex)
    - [p9-fb-38 score semantics](p9/p9-fb-38-score-semantics.md) — ⏳ 미구현, brainstorm 필요
--- a/tasks/p9/p9-fb-33-streaming-ask.md
+++ b/tasks/p9/p9-fb-33-streaming-ask.md
@@ -3,8 +3,8 @@ phase: P9
 component: kebab-cli + kebab-app + wire-schema
 task_id: p9-fb-33
 title: "Streaming ask (ndjson delta) — agent token 즉시 소비"
-status: open
-target_version: 0.4.0
+status: completed
+target_version: 0.5.0
 depends_on: []
 unblocks: []
 contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 token 도착

 # p9-fb-33 — Streaming ask (ndjson delta)

-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. delta event 형식 / final-only fallback / TUI vs CLI 차이 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 참조 — live source of truth.
+
+상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-33-streaming-ask-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-33-streaming-ask.md`.

 ## 증상 / 동기

--- a/tasks/p9/p9-fb-34-output-budget-controls.md
+++ b/tasks/p9/p9-fb-34-output-budget-controls.md
@@ -3,8 +3,8 @@ phase: P9
 component: kebab-cli + kebab-app + wire-schema
 task_id: p9-fb-34
 title: "Output budget controls (--max-tokens / --snippet-chars / pagination)"
-status: open
-target_version: 0.4.0
+status: completed
+target_version: 0.5.0
 depends_on: []
 unblocks: []
 contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent context window 제

 # p9-fb-34 — Output budget controls

-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. budget 적용 layer (truncate vs k 조정) / cursor 형식 / 기본값 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 의 `2026-05-09 — p9-fb-34` 항목 참조 — live source of truth.
+
+상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md`.

 ## 증상 / 동기

--- a/tasks/p9/p9-fb-35-verbatim-fetch.md
+++ b/tasks/p9/p9-fb-35-verbatim-fetch.md
@@ -3,8 +3,8 @@ phase: P9
 component: kebab-cli + kebab-app + wire-schema
 task_id: p9-fb-35
 title: "Verbatim fetch (`kebab fetch <chunk_id|doc_id>`) — citation deep-link"
-status: open
-target_version: 0.4.0
+status: completed
+target_version: 0.5.0
 depends_on: []
 unblocks: []
 contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 search hit / ci

 # p9-fb-35 — Verbatim fetch

-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. fetch unit (chunk vs doc vs span) / 주변 context (앞뒤 chunk N 개) / 옵션 정책 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 참조.
+
+상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-35-verbatim-fetch-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-35-verbatim-fetch.md`.

 ## 증상 / 동기

@@ -34,7 +37,7 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 search hit / ci
 - chunk_id / doc_id 노출 — 현재 search_hit.v1 에 있는지 확인 + 안정성.
 - context window — N 개 chunk vs N tokens.
 - doc 전체 fetch 의 size 제한 (fb-34 budget 과 통합).
- pdf / image 의 fetch — 텍스트 추출본 vs 원본 path.
+- pdf / audio 의 fetch — 텍스트 추출본 vs 원본 path. (image OCR 는 markdown line 으로 떨어져 span 허용.)

 ## Risks / notes

--- a/tasks/p9/p9-fb-36-search-filters.md
+++ b/tasks/p9/p9-fb-36-search-filters.md
@@ -3,7 +3,7 @@ phase: P9
 component: kebab-cli + kebab-search + wire-schema
 task_id: p9-fb-36
 title: "Search filter args (--media / --ingested-after / --doc-id / --tag)"
-status: open
+status: completed
 target_version: 0.4.0
 depends_on: []
 unblocks: []
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 검색 범위

 # p9-fb-36 — Search filter args

-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. filter 종류 / SQLite 쿼리 통합 / Lance vector 필터 적용 layer brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 참조.
+
+상세 설계: `docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md`.

 ## 증상 / 동기

--- a/tasks/p9/p9-fb-37-trace-and-stats.md
+++ b/tasks/p9/p9-fb-37-trace-and-stats.md
@@ -3,7 +3,7 @@ phase: P9
 component: kebab-cli + kebab-search + kebab-rag
 task_id: p9-fb-37
 title: "Trace (--trace) + stats — pipeline 가시성"
-status: open
+status: completed
 target_version: 0.4.0
 depends_on: [p9-fb-27]
 unblocks: []
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent / 사용자가 "왜

 # p9-fb-37 — Trace + stats

-> ⏳ **백로그 only — 미구현 (Nice-to-have).** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. trace 의 verbosity level / wire shape / stats 의 별도 명령 vs schema 통합 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태.
+>
+> - Design: [`docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md`](../../docs/superpowers/specs/2026-05-10-p9-fb-37-trace-and-stats-design.md)
+> - Plan: [`docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md`](../../docs/superpowers/plans/2026-05-10-p9-fb-37-trace-and-stats.md)

 ## 증상 / 동기