From 7210386699c172912cfe65fc4a6d43f52084b367 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:26:40 +0900
Subject: [PATCH 01/11] =?UTF-8?q?spec(fb-36):=20search=20filter=20args=20?=
 =?UTF-8?q?=E2=80=94=20design?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`kebab search` 에 7 flag 노출 (기존 4 + 신규 3):
- --tag (반복) / --lang / --path-glob / --trust-min (기존 SearchFilters)
- --media (csv) / --ingested-after (RFC3339) / --doc-id (신규)

filter layer = SQLite WHERE (lexical) + over-fetch+post-filter
(vector). AND 결합. wire schema 무변경 (input only).

`SearchFilters` 3 필드 additive (#[serde(default)] 로 backwards-
compat). MCP SearchInput 7 optional 필드 추가. invalid RFC3339 →
error.v1.code = config_invalid.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ...26-05-10-p9-fb-36-search-filters-design.md | 213 ++++++++++++++++++
 1 file changed, 213 insertions(+)
 create mode 100644 docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md
diff --git a/docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md b/docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md
new file mode 100644
index 0000000..cacf1d0
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md
@@ -0,0 +1,213 @@
+---
+title: "p9-fb-36 — Search filter args design"
+phase: P9
+component: kebab-core + kebab-search + kebab-cli + kebab-mcp
+task_id: p9-fb-36
+status: design
+target_version: 0.5.0
+contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+contract_sections: [§4 search]
+date: 2026-05-10
+---
+
+# p9-fb-36 — Search filter args
+
+## Goal
+
+agent / 사용자가 검색 범위를 좁힐 수 있도록 CLI / MCP 에 filter flag 추가. 기존 `SearchFilters` 도메인 type 의 4 필드 (tags_any / lang / path_glob / trust_min) 를 CLI 표면에 노출하고, 신규 3 필드 (media / ingested_after / doc_id) 추가. wire schema 변경 없음 (input-only). filter 적용 layer = SQLite WHERE (lexical) + over-fetch + post-filter (vector). AND 조합 의미 고정.
+
+## Behavior contract
+
+### CLI flags on `kebab search`
+
+7 flags 추가, 모두 optional. 비어있으면 미적용 (기존 동작 보존):
+
+| flag | 의미 | repeat? |
+|------|------|---------|
+| `--tag <name>` | doc 의 `metadata.tags` 안에 매칭 (OR-within) | yes (`--tag rust --tag async` = `tag IN (rust,async)`) |
+| `--lang <iso>` | `documents.lang` 정확 매칭 | no |
+| `--path-glob <pattern>` | `documents.workspace_path` glob 매칭 | no |
+| `--trust-min <level>` | `documents.trust_level >= level` (enum 순서) | no |
+| `--media <csv>` | `assets.media_type.kind` IN 리스트 (예: `--media md,pdf`) | csv |
+| `--ingested-after <RFC3339>` | `documents.updated_at >= timestamp` | no |
+| `--doc-id <id>` | `documents.doc_id = id` | no |
+
+다중 flag 조합 = AND 결합. 각 flag 안 다중 값 (--tag, --media) = OR.
+
+### Filter validation
+
+- `--ingested-after` RFC3339 파싱 실패 → CLI 진입 시 `error.v1.code = config_invalid`, exit 2.
+- `--media` 의 unknown value (예: `--media foo`) → 매칭 0건 (filter unmatch). 명시적 거절 안 함 (lenient).
+- `--trust-min` clap value_enum 검증 (enum 외 거절).
+- `--doc-id` 형식 검증 안 함 (DocumentId 는 단순 string wrapper). 존재하지 않으면 매칭 0건.
+
+### Filter layer
+
+**Lexical (lexical.rs)**:
+- 기존 SQL builder 의 WHERE 절 확장. `media` / `ingested_after` / `doc_id` 모두 SQL 구문 가능.
+- `media`: `JOIN assets a ON a.asset_id = d.asset_id` + `json_extract(a.media_type, '$.kind') IN (?, ?)` (다중 값).
+- `ingested_after`: `d.updated_at >= ?` (RFC3339 lexicographic compare; UTC `Z` 가정).
+- `doc_id`: `d.doc_id = ?`.
+- path_glob 은 기존 post-filter 그대로.
+
+**Vector (vector.rs)**:
+- 기존 over-fetch (k * 2) + `filter_chunks` 헬퍼에서 SQLite chunks JOIN documents JOIN assets.
+- 같은 WHERE 조건 적용. k 부족 시 truncated.
+
+### Wire shape
+
+기존 wire schema 변경 없음.
+
+- `search_response.v1` (output) — 그대로.
+- `search_hit.v1` (개별 hit) — 그대로.
+- 입력 측 (CLI args / MCP `SearchInput`) 만 확장.
+
+MCP `SearchInput` schema 는 `schemars` derive 로 자동 갱신. 수동 schema 파일 X.
+
+### MCP `SearchInput` 확장
+
+```rust
+pub struct SearchInput {
+    pub query: String,
+    pub mode: Option<String>,
+    pub k: Option<usize>,
+    pub max_tokens: Option<usize>,    // fb-34
+    pub snippet_chars: Option<usize>, // fb-34
+    pub cursor: Option<String>,       // fb-34
+    // p9-fb-36 신규 (모두 optional)
+    pub tags: Option<Vec<String>>,
+    pub lang: Option<String>,
+    pub path_glob: Option<String>,
+    pub trust_min: Option<String>,    // "low" | "medium" | "high"
+    pub media: Option<Vec<String>>,
+    pub ingested_after: Option<String>,  // RFC3339
+    pub doc_id: Option<String>,
+}
+```
+
+input → `SearchFilters` 변환 시 위와 동일 검증 (RFC3339 파싱, trust_level enum). 실패 시 `invalid_input` ErrorV1.
+
+## Allowed / forbidden dependencies
+
+- `kebab-core`: 신규 dep 없음. 기존 type 확장만.
+- `kebab-search`: 변경 없음 (SQL builder 안 WHERE 추가만).
+- `kebab-cli`: clap flag 추가, dispatch 변환.
+- `kebab-mcp`: SearchInput 확장.
+- `kebab-tui`: 변경 없음.
+
+`kebab-core` 의 다른 `kebab-*` crate 의존 금지 룰 그대로.
+
+## Public surface delta
+
+### kebab-core
+
+```rust
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchFilters {
+    pub tags_any: Vec<String>,
+    pub lang: Option<Lang>,
+    pub path_glob: Option<String>,
+    pub trust_min: Option<TrustLevel>,
+    /// p9-fb-36: media_type filter — IN-list of `MediaType.kind` strings
+    /// (e.g. `["markdown", "pdf"]`). Empty Vec = no filter.
+    #[serde(default)]
+    pub media: Vec<String>,
+    /// p9-fb-36: hits whose source doc's `documents.updated_at` is at
+    /// or after this timestamp. None = no filter. RFC3339 / UTC.
+    #[serde(default, with = "time::serde::rfc3339::option")]
+    pub ingested_after: Option<OffsetDateTime>,
+    /// p9-fb-36: restrict hits to a single document. None = no filter.
+    #[serde(default)]
+    pub doc_id: Option<DocumentId>,
+}
+```
+
+`#[serde(default)]` on each new field = backwards-compat (older JSON without these keys deserializes as defaults).
+
+### kebab-search (lexical + vector)
+
+내부 SQL builder 확장만. public API 변경 없음.
+
+### kebab-cli (`Cmd::Search`)
+
+```rust
+Cmd::Search {
+    // 기존
+    query, k, mode, explain, no_cache,
+    max_tokens, snippet_chars, cursor,   // fb-34
+    // p9-fb-36 신규
+    #[arg(long)] tag: Vec<String>,
+    #[arg(long)] lang: Option<String>,
+    #[arg(long)] path_glob: Option<String>,
+    #[arg(long, value_enum)] trust_min: Option<TrustLevelFlag>,
+    #[arg(long, value_delimiter = ',')] media: Vec<String>,
+    #[arg(long)] ingested_after: Option<String>,
+    #[arg(long)] doc_id: Option<String>,
+}
+```
+
+`TrustLevelFlag` 신규 clap value_enum (CLI-internal, kebab-core 의 `TrustLevel` 로 변환).
+
+### kebab-mcp::tools::search
+
+`SearchInput` 7 optional 필드 추가 (위 §MCP `SearchInput` 확장). dispatch 에서 `SearchFilters` 빌드 + 검증.
+
+## Test plan
+
+| kind | description |
+|------|-------------|
+| unit (kebab-core) | `SearchFilters::default()` — 7 필드 모두 비어있음 |
+| unit (kebab-search/lexical) | `media: ["pdf"]` — markdown doc 안 잡힘 |
+| unit (kebab-search/lexical) | `media: ["markdown", "pdf"]` — IN-list 동작 |
+| unit (kebab-search/lexical) | `ingested_after: <어제>` — 어제 이전 doc 안 잡힘 |
+| unit (kebab-search/lexical) | `doc_id: <X>` — 다른 doc 의 chunk 안 잡힘 |
+| unit (kebab-search/lexical) | 다중 filter AND — 모두 만족하는 hit 만 |
+| unit (kebab-search/lexical) | 빈 filter (default) — 기존 동작과 동일 |
+| unit (kebab-search/vector) | 동일 패턴 — `filter_chunks` post-filter |
+| unit (kebab-search) | 알 수 없는 media 값 (`["foo"]`) — empty result, no error |
+| 통합 (kebab-cli) | `kebab search Q --media md --json` wire shape (search_response.v1 그대로) |
+| 통합 (kebab-cli) | `kebab search Q --ingested-after 2020-01-01 --json` 모든 hit 통과 |
+| 통합 (kebab-cli) | `kebab search Q --ingested-after garbage --json` → `error.v1.code = config_invalid` exit 2 |
+| 통합 (kebab-cli) | `kebab search Q --doc-id <id> --json` 단일 doc 만 |
+| 통합 (kebab-cli) | `kebab search Q --tag rust --tag async --json` IN-list 동작 |
+| 통합 (kebab-mcp) | `mcp__kebab__search` 7 optional 필드 모두 정상 응답 |
+| 통합 (kebab-mcp) | `mcp__kebab__search` invalid `ingested_after` → invalid_input |
+
+## Implementation steps (high-level)
+
+1. `kebab-core::SearchFilters` 3 필드 추가 + 단위 테스트.
+2. `kebab-search/lexical.rs` SQL builder 확장 + 단위 테스트.
+3. `kebab-search/vector.rs` `filter_chunks` 헬퍼 동일 확장 + 단위 테스트.
+4. `kebab-cli::Cmd::Search` 7 flag 추가 + dispatch + RFC3339 파싱.
+5. `kebab-cli` 통합 테스트 (lexical-only, no Ollama).
+6. `kebab-mcp::tools::search::SearchInput` 7 필드 + dispatch + invalid_input 검증.
+7. `kebab-mcp` 통합 테스트.
+8. README + SMOKE — filter 예시.
+9. tasks/INDEX.md / spec status flip.
+10. SKILL.md — `mcp__kebab__search` input shape 갱신.
+
+## Risks / notes
+
+- **`assets.media_type` JSON shape**: `MediaType` enum 의 serde 직렬화 형태가 `{"kind": "markdown"}` 인지, 다른 형태인지 SQLite 저장 형식 확인 필요. `Markdown` 같은 unit variant 는 `"markdown"` 문자열, `Image(...)` / `Audio(...)` 같은 tuple variant 는 `{"image": {...}}` 형태일 가능성. `json_extract` 경로를 그에 맞춰 조정 (e.g. `case when typeof(...) = 'text' then ... else json_extract($.kind) end`).
+- **RFC3339 lexicographic compare**: ingest 시 항상 UTC `Z` 로 저장 (fb-32 ingest path 확인됨). 외부 도구가 다른 offset 으로 강제 update 시 비교 부정확. spec 에 "UTC `Z` 가정" 명시.
+- **path_glob 과 다른 filter 의 ordering**: path_glob 은 post-filter (lexical), 신규 3 개는 SQL — fetch_limit 도달 후 path_glob 으로 추가 cut → final hit 수가 줄 수 있음. 기존 동작과 동일 (path_glob 패턴 유지).
+- **clap `Vec<String>` 의 default**: clap 0.4 에서 미지정 = `Vec::new()`. 자동.
+- **trust_min enum 매핑**: clap value_enum 으로 안전. `TrustLevelFlag` → `TrustLevel` 변환 헬퍼.
+- **SearchFilters serde backwards-compat**: `#[serde(default)]` 로 옛 JSON 무영향. SQLite 안 SearchFilters 직렬 저장 안 함 (request-time only).
+
+## Out of scope
+
+- `--exclude-doc-id` / `--exclude-tag` (exclusion filter).
+- 다중 doc_id (`--doc-id a --doc-id b`) — 단일만.
+- TUI Search 패널 filter UI.
+- Lance metadata pre-filter.
+- tag 시스템 신규 도입 (이미 존재).
+- `--search.default-filter` config (default 값 지정) — agent 가 매번 명시.
+
+## Documentation updates (implementation PR 동시)
+
+- `README.md` — `kebab search` row 의 flag 표기에 7 flag 추가.
+- `docs/SMOKE.md` — filter walkthrough (`--media md --ingested-after 2026-04-01` 예시).
+- `tasks/p9/p9-fb-36-search-filters.md` — `status: open → completed`, design/plan 링크.
+- `tasks/INDEX.md` — fb-36 행 ✅.
+- `integrations/claude-code/kebab/SKILL.md` — `mcp__kebab__search` input shape 갱신 (7 필드 명시 + AND 의미 + lenient unknown media).
-- 
2.49.1


From 31c1e059519301674404afe8711bf88bccc94476 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:34:39 +0900
Subject: [PATCH 02/11] plan(fb-36): search filter args implementation plan

9 tasks: SearchFilters extension, lexical SQL WHERE, vector
filter_chunks mirror, CLI 7 flags, integration tests, MCP
SearchInput extension, workspace test/clippy, docs, smoke+PR.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../2026-05-10-p9-fb-36-search-filters.md     | 1304 +++++++++++++++++
 1 file changed, 1304 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md

diff --git a/docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md b/docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md
new file mode 100644
index 0000000..23bc018
--- /dev/null
+++ b/docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md
@@ -0,0 +1,1304 @@
+# p9-fb-36 — Search Filter Args Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Expose 7 filter flags on `kebab search` (`--tag`, `--lang`, `--path-glob`, `--trust-min` for existing `SearchFilters` fields plus `--media`, `--ingested-after`, `--doc-id` as new fields). Filter layer = SQLite WHERE for lexical, over-fetch + post-filter for vector. AND combinator. Wire-shape input-only. MCP `kebab__search` SearchInput gains the 7 fields.
+
+**Architecture:** Domain `SearchFilters` gets 3 new optional fields. Lexical retriever's SQL builder extends WHERE clause; vector retriever's `filter_chunks` helper mirrors. CLI dispatch translates clap flags into `SearchFilters`, parsing `--ingested-after` as RFC3339 (config_invalid on failure). MCP `SearchInput` gains 7 optional fields with the same translation. `media_type` JSON column has two shapes (text for unit variants, object for tuple variants) — use `CASE WHEN json_type(media_type) = 'text' THEN json_extract(media_type, '$') ELSE (SELECT key FROM json_each(media_type) LIMIT 1) END` to extract a unified `kind` string.
+
+**Tech Stack:** Rust 2024, clap (value_enum, value_delimiter), serde, time crate (RFC3339), rusqlite (json_extract / json_each / json_type), no new deps.
+
+**Spec:** `docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md`
+
+---
+
+## File Structure
+
+| File | Responsibility | Action |
+|------|----------------|--------|
+| `crates/kebab-core/src/search.rs` | `SearchFilters` 3 new fields + `MEDIA_KINDS` const | modify |
+| `crates/kebab-search/src/lexical.rs` | SQL builder WHERE clause extension (media JOIN assets, ingested_after, doc_id) | modify |
+| `crates/kebab-search/src/vector.rs` | `filter_chunks` helper extension to match | modify |
+| `crates/kebab-cli/src/main.rs` | `Cmd::Search` 7 new flags + dispatch + RFC3339 parsing + `TrustLevelFlag` enum | modify |
+| `crates/kebab-mcp/src/tools/search.rs` | `SearchInput` 7 optional fields + dispatch + invalid_input on bad RFC3339 | modify |
+| `crates/kebab-search/tests/lexical.rs` | filter unit tests (media / ingested_after / doc_id / AND combo) | modify |
+| `crates/kebab-search/tests/hybrid.rs` | vector filter mirror tests | modify |
+| `crates/kebab-cli/tests/wire_search_filters.rs` | NEW — CLI integration tests for 7 flags | create |
+| `crates/kebab-mcp/tests/tools_call_search.rs` | extend with filter input cases | modify |
+| `README.md` | `kebab search` row update | modify |
+| `docs/SMOKE.md` | filter walkthrough | modify |
+| `tasks/p9/p9-fb-36-search-filters.md` | status flip + design/plan links | modify |
+| `tasks/INDEX.md` | fb-36 row → ✅ | modify |
+| `integrations/claude-code/kebab/SKILL.md` | `mcp__kebab__search` input shape doc + filter examples | modify |
+
+---
+
+## Pre-flight
+
+- [ ] **Step 0.1: Branch off main**
+
+```bash
+git checkout main
+git pull
+git checkout -b feat/fb-36-search-filters
+```
+
+- [ ] **Step 0.2: Confirm spec branch reachable**
+
+```bash
+git log --oneline spec/fb-36-search-filters -1
+```
+
+Expected: `7210386 spec(fb-36): search filter args — design`. If spec PR not yet merged, `git merge spec/fb-36-search-filters`.
+
+---
+
+## Task 1: Domain — `SearchFilters` 3 new fields
+
+**Files:**
+- Modify: `crates/kebab-core/src/search.rs`
+
+- [ ] **Step 1.1: Failing test**
+
+Append to `crates/kebab-core/src/search.rs` `#[cfg(test)] mod tests`:
+
+```rust
+#[test]
+fn search_filters_default_includes_new_fb36_fields() {
+    let f = SearchFilters::default();
+    assert!(f.media.is_empty(), "media default empty");
+    assert!(f.ingested_after.is_none(), "ingested_after default None");
+    assert!(f.doc_id.is_none(), "doc_id default None");
+    // existing fields still default
+    assert!(f.tags_any.is_empty());
+    assert!(f.lang.is_none());
+    assert!(f.path_glob.is_none());
+    assert!(f.trust_min.is_none());
+}
+
+#[test]
+fn search_filters_serialize_with_serde_default_compat() {
+    // Old JSON without the new fields must still deserialize.
+    let old: SearchFilters = serde_json::from_str(r#"{"tags_any":[],"lang":null,"path_glob":null,"trust_min":null}"#).unwrap();
+    assert!(old.media.is_empty());
+    assert!(old.ingested_after.is_none());
+    assert!(old.doc_id.is_none());
+}
+```
+
+- [ ] **Step 1.2: Run test (verify failure)**
+
+```bash
+cargo test -p kebab-core search_filters_default_includes_new_fb36_fields
+```
+
+Expected: FAIL — fields don't exist.
+
+- [ ] **Step 1.3: Add the fields**
+
+Edit `SearchFilters` struct in `crates/kebab-core/src/search.rs`:
+
+```rust
+#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
+pub struct SearchFilters {
+    pub tags_any: Vec<String>,
+    pub lang: Option<Lang>,
+    pub path_glob: Option<String>,
+    pub trust_min: Option<TrustLevel>,
+    /// p9-fb-36: media_type filter — IN-list of `MediaType.kind`
+    /// strings (`"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`).
+    /// Empty Vec = no filter. Match is on the variant tag only;
+    /// e.g. `["image"]` matches `Image(Png)` and `Image(Jpeg)`.
+    #[serde(default)]
+    pub media: Vec<String>,
+    /// p9-fb-36: hits whose source doc's `documents.updated_at` is at
+    /// or after this timestamp. None = no filter. RFC3339 / UTC.
+    #[serde(default, with = "time::serde::rfc3339::option")]
+    pub ingested_after: Option<OffsetDateTime>,
+    /// p9-fb-36: restrict hits to a single document. None = no filter.
+    #[serde(default)]
+    pub doc_id: Option<DocumentId>,
+}
+```
+
+`OffsetDateTime` is already imported (other fields use it). `DocumentId` is already in scope. If neither is, add:
+
+```rust
+use time::OffsetDateTime;
+use crate::ids::DocumentId;
+```
+
+Also expose a `MEDIA_KINDS` const that downstream code can use for validation / aliases:
+
+```rust
+/// p9-fb-36: canonical kind labels for `SearchFilters.media`. Mirrors
+/// `MediaType` variant tags; CLI / MCP normalize aliases (`md` → `markdown`)
+/// before populating this Vec.
+pub const MEDIA_KINDS: &[&str] = &["markdown", "pdf", "image", "audio", "other"];
+```
+
+- [ ] **Step 1.4: Run tests (verify pass)**
+
+```bash
+cargo test -p kebab-core
+```
+
+Expected: 33+ tests pass (2 new + existing).
+
+Other crates may break (lexical / vector retrievers reference `SearchFilters`). That's expected — Tasks 2/3 fix.
+
+- [ ] **Step 1.5: Commit**
+
+```bash
+git add crates/kebab-core/src/search.rs
+git commit -m "$(cat <<'EOF'
+feat(core): SearchFilters gains media / ingested_after / doc_id (fb-36)
+
+3 additive optional fields. #[serde(default)] preserves
+backwards compat for older JSON without the new keys.
+MEDIA_KINDS const exposes canonical "markdown"/"pdf"/"image"/
+"audio"/"other" labels for downstream alias normalization.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 2: Lexical retriever — SQL WHERE extension
+
+**Files:**
+- Modify: `crates/kebab-search/src/lexical.rs`
+- Modify: `crates/kebab-search/tests/lexical.rs`
+
+- [ ] **Step 2.1: Failing tests**
+
+Append to `crates/kebab-search/tests/lexical.rs`:
+
+```rust
+#[test]
+fn lexical_filter_by_media() {
+    let env = TestEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", kebab_core::MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", kebab_core::MediaType::Pdf);
+    let filters = kebab_core::SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert_eq!(hits.len(), 1, "only pdf doc should match");
+    assert!(hits[0].doc_path.0.ends_with(".pdf"), "got: {}", hits[0].doc_path.0);
+}
+
+#[test]
+fn lexical_filter_by_ingested_after() {
+    let env = TestEnv::new();
+    let old_doc = env.insert_doc_with_updated_at(
+        "old.md",
+        "ingest test",
+        time::macros::datetime!(2020-01-01 00:00:00 UTC),
+    );
+    let new_doc = env.insert_doc_with_updated_at(
+        "new.md",
+        "ingest test",
+        time::macros::datetime!(2026-01-01 00:00:00 UTC),
+    );
+    let filters = kebab_core::SearchFilters {
+        ingested_after: Some(time::macros::datetime!(2025-01-01 00:00:00 UTC)),
+        ..Default::default()
+    };
+    let hits = env.run_search("ingest", &filters);
+    let _ = (old_doc, new_doc);
+    assert_eq!(hits.len(), 1, "only post-2025 doc matches");
+}
+
+#[test]
+fn lexical_filter_by_doc_id() {
+    let env = TestEnv::new();
+    let target = env.insert_doc("a.md", "shared term");
+    env.insert_doc("b.md", "shared term");
+    let filters = kebab_core::SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("shared", &filters);
+    for h in &hits {
+        assert_eq!(h.doc_id, target, "all hits must be from target doc");
+    }
+}
+
+#[test]
+fn lexical_filter_combinator_is_and() {
+    let env = TestEnv::new();
+    let target = env.insert_doc_with_media("a.md", "rust", kebab_core::MediaType::Markdown);
+    env.insert_doc_with_media("b.pdf", "rust", kebab_core::MediaType::Pdf);
+    let filters = kebab_core::SearchFilters {
+        media: vec!["markdown".to_string()],
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(hits.iter().all(|h| h.doc_id == target));
+}
+
+#[test]
+fn lexical_filter_unknown_media_returns_empty() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let filters = kebab_core::SearchFilters {
+        media: vec!["nonexistent_kind".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(hits.is_empty(), "unknown media → no hits, no error");
+}
+
+#[test]
+fn lexical_empty_filters_match_default_behavior() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let with_default = env.run_search("rust", &kebab_core::SearchFilters::default());
+    assert!(!with_default.is_empty());
+}
+```
+
+The `TestEnv` helper functions (`insert_doc`, `insert_doc_with_media`, `insert_doc_with_updated_at`, `run_search`) need to exist in the test scaffold. Check what's there:
+
+```bash
+grep -n "pub fn insert_doc\|pub fn run_search\|TestEnv" crates/kebab-search/tests/common/mod.rs 2>/dev/null
+ls crates/kebab-search/tests/
+```
+
+If missing, add minimal helpers to `crates/kebab-search/tests/common/mod.rs` (create the file if needed):
+
+```rust
+//! Lexical-test helpers shared across kebab-search integration tests.
+
+use std::sync::Arc;
+
+use kebab_core::{
+    DocumentId, MediaType, SearchFilters, SearchHit, SearchMode, SearchQuery,
+};
+use kebab_search::LexicalRetriever;
+use kebab_store_sqlite::SqliteStore;
+use time::OffsetDateTime;
+
+pub struct TestEnv {
+    pub store: Arc<SqliteStore>,
+    pub retriever: LexicalRetriever,
+    next: std::cell::Cell<usize>,
+}
+
+impl TestEnv {
+    pub fn new() -> Self {
+        // ... use whatever the existing tests do for store init.
+        // Mirror the pattern in crates/kebab-search/tests/lexical.rs that
+        // sets up an in-memory or tempdir SqliteStore + LexicalRetriever.
+        unimplemented!("copy the existing test scaffold's setup")
+    }
+
+    pub fn insert_doc(&self, path: &str, body: &str) -> DocumentId {
+        self.insert_doc_with_media(path, body, MediaType::Markdown)
+    }
+
+    pub fn insert_doc_with_media(
+        &self,
+        path: &str,
+        body: &str,
+        media: MediaType,
+    ) -> DocumentId {
+        self.insert_doc_with_updated_at(path, body, OffsetDateTime::now_utc())
+            // (set the media via a separate write or threading through
+            // whatever fixture helper the existing tests use)
+    }
+
+    pub fn insert_doc_with_updated_at(
+        &self,
+        path: &str,
+        body: &str,
+        updated_at: OffsetDateTime,
+    ) -> DocumentId {
+        // Insert a synthetic document + asset row + chunks + FTS row.
+        // Match the pattern used in the existing lexical / hybrid tests
+        // (which already use TestEnv-like helpers — adapt their signatures).
+        unimplemented!("see existing test scaffold")
+    }
+
+    pub fn run_search(&self, query: &str, filters: &SearchFilters) -> Vec<SearchHit> {
+        let q = SearchQuery {
+            text: query.to_string(),
+            mode: SearchMode::Lexical,
+            k: 10,
+            filters: filters.clone(),
+        };
+        kebab_core::Retriever::search(&self.retriever, &q).expect("search")
+    }
+}
+```
+
+The "unimplemented" placeholders must be replaced with concrete code — see `crates/kebab-search/tests/lexical.rs`'s existing test setup for the right pattern (likely something like `init_store_with_doc_and_chunk(...)`). Take the time to study what's there and mirror it. The plan can't enumerate the full scaffold here because it depends on the codebase's existing fixtures.
+
+If the existing tests already have similar helpers under different names, REUSE them — don't add a new TestEnv. The new fixture-needing helpers (`insert_doc_with_media`, `insert_doc_with_updated_at`) are the only genuinely new pieces.
+
+- [ ] **Step 2.2: Run tests (verify failure)**
+
+```bash
+cargo test -p kebab-search --test lexical lexical_filter_by_media
+```
+
+Expected: FAIL — `lexical.rs` doesn't yet handle `media` filter; the test would either compile fail (helpers missing) or assertion fail.
+
+- [ ] **Step 2.3: Implement SQL WHERE extension**
+
+Edit `crates/kebab-search/src/lexical.rs::run_query`. Find the existing WHERE clause builder block (after `tags_any` / `lang` / `trust_min` arms — see line ~280-320). Add the 3 new arms BEFORE the `path_glob` post-filter (path_glob stays in Rust):
+
+```rust
+// p9-fb-36: media_type filter (IN-list).
+// `assets.media_type` JSON has two shapes:
+//   - unit variant (Markdown / Pdf): JSON text, e.g. `"markdown"`
+//   - tuple variant (Image(Png) / Audio(Mp3) / Other(s)): JSON object,
+//     e.g. `{"image": "png"}`
+// Extract a unified "kind" string for both shapes via:
+//   CASE WHEN json_type = 'text' THEN json_extract($)
+//        ELSE (first object key)
+//   END IN (?, ...)
+if !filters.media.is_empty() {
+    let placeholders: Vec<&str> = std::iter::repeat_n("?", filters.media.len()).collect();
+    let placeholders = placeholders.join(",");
+    sql.push_str(&format!(
+        " AND f.doc_id IN (SELECT doc_id FROM documents d2 \
+           JOIN assets a ON a.asset_id = d2.asset_id \
+           WHERE CASE \
+             WHEN json_type(a.media_type) = 'text' THEN json_extract(a.media_type, '$') \
+             ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+           END IN ({placeholders}))"
+    ));
+    for kind in &filters.media {
+        params.push(Box::new(kind.clone()));
+    }
+}
+
+// p9-fb-36: ingested_after filter.
+// `documents.updated_at` is RFC3339 stored as TEXT (always UTC `Z` per
+// fb-32 ingest path), so lexicographic >= compare is correct.
+if let Some(after) = &filters.ingested_after {
+    let formatted = after
+        .format(&time::format_description::well_known::Rfc3339)
+        .expect("OffsetDateTime formats to RFC3339");
+    sql.push_str(" AND d.updated_at >= ?");
+    params.push(Box::new(formatted));
+}
+
+// p9-fb-36: doc_id filter — single-doc scoping.
+if let Some(id) = &filters.doc_id {
+    sql.push_str(" AND d.doc_id = ?");
+    params.push(Box::new(id.0.clone()));
+}
+```
+
+The exact `params` API depends on the existing builder pattern in `lexical.rs`. The current code uses something like `let mut params: Vec<Box<dyn ToSql>> = vec![...];`. Match that exactly. Don't introduce a new pattern.
+
+If the existing SQL has joins on `documents d` already (via `chunks → documents`), the `media` subquery uses `documents d2` to avoid alias collision. Read the existing SQL string to verify.
+
+- [ ] **Step 2.4: Run tests (verify pass)**
+
+```bash
+cargo test -p kebab-search --test lexical
+```
+
+Expected: all PASS, including 6 new fb-36 tests.
+
+If the helpers in Step 2.1 weren't fleshed out, this is the moment to fill them in — they're the bridge between the test text above and the actual store setup. The store crate's `tests/contract_roundtrip.rs` is a good model for inserting an asset + document + chunks fixture.
+
+- [ ] **Step 2.5: Commit**
+
+```bash
+git add crates/kebab-search/src/lexical.rs crates/kebab-search/tests/
+git commit -m "$(cat <<'EOF'
+feat(search/lexical): media / ingested_after / doc_id filters (fb-36)
+
+SQL WHERE clause extension. media uses CASE WHEN json_type='text'
+to handle both unit (`"markdown"`) and tuple (`{"image":"png"}`)
+MediaType serde shapes. ingested_after relies on RFC3339 lexicographic
+ordering with UTC Z (per fb-32 ingest invariant). doc_id is a simple
+equality. AND combinator with existing tags / lang / trust filters.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 3: Vector retriever — `filter_chunks` mirror
+
+**Files:**
+- Modify: `crates/kebab-search/src/vector.rs`
+- Modify: `crates/kebab-search/tests/hybrid.rs`
+
+- [ ] **Step 3.1: Failing test**
+
+Append to `crates/kebab-search/tests/hybrid.rs`:
+
+```rust
+#[test]
+fn vector_filter_by_media() {
+    let env = HybridTestEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", kebab_core::MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", kebab_core::MediaType::Pdf);
+
+    let filters = kebab_core::SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("rust", &filters);
+    assert_eq!(hits.len(), 1);
+    assert!(hits[0].doc_path.0.ends_with(".pdf"));
+}
+
+#[test]
+fn vector_filter_by_doc_id() {
+    let env = HybridTestEnv::new();
+    let target = env.insert_doc("a.md", "shared");
+    env.insert_doc("b.md", "shared");
+    let filters = kebab_core::SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("shared", &filters);
+    assert!(hits.iter().all(|h| h.doc_id == target));
+}
+```
+
+Mirror the helpers needed in `crates/kebab-search/tests/common/mod.rs` (add `HybridTestEnv` if it doesn't exist; copy the pattern from existing hybrid tests).
+
+- [ ] **Step 3.2: Run tests (verify failure)**
+
+```bash
+cargo test -p kebab-search --test hybrid vector_filter_by_media
+```
+
+Expected: FAIL.
+
+- [ ] **Step 3.3: Implement filter_chunks extension**
+
+Edit `crates/kebab-search/src/vector.rs::filter_chunks` (or whatever helper the vector retriever uses to post-filter SQLite-side after Lance returns chunks). Add the same 3 SQL fragments as Task 2.
+
+If `filter_chunks` builds its own SQL inline, match the lexical pattern verbatim. If it delegates to a shared SQL helper in `kebab-store-sqlite`, refactor: extract the "filter WHERE clause builder" into a small helper used by both. Inspect first:
+
+```bash
+grep -n "filter_chunks\|tags_any\|trust_min\|lang" crates/kebab-search/src/vector.rs | head -10
+```
+
+Decide: in-place duplication vs shared helper. Shared helper is cleaner if the SQL is identical. If the contexts differ (lexical SQL is a single statement, vector SQL is a follow-up `SELECT ... WHERE chunk_id IN (...) AND <filters>`), keep them separate but mirror the new filter pattern exactly.
+
+- [ ] **Step 3.4: Run tests (verify pass)**
+
+```bash
+cargo test -p kebab-search --test hybrid
+cargo test -p kebab-search
+```
+
+Expected: all PASS.
+
+- [ ] **Step 3.5: Commit**
+
+```bash
+git add crates/kebab-search/src/vector.rs crates/kebab-search/tests/
+git commit -m "$(cat <<'EOF'
+feat(search/vector): media / ingested_after / doc_id filters (fb-36)
+
+filter_chunks helper extended with the same 3 WHERE clauses as
+lexical. Vector still over-fetches k * 2 then post-filters; small
+k can return < k hits when filters drop a lot — agent is expected
+to widen k or paginate. AND combinator with existing filters.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 4: CLI flags + dispatch
+
+**Files:**
+- Modify: `crates/kebab-cli/src/main.rs`
+
+- [ ] **Step 4.1: Add `TrustLevelFlag` clap enum**
+
+Locate the existing `enum Cmd` and `enum ModeFlag` (or similar) declarations. Add near them:
+
+```rust
+#[derive(clap::ValueEnum, Clone, Debug)]
+enum TrustLevelFlag {
+    Trusted,
+    Reviewed,
+    Hearsay,
+    Untrusted,
+}
+
+impl From<TrustLevelFlag> for kebab_core::TrustLevel {
+    fn from(f: TrustLevelFlag) -> Self {
+        match f {
+            TrustLevelFlag::Trusted => kebab_core::TrustLevel::Trusted,
+            TrustLevelFlag::Reviewed => kebab_core::TrustLevel::Reviewed,
+            TrustLevelFlag::Hearsay => kebab_core::TrustLevel::Hearsay,
+            TrustLevelFlag::Untrusted => kebab_core::TrustLevel::Untrusted,
+        }
+    }
+}
+```
+
+If `TrustLevel` variants are different (verify):
+
+```bash
+grep -A 8 "^pub enum TrustLevel" crates/kebab-core/src/metadata.rs
+```
+
+Adapt names accordingly.
+
+- [ ] **Step 4.2: Add 7 flags to `Cmd::Search`**
+
+In the `enum Cmd { ... Search { ... } }` definition, add 7 fields:
+
+```rust
+/// p9-fb-36: filter by `metadata.tags`. Repeatable; OR-within (any tag).
+#[arg(long)]
+tag: Vec<String>,
+
+/// p9-fb-36: filter by `documents.lang` (ISO code).
+#[arg(long)]
+lang: Option<String>,
+
+/// p9-fb-36: filter by `documents.workspace_path` glob.
+#[arg(long)]
+path_glob: Option<String>,
+
+/// p9-fb-36: filter by minimum `documents.trust_level`.
+#[arg(long, value_enum)]
+trust_min: Option<TrustLevelFlag>,
+
+/// p9-fb-36: filter by `assets.media_type` kind. Comma-separated.
+/// Aliases: `md` → `markdown`. Other accepted: `markdown`, `pdf`,
+/// `image`, `audio`, `other`. Unknown values match nothing.
+#[arg(long, value_delimiter = ',')]
+media: Vec<String>,
+
+/// p9-fb-36: filter to docs whose `updated_at` is >= this RFC3339
+/// timestamp (UTC). Invalid format → exit 2 with error.v1
+/// code = config_invalid.
+#[arg(long)]
+ingested_after: Option<String>,
+
+/// p9-fb-36: filter to a single doc by id.
+#[arg(long)]
+doc_id: Option<String>,
+```
+
+- [ ] **Step 4.3: Build SearchFilters in dispatch arm**
+
+In the `Cmd::Search { ... } =>` match arm body, before the `let q = kebab_core::SearchQuery { ... }` line, replace the hardcoded `filters: kebab_core::SearchFilters::default()` with a constructed `SearchFilters`. Also normalize `--media` aliases:
+
+```rust
+fn normalize_media_alias(s: &str) -> String {
+    match s.to_ascii_lowercase().as_str() {
+        "md" => "markdown".to_string(),
+        other => other.to_string(),
+    }
+}
+
+let media_norm: Vec<String> = media.iter().map(|s| normalize_media_alias(s)).collect();
+
+let ingested_after_parsed: Option<time::OffsetDateTime> = match ingested_after.as_deref() {
+    Some(s) => {
+        let parsed = time::OffsetDateTime::parse(
+            s,
+            &time::format_description::well_known::Rfc3339,
+        );
+        match parsed {
+            Ok(ts) => Some(ts),
+            Err(e) => {
+                let err = anyhow::Error::new(kebab_app::StructuredError(kebab_app::ErrorV1 {
+                    schema_version: "error.v1".to_string(),
+                    code: "config_invalid".to_string(),
+                    message: format!("--ingested-after: invalid RFC3339 timestamp '{s}': {e}"),
+                    details: serde_json::Value::Null,
+                    hint: Some("expected format like 2026-04-01T00:00:00Z".to_string()),
+                }));
+                return Err(err);
+            }
+        }
+    }
+    None => None,
+};
+
+let filters = kebab_core::SearchFilters {
+    tags_any: tag.clone(),
+    lang: lang.as_ref().map(|s| kebab_core::Lang(s.clone())),
+    path_glob: path_glob.clone(),
+    trust_min: trust_min.clone().map(Into::into),
+    media: media_norm,
+    ingested_after: ingested_after_parsed,
+    doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
+};
+
+let q = kebab_core::SearchQuery {
+    text: query.clone(),
+    mode: (*mode).into(),
+    k: *k,
+    filters,
+};
+```
+
+If `Lang` constructor differs (e.g. `Lang::new(...)` vs `Lang(s)`), check:
+
+```bash
+grep -A 3 "^pub struct Lang\b" crates/kebab-core/src/media.rs
+```
+
+If the existing `Cmd::Search` arm doesn't currently `return Err(...)` for failures, the dispatch's outer `Result<()>` should catch the anyhow propagation through `?`. Verify the existing pattern.
+
+- [ ] **Step 4.4: Build CLI**
+
+```bash
+cargo build -p kebab-cli
+```
+
+Expected: clean.
+
+- [ ] **Step 4.5: Verify --help**
+
+```bash
+cargo run -q -p kebab-cli -- search --help 2>&1 | grep -E "tag|lang|path-glob|trust-min|media|ingested-after|doc-id"
+```
+
+Expected: 7 new flags appear.
+
+- [ ] **Step 4.6: Run kebab-cli tests**
+
+```bash
+cargo test -p kebab-cli
+```
+
+Expected: all PASS, no regressions.
+
+- [ ] **Step 4.7: Commit**
+
+```bash
+git add crates/kebab-cli/src/main.rs
+git commit -m "$(cat <<'EOF'
+feat(cli): kebab search filter flags (fb-36)
+
+7 new flags: --tag (repeatable), --lang, --path-glob,
+--trust-min (value_enum), --media (csv with `md` alias),
+--ingested-after (RFC3339; config_invalid on parse fail),
+--doc-id. Dispatch translates clap values into SearchFilters
+and propagates structured errors through the existing
+StructuredError wrapper from fb-34.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 5: CLI integration tests
+
+**Files:**
+- Create: `crates/kebab-cli/tests/wire_search_filters.rs`
+- Modify: `crates/kebab-cli/tests/common/mod.rs` (if helper missing)
+
+- [ ] **Step 5.1: Write integration tests**
+
+Create `crates/kebab-cli/tests/wire_search_filters.rs`:
+
+```rust
+//! p9-fb-36: CLI search filter flags.
+
+mod common;
+
+use serde_json::Value;
+
+#[test]
+fn search_with_doc_id_filter_returns_only_target_doc() {
+    let (cfg, ws) = common::write_config();
+    common::ingest(&cfg, &ws, "a.md", "# A\n\nshared term apple\n");
+    common::ingest(&cfg, &ws, "b.md", "# B\n\nshared term banana\n");
+
+    // Find any doc_id via search.
+    let (probe_stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "--k", "5", "shared"],
+    );
+    let probe: Value = serde_json::from_str(probe_stdout.trim()).expect("probe json");
+    let target_doc_id = probe["hits"][0]["doc_id"]
+        .as_str()
+        .expect("doc_id in first hit")
+        .to_string();
+
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "--doc-id", &target_doc_id, "shared"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("filtered json");
+    let hits = v["hits"].as_array().expect("hits array");
+    assert!(!hits.is_empty(), "filter should still match the target doc");
+    for h in hits {
+        assert_eq!(h["doc_id"], target_doc_id);
+    }
+}
+
+#[test]
+fn search_with_invalid_ingested_after_emits_config_invalid() {
+    let (cfg, _ws) = common::write_config();
+
+    let exe = env!("CARGO_BIN_EXE_kebab");
+    let cfg_str = cfg.to_str().expect("utf8");
+    let out = std::process::Command::new(exe)
+        .args([
+            "--config", cfg_str, "--json",
+            "search", "--mode", "lexical",
+            "--ingested-after", "not-a-timestamp",
+            "test",
+        ])
+        .output()
+        .expect("kebab search");
+    assert_ne!(out.status.code(), Some(0));
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    let err_line = stderr
+        .lines()
+        .find(|l| {
+            serde_json::from_str::<Value>(l)
+                .ok()
+                .and_then(|v| v.get("schema_version").and_then(|s| s.as_str()).map(String::from))
+                .as_deref()
+                == Some("error.v1")
+        })
+        .unwrap_or_else(|| panic!("no error.v1 on stderr: {stderr}"));
+    let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
+    assert_eq!(v["code"], "config_invalid");
+    assert!(
+        v["message"].as_str().unwrap_or("").contains("ingested-after"),
+        "message should mention the flag: {v:?}"
+    );
+}
+
+#[test]
+fn search_with_media_filter_md_alias_normalizes_to_markdown() {
+    let (cfg, ws) = common::write_config();
+    common::ingest(&cfg, &ws, "a.md", "# A\n\nrust ownership body\n");
+
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "--media", "md", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("json");
+    let hits = v["hits"].as_array().expect("hits");
+    assert!(!hits.is_empty(), "md alias should match markdown doc");
+}
+
+#[test]
+fn search_with_tag_filter_repeats_or_within() {
+    let (cfg, ws) = common::write_config();
+    // Tag-aware ingest: write a doc with frontmatter tags. The
+    // markdown parser captures them into `metadata.tags`.
+    common::ingest(
+        &cfg,
+        &ws,
+        "tagged.md",
+        "---\ntags: [rust, async]\n---\n\n# Tagged\n\nbody about rust\n",
+    );
+    common::ingest(&cfg, &ws, "untagged.md", "# Plain\n\nbody about rust\n");
+
+    // --tag rust → tagged doc only.
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--mode", "lexical", "--json", "--tag", "rust", "--k", "10", "rust"],
+    );
+    let v: Value = serde_json::from_str(stdout.trim()).expect("json");
+    let hits = v["hits"].as_array().expect("hits");
+    assert!(!hits.is_empty(), "tagged doc should match");
+    for h in hits {
+        let path = h["doc_path"].as_str().unwrap_or("");
+        assert_eq!(path, "tagged.md", "untagged doc should be filtered out");
+    }
+}
+```
+
+If `common::write_config` / `common::ingest` / `common::run_search_with_args` already exist (they do from fb-32 / fb-34), reuse. The test file imports them via `mod common;`.
+
+- [ ] **Step 5.2: Run tests**
+
+```bash
+cargo test -p kebab-cli --test wire_search_filters 2>&1 | tail -10
+```
+
+Expected: 4 PASS.
+
+If the tag-frontmatter test fails because parser doesn't capture tags from this exact format, simplify the test or check what frontmatter shape the codebase expects:
+
+```bash
+grep -rn "metadata.tags\|frontmatter.*tags" crates/kebab-parse-md/src/ 2>/dev/null | head -5
+```
+
+Adapt the fixture frontmatter to the parser's expected shape.
+
+- [ ] **Step 5.3: Run full kebab-cli suite**
+
+```bash
+cargo test -p kebab-cli
+```
+
+Expected: all PASS.
+
+- [ ] **Step 5.4: Commit**
+
+```bash
+git add crates/kebab-cli/tests/
+git commit -m "$(cat <<'EOF'
+test(cli): wire_search_filters — 4 lexical-only integration tests (fb-36)
+
+Cover: --doc-id scoping, --ingested-after validation error,
+--media md alias, --tag repeatable + frontmatter parsing.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 6: MCP `SearchInput` extension
+
+**Files:**
+- Modify: `crates/kebab-mcp/src/tools/search.rs`
+- Modify: `crates/kebab-mcp/tests/tools_call_search.rs`
+
+- [ ] **Step 6.1: Inspect current `SearchInput`**
+
+```bash
+sed -n '1,80p' crates/kebab-mcp/src/tools/search.rs
+```
+
+Note where `mode` / `k` / `max_tokens` / `cursor` are wired.
+
+- [ ] **Step 6.2: Add 7 fields to `SearchInput`**
+
+Edit the struct:
+
+```rust
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct SearchInput {
+    pub query: String,
+    pub mode: Option<String>,
+    pub k: Option<usize>,
+    pub max_tokens: Option<usize>,
+    pub snippet_chars: Option<usize>,
+    pub cursor: Option<String>,
+    /// p9-fb-36: filter by `metadata.tags` (OR-within).
+    pub tags: Option<Vec<String>>,
+    /// p9-fb-36: filter by `documents.lang` (ISO code).
+    pub lang: Option<String>,
+    /// p9-fb-36: filter by `documents.workspace_path` glob.
+    pub path_glob: Option<String>,
+    /// p9-fb-36: filter by minimum `documents.trust_level`.
+    /// Accepts: `"trusted"`, `"reviewed"`, `"hearsay"`, `"untrusted"`.
+    pub trust_min: Option<String>,
+    /// p9-fb-36: filter by `assets.media_type` kind. IN-list. Accepts:
+    /// `"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`.
+    pub media: Option<Vec<String>>,
+    /// p9-fb-36: RFC3339 UTC timestamp. Invalid format → invalid_input.
+    pub ingested_after: Option<String>,
+    /// p9-fb-36: filter to a single doc.
+    pub doc_id: Option<String>,
+}
+```
+
+- [ ] **Step 6.3: Update dispatch**
+
+In `handle(state, input)`, before constructing `SearchOpts`, build `SearchFilters` from the new inputs:
+
+```rust
+let trust_min = match input.trust_min.as_deref() {
+    Some("trusted") => Some(kebab_core::TrustLevel::Trusted),
+    Some("reviewed") => Some(kebab_core::TrustLevel::Reviewed),
+    Some("hearsay") => Some(kebab_core::TrustLevel::Hearsay),
+    Some("untrusted") => Some(kebab_core::TrustLevel::Untrusted),
+    Some(other) => {
+        return invalid_input(&format!(
+            "trust_min: unknown level '{other}'; expected trusted|reviewed|hearsay|untrusted"
+        ));
+    }
+    None => None,
+};
+
+let ingested_after = match input.ingested_after.as_deref() {
+    Some(s) => {
+        match time::OffsetDateTime::parse(s, &time::format_description::well_known::Rfc3339) {
+            Ok(ts) => Some(ts),
+            Err(e) => return invalid_input(&format!("ingested_after: invalid RFC3339 '{s}': {e}")),
+        }
+    }
+    None => None,
+};
+
+let filters = kebab_core::SearchFilters {
+    tags_any: input.tags.unwrap_or_default(),
+    lang: input.lang.map(kebab_core::Lang),
+    path_glob: input.path_glob,
+    trust_min,
+    media: input.media.unwrap_or_default(),
+    ingested_after,
+    doc_id: input.doc_id.map(kebab_core::DocumentId),
+};
+
+let query = kebab_core::SearchQuery {
+    text: input.query,
+    mode,
+    k: input.k.unwrap_or(10).clamp(1, 100),
+    filters,
+};
+```
+
+If `invalid_input` helper doesn't exist in this file (per fb-35 `tools/fetch.rs` pattern), add one:
+
+```rust
+fn invalid_input(msg: &str) -> CallToolResult {
+    use kebab_app::{ErrorV1, StructuredError};
+    let err = anyhow::Error::new(StructuredError(ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "invalid_input".to_string(),
+        message: msg.to_string(),
+        details: serde_json::Value::Null,
+        hint: None,
+    }));
+    to_tool_error(&err)
+}
+```
+
+If the existing dispatch hardcodes `SearchFilters::default()`, replace with the new `filters` value above.
+
+- [ ] **Step 6.4: Add MCP test cases**
+
+Edit `crates/kebab-mcp/tests/tools_call_search.rs`. Add tests:
+
+```rust
+#[test]
+fn search_with_doc_id_filter_returns_only_target() {
+    // Mirror the existing tools_call_search.rs setup pattern.
+    // After ingesting 2 docs and discovering target doc_id from a
+    // baseline search, call mcp__kebab__search with doc_id set and
+    // assert v["hits"] all have doc_id == target.
+    // (Concrete test code mirrors what fb-34 / fb-35 added; see them
+    // for the helper pattern this crate uses.)
+}
+
+#[test]
+fn search_with_invalid_ingested_after_returns_invalid_input() {
+    // Same MCP scaffold. Call with ingested_after = "garbage", assert
+    // the response carries error.v1 with code = "invalid_input" and
+    // message containing "ingested_after".
+}
+```
+
+Implement against whatever the existing tools_call_search.rs scaffold uses. The fb-34/35 tests are good templates.
+
+- [ ] **Step 6.5: Run MCP tests**
+
+```bash
+cargo test -p kebab-mcp
+```
+
+Expected: all PASS.
+
+- [ ] **Step 6.6: Commit**
+
+```bash
+git add crates/kebab-mcp/
+git commit -m "$(cat <<'EOF'
+feat(mcp): kebab__search filter inputs (fb-36)
+
+7 new optional inputs on SearchInput: tags, lang, path_glob,
+trust_min, media, ingested_after, doc_id. Validation surfaces as
+error.v1 code = invalid_input via StructuredError. Dispatch builds
+SearchFilters from the inputs and forwards through the existing
+search_with_opts_with_config facade.
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 7: Workspace test + clippy
+
+- [ ] **Step 7.1: Workspace test**
+
+```bash
+cargo test --workspace --no-fail-fast -j 1 2>&1 | tail -15
+```
+
+Expected: all PASS.
+
+- [ ] **Step 7.2: Clippy**
+
+```bash
+cargo clippy --workspace --all-targets -- -D warnings 2>&1 | tail -10
+```
+
+Expected: clean.
+
+- [ ] **Step 7.3: Commit any clippy fixes**
+
+```bash
+git add -A
+git commit -m "chore: clippy fixes for fb-36"
+```
+
+(Skip if no fixes needed.)
+
+---
+
+## Task 8: Documentation updates
+
+**Files:**
+- Modify: `README.md`
+- Modify: `docs/SMOKE.md`
+- Modify: `tasks/p9/p9-fb-36-search-filters.md`
+- Modify: `tasks/INDEX.md`
+- Modify: `integrations/claude-code/kebab/SKILL.md`
+
+- [ ] **Step 8.1: README — search row update**
+
+Find the `kebab search` row in 명령 table:
+
+```bash
+grep -n "kebab search" README.md | head -5
+```
+
+Append filter flags. The row gets long — keep concise:
+
+> `... [--tag <tag>] [--lang <iso>] [--path-glob <glob>] [--trust-min <level>] [--media md,pdf,...] [--ingested-after <RFC3339>] [--doc-id <id>]` (p9-fb-36 — filter args. AND combinator across flags; OR within --tag/--media. Invalid `--ingested-after` RFC3339 → `error.v1.code = config_invalid`.)
+
+- [ ] **Step 8.2: SMOKE.md — filter walkthrough**
+
+After the existing fb-35 verbatim fetch section, append:
+
+```markdown
+### Filter args (fb-36)
+
+```bash
+# Filter by media kind (md alias normalizes to markdown).
+kebab search "rust" --media md --json | jq '.hits | length'
+
+# Filter by ingest timestamp (RFC3339).
+kebab search "rust" --ingested-after 2026-04-01T00:00:00Z --json
+
+# Combine: doc-id scope + tag (AND across flags).
+kebab search "rust" --doc-id "<doc-id>" --tag rust --json
+```
+
+Bad `--ingested-after` → `error.v1.code = config_invalid`, exit 2.
+Unknown `--media` value → silently empty (no error).
+```
+
+- [ ] **Step 8.3: Spec status flip**
+
+Edit `tasks/p9/p9-fb-36-search-filters.md`:
+
+```diff
+-status: open
++status: completed
+```
+
+Replace the `> ⏳ **백로그 only — 미구현.**` block with:
+
+```markdown
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 참조.
+
+상세 설계: `docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md`.
+```
+
+- [ ] **Step 8.4: tasks/INDEX.md**
+
+```diff
+-    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27)
++    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-10)
+```
+
+(The `depends_on 27` annotation in the original was carried over from the spec stub; drop it.)
+
+- [ ] **Step 8.5: SKILL.md — search input shape**
+
+Find the existing `mcp__kebab__search` Input section:
+
+```bash
+grep -n "mcp__kebab__search\|max_tokens.*null" integrations/claude-code/kebab/SKILL.md | head -5
+```
+
+Update the example input + bullets to mention the 7 new fields:
+
+```markdown
+Input:
+```json
+{
+  "query": "<query>",
+  "mode": "hybrid",
+  "k": 10,
+  "max_tokens": null,
+  "snippet_chars": null,
+  "cursor": null,
+  "tags": null,
+  "lang": null,
+  "path_glob": null,
+  "trust_min": null,
+  "media": null,
+  "ingested_after": null,
+  "doc_id": null
+}
+```
+
+- p9-fb-36 filter inputs: `tags` (OR-within), `lang`, `path_glob`, `trust_min`, `media` (IN-list of `markdown|pdf|image|audio|other`), `ingested_after` (RFC3339 UTC), `doc_id`. AND combinator across keys. Invalid `ingested_after` / unknown `trust_min` → `error.v1.code = invalid_input`. Unknown `media` value → empty hits, no error.
+```
+
+- [ ] **Step 8.6: Commit docs**
+
+```bash
+git add README.md docs/SMOKE.md tasks/p9/p9-fb-36-search-filters.md tasks/INDEX.md integrations/claude-code/kebab/SKILL.md
+git commit -m "$(cat <<'EOF'
+docs(fb-36): README + SMOKE + INDEX + skill notes
+
+Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
+EOF
+)"
+```
+
+---
+
+## Task 9: Smoke + push + PR
+
+- [ ] **Step 9.1: Manual smoke**
+
+```bash
+cd /tmp/kebab-smoke
+~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml ingest
+~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml search "test" --json --media md | jq '{hits: (.hits | length)}'
+~/Workspace/projects/kebab/target/release/kebab --config /tmp/kebab-smoke/config.toml search "test" --json --ingested-after garbage 2>&1 | tail -5
+```
+
+Expected:
+- `--media md` returns sane hit count.
+- garbage `--ingested-after` exits non-zero with `error.v1.code = config_invalid` on stderr.
+
+- [ ] **Step 9.2: Final workspace test**
+
+```bash
+cd ~/Workspace/projects/kebab
+cargo test --workspace --no-fail-fast -j 1
+```
+
+Expected: all green.
+
+- [ ] **Step 9.3: Push branch**
+
+```bash
+git push -u origin feat/fb-36-search-filters
+```
+
+- [ ] **Step 9.4: Open PR**
+
+Build PR body at `/tmp/fb36-pr-body.md`:
+
+```markdown
+## Summary
+
+- adds 7 filter flags on `kebab search` and the equivalent inputs on `mcp__kebab__search`:
+  - existing `SearchFilters` fields exposed: `--tag` (repeatable, OR-within), `--lang`, `--path-glob`, `--trust-min`
+  - new fields: `--media` (csv, `md` alias), `--ingested-after` (RFC3339 UTC), `--doc-id`
+- AND combinator across flags; OR within `--tag` and `--media`
+- filter layer: SQLite WHERE for lexical (incl. media via `CASE WHEN json_type='text'` to handle both unit and tuple `MediaType` serde shapes), over-fetch + `filter_chunks` post-filter for vector
+- wire shape unchanged — input-only feature; `search_response.v1` and `search_hit.v1` untouched
+- invalid `--ingested-after` / unknown `trust_min` → `error.v1.code = config_invalid` (CLI) / `invalid_input` (MCP); unknown `--media` value → empty hits, no error
+
+## Test plan
+
+- [x] `cargo test --workspace --no-fail-fast -j 1` — green
+- [x] `cargo clippy --workspace --all-targets -- -D warnings` — clean
+- [x] new tests: 6 lexical (media / ingested_after / doc_id / AND / unknown / default), 2 vector mirror, 4 CLI integration, 2 MCP
+- [x] manual smoke per `docs/SMOKE.md` "Filter args" walkthrough
+
+## Architectural notes
+
+- `SearchFilters` 3 fields are additive with `#[serde(default)]` — old JSON without the new keys deserializes cleanly.
+- `MediaType` JSON has two shapes (`"markdown"` for unit variants, `{"image":"png"}` for tuple variants); the SQL `CASE WHEN json_type='text' THEN json_extract($) ELSE (first object key) END` extracts a unified kind string.
+- Vector retriever mirrors the lexical SQL exactly (same WHERE clauses, same params binding pattern). path_glob remains a Rust post-filter — unchanged from before fb-36.
+- No new HOTFIXES entry — additive minor, no contract drift.
+
+## Files of interest
+
+- spec: `docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md`
+- plan: `docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md`
+- core: `crates/kebab-core/src/search.rs` (SearchFilters)
+- search: `crates/kebab-search/src/lexical.rs` + `vector.rs`
+- CLI: `crates/kebab-cli/src/main.rs` (Cmd::Search)
+- MCP: `crates/kebab-mcp/src/tools/search.rs` (SearchInput)
+```
+
+Open PR:
+
+```bash
+/Users/user/.claude/skills/gitea-ops/bin/gitea-pr \
+  --title "feat(fb-36): search filter args (--media / --ingested-after / --doc-id + 4 existing)" \
+  --body "$(cat /tmp/fb36-pr-body.md)" \
+  --head feat/fb-36-search-filters \
+  --base main
+```
+
+- [ ] **Step 9.5: Cleanup**
+
+```bash
+rm /tmp/fb36-pr-body.md
+```
+
+---
+
+## Self-review
+
+- **Spec coverage:**
+  - §Behavior contract / 7 flags → Tasks 1, 4 (CLI), 6 (MCP)
+  - §Filter validation (RFC3339, trust_min) → Task 4 (CLI dispatch), Task 6 (MCP dispatch)
+  - §Filter layer (SQLite WHERE for lexical, over-fetch + post-filter for vector) → Tasks 2, 3
+  - §Wire shape (input-only, no schema change) → no task needed; covered by absence of changes
+  - §MCP `SearchInput` extension → Task 6
+  - §Public surface delta (SearchFilters / TrustLevelFlag / SearchInput) → Tasks 1, 4, 6
+  - §Test plan → Tasks 2 (6 lexical), 3 (2 vector), 5 (4 CLI), 6 (2 MCP)
+  - §Documentation → Task 8
+  - §Risks (MediaType JSON shape, RFC3339 UTC, path_glob ordering) → Task 2 explicitly handles the shape; Task 4 / 6 mention UTC; path_glob position unchanged
+
+- **Placeholder scan:**
+  - Task 2 / 3 / 6 contain "mirror the existing scaffold" instructions — concrete fallback paths spelled out (look at file X, copy pattern Y).
+  - No "TODO" / "fill in" / "later" remaining.
+
+- **Type consistency:**
+  - `SearchFilters { tags_any, lang, path_glob, trust_min, media, ingested_after, doc_id }` consistent across Tasks 1, 2, 3, 4, 6.
+  - `media: Vec<String>`, `ingested_after: Option<OffsetDateTime>`, `doc_id: Option<DocumentId>` consistent.
+  - `MEDIA_KINDS` const used as documentation reference, not at runtime.
+  - `TrustLevelFlag` clap enum → `kebab_core::TrustLevel` mapping defined in Task 4 step 4.1, used in Task 4 step 4.3.
+  - Error codes consistent: `config_invalid` (CLI), `invalid_input` (MCP) — both via StructuredError.
+
+---
+
+## Execution Handoff
+
+Plan complete and saved to `docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md`. Two execution options:
+
+**1. Subagent-Driven (recommended)** — fresh subagent per task, review between tasks.
+
+**2. Inline Execution** — execute tasks in this session.
+
+Which approach?
-- 
2.49.1


From d3f38c76e928a26f32ebbbc2e22a0e330c6fe1e5 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:36:45 +0900
Subject: [PATCH 03/11] feat(core): SearchFilters gains media / ingested_after
 / doc_id (fb-36)

3 additive optional fields. #[serde(default)] preserves
backwards compat for older JSON without the new keys.
MEDIA_KINDS const exposes canonical "markdown"/"pdf"/"image"/
"audio"/"other" labels for downstream alias normalization.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-core/src/search.rs | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/crates/kebab-core/src/search.rs b/crates/kebab-core/src/search.rs
index 9d6527b..5e5cd31 100644
--- a/crates/kebab-core/src/search.rs
+++ b/crates/kebab-core/src/search.rs
@@ -26,12 +26,30 @@ pub struct SearchQuery {
     pub filters: SearchFilters,
 }
 
+/// p9-fb-36: canonical kind labels for `SearchFilters.media`. Mirrors
+/// `MediaType` variant tags; CLI / MCP normalize aliases (`md` → `markdown`)
+/// before populating this Vec.
+pub const MEDIA_KINDS: &[&str] = &["markdown", "pdf", "image", "audio", "other"];
+
 #[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
 pub struct SearchFilters {
     pub tags_any: Vec<String>,
     pub lang: Option<Lang>,
     pub path_glob: Option<String>,
     pub trust_min: Option<TrustLevel>,
+    /// p9-fb-36: media_type filter — IN-list of `MediaType.kind`
+    /// strings (`"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`).
+    /// Empty Vec = no filter. Match is on the variant tag only;
+    /// e.g. `["image"]` matches `Image(Png)` and `Image(Jpeg)`.
+    #[serde(default)]
+    pub media: Vec<String>,
+    /// p9-fb-36: hits whose source doc's `documents.updated_at` is at
+    /// or after this timestamp. None = no filter. RFC3339 / UTC.
+    #[serde(default, with = "time::serde::rfc3339::option")]
+    pub ingested_after: Option<OffsetDateTime>,
+    /// p9-fb-36: restrict hits to a single document. None = no filter.
+    #[serde(default)]
+    pub doc_id: Option<DocumentId>,
 }
 
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -155,4 +173,24 @@ mod tests {
         assert!(opts.snippet_chars.is_none());
         assert!(opts.cursor.is_none());
     }
+
+    #[test]
+    fn search_filters_default_includes_new_fb36_fields() {
+        let f = SearchFilters::default();
+        assert!(f.media.is_empty(), "media default empty");
+        assert!(f.ingested_after.is_none(), "ingested_after default None");
+        assert!(f.doc_id.is_none(), "doc_id default None");
+        assert!(f.tags_any.is_empty());
+        assert!(f.lang.is_none());
+        assert!(f.path_glob.is_none());
+        assert!(f.trust_min.is_none());
+    }
+
+    #[test]
+    fn search_filters_serialize_with_serde_default_compat() {
+        let old: SearchFilters = serde_json::from_str(r#"{"tags_any":[],"lang":null,"path_glob":null,"trust_min":null}"#).unwrap();
+        assert!(old.media.is_empty());
+        assert!(old.ingested_after.is_none());
+        assert!(old.doc_id.is_none());
+    }
 }
-- 
2.49.1


From 2c80e2ad915e021dd73f4571ffa61aeb291341a7 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:41:02 +0900
Subject: [PATCH 04/11] feat(search/lexical): media / ingested_after / doc_id
 filters (fb-36)

SQL WHERE clause extension. media uses CASE WHEN json_type='text'
to handle both unit (\`"markdown"\`) and tuple (\`{"image":"png"}\`)
MediaType serde shapes. ingested_after relies on RFC3339 lexicographic
ordering with UTC Z (per fb-32 ingest invariant). doc_id is a simple
equality. AND combinator with existing tags / lang / trust filters.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-search/src/lexical.rs   |  44 ++++++
 crates/kebab-search/tests/lexical.rs | 210 ++++++++++++++++++++++++++-
 2 files changed, 253 insertions(+), 1 deletion(-)

diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs
index 513fb9e..954148c 100644
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -319,6 +319,50 @@ fn run_query(
         };
         params.push(Box::new(rank));
     }
+    // p9-fb-36: media_type filter (IN-list).
+    // `assets.media_type` JSON has two shapes:
+    //   - unit variant (Markdown / Pdf): JSON text, e.g. `"markdown"`
+    //   - tuple variant (Image(Png) / Audio(Mp3) / Other(s)): JSON object,
+    //     e.g. `{"image": "png"}`
+    // Extract a unified "kind" string for both shapes via:
+    //   CASE WHEN json_type = 'text' THEN json_extract($)
+    //        ELSE (first object key)
+    //   END IN (?, ...)
+    if !filters.media.is_empty() {
+        let placeholders: Vec<&str> =
+            std::iter::repeat("?").take(filters.media.len()).collect();
+        let placeholders = placeholders.join(",");
+        sql.push_str(&format!(
+            " AND f.doc_id IN (\
+               SELECT d2.doc_id FROM documents d2 \
+               JOIN assets a ON a.asset_id = d2.asset_id \
+               WHERE CASE \
+                 WHEN json_type(a.media_type) = 'text' THEN json_extract(a.media_type, '$') \
+                 ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+               END IN ({placeholders}))"
+        ));
+        for kind in &filters.media {
+            params.push(Box::new(kind.clone()));
+        }
+    }
+
+    // p9-fb-36: ingested_after filter.
+    // `documents.updated_at` is RFC3339 stored as TEXT (always UTC `Z` per
+    // fb-32 ingest path), so lexicographic >= compare is correct.
+    if let Some(after) = &filters.ingested_after {
+        let formatted = after
+            .format(&time::format_description::well_known::Rfc3339)
+            .expect("OffsetDateTime formats to RFC3339");
+        sql.push_str(" AND d.updated_at >= ?");
+        params.push(Box::new(formatted));
+    }
+
+    // p9-fb-36: doc_id filter — single-doc scoping.
+    if let Some(id) = &filters.doc_id {
+        sql.push_str(" AND d.doc_id = ?");
+        params.push(Box::new(id.0.clone()));
+    }
+
     // path_glob is intentionally NOT applied here — see module comment
     // on PATH_GLOB_OVERFETCH and the post-filter in `LexicalRetriever::search`.
 
diff --git a/crates/kebab-search/tests/lexical.rs b/crates/kebab-search/tests/lexical.rs
index ae01460..4265160 100644
--- a/crates/kebab-search/tests/lexical.rs
+++ b/crates/kebab-search/tests/lexical.rs
@@ -8,11 +8,15 @@
 use std::sync::Arc;
 
 use kebab_config::Config;
-use kebab_core::{IndexVersion, Lang, Retriever, SearchFilters, SearchMode, SearchQuery, TrustLevel};
+use kebab_core::{
+    DocumentId, IndexVersion, Lang, MediaType, Retriever, SearchFilters, SearchHit, SearchMode,
+    SearchQuery, TrustLevel,
+};
 use kebab_search::LexicalRetriever;
 use kebab_store_sqlite::SqliteStore;
 use rusqlite::Connection;
 use tempfile::TempDir;
+use time::OffsetDateTime;
 
 // ── Test scaffolding ─────────────────────────────────────────────────────
 
@@ -679,6 +683,210 @@ fn search_hit_carries_indexed_at_from_documents_updated_at() {
     assert!(!hit.stale, "lexical retriever must default stale=false");
 }
 
+// ── TestEnv helper for fb-36 filter tests ───────────────────────────────
+
+/// Convenience wrapper over `Env` that exposes higher-level fixture helpers
+/// for the fb-36 filter tests.  Intentionally kept separate from `Env` so
+/// the original tests are untouched.
+struct TestEnv {
+    inner: Env,
+    counter: std::cell::Cell<u32>,
+}
+
+impl TestEnv {
+    fn new() -> Self {
+        Self {
+            inner: Env::new(),
+            counter: std::cell::Cell::new(0),
+        }
+    }
+
+    /// Allocate a fresh monotone counter suffix so every inserted doc / chunk
+    /// gets a unique 32-hex ID without the caller worrying about collisions.
+    fn next_id(&self, prefix: &str) -> String {
+        let n = self.counter.get();
+        self.counter.set(n + 1);
+        let suffix = format!("{prefix}{n:04}");
+        id32(&suffix)
+    }
+
+    /// Insert a markdown doc with the given `body` and return its `DocumentId`.
+    fn insert_doc(&self, path: &str, body: &str) -> DocumentId {
+        self.insert_doc_with_media(path, body, MediaType::Markdown)
+    }
+
+    /// Insert a doc whose `assets.media_type` JSON is set to the serialized
+    /// form of `media`.  The `documents.updated_at` defaults to now.
+    fn insert_doc_with_media(&self, path: &str, body: &str, media: MediaType) -> DocumentId {
+        self.insert_doc_full(path, body, media, OffsetDateTime::now_utc())
+    }
+
+    /// Insert a doc with an explicit `updated_at` timestamp (for
+    /// `ingested_after` filter tests).
+    fn insert_doc_with_updated_at(
+        &self,
+        path: &str,
+        body: &str,
+        updated_at: OffsetDateTime,
+    ) -> DocumentId {
+        self.insert_doc_full(path, body, MediaType::Markdown, updated_at)
+    }
+
+    fn insert_doc_full(
+        &self,
+        path: &str,
+        body: &str,
+        media: MediaType,
+        updated_at: OffsetDateTime,
+    ) -> DocumentId {
+        use time::format_description::well_known::Rfc3339;
+        let doc_id = self.next_id("doc");
+        let chunk_id = self.next_id("chk");
+        let asset_id = self.next_id("ast");
+        let media_json = serde_json::to_string(&media).expect("serialize MediaType");
+        let updated_at_str = updated_at.format(&Rfc3339).expect("format updated_at");
+
+        let conn = self.inner.raw_conn();
+        conn.execute(
+            "INSERT OR IGNORE INTO assets (
+                asset_id, source_uri, workspace_path, media_type, byte_len,
+                checksum, storage_kind, storage_path, discovered_at
+            ) VALUES (?, ?, ?, ?, 0,
+                      'd0', 'reference', ?, '2024-01-01T00:00:00Z')",
+            rusqlite::params![asset_id, format!("file:///{path}"), path, media_json, path],
+        )
+        .expect("insert asset");
+
+        conn.execute(
+            "INSERT INTO documents (
+                doc_id, asset_id, workspace_path, title, lang,
+                source_type, trust_level, parser_version,
+                doc_version, schema_version, metadata_json,
+                provenance_json, created_at, updated_at
+            ) VALUES (?, ?, ?, NULL, 'en', 'markdown', 'primary', 'pv1', 1, 1,
+                      '{}', '{\"events\":[]}',
+                      '2024-01-01T00:00:00Z', ?)",
+            rusqlite::params![doc_id, asset_id, path, updated_at_str],
+        )
+        .expect("insert document");
+
+        let empty_headings: Vec<&str> = vec![];
+        let heading_json = serde_json::to_string(&empty_headings).unwrap();
+        conn.execute(
+            "INSERT INTO chunks (
+                chunk_id, doc_id, text, heading_path_json, section_label,
+                source_spans_json, token_estimate, chunker_version,
+                policy_hash, block_ids_json, created_at
+            ) VALUES (?, ?, ?, ?, NULL,
+                      '[{\"kind\":\"line\",\"start\":1,\"end\":1}]',
+                      1, 'v1', 'h', '[]', '2024-01-01T00:00:00Z')",
+            rusqlite::params![chunk_id, doc_id, body, heading_json],
+        )
+        .expect("insert chunk");
+
+        DocumentId(doc_id)
+    }
+
+    fn run_search(&self, query: &str, filters: &SearchFilters) -> Vec<SearchHit> {
+        let r = self.inner.retriever();
+        let q = SearchQuery {
+            text: query.to_string(),
+            mode: SearchMode::Lexical,
+            k: 10,
+            filters: filters.clone(),
+        };
+        r.search(&q).expect("search")
+    }
+}
+
+// ── fb-36 filter tests ───────────────────────────────────────────────────
+
+#[test]
+fn lexical_filter_by_media() {
+    let env = TestEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", MediaType::Pdf);
+    let filters = SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert_eq!(hits.len(), 1, "only pdf doc should match");
+    assert!(hits[0].doc_path.0.ends_with(".pdf"), "got: {}", hits[0].doc_path.0);
+}
+
+#[test]
+fn lexical_filter_by_ingested_after() {
+    let env = TestEnv::new();
+    env.insert_doc_with_updated_at(
+        "old.md",
+        "ingest test",
+        time::macros::datetime!(2020-01-01 00:00:00 UTC),
+    );
+    env.insert_doc_with_updated_at(
+        "new.md",
+        "ingest test",
+        time::macros::datetime!(2026-01-01 00:00:00 UTC),
+    );
+    let filters = SearchFilters {
+        ingested_after: Some(time::macros::datetime!(2025-01-01 00:00:00 UTC)),
+        ..Default::default()
+    };
+    let hits = env.run_search("ingest", &filters);
+    assert_eq!(hits.len(), 1, "only post-2025 doc matches");
+}
+
+#[test]
+fn lexical_filter_by_doc_id() {
+    let env = TestEnv::new();
+    let target = env.insert_doc("a.md", "shared term");
+    env.insert_doc("b.md", "shared term");
+    let filters = SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("shared", &filters);
+    assert!(!hits.is_empty(), "should get at least one hit for target doc");
+    for h in &hits {
+        assert_eq!(h.doc_id, target, "all hits must be from target doc");
+    }
+}
+
+#[test]
+fn lexical_filter_combinator_is_and() {
+    let env = TestEnv::new();
+    let target = env.insert_doc_with_media("a.md", "rust", MediaType::Markdown);
+    env.insert_doc_with_media("b.pdf", "rust", MediaType::Pdf);
+    let filters = SearchFilters {
+        media: vec!["markdown".to_string()],
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(!hits.is_empty(), "target doc should match combined filter");
+    assert!(hits.iter().all(|h| h.doc_id == target));
+}
+
+#[test]
+fn lexical_filter_unknown_media_returns_empty() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let filters = SearchFilters {
+        media: vec!["nonexistent_kind".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_search("rust", &filters);
+    assert!(hits.is_empty(), "unknown media → no hits, no error");
+}
+
+#[test]
+fn lexical_empty_filters_match_default_behavior() {
+    let env = TestEnv::new();
+    env.insert_doc("a.md", "rust");
+    let with_default = env.run_search("rust", &SearchFilters::default());
+    assert!(!with_default.is_empty());
+}
+
 #[test]
 fn lexical_snapshot_run_1() {
     // Pinned snapshot. A small, deterministic corpus; the JSON shape of
-- 
2.49.1


From 86475e5ba2b7108912a489e4bea801011ab2c651 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:43:51 +0900
Subject: [PATCH 05/11] fix(search/lexical): use std::iter::repeat_n (clippy)

Per code review on 2c80e2a. manual-repeat-n lint triggers
for Rust 1.94+ when repeat().take() can be expressed as
repeat_n directly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-search/src/lexical.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs
index 954148c..871c22d 100644
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -330,7 +330,7 @@ fn run_query(
     //   END IN (?, ...)
     if !filters.media.is_empty() {
         let placeholders: Vec<&str> =
-            std::iter::repeat("?").take(filters.media.len()).collect();
+            std::iter::repeat_n("?", filters.media.len()).collect();
         let placeholders = placeholders.join(",");
         sql.push_str(&format!(
             " AND f.doc_id IN (\
-- 
2.49.1


From c6cc1e2bfef6ac327143df3eeaef160060aa2838 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:50:56 +0900
Subject: [PATCH 06/11] feat(search/vector): media / ingested_after / doc_id
 filters (fb-36)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

filter_chunks helper in kebab-store-sqlite extended with the same 3
WHERE clauses as lexical. Vector still over-fetches k*2 then
post-filters via SqliteStore::filter_chunks; small k can return < k
hits when filters drop a lot — agent is expected to widen k or
paginate. AND combinator with existing filters.

- kebab-store-sqlite/src/filters.rs: media IN-list subquery, ingested_after
  lexicographic >= compare, doc_id equality; mirrors lexical SQL arms
- 3 direct unit tests (filter_chunks_media_type/ingested_after/doc_id)
  that run without AVX/Lance
- common/mod.rs: insert_doc / insert_doc_with_media / run_vector_search
  helpers on HybridEnv for integration-test use
- hybrid.rs: 2 new #[ignore = "requires AVX..."] integration tests
  (vector_filter_by_media, vector_filter_by_doc_id)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-search/tests/common/mod.rs  |  91 +++++++++-
 crates/kebab-search/tests/hybrid.rs      |  53 +++++-
 crates/kebab-store-sqlite/src/filters.rs | 217 +++++++++++++++++++++++
 3 files changed, 359 insertions(+), 2 deletions(-)

diff --git a/crates/kebab-search/tests/common/mod.rs b/crates/kebab-search/tests/common/mod.rs
index 69b87bd..d0ae1ad 100644
--- a/crates/kebab-search/tests/common/mod.rs
+++ b/crates/kebab-search/tests/common/mod.rs
@@ -19,7 +19,9 @@ use std::sync::Arc;
 use kebab_config::Config;
 use kebab_core::{
     ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
-    EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
+    EmbeddingModelId, EmbeddingVersion, IndexVersion, MediaType,
+    Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
+    VectorRecord, VectorStore,
 };
 use kebab_embed::{Embedder, MockEmbedder};
 use kebab_search::{LexicalRetriever, VectorRetriever};
@@ -173,6 +175,93 @@ impl HybridEnv {
         .unwrap();
     }
 
+    /// High-level helper: seed a doc with the default media type
+    /// (Markdown) and embed its text. Returns the `DocumentId` so
+    /// callers can use it in `doc_id` filter tests.
+    pub fn insert_doc(&self, path: &str, text: &str) -> DocumentId {
+        self.insert_doc_with_media(path, text, MediaType::Markdown)
+    }
+
+    /// High-level helper: seed a doc with an explicit `MediaType`.
+    /// The `media_type` is serialized to JSON (mirrors how
+    /// `DocumentStore::put_document` writes it) and stored in `assets`.
+    pub fn insert_doc_with_media(
+        &self,
+        path: &str,
+        text: &str,
+        media: MediaType,
+    ) -> DocumentId {
+        // Derive deterministic IDs from the path so repeated calls with
+        // the same path are idempotent (INSERT OR IGNORE).
+        let path_hash: String = {
+            use std::collections::hash_map::DefaultHasher;
+            use std::hash::{Hash, Hasher};
+            let mut h = DefaultHasher::new();
+            path.hash(&mut h);
+            format!("{:032x}", h.finish())
+        };
+        let doc_id = format!("d{}", &path_hash[..31]);
+        let chunk_id = format!("c{}", &path_hash[..31]);
+        let asset_id = format!("a{}", &path_hash[..31]);
+
+        let media_json = serde_json::to_string(&media).expect("serialize MediaType");
+        let conn = self.sqlite.read_conn();
+        conn.execute(
+            "INSERT OR IGNORE INTO assets (
+                asset_id, source_uri, workspace_path, media_type, byte_len,
+                checksum, storage_kind, storage_path, discovered_at
+             ) VALUES (?, ?, ?, ?, 0,
+                       'deadbeefdeadbeefdeadbeefdeadbeef',
+                       'reference', ?, '1970-01-01T00:00:00Z')",
+            params![
+                asset_id,
+                format!("file:///{path}"),
+                path,
+                media_json,
+                path,
+            ],
+        )
+        .unwrap();
+        conn.execute(
+            "INSERT OR IGNORE INTO documents (
+                doc_id, asset_id, workspace_path, title, lang, source_type,
+                trust_level, parser_version, doc_version, schema_version,
+                metadata_json, provenance_json, created_at, updated_at
+             ) VALUES (?, ?, ?, NULL, 'en', 'markdown', 'primary', 'v1', 1, 1,
+                       '{}', '{}', '1970-01-01T00:00:00Z', '1970-01-01T00:00:00Z')",
+            params![doc_id, asset_id, path],
+        )
+        .unwrap();
+        let heading_json = "[]";
+        conn.execute(
+            "INSERT OR IGNORE INTO chunks (
+                chunk_id, doc_id, text, heading_path_json, section_label,
+                source_spans_json, token_estimate, chunker_version,
+                policy_hash, block_ids_json, created_at
+             ) VALUES (?, ?, ?, ?, NULL,
+                       '[{\"kind\":\"line\",\"start\":1,\"end\":1}]',
+                       1, 'v1', 'h', '[]', '1970-01-01T00:00:00Z')",
+            params![chunk_id, doc_id, text, heading_json],
+        )
+        .unwrap();
+        drop(conn);
+        self.embed_and_upsert(&chunk_id, &doc_id, text, &[]);
+        DocumentId(doc_id)
+    }
+
+    /// Run a `SearchMode::Vector` query against the seeded corpus and
+    /// return the resulting `Vec<SearchHit>`.
+    pub fn run_vector_search(&self, query: &str, filters: &SearchFilters) -> Vec<SearchHit> {
+        let r = self.vector_retriever();
+        let q = SearchQuery {
+            text: query.to_string(),
+            mode: SearchMode::Vector,
+            k: 10,
+            filters: filters.clone(),
+        };
+        r.search(&q).expect("vector search")
+    }
+
     /// Embed `text` as a Document and upsert it as the embedding for
     /// `chunk_id`. Drives the same code path production uses:
     /// MockEmbedder → VectorRecord → LanceVectorStore::upsert →
diff --git a/crates/kebab-search/tests/hybrid.rs b/crates/kebab-search/tests/hybrid.rs
index 13f945d..912422a 100644
--- a/crates/kebab-search/tests/hybrid.rs
+++ b/crates/kebab-search/tests/hybrid.rs
@@ -15,7 +15,7 @@ use common::{
     HybridEnv, id32, require_avx_or_panic, TEST_LEX_INDEX_VERSION, TEST_VEC_INDEX_VERSION,
 };
 use kebab_core::{
-    Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
+    MediaType, Retriever, SearchFilters, SearchHit, SearchMode, SearchQuery,
 };
 use kebab_search::{FusionPolicy, HybridRetriever};
 use rusqlite::params;
@@ -213,6 +213,57 @@ fn hybrid_snapshot_run_1() {
     }
 }
 
+/// p9-fb-36: vector post-filter must pass `media` through `filter_chunks`.
+/// Seeding two docs (markdown + pdf) and filtering for pdf-only must
+/// return only the pdf chunk, proving `LanceVectorStore::search` →
+/// `SqliteStore::filter_chunks` correctly applies the media arm.
+#[test]
+#[ignore = "requires AVX-capable hardware (LanceDB)"]
+fn vector_filter_by_media() {
+    require_avx_or_panic();
+    let env = HybridEnv::new();
+    env.insert_doc_with_media("md1.md", "rust ownership", MediaType::Markdown);
+    env.insert_doc_with_media("doc.pdf", "rust pdf body", MediaType::Pdf);
+
+    let filters = SearchFilters {
+        media: vec!["pdf".to_string()],
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("rust", &filters);
+    assert_eq!(hits.len(), 1, "media filter must keep only pdf chunk");
+    assert!(
+        hits[0].doc_path.0.ends_with(".pdf"),
+        "expected .pdf path, got: {}",
+        hits[0].doc_path.0
+    );
+}
+
+/// p9-fb-36: vector post-filter must pass `doc_id` through `filter_chunks`.
+/// Seeding two docs with shared text, filtering by one doc_id must return
+/// only chunks from that doc.
+#[test]
+#[ignore = "requires AVX-capable hardware (LanceDB)"]
+fn vector_filter_by_doc_id() {
+    require_avx_or_panic();
+    let env = HybridEnv::new();
+    let target = env.insert_doc("a.md", "shared knowledge");
+    env.insert_doc("b.md", "shared knowledge");
+
+    let filters = SearchFilters {
+        doc_id: Some(target.clone()),
+        ..Default::default()
+    };
+    let hits = env.run_vector_search("shared", &filters);
+    assert!(
+        !hits.is_empty(),
+        "doc_id filter must return hits for the target doc"
+    );
+    assert!(
+        hits.iter().all(|h| h.doc_id == target),
+        "all hits must belong to the target doc_id"
+    );
+}
+
 #[test]
 #[ignore = "requires AVX-capable hardware (LanceDB)"]
 fn vector_hit_carries_indexed_at() {
diff --git a/crates/kebab-store-sqlite/src/filters.rs b/crates/kebab-store-sqlite/src/filters.rs
index 2b1ff00..4586236 100644
--- a/crates/kebab-store-sqlite/src/filters.rs
+++ b/crates/kebab-store-sqlite/src/filters.rs
@@ -129,6 +129,47 @@ impl SqliteStore {
             }
         }
 
+        // p9-fb-36: media_type filter (IN-list).
+        // `assets.media_type` JSON has two shapes:
+        //   - unit variant (Markdown / Pdf / …): JSON text, e.g. `"markdown"`
+        //   - tuple variant (Image(Png) / Audio(Mp3) / Other(s)): JSON object,
+        //     e.g. `{"image": "png"}`
+        // Extract a unified "kind" string for both shapes; mirrors lexical.
+        if !filters.media.is_empty() {
+            let media_ph = std::iter::repeat_n("?", filters.media.len())
+                .collect::<Vec<_>>()
+                .join(",");
+            sql.push_str(&format!(
+                " AND d.doc_id IN (\
+                   SELECT d2.doc_id FROM documents d2 \
+                   JOIN assets a ON a.asset_id = d2.asset_id \
+                   WHERE CASE \
+                     WHEN json_type(a.media_type) = 'text' THEN json_extract(a.media_type, '$') \
+                     ELSE (SELECT key FROM json_each(a.media_type) LIMIT 1) \
+                   END IN ({media_ph}))"
+            ));
+            for kind in &filters.media {
+                bind.push(Box::new(kind.clone()));
+            }
+        }
+
+        // p9-fb-36: ingested_after filter.
+        // `documents.updated_at` is RFC3339 TEXT (UTC `Z` per fb-32);
+        // lexicographic >= compare is correct.
+        if let Some(after) = &filters.ingested_after {
+            let formatted = after
+                .format(&time::format_description::well_known::Rfc3339)
+                .expect("OffsetDateTime formats to RFC3339");
+            sql.push_str(" AND d.updated_at >= ?");
+            bind.push(Box::new(formatted));
+        }
+
+        // p9-fb-36: doc_id filter — single-doc scoping.
+        if let Some(id) = &filters.doc_id {
+            sql.push_str(" AND d.doc_id = ?");
+            bind.push(Box::new(id.0.clone()));
+        }
+
         // Optional path_glob: applied in Rust on the rows we get back,
         // not in SQL — matching `kb-search::lexical`'s post-filter so
         // the glob semantics are byte-identical between retrievers.
@@ -280,6 +321,89 @@ mod tests {
             .unwrap();
     }
 
+    /// Variant of `seed_committed` that accepts an explicit `media_type`
+    /// JSON string (e.g. `r#""markdown""#` or `r#""pdf""#`) and an
+    /// explicit `updated_at` RFC3339 string so the fb-36 filter tests can
+    /// exercise `media` and `ingested_after` without going through the full
+    /// ingest pipeline.
+    #[allow(clippy::too_many_arguments)]
+    fn seed_committed_full(
+        store: &SqliteStore,
+        chunk_id: &str,
+        doc_id: &str,
+        workspace_path: &str,
+        lang: &str,
+        tags: &[&str],
+        trust: &str,
+        media_type_json: &str,
+        updated_at: &str,
+    ) {
+        let asset_id = format!("a{}", &doc_id[..31]);
+        {
+            let conn = store.lock_conn();
+            conn.execute(
+                "INSERT INTO assets (
+                    asset_id, source_uri, workspace_path, media_type, byte_len,
+                    checksum, storage_kind, storage_path, discovered_at
+                 ) VALUES (?, ?, ?, ?, 0, 'deadbeefdeadbeefdeadbeefdeadbeef',
+                           'reference', ?, '1970-01-01T00:00:00Z')",
+                params![
+                    asset_id,
+                    format!("file://{workspace_path}"),
+                    workspace_path,
+                    media_type_json,
+                    workspace_path,
+                ],
+            )
+            .unwrap();
+            conn.execute(
+                "INSERT INTO documents (
+                    doc_id, asset_id, workspace_path, title, lang, source_type,
+                    trust_level, parser_version, doc_version, schema_version,
+                    metadata_json, provenance_json, created_at, updated_at
+                 ) VALUES (?, ?, ?, NULL, ?, 'markdown', ?, 'v1', 1, 1,
+                           '{}', '{}', '1970-01-01T00:00:00Z', ?)",
+                params![doc_id, asset_id, workspace_path, lang, trust, updated_at],
+            )
+            .unwrap();
+            for t in tags {
+                conn.execute(
+                    "INSERT INTO document_tags (doc_id, tag) VALUES (?, ?)",
+                    params![doc_id, t],
+                )
+                .unwrap();
+            }
+            conn.execute(
+                "INSERT INTO chunks (
+                    chunk_id, doc_id, text, heading_path_json, section_label,
+                    source_spans_json, token_estimate, chunker_version,
+                    policy_hash, block_ids_json, created_at
+                 ) VALUES (?, ?, 'hi', '[]', NULL, '[]', 1, 'v1', 'h', '[]',
+                           '1970-01-01T00:00:00Z')",
+                params![chunk_id, doc_id],
+            )
+            .unwrap();
+        }
+
+        let embed_row = EmbeddingRecordRow {
+            embedding_id: format!("e{}", &chunk_id[..31]),
+            chunk_id: chunk_id.to_string(),
+            model_id: "m".to_string(),
+            model_version: "v1".to_string(),
+            dimensions: 4,
+            lance_table: "t".to_string(),
+            created_at: OffsetDateTime::UNIX_EPOCH,
+        };
+        store
+            .put_embedding_records_pending(std::slice::from_ref(&embed_row))
+            .unwrap();
+        store
+            .mark_embedding_records_committed(std::slice::from_ref(
+                &embed_row.embedding_id,
+            ))
+            .unwrap();
+    }
+
     fn cid(s: &str) -> ChunkId {
         ChunkId(s.to_string())
     }
@@ -449,4 +573,97 @@ mod tests {
         let out = store.filter_chunks(&[], &SearchFilters::default()).unwrap();
         assert!(out.is_empty());
     }
+
+    // ── p9-fb-36 new filter arms ─────────────────────────────────────────
+
+    #[test]
+    fn filter_chunks_media_type_keeps_matching_kind() {
+        // c1 = markdown, c2 = pdf. Filter for pdf → only c2 survives.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "notes/a.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "notes/b.pdf", "en", &[], "primary",
+            r#""pdf""#,
+            "1970-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            media: vec!["pdf".to_string()],
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c2)], "only pdf chunk should survive media filter");
+    }
+
+    #[test]
+    fn filter_chunks_ingested_after_excludes_old_docs() {
+        // c1 ingested 2020, c2 ingested 2026.  filter ingested_after=2025 → only c2.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "old.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2020-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "new.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2026-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            ingested_after: Some(time::macros::datetime!(2025-01-01 00:00:00 UTC)),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c2)], "only post-2025 chunk should survive ingested_after filter");
+    }
+
+    #[test]
+    fn filter_chunks_doc_id_scopes_to_single_doc() {
+        // c1 belongs to d1, c2 belongs to d2. filter doc_id=d1 → only c1.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        let c2 = "22222222222222222222222222222222";
+        let d1 = "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1";
+        seed_committed_full(
+            &store, c1, d1,
+            "a.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+        seed_committed_full(
+            &store, c2, "d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2d2",
+            "b.md", "en", &[], "primary",
+            r#""markdown""#,
+            "1970-01-01T00:00:00Z",
+        );
+
+        let f = SearchFilters {
+            doc_id: Some(kebab_core::DocumentId(d1.to_string())),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1), cid(c2)], &f)
+            .unwrap();
+        assert_eq!(out, vec![cid(c1)], "doc_id filter must scope to the target doc only");
+    }
 }
-- 
2.49.1


From 6a18847892c193c0122329d7d0ab8403c686c4de Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 03:57:55 +0900
Subject: [PATCH 07/11] feat(cli): kebab search filter flags (fb-36)

7 new flags: --tag (repeatable), --lang, --path-glob,
--trust-min (value_enum), --media (csv with `md` alias),
--ingested-after (RFC3339; config_invalid on parse fail),
--doc-id. Dispatch translates clap values into SearchFilters
and propagates structured errors through the existing
StructuredError wrapper from fb-34.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-cli/src/main.rs | 111 ++++++++++++++++++++++++++++++++++-
 1 file changed, 110 insertions(+), 1 deletion(-)

diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index c92ba27..7e41d8a 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -131,6 +131,38 @@ enum Cmd {
         /// `corpus_revision` returns `error.v1.code = stale_cursor`.
         #[arg(long)]
         cursor: Option<String>,
+
+        /// p9-fb-36: filter by `metadata.tags`. Repeatable; OR-within (any tag).
+        #[arg(long)]
+        tag: Vec<String>,
+
+        /// p9-fb-36: filter by `documents.lang` (ISO code).
+        #[arg(long)]
+        lang: Option<String>,
+
+        /// p9-fb-36: filter by `documents.workspace_path` glob.
+        #[arg(long)]
+        path_glob: Option<String>,
+
+        /// p9-fb-36: filter by minimum `documents.trust_level`.
+        #[arg(long, value_enum)]
+        trust_min: Option<TrustLevelFlag>,
+
+        /// p9-fb-36: filter by `assets.media_type` kind. Comma-separated.
+        /// Aliases: `md` → `markdown`. Other accepted: `markdown`, `pdf`,
+        /// `image`, `audio`, `other`. Unknown values match nothing.
+        #[arg(long, value_delimiter = ',')]
+        media: Vec<String>,
+
+        /// p9-fb-36: filter to docs whose `updated_at` is >= this RFC3339
+        /// timestamp (UTC). Invalid format → exit 2 with error.v1
+        /// code = config_invalid.
+        #[arg(long)]
+        ingested_after: Option<String>,
+
+        /// p9-fb-36: filter to a single doc by id.
+        #[arg(long)]
+        doc_id: Option<String>,
     },
 
     /// Retrieval-augmented question answering.
@@ -351,6 +383,25 @@ impl From<ModeFlag> for kebab_core::SearchMode {
     }
 }
 
+/// p9-fb-36: clap value enum for `--trust-min`. Maps to
+/// `kebab_core::TrustLevel` via `From`.
+#[derive(clap::ValueEnum, Clone, Debug)]
+enum TrustLevelFlag {
+    Primary,
+    Secondary,
+    Generated,
+}
+
+impl From<TrustLevelFlag> for kebab_core::TrustLevel {
+    fn from(f: TrustLevelFlag) -> Self {
+        match f {
+            TrustLevelFlag::Primary => kebab_core::TrustLevel::Primary,
+            TrustLevelFlag::Secondary => kebab_core::TrustLevel::Secondary,
+            TrustLevelFlag::Generated => kebab_core::TrustLevel::Generated,
+        }
+    }
+}
+
 /// Parse boolean env var accepting "1", "true", "yes", "on" (case-insensitive)
 /// as truthy; "0", "false", "no", "off" as falsy. Used for `KEBAB_READONLY`.
 fn parse_bool_env(s: &str) -> Result<bool, String> {
@@ -611,13 +662,71 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
             max_tokens,
             snippet_chars,
             cursor,
+            tag,
+            lang,
+            path_glob,
+            trust_min,
+            media,
+            ingested_after,
+            doc_id,
         } => {
             let cfg = kebab_config::Config::load(cli.config.as_deref())?;
+
+            // p9-fb-36: normalize --media aliases (md → markdown).
+            fn normalize_media_alias(s: &str) -> String {
+                match s.to_ascii_lowercase().as_str() {
+                    "md" => "markdown".to_string(),
+                    other => other.to_string(),
+                }
+            }
+            let media_norm: Vec<String> =
+                media.iter().map(|s| normalize_media_alias(s)).collect();
+
+            // p9-fb-36: parse --ingested-after as RFC3339; structured error on failure.
+            let ingested_after_parsed: Option<time::OffsetDateTime> =
+                match ingested_after.as_deref() {
+                    Some(s) => {
+                        match time::OffsetDateTime::parse(
+                            s,
+                            &time::format_description::well_known::Rfc3339,
+                        ) {
+                            Ok(ts) => Some(ts),
+                            Err(e) => {
+                                return Err(anyhow::Error::new(
+                                    kebab_app::StructuredError(kebab_app::ErrorV1 {
+                                        schema_version: kebab_app::ERROR_V1_ID.to_string(),
+                                        code: "config_invalid".to_string(),
+                                        message: format!(
+                                            "--ingested-after: invalid RFC3339 timestamp '{s}': {e}"
+                                        ),
+                                        details: serde_json::Value::Null,
+                                        hint: Some(
+                                            "expected format like 2026-04-01T00:00:00Z".to_string(),
+                                        ),
+                                    }),
+                                ));
+                            }
+                        }
+                    }
+                    None => None,
+                };
+
+            // p9-fb-36: build SearchFilters from the 7 new flags.
+            let filters = kebab_core::SearchFilters {
+                tags_any: tag.clone(),
+                lang: lang.as_ref().map(|s| kebab_core::Lang(s.clone())),
+                path_glob: path_glob.clone(),
+                trust_min: trust_min.clone().map(Into::into),
+                media: media_norm,
+                ingested_after: ingested_after_parsed,
+                doc_id: doc_id.as_ref().map(|s| kebab_core::DocumentId(s.clone())),
+            };
+
             let q = kebab_core::SearchQuery {
                 text: query.clone(),
                 mode: (*mode).into(),
                 k: *k,
-                filters: kebab_core::SearchFilters::default(),
+                filters,
             };
             let opts = kebab_core::SearchOpts {
                 max_tokens: *max_tokens,
-- 
2.49.1


From 4e0379c04fc3ae6bdf96d5a50955e398a6dd2565 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 04:06:21 +0900
Subject: [PATCH 08/11] =?UTF-8?q?test(cli):=20wire=5Fsearch=5Ffilters=20?=
 =?UTF-8?q?=E2=80=94=20lexical-only=20integration=20tests=20(fb-36)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cover: --doc-id scoping, --ingested-after validation error,
--media md alias, --tag repeatable + frontmatter parsing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-cli/tests/wire_search_filters.rs | 226 ++++++++++++++++++
 1 file changed, 226 insertions(+)
 create mode 100644 crates/kebab-cli/tests/wire_search_filters.rs

diff --git a/crates/kebab-cli/tests/wire_search_filters.rs b/crates/kebab-cli/tests/wire_search_filters.rs
new file mode 100644
index 0000000..6c68aef
--- /dev/null
+++ b/crates/kebab-cli/tests/wire_search_filters.rs
@@ -0,0 +1,226 @@
+//! p9-fb-36: CLI integration tests for search filter flags.
+//!
+//! Lexical-only — no fastembed / no Ollama. Each test builds its own
+//! TempDir KB via `common::write_config` + `common::ingest` and drives
+//! `kebab search` through `common::run_search_with_args` or direct
+//! `Command` invocations. Verifies:
+//!
+//! - `--doc-id <id>` restricts all returned hits to the target document.
+//! - `--ingested-after <bad>` exits non-zero and emits `error.v1` on
+//!   stderr with `code = "config_invalid"`.
+//! - `--media md` (alias) normalises to `markdown` and matches `.md` docs.
+//! - `--tag <tag>` (repeatable, OR-within) filters by frontmatter tags.
+
+mod common;
+
+use serde_json::Value;
+use std::fs;
+use std::process::Command;
+
+// ---------------------------------------------------------------------------
+// Test 1: --doc-id restricts hits to a single document
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_doc_id_filter_returns_only_target_doc() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Two docs that both contain the search term.
+    fs::write(workspace.join("a.md"), "# Alpha\n\nrust ownership rules\n").unwrap();
+    fs::write(workspace.join("b.md"), "# Beta\n\nrust borrow checker\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // First, search without a doc-id filter to find what doc_ids exist.
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let resp: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    let hits = resp["hits"].as_array().expect("hits array");
+    assert!(
+        hits.len() >= 2,
+        "expected ≥2 hits from two docs before filter: {resp}"
+    );
+
+    // Grab one doc_id from the results.
+    let target_doc_id = hits[0]["doc_id"]
+        .as_str()
+        .expect("doc_id string")
+        .to_string();
+
+    // Re-search with --doc-id set to the first hit's doc_id.
+    let (stdout2, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json",
+            "--mode",
+            "lexical",
+            "--doc-id",
+            &target_doc_id,
+            "rust",
+        ],
+    );
+    let resp2: Value = serde_json::from_str(stdout2.trim())
+        .unwrap_or_else(|e| panic!("not JSON after filter: {stdout2:?}: {e}"));
+    let filtered_hits = resp2["hits"].as_array().expect("hits array (filtered)");
+
+    assert!(
+        !filtered_hits.is_empty(),
+        "expected at least one hit for the target doc"
+    );
+    for hit in filtered_hits {
+        let got = hit["doc_id"].as_str().expect("doc_id string in hit");
+        assert_eq!(
+            got, target_doc_id,
+            "--doc-id filter must restrict all hits to target doc, got {got}"
+        );
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Test 2: --ingested-after with bad RFC3339 → exit non-zero + error.v1
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_invalid_ingested_after_emits_config_invalid() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+    fs::write(workspace.join("a.md"), "# T\n\nrust stuff\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let bin = env!("CARGO_BIN_EXE_kebab");
+    let out = Command::new(bin)
+        .args([
+            "--config",
+            cfg.to_str().unwrap(),
+            "--json",
+            "search",
+            "--mode",
+            "lexical",
+            "--ingested-after",
+            "not-a-date",
+            "rust",
+        ])
+        .output()
+        .expect("kebab search --ingested-after bad");
+
+    assert!(
+        !out.status.success(),
+        "expected non-zero exit for invalid --ingested-after, got: status={} stderr={}",
+        out.status,
+        String::from_utf8_lossy(&out.stderr)
+    );
+
+    let stderr = String::from_utf8_lossy(&out.stderr);
+    // Find the error.v1 ndjson line on stderr (one JSON event per line).
+    let err_line = stderr
+        .lines()
+        .find(|l| {
+            serde_json::from_str::<Value>(l)
+                .ok()
+                .and_then(|v| {
+                    v.get("schema_version")
+                        .and_then(|s| s.as_str())
+                        .map(String::from)
+                })
+                .as_deref()
+                == Some("error.v1")
+        })
+        .unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}"));
+
+    let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
+    assert_eq!(
+        v["code"], "config_invalid",
+        "code must be config_invalid for bad RFC3339: {err_line}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 3: --media md (alias) normalises to markdown and matches .md docs
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_media_filter_md_alias_normalizes_to_markdown() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Only a markdown file — the `md` alias should match it.
+    fs::write(workspace.join("notes.md"), "# Notes\n\nrust async programming\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    let (stdout, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--media", "md", "rust"],
+    );
+    let resp: Value = serde_json::from_str(stdout.trim())
+        .unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
+    let hits = resp["hits"].as_array().expect("hits array");
+
+    assert!(
+        !hits.is_empty(),
+        "--media md must match the markdown doc; got 0 hits: {resp}"
+    );
+}
+
+// ---------------------------------------------------------------------------
+// Test 4: --tag (repeatable, OR-within) filters by frontmatter tags
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_tag_filter_matches_frontmatter_tags() {
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    // Doc with `rust` tag.
+    fs::write(
+        workspace.join("rust_doc.md"),
+        "---\ntags: [rust, systems]\n---\n# Rust\n\nrust ownership\n",
+    )
+    .unwrap();
+    // Doc without the tag (but same keyword in body so it appears in
+    // unfiltered results — the tag filter must exclude it).
+    fs::write(
+        workspace.join("other_doc.md"),
+        "# Other\n\nrust programming\n",
+    )
+    .unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Without filter — both docs must produce hits.
+    let (unfiltered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let uresp: Value = serde_json::from_str(unfiltered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (unfiltered): {unfiltered:?}: {e}"));
+    let uhits = uresp["hits"].as_array().expect("unfiltered hits array");
+    assert!(
+        uhits.len() >= 2,
+        "expected ≥2 hits before tag filter: {uresp}"
+    );
+
+    // With --tag rust — only the tagged doc's hits should appear.
+    let (filtered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "--tag", "rust", "rust"],
+    );
+    let fresp: Value = serde_json::from_str(filtered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (tag-filtered): {filtered:?}: {e}"));
+    let fhits = fresp["hits"].as_array().expect("filtered hits array");
+
+    assert!(
+        !fhits.is_empty(),
+        "--tag rust must match the tagged doc; got 0 hits: {fresp}"
+    );
+
+    // Every returned hit must come from rust_doc.md (the tagged file).
+    for hit in fhits {
+        let path = hit["doc_path"].as_str().unwrap_or("");
+        assert!(
+            path.ends_with("rust_doc.md"),
+            "--tag rust must only return hits from the tagged doc, got path={path}"
+        );
+    }
+}
-- 
2.49.1


From b06f4654e712833653bf6aa901709c16b7d8132b Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 04:11:27 +0900
Subject: [PATCH 09/11] feat(mcp): kebab__search filter inputs (fb-36)

7 new optional inputs on SearchInput: tags, lang, path_glob,
trust_min, media, ingested_after, doc_id. Validation surfaces as
error.v1 code = invalid_input via StructuredError. Dispatch builds
SearchFilters from the inputs and forwards through the existing
search_with_opts_with_config facade.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/kebab-mcp/Cargo.toml                 |   2 +
 crates/kebab-mcp/src/tools/search.rs        |  92 +++++++++-
 crates/kebab-mcp/tests/tools_call_fetch.rs  |   7 +
 crates/kebab-mcp/tests/tools_call_search.rs | 179 ++++++++++++++++++++
 4 files changed, 278 insertions(+), 2 deletions(-)

diff --git a/crates/kebab-mcp/Cargo.toml b/crates/kebab-mcp/Cargo.toml
index dfd6136..9ecea0d 100644
--- a/crates/kebab-mcp/Cargo.toml
+++ b/crates/kebab-mcp/Cargo.toml
@@ -19,6 +19,8 @@ tracing     = { workspace = true }
 # /dependencies endpoint — rmcp declares optional schemars = "^1.0").
 schemars    = "1"
 
+time         = { workspace = true }
+
 kebab-app    = { path = "../kebab-app" }
 kebab-config = { path = "../kebab-config" }
 kebab-core   = { path = "../kebab-core" }
diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs
index e5f7b4e..2027024 100644
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -1,5 +1,7 @@
 //! `search` tool — wraps `kebab_app::search_with_opts_with_config`.
-//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor? }.
+//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor?,
+//!          tags?, lang?, path_glob?, trust_min?, media?,
+//!          ingested_after?, doc_id? }.
 //! Output: search_response.v1 envelope (hits + next_cursor + truncated).
 //!
 //! First tool with a non-empty `inputSchema`: `SearchInput` derives
@@ -27,6 +29,22 @@ pub struct SearchInput {
     pub snippet_chars: Option<usize>,
     /// p9-fb-34: opaque cursor from a previous response.
     pub cursor: Option<String>,
+    /// p9-fb-36: filter by `metadata.tags` (OR-within).
+    pub tags: Option<Vec<String>>,
+    /// p9-fb-36: filter by `documents.lang` (ISO code).
+    pub lang: Option<String>,
+    /// p9-fb-36: filter by `documents.workspace_path` glob.
+    pub path_glob: Option<String>,
+    /// p9-fb-36: filter by minimum `documents.trust_level`.
+    /// Accepts: `"primary"`, `"secondary"`, `"generated"`.
+    pub trust_min: Option<String>,
+    /// p9-fb-36: filter by `assets.media_type` kind. IN-list. Accepts:
+    /// `"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`. Aliases: `md` → `markdown`.
+    pub media: Option<Vec<String>>,
+    /// p9-fb-36: RFC3339 UTC timestamp. Invalid format → invalid_input.
+    pub ingested_after: Option<String>,
+    /// p9-fb-36: filter to a single doc.
+    pub doc_id: Option<String>,
 }
 
 pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
@@ -37,11 +55,62 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
         "vector" => kebab_core::SearchMode::Vector,
         _ => kebab_core::SearchMode::Hybrid,
     };
+
+    // p9-fb-36: parse filter inputs, returning invalid_input on bad values.
+    let trust_min = match input.trust_min.as_deref() {
+        Some(s) => match s.to_ascii_lowercase().as_str() {
+            "primary" => Some(kebab_core::TrustLevel::Primary),
+            "secondary" => Some(kebab_core::TrustLevel::Secondary),
+            "generated" => Some(kebab_core::TrustLevel::Generated),
+            other => {
+                return invalid_input(&format!(
+                    "trust_min: unknown level '{other}'; expected primary|secondary|generated"
+                ));
+            }
+        },
+        None => None,
+    };
+
+    let ingested_after = match input.ingested_after.as_deref() {
+        Some(s) => {
+            match time::OffsetDateTime::parse(
+                s,
+                &time::format_description::well_known::Rfc3339,
+            ) {
+                Ok(ts) => Some(ts),
+                Err(e) => {
+                    return invalid_input(&format!(
+                        "ingested_after: invalid RFC3339 '{s}': {e}"
+                    ));
+                }
+            }
+        }
+        None => None,
+    };
+
+    let media: Vec<String> = input
+        .media
+        .clone()
+        .unwrap_or_default()
+        .iter()
+        .map(|s| normalize_media_alias(s))
+        .collect();
+
+    let filters = kebab_core::SearchFilters {
+        tags_any: input.tags.clone().unwrap_or_default(),
+        lang: input.lang.clone().map(kebab_core::Lang),
+        path_glob: input.path_glob.clone(),
+        trust_min,
+        media,
+        ingested_after,
+        doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
+    };
+
     let query = kebab_core::SearchQuery {
         text: input.query,
         mode,
         k,
-        filters: kebab_core::SearchFilters::default(),
+        filters,
     };
     let opts = kebab_core::SearchOpts {
         max_tokens: input.max_tokens,
@@ -81,3 +150,22 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
         Err(e) => to_tool_error(&e),
     }
 }
+
+fn normalize_media_alias(s: &str) -> String {
+    match s.to_ascii_lowercase().as_str() {
+        "md" => "markdown".to_string(),
+        other => other.to_string(),
+    }
+}
+
+fn invalid_input(msg: &str) -> CallToolResult {
+    use kebab_app::{ErrorV1, StructuredError};
+    let err = anyhow::Error::new(StructuredError(ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "invalid_input".to_string(),
+        message: msg.to_string(),
+        details: serde_json::Value::Null,
+        hint: None,
+    }));
+    to_tool_error(&err)
+}
diff --git a/crates/kebab-mcp/tests/tools_call_fetch.rs b/crates/kebab-mcp/tests/tools_call_fetch.rs
index 5627e93..8da70a7 100644
--- a/crates/kebab-mcp/tests/tools_call_fetch.rs
+++ b/crates/kebab-mcp/tests/tools_call_fetch.rs
@@ -62,6 +62,13 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
             max_tokens: None,
             snippet_chars: None,
             cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
         },
     );
     let search_text = match &search_result.content.first().unwrap().raw {
diff --git a/crates/kebab-mcp/tests/tools_call_search.rs b/crates/kebab-mcp/tests/tools_call_search.rs
index 5995292..58a32d8 100644
--- a/crates/kebab-mcp/tests/tools_call_search.rs
+++ b/crates/kebab-mcp/tests/tools_call_search.rs
@@ -58,6 +58,13 @@ async fn search_tool_returns_search_response_v1() {
             max_tokens: None,
             snippet_chars: None,
             cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
         },
     );
 
@@ -108,3 +115,175 @@ async fn search_tool_returns_search_response_v1() {
         "envelope should carry next_cursor (possibly null)"
     );
 }
+
+/// p9-fb-36: search with doc_id filter — only hits from the target doc.
+#[tokio::test]
+async fn search_with_doc_id_filter_returns_only_target() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    // Write two markdown documents, both containing the query term.
+    fs::write(
+        workspace_root.join("a.md"),
+        "# Alpha\n\nThis document mentions kebab and flatbread.",
+    )
+    .unwrap();
+    fs::write(
+        workspace_root.join("b.md"),
+        "# Beta\n\nAnother document about kebab wraps and fillings.",
+    )
+    .unwrap();
+
+    let scope = SourceScope {
+        root: workspace_root.clone(),
+        include: vec![],
+        exclude: vec![],
+    };
+    let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    // First: unfiltered search to discover a doc_id from one of the docs.
+    let unfiltered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
+        },
+    );
+    assert!(
+        !unfiltered.is_error.unwrap_or(false),
+        "unfiltered search failed: {:?}",
+        unfiltered
+    );
+    let unfiltered_text = match &unfiltered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let unfiltered_v: serde_json::Value = serde_json::from_str(&unfiltered_text).unwrap();
+    let hits = unfiltered_v["hits"].as_array().expect("hits must be array");
+    assert!(hits.len() >= 2, "expected hits from both docs");
+
+    // Pick the doc_id of the first hit.
+    let target_doc_id = hits[0]["doc_id"]
+        .as_str()
+        .expect("doc_id on first hit")
+        .to_string();
+
+    // Now search with doc_id filter — all results must belong to that doc.
+    let filtered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: Some(target_doc_id.clone()),
+        },
+    );
+    assert!(
+        !filtered.is_error.unwrap_or(false),
+        "filtered search failed: {:?}",
+        filtered
+    );
+    let filtered_text = match &filtered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let filtered_v: serde_json::Value = serde_json::from_str(&filtered_text).unwrap();
+    let filtered_hits = filtered_v["hits"].as_array().expect("hits must be array");
+
+    assert!(
+        !filtered_hits.is_empty(),
+        "expected at least one hit for target doc"
+    );
+    for hit in filtered_hits {
+        assert_eq!(
+            hit["doc_id"].as_str(),
+            Some(target_doc_id.as_str()),
+            "all filtered hits must belong to the target doc"
+        );
+    }
+}
+
+/// p9-fb-36: invalid RFC3339 for ingested_after → invalid_input error.v1.
+#[tokio::test]
+async fn search_with_invalid_ingested_after_returns_invalid_input() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    let result = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: None,
+            k: None,
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: Some("garbage".to_string()),
+            doc_id: None,
+        },
+    );
+
+    assert!(
+        result.is_error.unwrap_or(false),
+        "expected isError=true for invalid ingested_after"
+    );
+    let content = result
+        .content
+        .first()
+        .expect("expected at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("error.v1"),
+        "must carry error.v1 envelope"
+    );
+    assert_eq!(
+        v.get("code").and_then(|s| s.as_str()),
+        Some("invalid_input"),
+        "code must be invalid_input for bad RFC3339"
+    );
+}
-- 
2.49.1


From 6e7446861bb4f90545408464d169a1c7960df4f0 Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 04:26:27 +0900
Subject: [PATCH 10/11] docs(fb-36): README + SMOKE + INDEX + skill notes

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                               |  2 +-
 docs/SMOKE.md                           | 16 ++++++++++++++++
 integrations/claude-code/kebab/SKILL.md |  3 ++-
 tasks/INDEX.md                          |  2 +-
 tasks/p9/p9-fb-36-search-filters.md     |  7 +++++--
 5 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index b7595a6..3c699f3 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ kebab doctor
 |------|------|
 | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
 | `kebab ingest [<path>]` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. |
-| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor` |
+| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` / `--media` 는 각각 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). |
 | `kebab list docs` | 색인된 문서 목록 |
 | `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 |
 | `kebab fetch chunk <id> [--context N]` / `kebab fetch doc <id> [--max-tokens N]` / `kebab fetch span <doc_id> <ls> <le> [--max-tokens N]` | (p9-fb-35) verbatim text fetch from indexed corpus. wire = `fetch_result.v1` (kind discriminator). chunk: target + ±N ordinal-context chunks. doc: full normalized markdown. span: 1-based line range (PDF/audio rejected as `error.v1.code = span_not_supported`). chars/4 budget on doc/span. |
diff --git a/docs/SMOKE.md b/docs/SMOKE.md
index 272c1f7..9a68800 100644
--- a/docs/SMOKE.md
+++ b/docs/SMOKE.md
@@ -190,6 +190,22 @@ kebab fetch span "$DOC_ID" 1 5 --json | jq '{line_start, line_end, effective_end
 
 PDF / audio docs reject `fetch span` with `error.v1.code = span_not_supported` — use `fetch chunk` (PDF chunks are page-aligned) or `fetch doc` instead.
 
+### Filter args (fb-36)
+
+````bash
+# Filter by media kind (md alias normalizes to markdown).
+kebab search "rust" --media md --json | jq '.hits | length'
+
+# Filter by ingest timestamp (RFC3339).
+kebab search "rust" --ingested-after 2026-04-01T00:00:00Z --json
+
+# Combine: doc-id scope + tag (AND across flags).
+kebab search "rust" --doc-id "<doc-id>" --tag rust --json
+````
+
+Bad `--ingested-after` → `error.v1.code = config_invalid`, exit 2.
+Unknown `--media` value → silently empty (no error).
+
 ## P6-4 이미지 ingestion 옵션
 
 `config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략):
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index 2faedda..fea4e2e 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -48,11 +48,12 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun
 
 Input:
 ```json
-{ "query": "<query>", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null }
+{ "query": "<query>", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null, "tags": null, "lang": null, "path_glob": null, "trust_min": null, "media": null, "ingested_after": null, "doc_id": null }
 ```
 
 - `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`").
 - **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page.
+- **p9-fb-36 filter inputs:** `tags` (string array — OR-within, AND across keys), `lang` (BCP-47 language code), `path_glob` (glob pattern matched against doc path), `trust_min` (`"primary"` | `"secondary"` | `"generated"` — includes that level and above), `media` (string array — IN-list of `"markdown"` | `"pdf"` | `"image"` | `"audio"` | `"other"`; alias `"md"` → `"markdown"`), `ingested_after` (RFC3339 UTC string), `doc_id` (exact doc UUID). AND combinator across keys. Invalid `ingested_after` or unknown `trust_min` → `error.v1.code = invalid_input`. Unknown `media` value → empty hits, no error.
 - Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
 - Cite back to the user as `doc_path § heading_path[-1]` so they can open the source.
 - When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
diff --git a/tasks/INDEX.md b/tasks/INDEX.md
index fab95d7..db35a0b 100644
--- a/tasks/INDEX.md
+++ b/tasks/INDEX.md
@@ -124,7 +124,7 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능.
     - [p9-fb-33 streaming ask (ndjson delta)](p9/p9-fb-33-streaming-ask.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
     - [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
     - [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
-    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ⏳ 미구현, brainstorm 필요
+    - [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ✅ 머지 (2026-05-10)
     - [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27)
 
     ### 🎯 0.5.0 — RAG quality (cascade 동반: V00X + reindex)
diff --git a/tasks/p9/p9-fb-36-search-filters.md b/tasks/p9/p9-fb-36-search-filters.md
index a0007fe..3577755 100644
--- a/tasks/p9/p9-fb-36-search-filters.md
+++ b/tasks/p9/p9-fb-36-search-filters.md
@@ -3,7 +3,7 @@ phase: P9
 component: kebab-cli + kebab-search + wire-schema
 task_id: p9-fb-36
 title: "Search filter args (--media / --ingested-after / --doc-id / --tag)"
-status: open
+status: completed
 target_version: 0.4.0
 depends_on: []
 unblocks: []
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 검색 범위 
 
 # p9-fb-36 — Search filter args
 
-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. filter 종류 / SQLite 쿼리 통합 / Lance vector 필터 적용 layer brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 참조.
+
+상세 설계: `docs/superpowers/specs/2026-05-10-p9-fb-36-search-filters-design.md`.
+구현 계획: `docs/superpowers/plans/2026-05-10-p9-fb-36-search-filters.md`.
 
 ## 증상 / 동기
 
-- 
2.49.1


From 84287d0ef65374a15f28226f0f705baab9b47eff Mon Sep 17 00:00:00 2001
From: th-kim0823 <th.kim0823@navercorp.com>
Date: Sun, 10 May 2026 04:47:55 +0900
Subject: [PATCH 11/11] fix(fb-36): address PR #127 round 1 review

- ingested_after: convert OffsetDateTime to UTC before formatting
  so non-Z offsets compare correctly against UTC TEXT storage
  (lexical.rs + filters.rs)
- README: --tag is repeatable-only, not csv (only --media is csv)
- test(cli): add multi-value --tag OR-within IN-list coverage
- test(store): add UTC-offset regression test for ingested_after
- mcp: use ERROR_V1_ID const instead of hardcoded "error.v1"

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md                                     |  2 +-
 crates/kebab-cli/tests/wire_search_filters.rs | 80 +++++++++++++++++++
 crates/kebab-mcp/src/tools/search.rs          |  4 +-
 crates/kebab-search/src/lexical.rs            |  8 +-
 crates/kebab-store-sqlite/src/filters.rs      | 58 +++++++++++++-
 5 files changed, 146 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 3c699f3..7697391 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ kebab doctor
 |------|------|
 | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
 | `kebab ingest [<path>]` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. |
-| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` / `--media` 는 각각 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). |
+| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>] [--tag T] [--lang L] [--path-glob G] [--trust-min LEVEL] [--media TYPE] [--ingested-after RFC3339] [--doc-id ID]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor`. **filter flags (p9-fb-36):** `--tag` 는 반복 가능 flag (`--tag rust --tag async`) 로 OR 매칭, `--media` 는 `,` 구분 다중 값 OR 매칭, 나머지 flags 간은 AND 조합. `--trust-min` 은 `primary\|secondary\|generated` 중 하나 (해당 level 이상 포함). `--ingested-after` 는 RFC3339 UTC — 파싱 실패 시 `error.v1.code = config_invalid` (exit 2). `--media md` 는 `markdown` alias 로 정규화. 알 수 없는 `--media` 값은 무조건 empty hits (오류 아님). |
 | `kebab list docs` | 색인된 문서 목록 |
 | `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 |
 | `kebab fetch chunk <id> [--context N]` / `kebab fetch doc <id> [--max-tokens N]` / `kebab fetch span <doc_id> <ls> <le> [--max-tokens N]` | (p9-fb-35) verbatim text fetch from indexed corpus. wire = `fetch_result.v1` (kind discriminator). chunk: target + ±N ordinal-context chunks. doc: full normalized markdown. span: 1-based line range (PDF/audio rejected as `error.v1.code = span_not_supported`). chars/4 budget on doc/span. |
diff --git a/crates/kebab-cli/tests/wire_search_filters.rs b/crates/kebab-cli/tests/wire_search_filters.rs
index 6c68aef..71ba48c 100644
--- a/crates/kebab-cli/tests/wire_search_filters.rs
+++ b/crates/kebab-cli/tests/wire_search_filters.rs
@@ -224,3 +224,83 @@ fn search_with_tag_filter_matches_frontmatter_tags() {
         );
     }
 }
+
+// ---------------------------------------------------------------------------
+// Test 5: --tag is repeatable (OR-within); two --tag values form an IN-list
+// ---------------------------------------------------------------------------
+
+#[test]
+fn search_with_two_tag_filters_returns_or_within_tags() {
+    // Two docs with different tag sets:
+    //   a.md → tags: [rust]
+    //   b.md → tags: [async]
+    //   c.md → no tags (but same keyword in body)
+    // Search with --tag rust --tag async (OR within --tag).
+    // Expect a.md and b.md, not c.md.
+    let dir = tempfile::tempdir().unwrap();
+    let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
+
+    fs::write(
+        workspace.join("a.md"),
+        "---\ntags: [rust]\n---\n# A\n\nrust systems programming\n",
+    )
+    .unwrap();
+    fs::write(
+        workspace.join("b.md"),
+        "---\ntags: [async]\n---\n# B\n\nrust async programming\n",
+    )
+    .unwrap();
+    fs::write(workspace.join("c.md"), "# C\n\nrust programming\n").unwrap();
+    common::ingest(&cfg, &workspace);
+
+    // Without filter: all three docs produce hits.
+    let (unfiltered, _) = common::run_search_with_args(
+        &cfg,
+        &["--json", "--mode", "lexical", "rust"],
+    );
+    let uresp: Value = serde_json::from_str(unfiltered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (unfiltered): {unfiltered:?}: {e}"));
+    let uhits = uresp["hits"].as_array().expect("unfiltered hits array");
+    assert!(
+        uhits.len() >= 3,
+        "expected ≥3 hits before tag filter: {uresp}"
+    );
+
+    // With --tag rust --tag async: only a.md and b.md should appear.
+    let (filtered, _) = common::run_search_with_args(
+        &cfg,
+        &[
+            "--json", "--mode", "lexical",
+            "--tag", "rust",
+            "--tag", "async",
+            "rust",
+        ],
+    );
+    let fresp: Value = serde_json::from_str(filtered.trim())
+        .unwrap_or_else(|e| panic!("not JSON (two-tag-filtered): {filtered:?}: {e}"));
+    let fhits = fresp["hits"].as_array().expect("filtered hits array");
+
+    assert!(
+        !fhits.is_empty(),
+        "--tag rust --tag async must return hits from tagged docs; got 0: {fresp}"
+    );
+
+    // c.md must not appear — it has no tags.
+    for hit in fhits {
+        let path = hit["doc_path"].as_str().unwrap_or("");
+        assert!(
+            path.ends_with("a.md") || path.ends_with("b.md"),
+            "--tag rust --tag async must only return a.md or b.md, got path={path}"
+        );
+    }
+
+    // Both a.md and b.md must appear (OR, not AND).
+    let paths: Vec<&str> = fhits
+        .iter()
+        .filter_map(|h| h["doc_path"].as_str())
+        .collect();
+    let has_a = paths.iter().any(|p| p.ends_with("a.md"));
+    let has_b = paths.iter().any(|p| p.ends_with("b.md"));
+    assert!(has_a, "--tag rust must include a.md (rust-tagged): paths={paths:?}");
+    assert!(has_b, "--tag async must include b.md (async-tagged): paths={paths:?}");
+}
diff --git a/crates/kebab-mcp/src/tools/search.rs b/crates/kebab-mcp/src/tools/search.rs
index 2027024..74af6e9 100644
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -12,6 +12,8 @@ use rmcp::model::CallToolResult;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
 
+use kebab_app::ERROR_V1_ID;
+
 use crate::error::{to_tool_error, to_tool_success};
 use crate::state::KebabAppState;
 
@@ -161,7 +163,7 @@ fn normalize_media_alias(s: &str) -> String {
 fn invalid_input(msg: &str) -> CallToolResult {
     use kebab_app::{ErrorV1, StructuredError};
     let err = anyhow::Error::new(StructuredError(ErrorV1 {
-        schema_version: "error.v1".to_string(),
+        schema_version: ERROR_V1_ID.to_string(),
         code: "invalid_input".to_string(),
         message: msg.to_string(),
         details: serde_json::Value::Null,
diff --git a/crates/kebab-search/src/lexical.rs b/crates/kebab-search/src/lexical.rs
index 871c22d..bfdd0f7 100644
--- a/crates/kebab-search/src/lexical.rs
+++ b/crates/kebab-search/src/lexical.rs
@@ -348,11 +348,15 @@ fn run_query(
 
     // p9-fb-36: ingested_after filter.
     // `documents.updated_at` is RFC3339 stored as TEXT (always UTC `Z` per
-    // fb-32 ingest path), so lexicographic >= compare is correct.
+    // fb-32 ingest path), so lexicographic >= compare is correct — but only
+    // when the filter instant is also formatted as UTC `Z`. A non-UTC offset
+    // (e.g. `+09:00`) would compare as ASCII after `Z` (0x2B < 0x5A) and
+    // produce wrong results. Convert to UTC before formatting.
     if let Some(after) = &filters.ingested_after {
         let formatted = after
+            .to_offset(time::UtcOffset::UTC)
             .format(&time::format_description::well_known::Rfc3339)
-            .expect("OffsetDateTime formats to RFC3339");
+            .expect("OffsetDateTime (UTC) formats to RFC3339");
         sql.push_str(" AND d.updated_at >= ?");
         params.push(Box::new(formatted));
     }
diff --git a/crates/kebab-store-sqlite/src/filters.rs b/crates/kebab-store-sqlite/src/filters.rs
index 4586236..9519879 100644
--- a/crates/kebab-store-sqlite/src/filters.rs
+++ b/crates/kebab-store-sqlite/src/filters.rs
@@ -155,11 +155,15 @@ impl SqliteStore {
 
         // p9-fb-36: ingested_after filter.
         // `documents.updated_at` is RFC3339 TEXT (UTC `Z` per fb-32);
-        // lexicographic >= compare is correct.
+        // lexicographic >= compare is correct — but only when the filter
+        // instant is also formatted as UTC `Z`. A non-UTC offset (e.g.
+        // `+09:00`) would compare as ASCII after `Z` (0x2B < 0x5A) and
+        // produce wrong results. Convert to UTC before formatting.
         if let Some(after) = &filters.ingested_after {
             let formatted = after
+                .to_offset(time::UtcOffset::UTC)
                 .format(&time::format_description::well_known::Rfc3339)
-                .expect("OffsetDateTime formats to RFC3339");
+                .expect("OffsetDateTime (UTC) formats to RFC3339");
             sql.push_str(" AND d.updated_at >= ?");
             bind.push(Box::new(formatted));
         }
@@ -666,4 +670,54 @@ mod tests {
             .unwrap();
         assert_eq!(out, vec![cid(c1)], "doc_id filter must scope to the target doc only");
     }
+
+    #[test]
+    fn filter_chunks_ingested_after_non_utc_offset_compares_as_instant() {
+        // Regression test for the non-UTC offset lex-compare bug.
+        //
+        // Scenario (from PR #127 review):
+        //   - doc stored at `2026-04-01T01:00:00Z`
+        //   - filter: `2026-04-01T05:00:00+09:00` == `2026-03-31T20:00:00Z` instant
+        //
+        // The doc instant (01:00 UTC on Apr 1) is AFTER the filter instant
+        // (20:00 UTC on Mar 31), so the doc SHOULD match.
+        //
+        // Buggy code: formats `+09:00` as-is → lex compare
+        //   `2026-04-01T01:00:00Z` vs `2026-04-01T05:00:00+09:00`
+        //   `01` < `05` → doc dropped incorrectly.
+        //
+        // Fixed code: converts to UTC first → compares
+        //   `2026-04-01T01:00:00Z` vs `2026-03-31T20:00:00Z`
+        //   Apr 1 > Mar 31 → doc correctly included.
+        let tmp = TempDir::new().unwrap();
+        let store = open_store(&tmp);
+        let c1 = "11111111111111111111111111111111";
+        seed_committed_full(
+            &store, c1, "d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1d1",
+            "doc.md", "en", &[], "primary",
+            r#""markdown""#,
+            "2026-04-01T01:00:00Z",
+        );
+
+        // Filter instant: 2026-04-01T05:00:00+09:00 == 2026-03-31T20:00:00 UTC.
+        // Doc (2026-04-01T01:00:00Z) is after the filter instant → should match.
+        let filter_instant = time::OffsetDateTime::parse(
+            "2026-04-01T05:00:00+09:00",
+            &time::format_description::well_known::Rfc3339,
+        )
+        .expect("valid RFC3339 with +09:00 offset");
+
+        let f = SearchFilters {
+            ingested_after: Some(filter_instant),
+            ..Default::default()
+        };
+        let out = store
+            .filter_chunks(&[cid(c1)], &f)
+            .unwrap();
+        assert_eq!(
+            out,
+            vec![cid(c1)],
+            "doc ingested at 01:00Z should match filter 05:00+09:00 (== 20:00Z previous day)"
+        );
+    }
 }
-- 
2.49.1