feat(mcp): kebab__search filter inputs (fb-36)

7 new optional inputs on SearchInput: tags, lang, path_glob, trust_min, media, ingested_after, doc_id. Validation surfaces as error.v1 code = invalid_input via StructuredError. Dispatch builds SearchFilters from the inputs and forwards through the existing search_with_opts_with_config facade. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-10 04:11:27 +09:00
parent 4e0379c04f
commit b06f4654e7
4 changed files with 278 additions and 2 deletions
--- a/crates/kebab-mcp/Cargo.toml
+++ b/crates/kebab-mcp/Cargo.toml
@@ -19,6 +19,8 @@ tracing     = { workspace = true }
 # /dependencies endpoint — rmcp declares optional schemars = "^1.0").
 schemars    = "1"

+time         = { workspace = true }
+
 kebab-app    = { path = "../kebab-app" }
 kebab-config = { path = "../kebab-config" }
 kebab-core   = { path = "../kebab-core" }
--- a/crates/kebab-mcp/src/tools/search.rs
+++ b/crates/kebab-mcp/src/tools/search.rs
@@ -1,5 +1,7 @@
 //! `search` tool — wraps `kebab_app::search_with_opts_with_config`.
-//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor? }.
+//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor?,
+//!          tags?, lang?, path_glob?, trust_min?, media?,
+//!          ingested_after?, doc_id? }.
 //! Output: search_response.v1 envelope (hits + next_cursor + truncated).
 //!
 //! First tool with a non-empty `inputSchema`: `SearchInput` derives
@@ -27,6 +29,22 @@ pub struct SearchInput {
    pub snippet_chars: Option<usize>,
    /// p9-fb-34: opaque cursor from a previous response.
    pub cursor: Option<String>,
+    /// p9-fb-36: filter by `metadata.tags` (OR-within).
+    pub tags: Option<Vec<String>>,
+    /// p9-fb-36: filter by `documents.lang` (ISO code).
+    pub lang: Option<String>,
+    /// p9-fb-36: filter by `documents.workspace_path` glob.
+    pub path_glob: Option<String>,
+    /// p9-fb-36: filter by minimum `documents.trust_level`.
+    /// Accepts: `"primary"`, `"secondary"`, `"generated"`.
+    pub trust_min: Option<String>,
+    /// p9-fb-36: filter by `assets.media_type` kind. IN-list. Accepts:
+    /// `"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`. Aliases: `md` → `markdown`.
+    pub media: Option<Vec<String>>,
+    /// p9-fb-36: RFC3339 UTC timestamp. Invalid format → invalid_input.
+    pub ingested_after: Option<String>,
+    /// p9-fb-36: filter to a single doc.
+    pub doc_id: Option<String>,
 }

 pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
@@ -37,11 +55,62 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
        "vector" => kebab_core::SearchMode::Vector,
        _ => kebab_core::SearchMode::Hybrid,
    };
+
+    // p9-fb-36: parse filter inputs, returning invalid_input on bad values.
+    let trust_min = match input.trust_min.as_deref() {
+        Some(s) => match s.to_ascii_lowercase().as_str() {
+            "primary" => Some(kebab_core::TrustLevel::Primary),
+            "secondary" => Some(kebab_core::TrustLevel::Secondary),
+            "generated" => Some(kebab_core::TrustLevel::Generated),
+            other => {
+                return invalid_input(&format!(
+                    "trust_min: unknown level '{other}'; expected primary|secondary|generated"
+                ));
+            }
+        },
+        None => None,
+    };
+
+    let ingested_after = match input.ingested_after.as_deref() {
+        Some(s) => {
+            match time::OffsetDateTime::parse(
+                s,
+                &time::format_description::well_known::Rfc3339,
+            ) {
+                Ok(ts) => Some(ts),
+                Err(e) => {
+                    return invalid_input(&format!(
+                        "ingested_after: invalid RFC3339 '{s}': {e}"
+                    ));
+                }
+            }
+        }
+        None => None,
+    };
+
+    let media: Vec<String> = input
+        .media
+        .clone()
+        .unwrap_or_default()
+        .iter()
+        .map(|s| normalize_media_alias(s))
+        .collect();
+
+    let filters = kebab_core::SearchFilters {
+        tags_any: input.tags.clone().unwrap_or_default(),
+        lang: input.lang.clone().map(kebab_core::Lang),
+        path_glob: input.path_glob.clone(),
+        trust_min,
+        media,
+        ingested_after,
+        doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
+    };
+
    let query = kebab_core::SearchQuery {
        text: input.query,
        mode,
        k,
-        filters: kebab_core::SearchFilters::default(),
+        filters,
    };
    let opts = kebab_core::SearchOpts {
        max_tokens: input.max_tokens,
@@ -81,3 +150,22 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
        Err(e) => to_tool_error(&e),
    }
 }
+
+fn normalize_media_alias(s: &str) -> String {
+    match s.to_ascii_lowercase().as_str() {
+        "md" => "markdown".to_string(),
+        other => other.to_string(),
+    }
+}
+
+fn invalid_input(msg: &str) -> CallToolResult {
+    use kebab_app::{ErrorV1, StructuredError};
+    let err = anyhow::Error::new(StructuredError(ErrorV1 {
+        schema_version: "error.v1".to_string(),
+        code: "invalid_input".to_string(),
+        message: msg.to_string(),
+        details: serde_json::Value::Null,
+        hint: None,
+    }));
+    to_tool_error(&err)
+}
--- a/crates/kebab-mcp/tests/tools_call_fetch.rs
+++ b/crates/kebab-mcp/tests/tools_call_fetch.rs
@@ -62,6 +62,13 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
            max_tokens: None,
            snippet_chars: None,
            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
        },
    );
    let search_text = match &search_result.content.first().unwrap().raw {
--- a/crates/kebab-mcp/tests/tools_call_search.rs
+++ b/crates/kebab-mcp/tests/tools_call_search.rs
@@ -58,6 +58,13 @@ async fn search_tool_returns_search_response_v1() {
            max_tokens: None,
            snippet_chars: None,
            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
        },
    );

@@ -108,3 +115,175 @@ async fn search_tool_returns_search_response_v1() {
        "envelope should carry next_cursor (possibly null)"
    );
 }
+
+/// p9-fb-36: search with doc_id filter — only hits from the target doc.
+#[tokio::test]
+async fn search_with_doc_id_filter_returns_only_target() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+
+    // Write two markdown documents, both containing the query term.
+    fs::write(
+        workspace_root.join("a.md"),
+        "# Alpha\n\nThis document mentions kebab and flatbread.",
+    )
+    .unwrap();
+    fs::write(
+        workspace_root.join("b.md"),
+        "# Beta\n\nAnother document about kebab wraps and fillings.",
+    )
+    .unwrap();
+
+    let scope = SourceScope {
+        root: workspace_root.clone(),
+        include: vec![],
+        exclude: vec![],
+    };
+    let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
+
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    // First: unfiltered search to discover a doc_id from one of the docs.
+    let unfiltered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: None,
+        },
+    );
+    assert!(
+        !unfiltered.is_error.unwrap_or(false),
+        "unfiltered search failed: {:?}",
+        unfiltered
+    );
+    let unfiltered_text = match &unfiltered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let unfiltered_v: serde_json::Value = serde_json::from_str(&unfiltered_text).unwrap();
+    let hits = unfiltered_v["hits"].as_array().expect("hits must be array");
+    assert!(hits.len() >= 2, "expected hits from both docs");
+
+    // Pick the doc_id of the first hit.
+    let target_doc_id = hits[0]["doc_id"]
+        .as_str()
+        .expect("doc_id on first hit")
+        .to_string();
+
+    // Now search with doc_id filter — all results must belong to that doc.
+    let filtered = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: Some("lexical".to_string()),
+            k: Some(10),
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: None,
+            doc_id: Some(target_doc_id.clone()),
+        },
+    );
+    assert!(
+        !filtered.is_error.unwrap_or(false),
+        "filtered search failed: {:?}",
+        filtered
+    );
+    let filtered_text = match &filtered.content.first().unwrap().raw {
+        RawContent::Text(t) => t.text.clone(),
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let filtered_v: serde_json::Value = serde_json::from_str(&filtered_text).unwrap();
+    let filtered_hits = filtered_v["hits"].as_array().expect("hits must be array");
+
+    assert!(
+        !filtered_hits.is_empty(),
+        "expected at least one hit for target doc"
+    );
+    for hit in filtered_hits {
+        assert_eq!(
+            hit["doc_id"].as_str(),
+            Some(target_doc_id.as_str()),
+            "all filtered hits must belong to the target doc"
+        );
+    }
+}
+
+/// p9-fb-36: invalid RFC3339 for ingested_after → invalid_input error.v1.
+#[tokio::test]
+async fn search_with_invalid_ingested_after_returns_invalid_input() {
+    let dir = tempfile::tempdir().unwrap();
+    let data_dir = dir.path().join("data");
+    let workspace_root = dir.path().join("notes");
+    fs::create_dir_all(&data_dir).unwrap();
+    fs::create_dir_all(&workspace_root).unwrap();
+
+    let config = minimal_config(&data_dir, &workspace_root);
+    let state = KebabAppState::new(config, None);
+    let handler = KebabHandler::new(state);
+
+    let result = kebab_mcp::tools::search::handle(
+        handler.state(),
+        kebab_mcp::tools::search::SearchInput {
+            query: "kebab".to_string(),
+            mode: None,
+            k: None,
+            max_tokens: None,
+            snippet_chars: None,
+            cursor: None,
+            tags: None,
+            lang: None,
+            path_glob: None,
+            trust_min: None,
+            media: None,
+            ingested_after: Some("garbage".to_string()),
+            doc_id: None,
+        },
+    );
+
+    assert!(
+        result.is_error.unwrap_or(false),
+        "expected isError=true for invalid ingested_after"
+    );
+    let content = result
+        .content
+        .first()
+        .expect("expected at least one content item");
+    let text = match &content.raw {
+        RawContent::Text(t) => &t.text,
+        other => panic!("expected text content, got {other:?}"),
+    };
+    let v: serde_json::Value = serde_json::from_str(text).unwrap();
+    assert_eq!(
+        v.get("schema_version").and_then(|s| s.as_str()),
+        Some("error.v1"),
+        "must carry error.v1 envelope"
+    );
+    assert_eq!(
+        v.get("code").and_then(|s| s.as_str()),
+        Some("invalid_input"),
+        "code must be invalid_input for bad RFC3339"
+    );
+}