feat(mcp): kebab__search filter inputs (fb-36)

7 new optional inputs on SearchInput: tags, lang, path_glob,
trust_min, media, ingested_after, doc_id. Validation surfaces as
error.v1 code = invalid_input via StructuredError. Dispatch builds
SearchFilters from the inputs and forwards through the existing
search_with_opts_with_config facade.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-10 04:11:27 +09:00
parent 4e0379c04f
commit b06f4654e7
4 changed files with 278 additions and 2 deletions

View File

@@ -19,6 +19,8 @@ tracing = { workspace = true }
# /dependencies endpoint — rmcp declares optional schemars = "^1.0").
schemars = "1"
time = { workspace = true }
kebab-app = { path = "../kebab-app" }
kebab-config = { path = "../kebab-config" }
kebab-core = { path = "../kebab-core" }

View File

@@ -1,5 +1,7 @@
//! `search` tool — wraps `kebab_app::search_with_opts_with_config`.
//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor? }.
//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor?,
//! tags?, lang?, path_glob?, trust_min?, media?,
//! ingested_after?, doc_id? }.
//! Output: search_response.v1 envelope (hits + next_cursor + truncated).
//!
//! First tool with a non-empty `inputSchema`: `SearchInput` derives
@@ -27,6 +29,22 @@ pub struct SearchInput {
pub snippet_chars: Option<usize>,
/// p9-fb-34: opaque cursor from a previous response.
pub cursor: Option<String>,
/// p9-fb-36: filter by `metadata.tags` (OR-within).
pub tags: Option<Vec<String>>,
/// p9-fb-36: filter by `documents.lang` (ISO code).
pub lang: Option<String>,
/// p9-fb-36: filter by `documents.workspace_path` glob.
pub path_glob: Option<String>,
/// p9-fb-36: filter by minimum `documents.trust_level`.
/// Accepts: `"primary"`, `"secondary"`, `"generated"`.
pub trust_min: Option<String>,
/// p9-fb-36: filter by `assets.media_type` kind. IN-list. Accepts:
/// `"markdown"`, `"pdf"`, `"image"`, `"audio"`, `"other"`. Aliases: `md` → `markdown`.
pub media: Option<Vec<String>>,
/// p9-fb-36: RFC3339 UTC timestamp. Invalid format → invalid_input.
pub ingested_after: Option<String>,
/// p9-fb-36: filter to a single doc.
pub doc_id: Option<String>,
}
pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
@@ -37,11 +55,62 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
"vector" => kebab_core::SearchMode::Vector,
_ => kebab_core::SearchMode::Hybrid,
};
// p9-fb-36: parse filter inputs, returning invalid_input on bad values.
let trust_min = match input.trust_min.as_deref() {
Some(s) => match s.to_ascii_lowercase().as_str() {
"primary" => Some(kebab_core::TrustLevel::Primary),
"secondary" => Some(kebab_core::TrustLevel::Secondary),
"generated" => Some(kebab_core::TrustLevel::Generated),
other => {
return invalid_input(&format!(
"trust_min: unknown level '{other}'; expected primary|secondary|generated"
));
}
},
None => None,
};
let ingested_after = match input.ingested_after.as_deref() {
Some(s) => {
match time::OffsetDateTime::parse(
s,
&time::format_description::well_known::Rfc3339,
) {
Ok(ts) => Some(ts),
Err(e) => {
return invalid_input(&format!(
"ingested_after: invalid RFC3339 '{s}': {e}"
));
}
}
}
None => None,
};
let media: Vec<String> = input
.media
.clone()
.unwrap_or_default()
.iter()
.map(|s| normalize_media_alias(s))
.collect();
let filters = kebab_core::SearchFilters {
tags_any: input.tags.clone().unwrap_or_default(),
lang: input.lang.clone().map(kebab_core::Lang),
path_glob: input.path_glob.clone(),
trust_min,
media,
ingested_after,
doc_id: input.doc_id.clone().map(kebab_core::DocumentId),
};
let query = kebab_core::SearchQuery {
text: input.query,
mode,
k,
filters: kebab_core::SearchFilters::default(),
filters,
};
let opts = kebab_core::SearchOpts {
max_tokens: input.max_tokens,
@@ -81,3 +150,22 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
Err(e) => to_tool_error(&e),
}
}
fn normalize_media_alias(s: &str) -> String {
match s.to_ascii_lowercase().as_str() {
"md" => "markdown".to_string(),
other => other.to_string(),
}
}
fn invalid_input(msg: &str) -> CallToolResult {
use kebab_app::{ErrorV1, StructuredError};
let err = anyhow::Error::new(StructuredError(ErrorV1 {
schema_version: "error.v1".to_string(),
code: "invalid_input".to_string(),
message: msg.to_string(),
details: serde_json::Value::Null,
hint: None,
}));
to_tool_error(&err)
}

View File

@@ -62,6 +62,13 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
max_tokens: None,
snippet_chars: None,
cursor: None,
tags: None,
lang: None,
path_glob: None,
trust_min: None,
media: None,
ingested_after: None,
doc_id: None,
},
);
let search_text = match &search_result.content.first().unwrap().raw {

View File

@@ -58,6 +58,13 @@ async fn search_tool_returns_search_response_v1() {
max_tokens: None,
snippet_chars: None,
cursor: None,
tags: None,
lang: None,
path_glob: None,
trust_min: None,
media: None,
ingested_after: None,
doc_id: None,
},
);
@@ -108,3 +115,175 @@ async fn search_tool_returns_search_response_v1() {
"envelope should carry next_cursor (possibly null)"
);
}
/// p9-fb-36: search with doc_id filter — only hits from the target doc.
#[tokio::test]
async fn search_with_doc_id_filter_returns_only_target() {
let dir = tempfile::tempdir().unwrap();
let data_dir = dir.path().join("data");
let workspace_root = dir.path().join("notes");
fs::create_dir_all(&data_dir).unwrap();
fs::create_dir_all(&workspace_root).unwrap();
let config = minimal_config(&data_dir, &workspace_root);
// Write two markdown documents, both containing the query term.
fs::write(
workspace_root.join("a.md"),
"# Alpha\n\nThis document mentions kebab and flatbread.",
)
.unwrap();
fs::write(
workspace_root.join("b.md"),
"# Beta\n\nAnother document about kebab wraps and fillings.",
)
.unwrap();
let scope = SourceScope {
root: workspace_root.clone(),
include: vec![],
exclude: vec![],
};
let _ = kebab_app::ingest_with_config(config.clone(), scope, false).unwrap();
let state = KebabAppState::new(config, None);
let handler = KebabHandler::new(state);
// First: unfiltered search to discover a doc_id from one of the docs.
let unfiltered = kebab_mcp::tools::search::handle(
handler.state(),
kebab_mcp::tools::search::SearchInput {
query: "kebab".to_string(),
mode: Some("lexical".to_string()),
k: Some(10),
max_tokens: None,
snippet_chars: None,
cursor: None,
tags: None,
lang: None,
path_glob: None,
trust_min: None,
media: None,
ingested_after: None,
doc_id: None,
},
);
assert!(
!unfiltered.is_error.unwrap_or(false),
"unfiltered search failed: {:?}",
unfiltered
);
let unfiltered_text = match &unfiltered.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
other => panic!("expected text content, got {other:?}"),
};
let unfiltered_v: serde_json::Value = serde_json::from_str(&unfiltered_text).unwrap();
let hits = unfiltered_v["hits"].as_array().expect("hits must be array");
assert!(hits.len() >= 2, "expected hits from both docs");
// Pick the doc_id of the first hit.
let target_doc_id = hits[0]["doc_id"]
.as_str()
.expect("doc_id on first hit")
.to_string();
// Now search with doc_id filter — all results must belong to that doc.
let filtered = kebab_mcp::tools::search::handle(
handler.state(),
kebab_mcp::tools::search::SearchInput {
query: "kebab".to_string(),
mode: Some("lexical".to_string()),
k: Some(10),
max_tokens: None,
snippet_chars: None,
cursor: None,
tags: None,
lang: None,
path_glob: None,
trust_min: None,
media: None,
ingested_after: None,
doc_id: Some(target_doc_id.clone()),
},
);
assert!(
!filtered.is_error.unwrap_or(false),
"filtered search failed: {:?}",
filtered
);
let filtered_text = match &filtered.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
other => panic!("expected text content, got {other:?}"),
};
let filtered_v: serde_json::Value = serde_json::from_str(&filtered_text).unwrap();
let filtered_hits = filtered_v["hits"].as_array().expect("hits must be array");
assert!(
!filtered_hits.is_empty(),
"expected at least one hit for target doc"
);
for hit in filtered_hits {
assert_eq!(
hit["doc_id"].as_str(),
Some(target_doc_id.as_str()),
"all filtered hits must belong to the target doc"
);
}
}
/// p9-fb-36: invalid RFC3339 for ingested_after → invalid_input error.v1.
#[tokio::test]
async fn search_with_invalid_ingested_after_returns_invalid_input() {
let dir = tempfile::tempdir().unwrap();
let data_dir = dir.path().join("data");
let workspace_root = dir.path().join("notes");
fs::create_dir_all(&data_dir).unwrap();
fs::create_dir_all(&workspace_root).unwrap();
let config = minimal_config(&data_dir, &workspace_root);
let state = KebabAppState::new(config, None);
let handler = KebabHandler::new(state);
let result = kebab_mcp::tools::search::handle(
handler.state(),
kebab_mcp::tools::search::SearchInput {
query: "kebab".to_string(),
mode: None,
k: None,
max_tokens: None,
snippet_chars: None,
cursor: None,
tags: None,
lang: None,
path_glob: None,
trust_min: None,
media: None,
ingested_after: Some("garbage".to_string()),
doc_id: None,
},
);
assert!(
result.is_error.unwrap_or(false),
"expected isError=true for invalid ingested_after"
);
let content = result
.content
.first()
.expect("expected at least one content item");
let text = match &content.raw {
RawContent::Text(t) => &t.text,
other => panic!("expected text content, got {other:?}"),
};
let v: serde_json::Value = serde_json::from_str(text).unwrap();
assert_eq!(
v.get("schema_version").and_then(|s| s.as_str()),
Some("error.v1"),
"must carry error.v1 envelope"
);
assert_eq!(
v.get("code").and_then(|s| s.as_str()),
Some("invalid_input"),
"code must be invalid_input for bad RFC3339"
);
}