Files
kebab/crates/kebab-mcp/tests/tools_call_ask_multi_hop.rs
altair823 58ac62d53a feat(search): provenance 출처 필터 — [[workspace.sources]] 멀티소스 + --source/--source-type
혼합 출처 KB(위키+jira 등)에서 색인은 전부 하되 질의 시 출처로 좁히는 provenance
레버. 전역 trust 곱셈가중(weighted-RRF)은 A/B 에서 반증(θ=0.85 만으로 incident MRR
0.918→0.340 절벽, 점수 압축) — 필터가 see-saw 없는 올바른 레버.

- config [[workspace.sources]] (각 id/root/exclude/trust_level/source_type);
  단일 root 는 implicit `default` source 로 정규화. validate: id 유일·비어있지 않음.
- config schema v3→v4 (step_3_to_4, root→[[workspace.sources]] id=default 미러, 멱등)
- V014 documents.source_id 컬럼+인덱스 (additive, DEFAULT 'default', 재색인 0)
- Metadata.source_id + BodyHints trust precedence(frontmatter > source 기본값 > Primary)
- ingest: --root 미지정 시 resolved_sources() 순회 + doc 마다 source_id/trust stamp
- 검색 SearchFilters.source_type/source_id → lexical + vector 두 site (IN, OR)
- CLI kebab search --source <id> / --source-type <type> (repeatable/comma-sep)

도그푸딩(620 doc, jira400+wiki220): --source wiki 로 개념 질의 MRR 0.780→0.810,
--source jira 로 incident 0.918→0.975. trust precedence 실측(jira=secondary 기본값).

version bump 0.28.0 → 0.29.0 (신규 CLI flag + config 키 + V014 migration → minor).
follow-up: MCP search 필터 미노출 · kebab list source_id 미표시 · RAG provenance 라벨.

자세한 내용: tasks/HOTFIXES.md (2026-06-21), docs/release-notes/v0.29.0-draft.md.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Claude-Session: https://claude.ai/code/session_012Mc6W1fgsrbFKTsqA6P8La
2026-06-21 08:35:19 +00:00

232 lines
10 KiB
Rust

//! Pin the MCP `ask` tool's `multi_hop` argument dispatch contract.
//!
//! v0.18 dogfood fix (PR-7) introduced a pre-decompose score-gate probe
//! in `RagPipeline::ask_multi_hop`: empty KB / sub-gate probe -> the
//! single-pass NoChunks refusal envelope (`answer.v1`), not `error.v1`.
//! The two surfaces' divergence is therefore observed *only when the probe
//! passes* — at that point, single-pass returns retrieval + LLM call, and
//! multi-hop calls decompose first (LLM unreachable -> `error.v1`).
//!
//! These two tests pin:
//! 1. `ask_tool_routes_multi_hop_true_to_decompose_first` — probe-passing
//! fixture, multi_hop=true → decompose (LLM error), single_pass → retrieval
//! NoChunks. Wire shapes diverge: `error.v1` vs `answer.v1`.
//! 2. `ask_tool_multi_hop_short_circuits_when_probe_empty` — empty KB,
//! multi_hop=true → probe-empty short-circuit, NoChunks refusal byte-
//! identical to single-pass. PR-7 의 intent 가 MCP layer 에 pin.
//!
//! A live-Ollama end-to-end multi-hop pin lands in a follow-up
//! `#[ignore]` test (same pattern as `wire_ask_stale.rs`).
use kebab_config::Config;
use kebab_core::SourceScope;
use kebab_mcp::{KebabAppState, KebabHandler};
use rmcp::model::RawContent;
fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) -> Config {
let mut cfg = Config::defaults();
cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
cfg.workspace.root = Some(workspace_root.to_string_lossy().into_owned());
cfg.workspace.exclude.clear();
cfg.models.embedding.provider = "none".to_string();
cfg.models.embedding.dimensions = 0;
// Force the LLM endpoint to a known-unreachable port so this test
// is robust against whether a real Ollama happens to be running
// on 127.0.0.1:11434 (the developer's box; CI; etc.). The
// `request_timeout_secs = 5` gives slow CI / Docker network stacks
// enough headroom that *some* error fires deterministically — the
// dispatch contract below only cares that `is_error` flipped, not
// which specific error code surfaced.
cfg.models.llm.endpoint = "http://127.0.0.1:1".to_string();
cfg.models.llm.request_timeout_secs = 5;
// Bypass the second probe gate (`top_score < score_gate`) so that the
// probe-pass path in `RagPipeline::ask_multi_hop` (PR-7 v0.18 dogfood
// fix) is reachable from a tiny lexical fixture whose FTS5 fusion
// score may sit below the production default (0.30). The probe's
// first gate (`probe_hits.is_empty()`) is unaffected — the empty-KB
// short-circuit test below still exercises it. Production default
// 0.30 remains untouched (test config isolation only).
cfg.rag.score_gate = 0.0;
cfg
}
/// The dispatch contract (post-PR-7 probe-first): with a probe-passing
/// fixture, single-pass `ask` retrieves first and returns a NoChunks
/// refusal Answer for an unrelated query (`grounded=false`,
/// `isError=false`). Multi-hop's probe passes on the same fixture →
/// decompose runs → unreachable LLM yields `error.v1` with
/// `code=model_unreachable` (`isError=true`). The divergence confirms
/// the `multi_hop` arg actually rerouted the dispatch *after* the
/// probe gate.
#[tokio::test]
async fn ask_tool_routes_multi_hop_true_to_decompose_first() {
let dir = tempfile::tempdir().unwrap();
let data_dir = dir.path().join("data");
let workspace_root = dir.path().join("notes");
std::fs::create_dir_all(&data_dir).unwrap();
std::fs::create_dir_all(&workspace_root).unwrap();
// Lexical-friendly fixture so the multi-hop probe (PR-7 v0.18 dogfood
// fix) returns at least one hit and we exercise the post-probe
// decompose path. `build_match_string` rewrites the query
// `"compound about X and Y"` into
// `text : (("compound about X and Y") OR ("compound" "about" "and"))`
// — the token_and branch is FTS5 implicit-AND, so the fixture body
// MUST keep all three tokens (`compound`, `about`, `and`). Do not
// collapse to a single-token body or the probe short-circuits to
// NoChunks and the dispatch divergence below disappears.
let fixture = workspace_root.join("note.md");
std::fs::write(
&fixture,
"# Compound topic\n\nThis note is about a compound containing X and Y in detail.\n",
)
.unwrap();
let cfg = minimal_config(&data_dir, &workspace_root);
let scope = SourceScope {
root: workspace_root.clone(),
include: vec![],
exclude: vec![],
};
let _ = kebab_app::ingest_with_config(cfg.clone(), scope, false).unwrap();
let state = KebabAppState::new(cfg, None);
let handler = KebabHandler::new(state);
// Multi-hop branch — decompose runs first, hits the unreachable
// endpoint, MCP wraps as error.v1.
let state_mh = handler.state().clone();
let mh = tokio::task::spawn_blocking(move || {
kebab_mcp::tools::ask::handle(
&state_mh,
kebab_mcp::tools::ask::AskInput {
query: "compound about X and Y".to_string(),
session_id: None,
mode: Some("lexical".to_string()),
multi_hop: Some(true),
},
)
})
.await
.unwrap();
assert!(
mh.is_error.unwrap_or(false),
"multi_hop=true must reach the LLM (decompose first) — got {mh:?}"
);
let mh_text = match &mh.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
other => panic!("expected text, got {other:?}"),
};
let mh_v: serde_json::Value = serde_json::from_str(&mh_text).unwrap();
assert_eq!(mh_v["schema_version"], "error.v1");
// The dispatch contract is "multi-hop's probe passed, then decompose
// tried to talk to the LLM and failed" — i.e. `is_error` fires
// because, *after* the PR-7 probe gate, decompose attempted an LLM
// call against the unreachable endpoint. Which *specific* error code
// lands (`model_unreachable` on fast ECONNREFUSED hosts, `timeout`
// on slow connect-timeout stacks, etc.) is implementation detail of
// the host TCP/HTTP path; pinning it here would just produce flakes
// on slow CI.
// Single-pass branch — empty KB short-circuits at retrieve, no LLM
// call happens, refusal Answer comes back as isError=false.
let state_sp = handler.state().clone();
let sp = tokio::task::spawn_blocking(move || {
kebab_mcp::tools::ask::handle(
&state_sp,
kebab_mcp::tools::ask::AskInput {
query: "anything".to_string(),
session_id: None,
mode: Some("lexical".to_string()),
multi_hop: Some(false),
},
)
})
.await
.unwrap();
assert!(
!sp.is_error.unwrap_or(false),
"single-pass empty-KB refusal must NOT be isError — got {sp:?}"
);
let sp_text = match &sp.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
other => panic!("expected text, got {other:?}"),
};
let sp_v: serde_json::Value = serde_json::from_str(&sp_text).unwrap();
assert_eq!(sp_v["schema_version"], "answer.v1");
assert_eq!(sp_v["grounded"], false);
}
/// PR-7 의 probe-empty short-circuit 이 MCP-layer 의 wire shape 로 pin.
/// 빈 KB + multi_hop=true → `RagPipeline::ask_multi_hop` 의 첫 probe
/// gate (`probe_hits.is_empty()`) 에 막혀 `refuse_no_chunks` 가 single-pass
/// 와 byte-identical 한 `answer.v1` refusal envelope 을 반환한다.
/// kebab-rag::multi_hop_empty_probe_pool_refuses_before_any_llm_call 가
/// RAG-layer 만 pin — MCP-layer 의 wire shape 는 본 test 만이 안전망.
#[tokio::test]
async fn ask_tool_multi_hop_short_circuits_when_probe_empty() {
let dir = tempfile::tempdir().unwrap();
let data_dir = dir.path().join("data");
let workspace_root = dir.path().join("notes");
std::fs::create_dir_all(&data_dir).unwrap();
std::fs::create_dir_all(&workspace_root).unwrap();
let cfg = minimal_config(&data_dir, &workspace_root);
let scope = SourceScope {
root: workspace_root.clone(),
include: vec![],
exclude: vec![],
};
let _ = kebab_app::ingest_with_config(cfg.clone(), scope, false).unwrap();
let state = KebabAppState::new(cfg.clone(), None);
let handler = KebabHandler::new(state);
let state_mh = handler.state().clone();
let mh = tokio::task::spawn_blocking(move || {
kebab_mcp::tools::ask::handle(
&state_mh,
kebab_mcp::tools::ask::AskInput {
query: "compound about X and Y".to_string(),
session_id: None,
mode: Some("lexical".to_string()),
multi_hop: Some(true),
},
)
})
.await
.unwrap();
assert_eq!(
mh.is_error,
Some(false),
"probe-empty short-circuit must yield refusal envelope, not error.v1 — got {mh:?}"
);
let mh_text = match &mh.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
other => panic!("expected text content, got {other:?}"),
};
let body: serde_json::Value = serde_json::from_str(&mh_text).unwrap();
assert_eq!(body["schema_version"], "answer.v1");
assert_eq!(body["refusal_reason"], "no_chunks");
}
/// AskInput's JSON-schema (rendered for tools/list) advertises the
/// new `multi_hop` field. Pins agent / MCP host capability discovery
/// against accidental schema-rename or omission.
#[test]
fn ask_input_schema_advertises_multi_hop_field() {
let schema = schemars::schema_for!(kebab_mcp::tools::ask::AskInput);
let v = serde_json::to_value(&schema).unwrap();
let props = v
.get("properties")
.and_then(|p| p.as_object())
.expect("AskInput schema must declare properties");
assert!(
props.contains_key("multi_hop"),
"AskInput.multi_hop must surface in the JsonSchema — got keys: {:?}",
props.keys().collect::<Vec<_>>()
);
}