diff --git a/crates/kebab-mcp/tests/tools_call_ask_multi_hop.rs b/crates/kebab-mcp/tests/tools_call_ask_multi_hop.rs index 0452238..c121cd2 100644 --- a/crates/kebab-mcp/tests/tools_call_ask_multi_hop.rs +++ b/crates/kebab-mcp/tests/tools_call_ask_multi_hop.rs @@ -1,18 +1,19 @@ -//! p9-fb-41 PR-5: MCP `ask` tool with `multi_hop: true` argument. +//! Pin the MCP `ask` tool's `multi_hop` argument dispatch contract. //! -//! Two Ollama-free pins: +//! v0.18 dogfood fix (PR-7) introduced a pre-decompose score-gate probe +//! in `RagPipeline::ask_multi_hop`: empty KB / sub-gate probe -> the +//! single-pass NoChunks refusal envelope (`answer.v1`), not `error.v1`. +//! The two surfaces' divergence is therefore observed *only when the probe +//! passes* — at that point, single-pass returns retrieval + LLM call, and +//! multi-hop calls decompose first (LLM unreachable -> `error.v1`). //! -//! 1. `ask_tool_routes_multi_hop_true_to_decompose_first` — multi-hop -//! dispatch differs from single-pass on dispatch shape. Single-pass -//! retrieves *first* (empty KB → `NoChunks` refusal, no LLM call, -//! `grounded=false`). Multi-hop calls *decompose first* (no -//! retrieval yet), so an empty KB + no Ollama yields `error.v1` -//! with `code=model_unreachable` — different wire shape than the -//! refusal envelope. The two surfaces' divergence is the signal -//! that the `multi_hop` arg actually routed the dispatch. -//! 2. `ask_input_schema_advertises_multi_hop_field` — `AskInput`'s -//! `JsonSchema` exposes the new field so MCP host capability -//! discovery (tools/list) renders it for agents. +//! These two tests pin: +//! 1. `ask_tool_routes_multi_hop_true_to_decompose_first` — probe-passing +//! fixture, multi_hop=true → decompose (LLM error), single_pass → retrieval +//! NoChunks. Wire shapes diverge: `error.v1` vs `answer.v1`. +//! 2. `ask_tool_multi_hop_short_circuits_when_probe_empty` — empty KB, +//! multi_hop=true → probe-empty short-circuit, NoChunks refusal byte- +//! identical to single-pass. PR-7 의 intent 가 MCP layer 에 pin. //! //! A live-Ollama end-to-end multi-hop pin lands in a follow-up //! `#[ignore]` test (same pattern as `wire_ask_stale.rs`). @@ -39,15 +40,25 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path) // which specific error code surfaced. cfg.models.llm.endpoint = "http://127.0.0.1:1".to_string(); cfg.models.llm.request_timeout_secs = 5; + // Bypass the second probe gate (`top_score < score_gate`) so that the + // probe-pass path in `RagPipeline::ask_multi_hop` (PR-7 v0.18 dogfood + // fix) is reachable from a tiny lexical fixture whose FTS5 fusion + // score may sit below the production default (0.30). The probe's + // first gate (`probe_hits.is_empty()`) is unaffected — the empty-KB + // short-circuit test below still exercises it. Production default + // 0.30 remains untouched (test config isolation only). + cfg.rag.score_gate = 0.0; cfg } -/// The dispatch contract: with an empty KB, single-pass `ask` short- -/// circuits at retrieval (no LLM call) and returns a refusal Answer -/// (`grounded=false`, `isError=false`). Multi-hop calls *decompose -/// first*, so the same empty KB + unreachable LLM yields `error.v1` -/// with `code=model_unreachable` (`isError=true`). The divergence -/// confirms the `multi_hop` arg actually rerouted the dispatch. +/// The dispatch contract (post-PR-7 probe-first): with a probe-passing +/// fixture, single-pass `ask` retrieves first and returns a NoChunks +/// refusal Answer for an unrelated query (`grounded=false`, +/// `isError=false`). Multi-hop's probe passes on the same fixture → +/// decompose runs → unreachable LLM yields `error.v1` with +/// `code=model_unreachable` (`isError=true`). The divergence confirms +/// the `multi_hop` arg actually rerouted the dispatch *after* the +/// probe gate. #[tokio::test] async fn ask_tool_routes_multi_hop_true_to_decompose_first() { let dir = tempfile::tempdir().unwrap(); @@ -55,6 +66,23 @@ async fn ask_tool_routes_multi_hop_true_to_decompose_first() { let workspace_root = dir.path().join("notes"); std::fs::create_dir_all(&data_dir).unwrap(); std::fs::create_dir_all(&workspace_root).unwrap(); + + // Lexical-friendly fixture so the multi-hop probe (PR-7 v0.18 dogfood + // fix) returns at least one hit and we exercise the post-probe + // decompose path. `build_match_string` rewrites the query + // `"compound about X and Y"` into + // `text : (("compound about X and Y") OR ("compound" "about" "and"))` + // — the token_and branch is FTS5 implicit-AND, so the fixture body + // MUST keep all three tokens (`compound`, `about`, `and`). Do not + // collapse to a single-token body or the probe short-circuits to + // NoChunks and the dispatch divergence below disappears. + let fixture = workspace_root.join("note.md"); + std::fs::write( + &fixture, + "# Compound topic\n\nThis note is about a compound containing X and Y in detail.\n", + ) + .unwrap(); + let cfg = minimal_config(&data_dir, &workspace_root); let scope = SourceScope { @@ -93,12 +121,14 @@ async fn ask_tool_routes_multi_hop_true_to_decompose_first() { }; let mh_v: serde_json::Value = serde_json::from_str(&mh_text).unwrap(); assert_eq!(mh_v["schema_version"], "error.v1"); - // The dispatch contract is "multi-hop reached the LLM" — i.e. - // `is_error` fires because decompose tried to talk to the LLM and - // failed. Which *specific* error code lands (`model_unreachable` - // on fast ECONNREFUSED hosts, `timeout` on slow connect-timeout - // stacks, etc.) is implementation detail of the host TCP/HTTP - // path; pinning it here would just produce flakes on slow CI. + // The dispatch contract is "multi-hop's probe passed, then decompose + // tried to talk to the LLM and failed" — i.e. `is_error` fires + // because, *after* the PR-7 probe gate, decompose attempted an LLM + // call against the unreachable endpoint. Which *specific* error code + // lands (`model_unreachable` on fast ECONNREFUSED hosts, `timeout` + // on slow connect-timeout stacks, etc.) is implementation detail of + // the host TCP/HTTP path; pinning it here would just produce flakes + // on slow CI. // Single-pass branch — empty KB short-circuits at retrieve, no LLM // call happens, refusal Answer comes back as isError=false. @@ -129,6 +159,59 @@ async fn ask_tool_routes_multi_hop_true_to_decompose_first() { assert_eq!(sp_v["grounded"], false); } +/// PR-7 의 probe-empty short-circuit 이 MCP-layer 의 wire shape 로 pin. +/// 빈 KB + multi_hop=true → `RagPipeline::ask_multi_hop` 의 첫 probe +/// gate (`probe_hits.is_empty()`) 에 막혀 `refuse_no_chunks` 가 single-pass +/// 와 byte-identical 한 `answer.v1` refusal envelope 을 반환한다. +/// kebab-rag::multi_hop_empty_probe_pool_refuses_before_any_llm_call 가 +/// RAG-layer 만 pin — MCP-layer 의 wire shape 는 본 test 만이 안전망. +#[tokio::test] +async fn ask_tool_multi_hop_short_circuits_when_probe_empty() { + let dir = tempfile::tempdir().unwrap(); + let data_dir = dir.path().join("data"); + let workspace_root = dir.path().join("notes"); + std::fs::create_dir_all(&data_dir).unwrap(); + std::fs::create_dir_all(&workspace_root).unwrap(); + + let cfg = minimal_config(&data_dir, &workspace_root); + let scope = SourceScope { + root: workspace_root.clone(), + include: vec![], + exclude: vec![], + }; + let _ = kebab_app::ingest_with_config(cfg.clone(), scope, false).unwrap(); + + let state = KebabAppState::new(cfg.clone(), None); + let handler = KebabHandler::new(state); + let state_mh = handler.state().clone(); + let mh = tokio::task::spawn_blocking(move || { + kebab_mcp::tools::ask::handle( + &state_mh, + kebab_mcp::tools::ask::AskInput { + query: "compound about X and Y".to_string(), + session_id: None, + mode: Some("lexical".to_string()), + multi_hop: Some(true), + }, + ) + }) + .await + .unwrap(); + + assert_eq!( + mh.is_error, + Some(false), + "probe-empty short-circuit must yield refusal envelope, not error.v1 — got {mh:?}" + ); + let mh_text = match &mh.content.first().unwrap().raw { + RawContent::Text(t) => t.text.clone(), + other => panic!("expected text content, got {other:?}"), + }; + let body: serde_json::Value = serde_json::from_str(&mh_text).unwrap(); + assert_eq!(body["schema_version"], "answer.v1"); + assert_eq!(body["refusal_reason"], "no_chunks"); +} + /// AskInput's JSON-schema (rendered for tools/list) advertises the /// new `multi_hop` field. Pins agent / MCP host capability discovery /// against accidental schema-rename or omission. diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md index 017514f..5815edd 100644 --- a/tasks/HOTFIXES.md +++ b/tasks/HOTFIXES.md @@ -14,6 +14,16 @@ historical contract that was implemented; this file accumulates the deltas so phase 5+ readers can find the live behavior without diffing git history. +## 2026-05-26 — HOTFIX #15 — MCP ask multi_hop dispatch-divergence assertion stale (fixture 보강) + +**Symptom**: PR-7 (multi-hop probe-first dogfood fix) 머지 후 `kebab-mcp::tools_call_ask_multi_hop::ask_tool_routes_multi_hop_true_to_decompose_first` 가 모든 workspace test 에서 deterministic fail (no_chunks short-circuit 으로 `is_error=Some(false)`). + +**Root cause**: PR-5 의 test 가 *empty KB → multi-hop 은 decompose first → LLM 도달* 의 stale contract 에 assert. PR-7 의 pre-decompose probe 가 빈 KB → refuse_no_chunks short-circuit. + +**Action**: test fixture 보강 — `minimal_config.score_gate = 0.0` + workspace_root 에 `note.md` ("This note is about a compound containing X and Y in detail.") ingest → probe 통과 → decompose → unreachable LLM → `error.v1` 의 원래 dispatch divergence 회복. + 신규 `_multi_hop_short_circuits_when_probe_empty` test 1개 (probe-empty short-circuit 의 MCP-layer wire pin 안전망). + module doc rewrite. + +**Amends**: spec `docs/superpowers/specs/2026-05-26-hotfix-15-mcp-ask-multi-hop-flaky-spec.md` cross-link. production code 0 touch (PR-7 의 probe-first 는 의도된 동작 유지). + ## 2026-05-25 — fb-41 pre-v0.18 dogfood: multi-hop score-gate 우회 (S7 hallucination 회귀 핀) v0.18.0 cut 전 fb-41 multi-hop RAG 도그푸딩 (`/build/cache/dogfood-v018/`, 33 assets / 205 chunks corpus — 16 신규 markdown 5 클러스터 + v017 carryover, gemma3:4b CPU only / 16 GB RAM) 에서 발견된 **score_gate 우회 + hallucination 케이스**.