diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index 5f3ee34..ee54889 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -908,6 +908,16 @@ impl RagPipeline { let req = GenerateRequest { system: MULTI_HOP_DECOMPOSE_SYSTEM_PROMPT.to_string(), user, + // Empty stop is intentional. Instruction-following models + // (gemma3:4b+ / gemma4:e4b / Claude / GPT-4) honor the + // "JSON array only" prompt rule, so prose past the + // closing `]` is rare. If a downstream LM does append + // prose, `parse_decompose_response` returns `None` and + // the caller surfaces `MultiHopDecomposeFailed` — that + // is the policy. Adding a trailing-`]` stop sequence + // risks truncating the array (LM emits the close bracket + // and we cut the response one token too early), which + // is a worse failure mode than the explicit refusal. stop: Vec::new(), // JSON array of up to 5 sub-questions is short. 512 is a // comfortable cap that fits in any context window without @@ -1495,6 +1505,16 @@ mod tests { assert_eq!(out, vec!["rust async", "tokio runtime"]); } + /// Partial-empty case — drop the empties, keep the rest. Pins + /// the trim-then-filter chain in `parse_decompose_response` so a + /// future refactor that reorders the steps (e.g. take-then-trim) + /// can't accidentally swallow valid sub-queries. + #[test] + fn parse_decompose_response_drops_partial_empty_keeps_valid() { + let out = parse_decompose_response(r#"["", "valid q", " "]"#).unwrap(); + assert_eq!(out, vec!["valid q"]); + } + #[test] fn est_tokens_approx_quarters() { assert_eq!(est_tokens(""), 0); diff --git a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md index 90dea0d..d56b575 100644 --- a/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md +++ b/docs/superpowers/plans/2026-05-25-p9-fb-41-multi-hop-rag.md @@ -112,6 +112,9 @@ XL 작업 — 6 PR 분할 (각 머지 후 누적, 마지막 PR 후 v0.18.0 cut). - cap 도달 (max_depth / max_total_sub_queries / max_pool_chunks) 시 forced_stop=true 로 break. - synthesize → Answer.hops 에 누적된 HopRecord array 첨부. 6. decide JSON parse failure → forced_stop synthesize (refusal 아님, 안전한 graceful degrade). +7. **PR-2 회차 1 carry-over** — 같은 PR 에서 함께 해소: + - `ask` + `ask_multi_hop` 의 §4-§9 mirror (~150 줄 중복) → 공통 helper `synthesize_with_packed_context` 추출. history block 처리도 helper 화. drift 위험 차단. + - `MULTI_HOP_DECOMPOSE_USER_TEMPLATE` 의 `.replace("{query}", ...).replace("{max_sub_queries}", ...)` corner case → `format!` named arg 또는 strict substitution helper 로 교체. 사용자 query 에 literal `{max_sub_queries}` 포함 시 mis-replace 회피. **Test**: - `multi_hop_decide_stop_triggers_synthesize` — decide 가 `[]` 반환 시 즉시 synthesize.