From 2c058ab1759028b9fa2133bf2a3edf9444ee07ec Mon Sep 17 00:00:00 2001 From: altair823 Date: Sat, 2 May 2026 23:09:46 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(rag):=20multi-turn=20ask=20=E2=80=94?= =?UTF-8?q?=20Turn=20struct=20+=20ask=5Fwith=5Fhistory=20+=20token=20budge?= =?UTF-8?q?t=20(p9-fb-15)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec PR #59 의 §3.8 multi-turn behaviour 구현. RAG facade 가 prior turns 받아 prompt 에 prepend, retrieval query expansion 적용, Answer 에 conversation_id / turn_index 채움. 신규 (kebab-core): - Answer 에 conversation_id (Option) / turn_index (Option) field 추가. serde skip_serializing_if 로 single-shot 의 wire output 변경 0 (기존 외부 wrapper 영향 없음). - Turn struct (question + answer + citations + created_at). - RefusalReason::LlmStreamAborted variant. 신규 (kebab-rag): - AskOpts 에 history (Vec) / conversation_id / turn_index 3 field. - AskOpts::single_shot(mode) helper. - RagPipeline::ask_with_history(query, history, conversation_id, turn_index, opts) — combined opts 로 ask 호출. - expand_query_with_history: history.last() 의 answer 첫 200 자 concat 해 SearchQuery.text 확장 (spec §3.8 의 \"cheap concat\"; LLM-based standalone-question rewriting 은 P+). - serialize_history + remaining_history_budget_chars: spec 의 priority enforcement — system+question 필수, retrieved chunks 가 차지한 뒤 남은 char budget 안에서 newest 우선, oldest drop. - ask 본문: history 가 비어있지 않으면 [이전 대화] 블록을 user prompt 위에 prepend. Answer 생성 site 3 곳 (정상 / NoChunks / ScoreGate refuse) 모두 conversation_id / turn_index 채움. 신규 (kebab-store-sqlite): - refusal_reason_label 가 LlmStreamAborted → 'llm_stream_aborted'. 기존 caller 변경 (single-shot 동작 동일): - kebab-cli main.rs Cmd::Ask: AskOpts 에 history=Vec::new(), conversation_id=None, turn_index=None 명시 (CLI multi-turn 은 p9-fb-18 의 --session/--repl 가 채움). - kebab-tui src/ask.rs spawn site 동일 (multi-turn UI 는 p9-fb-16). - kebab-eval runner.rs golden eval 동일 (single-shot per query). - kebab-app tests/ask_smoke.rs / kebab-tui tests/ask.rs / kebab-rag tests/pipeline.rs / kebab-eval metrics.rs Answer literal 갱신. Test: - 9 신규 lib unit (expand_query 4 / serialize_history 3 / remaining_budget 2). - 기존 12 PASS 회귀 0. Plan 갱신: - p9-fb-15 status planned → in_progress. 머지 후 한 줄 commit 으로 completed flip. Co-Authored-By: Claude Opus 4.7 (1M context) --- HANDOFF.md | 1 + crates/kebab-app/tests/ask_smoke.rs | 3 + crates/kebab-cli/src/main.rs | 6 + crates/kebab-core/src/answer.rs | 26 +++ crates/kebab-core/src/lib.rs | 4 +- crates/kebab-eval/src/metrics.rs | 2 + crates/kebab-eval/src/runner.rs | 5 + crates/kebab-rag/src/pipeline.rs | 268 ++++++++++++++++++++++- crates/kebab-rag/tests/pipeline.rs | 3 + crates/kebab-store-sqlite/src/answers.rs | 1 + crates/kebab-tui/src/ask.rs | 6 + crates/kebab-tui/tests/ask.rs | 2 + tasks/p9/p9-fb-15-rag-multi-turn-core.md | 2 +- 13 files changed, 323 insertions(+), 6 deletions(-) diff --git a/HANDOFF.md b/HANDOFF.md index 5b0750f..5ffff69 100644 --- a/HANDOFF.md +++ b/HANDOFF.md @@ -44,6 +44,7 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능. - **2026-05-02 P9 도그푸딩 후속 (spec PR #51 + p9-fb-01 + p9-fb-02)** — `kebab ingest` 진행 표시 도입. frozen design §2.4a 신설 (wire schema `ingest_progress.v1` line-delimited streaming) + §10 의 long-running 작업 절 추가. `kebab-app::ingest_with_config_progress(.., progress: Option>)` facade 추가, 기존 `_with_config` 가 `progress=None` forwarding wrapper. CLI 가 indicatif TTY 진행 바 (stderr) / non-TTY 한 줄씩 / `--json` 모드는 line-delimited stdout. p9-fb-03 (TUI background worker) + p9-fb-04 (cancel) 가 같은 stream 위에 build. - **2026-05-02 P9 도그푸딩 후속 (p9-fb-03)** — TUI 의 background ingest worker. Library 의 `r` 키가 `kebab_app::ingest_with_config_progress` 를 spawned thread 에서 호출, run loop 가 매 frame 마다 progress channel drain → 화면 하단 status bar 1 줄 갱신. terminal event (`Completed`/`Aborted`) 후 3 초 final 라인 hold + 자동 hide + Library auto-refresh. spec: `tasks/p9/p9-fb-03-tui-ingest-background.md`. (cancel slot 은 p9-fb-04 가 추가하는 형태로 단일화 — 회차 1 review 결과.) - **2026-05-02 P9 도그푸딩 후속 (p9-fb-04)** — ingest cooperative cancellation. `kebab-app::ingest_with_config_cancellable(.., cancel: Option>)` facade 추가, 기존 `_progress` 가 `cancel=None` forwarding. asset loop iter 시작 boundary 마다 cancel poll → true 면 break + `IngestEvent::Aborted { partial_counts }` + `Ok(IngestReport)` 정상 반환 (Err 아님). 부분 commit 보존, 다음 ingest 가 idempotent 재개. CLI Ctrl-C SIGINT handler (`ctrlc` crate) — 1회: cancel, 2회: hard exit (130). TUI Esc / Ctrl-C 가 cancel signal (in-flight 시), 그 외에는 quit. `IngestState` 에 `cancel: Arc` field 추가. spec: `tasks/p9/p9-fb-04-ingest-cancellation.md`. +- **2026-05-02 P9 도그푸딩 후속 (spec PR #59 + p9-fb-15)** — RAG multi-turn 도입. frozen design §3.8 갱신 — `Answer` 에 `conversation_id` / `turn_index` optional field, 신규 `Turn` struct, `RefusalReason::LlmStreamAborted` variant. `kebab-rag::AskOpts` 에 `history: Vec` / `conversation_id` / `turn_index` 3 field 추가, 기존 caller 는 `Vec::new() / None` (single-shot 동작 동일). `RagPipeline::ask_with_history(query, history, conversation_id, turn_index, opts)` helper. prompt 빌드: `[이전 대화]` 블록을 user prompt 위에 prepend, newest-first, char budget (`cfg.rag.max_context_tokens * 4`) 안에서 oldest 부터 drop. retrieval query expansion: 직전 answer 첫 200 자 concat. wire schema `answer.v1` 에 두 필드 + `format: date-time` 추가. p9-fb-16 (TUI conversation UI) + p9-fb-17/18 (V004 storage + CLI session) 가 같은 facade 위에 build. spec: `tasks/p9/p9-fb-15-rag-multi-turn-core.md`. ## 다음 task 후보 diff --git a/crates/kebab-app/tests/ask_smoke.rs b/crates/kebab-app/tests/ask_smoke.rs index 8df4f3f..0e5c8c9 100644 --- a/crates/kebab-app/tests/ask_smoke.rs +++ b/crates/kebab-app/tests/ask_smoke.rs @@ -30,6 +30,9 @@ fn ask_lexical_smoke() { temperature: Some(0.0), seed: Some(0), stream_sink: None, + history: Vec::new(), + conversation_id: None, + turn_index: None, }; // The fixture workspace contains "ownership" content; the model's // citation behavior depends on its training, so we don't assert on diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 7fe57c9..3c2c60c 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -430,6 +430,12 @@ fn run(cli: &Cli) -> anyhow::Result<()> { // once on completion). The TUI ask pane (P9-3) is what // wires up a real `mpsc::Sender` here. stream_sink: None, + // p9-fb-15: CLI single-shot ask. p9-fb-18 adds + // `--session` / `--repl` for multi-turn over the same + // facade (passes a populated `history`). + history: Vec::new(), + conversation_id: None, + turn_index: None, }; let ans = kebab_app::ask_with_config(cfg, query, opts)?; if cli.json { diff --git a/crates/kebab-core/src/answer.rs b/crates/kebab-core/src/answer.rs index bbf6007..4fc5f0c 100644 --- a/crates/kebab-core/src/answer.rs +++ b/crates/kebab-core/src/answer.rs @@ -20,6 +20,15 @@ pub struct Answer { pub usage: TokenUsage, #[serde(with = "time::serde::rfc3339")] pub created_at: OffsetDateTime, + /// p9-fb-15: same conversation 의 turn 들이 공유. CLI single-shot + /// (history 없음) / TUI 첫 turn 은 None. blake3 해시 또는 사용자 + /// 명시 (`kebab ask --session `, p9-fb-18). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub conversation_id: Option, + /// p9-fb-15: 같은 conversation 안 0-based 순서. 첫 turn = 0. None + /// 이면 single-shot. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub turn_index: Option, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] @@ -28,6 +37,19 @@ pub struct AnswerCitation { pub citation: Citation, } +/// p9-fb-15: history 가 prompt 에 들어갈 때의 한 turn. RAG facade 가 +/// `Vec` 받아 system + history + retrieval + new question 으로 +/// prompt 빌드. token budget 안에 fit 안 되면 oldest turn 부터 drop +/// (newest 우선 보존). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct Turn { + pub question: String, + pub answer: String, + pub citations: Vec, + #[serde(with = "time::serde::rfc3339")] + pub created_at: OffsetDateTime, +} + #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum RefusalReason { @@ -35,6 +57,10 @@ pub enum RefusalReason { LlmSelfJudge, NoIndex, NoChunks, + /// p9-fb-15: ask 가 LLM 토큰 stream 도중 cancel 됨. partial answer + /// 가 채워져 있을 수 있음 (사용자가 본 부분까지). RAG retrieval + /// 자체는 정상 — 모델 generation 단계에서만 중단. + LlmStreamAborted, } #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs index 3abedb2..99dd534 100644 --- a/crates/kebab-core/src/lib.rs +++ b/crates/kebab-core/src/lib.rs @@ -54,8 +54,8 @@ pub use search::{ SearchMode, SearchQuery, }; pub use answer::{ - Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, - TokenUsage, TraceId, + Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage, + TraceId, Turn, }; pub use ingest::{IngestItem, IngestItemKind, IngestReport}; pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus}; diff --git a/crates/kebab-eval/src/metrics.rs b/crates/kebab-eval/src/metrics.rs index 4f3eee8..7a709e1 100644 --- a/crates/kebab-eval/src/metrics.rs +++ b/crates/kebab-eval/src/metrics.rs @@ -496,6 +496,8 @@ mod tests { }, usage: TokenUsage { prompt_tokens: 1, completion_tokens: 1, latency_ms: 1 }, created_at: OffsetDateTime::UNIX_EPOCH, + conversation_id: None, + turn_index: None, } } diff --git a/crates/kebab-eval/src/runner.rs b/crates/kebab-eval/src/runner.rs index c70ded0..15ee57e 100644 --- a/crates/kebab-eval/src/runner.rs +++ b/crates/kebab-eval/src/runner.rs @@ -174,6 +174,11 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult temperature: opts.temperature, seed: opts.seed, stream_sink: None, + // p9-fb-15: golden eval is single-shot per query; no + // conversational history. + history: Vec::new(), + conversation_id: None, + turn_index: None, }; match app.ask(&gq.query, ask_opts) { Ok(ans) => Some(ans), diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index af53265..62dd40c 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -36,7 +36,7 @@ use anyhow::{Context, Result}; use kebab_core::{ Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason, GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters, - SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId, + SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId, Turn, }; use kebab_core::versions::PromptTemplateVersion; use kebab_store_sqlite::SqliteStore; @@ -80,6 +80,41 @@ pub struct AskOpts { /// pipeline — `SendError` is silently swallowed and generation /// continues so the `Answer` row still gets persisted. pub stream_sink: Option>, + /// p9-fb-15: prior turns of the same conversation. Empty for + /// single-shot ask. The pipeline prepends a serialized `[이전 + /// 대화]` block to the user prompt and uses the most-recent + /// answer's first 200 chars to expand the retrieval query + /// (cheap concat — LLM-based standalone-question rewriting is + /// out of scope per spec §3.8). Newest-first prepended; older + /// turns drop when the prompt would otherwise exceed + /// `cfg.rag.max_context_tokens`. + pub history: Vec, + /// p9-fb-15: same conversation 의 turn 들이 공유. Filled into + /// `Answer.conversation_id`. None for single-shot ask. + pub conversation_id: Option, + /// p9-fb-15: 0-based index within `conversation_id`. Caller + /// (TUI / CLI session) computes from `history.len()`. None for + /// single-shot ask. + pub turn_index: Option, +} + +impl AskOpts { + /// Default knobs for a single-shot ask (no history, no + /// conversation_id). `k` falls through to the config floor in + /// `RagPipeline::ask`. + pub fn single_shot(mode: SearchMode) -> Self { + Self { + k: 0, + explain: false, + mode, + temperature: None, + seed: None, + stream_sink: None, + history: Vec::new(), + conversation_id: None, + turn_index: None, + } + } } // ── RagPipeline ───────────────────────────────────────────────────────────── @@ -111,6 +146,29 @@ impl RagPipeline { } } + /// p9-fb-15: convenience for multi-turn ask. Stuffs `history`, + /// `conversation_id`, `turn_index` into a fresh `AskOpts` (built + /// from `opts.mode` + carried-through knobs) and forwards to + /// [`Self::ask`]. The returned `Answer` carries the same + /// `conversation_id` / `turn_index`. CLI / TUI sessions call this + /// once per follow-up question. + pub fn ask_with_history( + &self, + query: &str, + history: Vec, + conversation_id: String, + turn_index: u32, + opts: AskOpts, + ) -> Result { + let combined = AskOpts { + history, + conversation_id: Some(conversation_id), + turn_index: Some(turn_index), + ..opts + }; + self.ask(query, combined) + } + /// Run one query through the full pipeline. Always persists an /// `answers` row (including refusals); the row write is best-effort /// — a persistence error is surfaced via `tracing::warn!` so the @@ -121,8 +179,14 @@ impl RagPipeline { // ── 1. Retrieve ──────────────────────────────────────────────────── // floor at config default — see `AskOpts::k` doc for rationale. let k_effective = opts.k.max(self.config.search.default_k); + // p9-fb-15: query expansion when history is present. + // Concat the most-recent answer's first 200 chars so the + // retriever sees the full conversational context. Cheap — + // LLM-based standalone-question rewriting is out of scope + // (spec §3.8 marks it P+). + let expanded_query = expand_query_with_history(query, &opts.history); let search_query = SearchQuery { - text: query.to_string(), + text: expanded_query, mode: opts.mode, k: k_effective, filters: SearchFilters::default(), @@ -171,7 +235,25 @@ impl RagPipeline { // ── 4. Render prompt ─────────────────────────────────────────────── let system = SYSTEM_PROMPT_RAG_V1.to_string(); - let user = format!("[질문]\n{query}\n\n[근거]\n{packed_text}"); + // p9-fb-15: prepend `[이전 대화]` block when history is + // present. `serialize_history` enforces the spec §3.8 + // priority — system+question stay untouched, retrieved + // chunks already fit (`pack_context` honoured the budget), + // so the budget remaining for history is what's left over. + let history_budget_chars = remaining_history_budget_chars( + self.config.rag.max_context_tokens, + &system, + query, + &packed_text, + ); + let history_block = serialize_history(&opts.history, history_budget_chars); + let user = if history_block.is_empty() { + format!("[질문]\n{query}\n\n[근거]\n{packed_text}") + } else { + format!( + "{history_block}\n\n[질문]\n{query}\n\n[근거]\n{packed_text}" + ) + }; // ── 5. Generate ──────────────────────────────────────────────────── // Completion budget is bounded only by what the LM context window @@ -322,6 +404,8 @@ impl RagPipeline { }, usage: usage_final, created_at: OffsetDateTime::now_utc(), + conversation_id: opts.conversation_id.clone(), + turn_index: opts.turn_index, }; // Drop the moved `finish_reason` early into a tracing breadcrumb; the @@ -455,6 +539,8 @@ impl RagPipeline { latency_ms: elapsed_ms, }, created_at: OffsetDateTime::now_utc(), + conversation_id: opts.conversation_id.clone(), + turn_index: opts.turn_index, }; if let Err(e) = self.docs.put_answer(&answer, query, None) { tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed"); @@ -530,6 +616,8 @@ impl RagPipeline { latency_ms: elapsed_ms, }, created_at: OffsetDateTime::now_utc(), + conversation_id: opts.conversation_id.clone(), + turn_index: opts.turn_index, }; if let Err(e) = self.docs.put_answer(&answer, query, None) { tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed"); @@ -569,6 +657,80 @@ fn est_tokens(s: &str) -> usize { s.chars().count().div_ceil(4) } +/// p9-fb-15: expand the retrieval query with the most-recent answer's +/// first 200 chars when history is non-empty. Cheap concat per spec +/// §3.8 — LLM-based standalone-question rewriting is P+. The retriever +/// sees ` ` so embedding / FTS hit on +/// names from the prior turn ("Y" in "Y vs X 의 차이?") still surfaces +/// the right chunks. +fn expand_query_with_history(query: &str, history: &[Turn]) -> String { + let Some(last) = history.last() else { + return query.to_string(); + }; + let prefix: String = last.answer.chars().take(200).collect(); + if prefix.is_empty() { + query.to_string() + } else { + format!("{query} {prefix}") + } +} + +/// p9-fb-15: how many *chars* of history block we may afford. The +/// budget is `cfg.rag.max_context_tokens * BYTES_PER_TOKEN` minus the +/// chars already committed to system + question + retrieved chunks. +/// Returns 0 (history fully dropped) when budget already exhausted. +fn remaining_history_budget_chars( + max_context_tokens: usize, + system: &str, + question: &str, + packed_text: &str, +) -> usize { + let total_chars = max_context_tokens.saturating_mul(4); + let used = system.chars().count() + + question.chars().count() + + packed_text.chars().count() + // Account for the format-string overhead: `[질문]\n` + `\n\n[근거]\n` + // + `\n\n` between history and question. Round up to ~32 chars + // to keep the maths simple. + + 32; + total_chars.saturating_sub(used) +} + +/// p9-fb-15: serialize history into the `[이전 대화]` block. Newest +/// turn first per spec §3.8 — the loop walks `history` in reverse and +/// stops as soon as appending the next turn would exceed `budget_chars`. +/// Empty when history is empty or no turn fits. +fn serialize_history(history: &[Turn], budget_chars: usize) -> String { + if history.is_empty() || budget_chars == 0 { + return String::new(); + } + // Build newest-first, then reverse so the LM reads chronological + // order ("Q1/A1\nQ2/A2 → newest at the bottom, just above the + // current question"). + let mut included_rev: Vec = Vec::new(); + let mut used = 0usize; + let header = "[이전 대화]\n"; + let header_len = header.chars().count(); + for turn in history.iter().rev() { + let block = format!("Q: {}\nA: {}\n", turn.question, turn.answer); + let blen = block.chars().count(); + if used + blen + header_len > budget_chars { + break; + } + used += blen; + included_rev.push(block); + } + if included_rev.is_empty() { + return String::new(); + } + let mut out = String::with_capacity(used + header_len); + out.push_str(header); + for block in included_rev.iter().rev() { + out.push_str(block); + } + out +} + /// Strict marker regex per design §1 / spec line 107: `[#1]` … `[#999]`. /// Matches without `#`, with whitespace, or with non-digit content are /// intentionally ignored (see test plan rows 5–6). @@ -634,4 +796,104 @@ mod tests { // 8 chars → 2 tokens assert_eq!(est_tokens("abcdefgh"), 2); } + + // ── p9-fb-15: multi-turn helpers ─────────────────────────────────────── + + fn fake_turn(question: &str, answer: &str) -> Turn { + Turn { + question: question.into(), + answer: answer.into(), + citations: Vec::new(), + created_at: OffsetDateTime::now_utc(), + } + } + + #[test] + fn expand_query_with_history_empty_returns_query_unchanged() { + assert_eq!(expand_query_with_history("hi", &[]), "hi"); + } + + #[test] + fn expand_query_with_history_concats_last_answer_prefix() { + let h = vec![fake_turn("Q1", "first answer body")]; + let expanded = expand_query_with_history("follow-up", &h); + assert!(expanded.starts_with("follow-up "), "got: {expanded}"); + assert!( + expanded.contains("first answer body"), + "got: {expanded}" + ); + } + + #[test] + fn expand_query_caps_last_answer_at_200_chars() { + let long = "x".repeat(500); + let h = vec![fake_turn("Q", &long)]; + let expanded = expand_query_with_history("q", &h); + // query (1 char) + space (1) + 200 of x = 202. + assert_eq!(expanded.chars().count(), 1 + 1 + 200); + } + + #[test] + fn expand_query_uses_last_turn_only() { + let h = vec![ + fake_turn("Q1", "FIRST ANSWER"), + fake_turn("Q2", "LATEST ANSWER"), + ]; + let expanded = expand_query_with_history("q3", &h); + assert!(expanded.contains("LATEST ANSWER"), "got: {expanded}"); + assert!(!expanded.contains("FIRST ANSWER"), "got: {expanded}"); + } + + #[test] + fn serialize_history_empty_returns_empty_string() { + assert_eq!(serialize_history(&[], 1000), ""); + let h = vec![fake_turn("q", "a")]; + assert_eq!(serialize_history(&h, 0), ""); + } + + #[test] + fn serialize_history_chronological_order_with_header() { + let h = vec![ + fake_turn("Q1", "A1"), + fake_turn("Q2", "A2"), + fake_turn("Q3", "A3"), + ]; + let s = serialize_history(&h, 1000); + assert!(s.starts_with("[이전 대화]\n"), "got: {s:?}"); + let q1_pos = s.find("Q1").unwrap(); + let q3_pos = s.find("Q3").unwrap(); + assert!(q1_pos < q3_pos, "chronological: oldest first; got: {s:?}"); + } + + #[test] + fn serialize_history_drops_oldest_when_budget_tight() { + // Budget tight enough that only 1 of 3 turns fits. + let h = vec![ + fake_turn("Q1", "A1"), + fake_turn("Q2", "A2"), + fake_turn("Q3", "A3"), + ]; + // Header is "[이전 대화]\n" (8 chars) + 1 turn ("Q: Q3\nA: A3\n" = 12 chars) ≈ 20. + let s = serialize_history(&h, 25); + assert!(s.contains("Q3"), "newest must be kept: {s:?}"); + assert!(!s.contains("Q1"), "oldest dropped: {s:?}"); + } + + #[test] + fn remaining_history_budget_subtracts_known_pieces() { + // total = 100 tokens * 4 chars = 400 chars budget. + // system 100 chars + question 50 chars + packed 150 chars + 32 overhead = 332. left = 68. + let s = "x".repeat(100); + let q = "y".repeat(50); + let p = "z".repeat(150); + let left = remaining_history_budget_chars(100, &s, &q, &p); + assert_eq!(left, 400 - 100 - 50 - 150 - 32); + } + + #[test] + fn remaining_history_budget_clamps_to_zero_when_overrun() { + let s = "x".repeat(1000); + let left = remaining_history_budget_chars(10, &s, "q", "p"); + assert_eq!(left, 0); + } } diff --git a/crates/kebab-rag/tests/pipeline.rs b/crates/kebab-rag/tests/pipeline.rs index 874cbf9..18fe1a2 100644 --- a/crates/kebab-rag/tests/pipeline.rs +++ b/crates/kebab-rag/tests/pipeline.rs @@ -72,6 +72,9 @@ fn default_opts() -> AskOpts { temperature: Some(0.0), seed: Some(0), stream_sink: None, + history: Vec::new(), + conversation_id: None, + turn_index: None, } } diff --git a/crates/kebab-store-sqlite/src/answers.rs b/crates/kebab-store-sqlite/src/answers.rs index e4ef25d..3f1738e 100644 --- a/crates/kebab-store-sqlite/src/answers.rs +++ b/crates/kebab-store-sqlite/src/answers.rs @@ -98,6 +98,7 @@ fn refusal_reason_label(r: &RefusalReason) -> &'static str { RefusalReason::LlmSelfJudge => "llm_self_judge", RefusalReason::NoIndex => "no_index", RefusalReason::NoChunks => "no_chunks", + RefusalReason::LlmStreamAborted => "llm_stream_aborted", } } diff --git a/crates/kebab-tui/src/ask.rs b/crates/kebab-tui/src/ask.rs index ccc034e..7081475 100644 --- a/crates/kebab-tui/src/ask.rs +++ b/crates/kebab-tui/src/ask.rs @@ -141,6 +141,7 @@ fn render_status(f: &mut Frame, area: Rect, s: &AskState) { Some(RefusalReason::LlmSelfJudge) => " refusal=llm_self_judge", Some(RefusalReason::NoIndex) => " refusal=no_index", Some(RefusalReason::NoChunks) => " refusal=no_chunks", + Some(RefusalReason::LlmStreamAborted) => " refusal=llm_stream_aborted", None => "", }; vec![ @@ -300,6 +301,11 @@ fn spawn_ask_worker(state: &mut App) { temperature: None, seed: None, stream_sink: Some(tx), + // p9-fb-15: TUI ask is single-shot in this task; multi-turn + // conversation UI lands in p9-fb-16. + history: Vec::new(), + conversation_id: None, + turn_index: None, }; let handle = thread::spawn(move || kebab_app::ask_with_config(cfg, &query, opts)); diff --git a/crates/kebab-tui/tests/ask.rs b/crates/kebab-tui/tests/ask.rs index e2d6c44..5d1d5c4 100644 --- a/crates/kebab-tui/tests/ask.rs +++ b/crates/kebab-tui/tests/ask.rs @@ -66,6 +66,8 @@ fn make_answer(grounded: bool, refusal: Option, body: &str) -> An latency_ms: 1200, }, created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(), + conversation_id: None, + turn_index: None, } } diff --git a/tasks/p9/p9-fb-15-rag-multi-turn-core.md b/tasks/p9/p9-fb-15-rag-multi-turn-core.md index e99ede1..254d1c7 100644 --- a/tasks/p9/p9-fb-15-rag-multi-turn-core.md +++ b/tasks/p9/p9-fb-15-rag-multi-turn-core.md @@ -3,7 +3,7 @@ phase: P9 component: kebab-rag + kebab-app task_id: p9-fb-15 title: "RAG multi-turn — history-aware prompt + token budget" -status: planned +status: in_progress depends_on: [] unblocks: [p9-fb-16, p9-fb-17, p9-fb-18] contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md -- 2.49.1 From b19ebfd2bccdc4044d7bf4645b4856369063f2d5 Mon Sep 17 00:00:00 2001 From: altair823 Date: Sat, 2 May 2026 23:14:12 +0000 Subject: [PATCH 2/2] =?UTF-8?q?review(=ED=9A=8C=EC=B0=A81):=20AskOpts::sin?= =?UTF-8?q?gle=5Fshot=20helper=20=EC=A0=9C=EA=B1=B0=20(yagni)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 회차 1 nit 반영. helper 가 본 PR 안 caller 0 — 모든 사용처가 struct literal 패턴. CLAUDE.md "Don't add abstractions beyond what the task requires" 룰. 미래 caller 가 필요 시 본인이 추가. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-rag/src/pipeline.rs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/crates/kebab-rag/src/pipeline.rs b/crates/kebab-rag/src/pipeline.rs index 62dd40c..8c39c44 100644 --- a/crates/kebab-rag/src/pipeline.rs +++ b/crates/kebab-rag/src/pipeline.rs @@ -98,25 +98,6 @@ pub struct AskOpts { pub turn_index: Option, } -impl AskOpts { - /// Default knobs for a single-shot ask (no history, no - /// conversation_id). `k` falls through to the config floor in - /// `RagPipeline::ask`. - pub fn single_shot(mode: SearchMode) -> Self { - Self { - k: 0, - explain: false, - mode, - temperature: None, - seed: None, - stream_sink: None, - history: Vec::new(), - conversation_id: None, - turn_index: None, - } - } -} - // ── RagPipeline ───────────────────────────────────────────────────────────── /// Single-threaded RAG orchestrator. See module docs for the stage list. -- 2.49.1