Merge pull request 'feat(rag): multi-turn ask — Turn + ask_with_history + token budget (p9-fb-15)' (#60) from feat/p9-fb-15-rag-multiturn into main

2026-05-02 23:14:54 +00:00
parent 9ddd199727 b19ebfd2bc
commit 76fbb44e83
13 changed files with 304 additions and 6 deletions
--- a/HANDOFF.md
+++ b/HANDOFF.md
@@ -44,6 +44,7 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
 - **2026-05-02 P9 도그푸딩 후속 (spec PR #51 + p9-fb-01 + p9-fb-02)** — `kebab ingest` 진행 표시 도입. frozen design §2.4a 신설 (wire schema `ingest_progress.v1` line-delimited streaming) + §10 의 long-running 작업 절 추가. `kebab-app::ingest_with_config_progress(.., progress: Option<Sender<IngestEvent>>)` facade 추가, 기존 `_with_config` 가 `progress=None` forwarding wrapper. CLI 가 indicatif TTY 진행 바 (stderr) / non-TTY 한 줄씩 / `--json` 모드는 line-delimited stdout. p9-fb-03 (TUI background worker) + p9-fb-04 (cancel) 가 같은 stream 위에 build.
 - **2026-05-02 P9 도그푸딩 후속 (p9-fb-03)** — TUI 의 background ingest worker. Library 의 `r` 키가 `kebab_app::ingest_with_config_progress` 를 spawned thread 에서 호출, run loop 가 매 frame 마다 progress channel drain → 화면 하단 status bar 1 줄 갱신. terminal event (`Completed`/`Aborted`) 후 3 초 final 라인 hold + 자동 hide + Library auto-refresh. spec: `tasks/p9/p9-fb-03-tui-ingest-background.md`. (cancel slot 은 p9-fb-04 가 추가하는 형태로 단일화 — 회차 1 review 결과.)
 - **2026-05-02 P9 도그푸딩 후속 (p9-fb-04)** — ingest cooperative cancellation. `kebab-app::ingest_with_config_cancellable(.., cancel: Option<Arc<AtomicBool>>)` facade 추가, 기존 `_progress` 가 `cancel=None` forwarding. asset loop iter 시작 boundary 마다 cancel poll → true 면 break + `IngestEvent::Aborted { partial_counts }` + `Ok(IngestReport)` 정상 반환 (Err 아님). 부분 commit 보존, 다음 ingest 가 idempotent 재개. CLI Ctrl-C SIGINT handler (`ctrlc` crate) — 1회: cancel, 2회: hard exit (130). TUI Esc / Ctrl-C 가 cancel signal (in-flight 시), 그 외에는 quit. `IngestState` 에 `cancel: Arc<AtomicBool>` field 추가. spec: `tasks/p9/p9-fb-04-ingest-cancellation.md`.
+- **2026-05-02 P9 도그푸딩 후속 (spec PR #59 + p9-fb-15)** — RAG multi-turn 도입. frozen design §3.8 갱신 — `Answer` 에 `conversation_id` / `turn_index` optional field, 신규 `Turn` struct, `RefusalReason::LlmStreamAborted` variant. `kebab-rag::AskOpts` 에 `history: Vec<Turn>` / `conversation_id` / `turn_index` 3 field 추가, 기존 caller 는 `Vec::new() / None` (single-shot 동작 동일). `RagPipeline::ask_with_history(query, history, conversation_id, turn_index, opts)` helper. prompt 빌드: `[이전 대화]` 블록을 user prompt 위에 prepend, newest-first, char budget (`cfg.rag.max_context_tokens * 4`) 안에서 oldest 부터 drop. retrieval query expansion: 직전 answer 첫 200 자 concat. wire schema `answer.v1` 에 두 필드 + `format: date-time` 추가. p9-fb-16 (TUI conversation UI) + p9-fb-17/18 (V004 storage + CLI session) 가 같은 facade 위에 build. spec: `tasks/p9/p9-fb-15-rag-multi-turn-core.md`.

 ## 다음 task 후보

--- a/crates/kebab-app/tests/ask_smoke.rs
+++ b/crates/kebab-app/tests/ask_smoke.rs
@@ -30,6 +30,9 @@ fn ask_lexical_smoke() {
        temperature: Some(0.0),
        seed: Some(0),
        stream_sink: None,
+        history: Vec::new(),
+        conversation_id: None,
+        turn_index: None,
    };
    // The fixture workspace contains "ownership" content; the model's
    // citation behavior depends on its training, so we don't assert on
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -430,6 +430,12 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
                // once on completion). The TUI ask pane (P9-3) is what
                // wires up a real `mpsc::Sender` here.
                stream_sink: None,
+                // p9-fb-15: CLI single-shot ask. p9-fb-18 adds
+                // `--session` / `--repl` for multi-turn over the same
+                // facade (passes a populated `history`).
+                history: Vec::new(),
+                conversation_id: None,
+                turn_index: None,
            };
            let ans = kebab_app::ask_with_config(cfg, query, opts)?;
            if cli.json {
--- a/crates/kebab-core/src/answer.rs
+++ b/crates/kebab-core/src/answer.rs
@@ -20,6 +20,15 @@ pub struct Answer {
    pub usage: TokenUsage,
    #[serde(with = "time::serde::rfc3339")]
    pub created_at: OffsetDateTime,
+    /// p9-fb-15: same conversation 의 turn 들이 공유. CLI single-shot
+    /// (history 없음) / TUI 첫 turn 은 None. blake3 해시 또는 사용자
+    /// 명시 (`kebab ask --session <id>`, p9-fb-18).
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub conversation_id: Option<String>,
+    /// p9-fb-15: 같은 conversation 안 0-based 순서. 첫 turn = 0. None
+    /// 이면 single-shot.
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub turn_index: Option<u32>,
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
@@ -28,6 +37,19 @@ pub struct AnswerCitation {
    pub citation: Citation,
 }

+/// p9-fb-15: history 가 prompt 에 들어갈 때의 한 turn. RAG facade 가
+/// `Vec<Turn>` 받아 system + history + retrieval + new question 으로
+/// prompt 빌드. token budget 안에 fit 안 되면 oldest turn 부터 drop
+/// (newest 우선 보존).
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct Turn {
+    pub question: String,
+    pub answer: String,
+    pub citations: Vec<AnswerCitation>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub created_at: OffsetDateTime,
+}
+
 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum RefusalReason {
@@ -35,6 +57,10 @@ pub enum RefusalReason {
    LlmSelfJudge,
    NoIndex,
    NoChunks,
+    /// p9-fb-15: ask 가 LLM 토큰 stream 도중 cancel 됨. partial answer
+    /// 가 채워져 있을 수 있음 (사용자가 본 부분까지). RAG retrieval
+    /// 자체는 정상 — 모델 generation 단계에서만 중단.
+    LlmStreamAborted,
 }

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
--- a/crates/kebab-core/src/lib.rs
+++ b/crates/kebab-core/src/lib.rs
@@ -54,8 +54,8 @@ pub use search::{
    SearchMode, SearchQuery,
 };
 pub use answer::{
-    Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason,
-    TokenUsage, TraceId,
+    Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
+    TraceId, Turn,
 };
 pub use ingest::{IngestItem, IngestItemKind, IngestReport};
 pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};
--- a/crates/kebab-eval/src/metrics.rs
+++ b/crates/kebab-eval/src/metrics.rs
@@ -496,6 +496,8 @@ mod tests {
            },
            usage: TokenUsage { prompt_tokens: 1, completion_tokens: 1, latency_ms: 1 },
            created_at: OffsetDateTime::UNIX_EPOCH,
+            conversation_id: None,
+            turn_index: None,
        }
    }

--- a/crates/kebab-eval/src/runner.rs
+++ b/crates/kebab-eval/src/runner.rs
@@ -174,6 +174,11 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
            temperature: opts.temperature,
            seed: opts.seed,
            stream_sink: None,
+            // p9-fb-15: golden eval is single-shot per query; no
+            // conversational history.
+            history: Vec::new(),
+            conversation_id: None,
+            turn_index: None,
        };
        match app.ask(&gq.query, ask_opts) {
            Ok(ans) => Some(ans),
--- a/crates/kebab-rag/src/pipeline.rs
+++ b/crates/kebab-rag/src/pipeline.rs
@@ -36,7 +36,7 @@ use anyhow::{Context, Result};
 use kebab_core::{
    Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
    GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
-    SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
+    SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId, Turn,
 };
 use kebab_core::versions::PromptTemplateVersion;
 use kebab_store_sqlite::SqliteStore;
@@ -80,6 +80,22 @@ pub struct AskOpts {
    /// pipeline — `SendError` is silently swallowed and generation
    /// continues so the `Answer` row still gets persisted.
    pub stream_sink: Option<std::sync::mpsc::Sender<String>>,
+    /// p9-fb-15: prior turns of the same conversation. Empty for
+    /// single-shot ask. The pipeline prepends a serialized `[이전
+    /// 대화]` block to the user prompt and uses the most-recent
+    /// answer's first 200 chars to expand the retrieval query
+    /// (cheap concat — LLM-based standalone-question rewriting is
+    /// out of scope per spec §3.8). Newest-first prepended; older
+    /// turns drop when the prompt would otherwise exceed
+    /// `cfg.rag.max_context_tokens`.
+    pub history: Vec<Turn>,
+    /// p9-fb-15: same conversation 의 turn 들이 공유. Filled into
+    /// `Answer.conversation_id`. None for single-shot ask.
+    pub conversation_id: Option<String>,
+    /// p9-fb-15: 0-based index within `conversation_id`. Caller
+    /// (TUI / CLI session) computes from `history.len()`. None for
+    /// single-shot ask.
+    pub turn_index: Option<u32>,
 }

 // ── RagPipeline ─────────────────────────────────────────────────────────────
@@ -111,6 +127,29 @@ impl RagPipeline {
        }
    }

+    /// p9-fb-15: convenience for multi-turn ask. Stuffs `history`,
+    /// `conversation_id`, `turn_index` into a fresh `AskOpts` (built
+    /// from `opts.mode` + carried-through knobs) and forwards to
+    /// [`Self::ask`]. The returned `Answer` carries the same
+    /// `conversation_id` / `turn_index`. CLI / TUI sessions call this
+    /// once per follow-up question.
+    pub fn ask_with_history(
+        &self,
+        query: &str,
+        history: Vec<Turn>,
+        conversation_id: String,
+        turn_index: u32,
+        opts: AskOpts,
+    ) -> Result<Answer> {
+        let combined = AskOpts {
+            history,
+            conversation_id: Some(conversation_id),
+            turn_index: Some(turn_index),
+            ..opts
+        };
+        self.ask(query, combined)
+    }
+
    /// Run one query through the full pipeline. Always persists an
    /// `answers` row (including refusals); the row write is best-effort
    /// — a persistence error is surfaced via `tracing::warn!` so the
@@ -121,8 +160,14 @@ impl RagPipeline {
        // ── 1. Retrieve ────────────────────────────────────────────────────
        // floor at config default — see `AskOpts::k` doc for rationale.
        let k_effective = opts.k.max(self.config.search.default_k);
+        // p9-fb-15: query expansion when history is present.
+        // Concat the most-recent answer's first 200 chars so the
+        // retriever sees the full conversational context. Cheap —
+        // LLM-based standalone-question rewriting is out of scope
+        // (spec §3.8 marks it P+).
+        let expanded_query = expand_query_with_history(query, &opts.history);
        let search_query = SearchQuery {
-            text: query.to_string(),
+            text: expanded_query,
            mode: opts.mode,
            k: k_effective,
            filters: SearchFilters::default(),
@@ -171,7 +216,25 @@ impl RagPipeline {

        // ── 4. Render prompt ───────────────────────────────────────────────
        let system = SYSTEM_PROMPT_RAG_V1.to_string();
-        let user = format!("[질문]\n{query}\n\n[근거]\n{packed_text}");
+        // p9-fb-15: prepend `[이전 대화]` block when history is
+        // present. `serialize_history` enforces the spec §3.8
+        // priority — system+question stay untouched, retrieved
+        // chunks already fit (`pack_context` honoured the budget),
+        // so the budget remaining for history is what's left over.
+        let history_budget_chars = remaining_history_budget_chars(
+            self.config.rag.max_context_tokens,
+            &system,
+            query,
+            &packed_text,
+        );
+        let history_block = serialize_history(&opts.history, history_budget_chars);
+        let user = if history_block.is_empty() {
+            format!("[질문]\n{query}\n\n[근거]\n{packed_text}")
+        } else {
+            format!(
+                "{history_block}\n\n[질문]\n{query}\n\n[근거]\n{packed_text}"
+            )
+        };

        // ── 5. Generate ────────────────────────────────────────────────────
        // Completion budget is bounded only by what the LM context window
@@ -322,6 +385,8 @@ impl RagPipeline {
            },
            usage: usage_final,
            created_at: OffsetDateTime::now_utc(),
+            conversation_id: opts.conversation_id.clone(),
+            turn_index: opts.turn_index,
        };

        // Drop the moved `finish_reason` early into a tracing breadcrumb; the
@@ -455,6 +520,8 @@ impl RagPipeline {
                latency_ms: elapsed_ms,
            },
            created_at: OffsetDateTime::now_utc(),
+            conversation_id: opts.conversation_id.clone(),
+            turn_index: opts.turn_index,
        };
        if let Err(e) = self.docs.put_answer(&answer, query, None) {
            tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
@@ -530,6 +597,8 @@ impl RagPipeline {
                latency_ms: elapsed_ms,
            },
            created_at: OffsetDateTime::now_utc(),
+            conversation_id: opts.conversation_id.clone(),
+            turn_index: opts.turn_index,
        };
        if let Err(e) = self.docs.put_answer(&answer, query, None) {
            tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
@@ -569,6 +638,80 @@ fn est_tokens(s: &str) -> usize {
    s.chars().count().div_ceil(4)
 }

+/// p9-fb-15: expand the retrieval query with the most-recent answer's
+/// first 200 chars when history is non-empty. Cheap concat per spec
+/// §3.8 — LLM-based standalone-question rewriting is P+. The retriever
+/// sees `<question> <last answer prefix>` so embedding / FTS hit on
+/// names from the prior turn ("Y" in "Y vs X 의 차이?") still surfaces
+/// the right chunks.
+fn expand_query_with_history(query: &str, history: &[Turn]) -> String {
+    let Some(last) = history.last() else {
+        return query.to_string();
+    };
+    let prefix: String = last.answer.chars().take(200).collect();
+    if prefix.is_empty() {
+        query.to_string()
+    } else {
+        format!("{query} {prefix}")
+    }
+}
+
+/// p9-fb-15: how many *chars* of history block we may afford. The
+/// budget is `cfg.rag.max_context_tokens * BYTES_PER_TOKEN` minus the
+/// chars already committed to system + question + retrieved chunks.
+/// Returns 0 (history fully dropped) when budget already exhausted.
+fn remaining_history_budget_chars(
+    max_context_tokens: usize,
+    system: &str,
+    question: &str,
+    packed_text: &str,
+) -> usize {
+    let total_chars = max_context_tokens.saturating_mul(4);
+    let used = system.chars().count()
+        + question.chars().count()
+        + packed_text.chars().count()
+        // Account for the format-string overhead: `[질문]\n` + `\n\n[근거]\n`
+        // + `\n\n` between history and question. Round up to ~32 chars
+        // to keep the maths simple.
+        + 32;
+    total_chars.saturating_sub(used)
+}
+
+/// p9-fb-15: serialize history into the `[이전 대화]` block. Newest
+/// turn first per spec §3.8 — the loop walks `history` in reverse and
+/// stops as soon as appending the next turn would exceed `budget_chars`.
+/// Empty when history is empty or no turn fits.
+fn serialize_history(history: &[Turn], budget_chars: usize) -> String {
+    if history.is_empty() || budget_chars == 0 {
+        return String::new();
+    }
+    // Build newest-first, then reverse so the LM reads chronological
+    // order ("Q1/A1\nQ2/A2 → newest at the bottom, just above the
+    // current question").
+    let mut included_rev: Vec<String> = Vec::new();
+    let mut used = 0usize;
+    let header = "[이전 대화]\n";
+    let header_len = header.chars().count();
+    for turn in history.iter().rev() {
+        let block = format!("Q: {}\nA: {}\n", turn.question, turn.answer);
+        let blen = block.chars().count();
+        if used + blen + header_len > budget_chars {
+            break;
+        }
+        used += blen;
+        included_rev.push(block);
+    }
+    if included_rev.is_empty() {
+        return String::new();
+    }
+    let mut out = String::with_capacity(used + header_len);
+    out.push_str(header);
+    for block in included_rev.iter().rev() {
+        out.push_str(block);
+    }
+    out
+}
+
 /// Strict marker regex per design §1 / spec line 107: `[#1]` … `[#999]`.
 /// Matches without `#`, with whitespace, or with non-digit content are
 /// intentionally ignored (see test plan rows 5–6).
@@ -634,4 +777,104 @@ mod tests {
        // 8 chars → 2 tokens
        assert_eq!(est_tokens("abcdefgh"), 2);
    }
+
+    // ── p9-fb-15: multi-turn helpers ───────────────────────────────────────
+
+    fn fake_turn(question: &str, answer: &str) -> Turn {
+        Turn {
+            question: question.into(),
+            answer: answer.into(),
+            citations: Vec::new(),
+            created_at: OffsetDateTime::now_utc(),
+        }
+    }
+
+    #[test]
+    fn expand_query_with_history_empty_returns_query_unchanged() {
+        assert_eq!(expand_query_with_history("hi", &[]), "hi");
+    }
+
+    #[test]
+    fn expand_query_with_history_concats_last_answer_prefix() {
+        let h = vec![fake_turn("Q1", "first answer body")];
+        let expanded = expand_query_with_history("follow-up", &h);
+        assert!(expanded.starts_with("follow-up "), "got: {expanded}");
+        assert!(
+            expanded.contains("first answer body"),
+            "got: {expanded}"
+        );
+    }
+
+    #[test]
+    fn expand_query_caps_last_answer_at_200_chars() {
+        let long = "x".repeat(500);
+        let h = vec![fake_turn("Q", &long)];
+        let expanded = expand_query_with_history("q", &h);
+        // query (1 char) + space (1) + 200 of x = 202.
+        assert_eq!(expanded.chars().count(), 1 + 1 + 200);
+    }
+
+    #[test]
+    fn expand_query_uses_last_turn_only() {
+        let h = vec![
+            fake_turn("Q1", "FIRST ANSWER"),
+            fake_turn("Q2", "LATEST ANSWER"),
+        ];
+        let expanded = expand_query_with_history("q3", &h);
+        assert!(expanded.contains("LATEST ANSWER"), "got: {expanded}");
+        assert!(!expanded.contains("FIRST ANSWER"), "got: {expanded}");
+    }
+
+    #[test]
+    fn serialize_history_empty_returns_empty_string() {
+        assert_eq!(serialize_history(&[], 1000), "");
+        let h = vec![fake_turn("q", "a")];
+        assert_eq!(serialize_history(&h, 0), "");
+    }
+
+    #[test]
+    fn serialize_history_chronological_order_with_header() {
+        let h = vec![
+            fake_turn("Q1", "A1"),
+            fake_turn("Q2", "A2"),
+            fake_turn("Q3", "A3"),
+        ];
+        let s = serialize_history(&h, 1000);
+        assert!(s.starts_with("[이전 대화]\n"), "got: {s:?}");
+        let q1_pos = s.find("Q1").unwrap();
+        let q3_pos = s.find("Q3").unwrap();
+        assert!(q1_pos < q3_pos, "chronological: oldest first; got: {s:?}");
+    }
+
+    #[test]
+    fn serialize_history_drops_oldest_when_budget_tight() {
+        // Budget tight enough that only 1 of 3 turns fits.
+        let h = vec![
+            fake_turn("Q1", "A1"),
+            fake_turn("Q2", "A2"),
+            fake_turn("Q3", "A3"),
+        ];
+        // Header is "[이전 대화]\n" (8 chars) + 1 turn ("Q: Q3\nA: A3\n" = 12 chars) ≈ 20.
+        let s = serialize_history(&h, 25);
+        assert!(s.contains("Q3"), "newest must be kept: {s:?}");
+        assert!(!s.contains("Q1"), "oldest dropped: {s:?}");
+    }
+
+    #[test]
+    fn remaining_history_budget_subtracts_known_pieces() {
+        // total = 100 tokens * 4 chars = 400 chars budget.
+        // system 100 chars + question 50 chars + packed 150 chars + 32 overhead = 332. left = 68.
+        let s = "x".repeat(100);
+        let q = "y".repeat(50);
+        let p = "z".repeat(150);
+        let left = remaining_history_budget_chars(100, &s, &q, &p);
+        assert_eq!(left, 400 - 100 - 50 - 150 - 32);
+    }
+
+    #[test]
+    fn remaining_history_budget_clamps_to_zero_when_overrun() {
+        let s = "x".repeat(1000);
+        let left = remaining_history_budget_chars(10, &s, "q", "p");
+        assert_eq!(left, 0);
+    }
 }
--- a/crates/kebab-rag/tests/pipeline.rs
+++ b/crates/kebab-rag/tests/pipeline.rs
@@ -72,6 +72,9 @@ fn default_opts() -> AskOpts {
        temperature: Some(0.0),
        seed: Some(0),
        stream_sink: None,
+        history: Vec::new(),
+        conversation_id: None,
+        turn_index: None,
    }
 }

--- a/crates/kebab-store-sqlite/src/answers.rs
+++ b/crates/kebab-store-sqlite/src/answers.rs
@@ -98,6 +98,7 @@ fn refusal_reason_label(r: &RefusalReason) -> &'static str {
        RefusalReason::LlmSelfJudge => "llm_self_judge",
        RefusalReason::NoIndex => "no_index",
        RefusalReason::NoChunks => "no_chunks",
+        RefusalReason::LlmStreamAborted => "llm_stream_aborted",
    }
 }

--- a/crates/kebab-tui/src/ask.rs
+++ b/crates/kebab-tui/src/ask.rs
@@ -141,6 +141,7 @@ fn render_status(f: &mut Frame, area: Rect, s: &AskState) {
                Some(RefusalReason::LlmSelfJudge) => "  refusal=llm_self_judge",
                Some(RefusalReason::NoIndex) => "  refusal=no_index",
                Some(RefusalReason::NoChunks) => "  refusal=no_chunks",
+                Some(RefusalReason::LlmStreamAborted) => "  refusal=llm_stream_aborted",
                None => "",
            };
            vec![
@@ -300,6 +301,11 @@ fn spawn_ask_worker(state: &mut App) {
        temperature: None,
        seed: None,
        stream_sink: Some(tx),
+        // p9-fb-15: TUI ask is single-shot in this task; multi-turn
+        // conversation UI lands in p9-fb-16.
+        history: Vec::new(),
+        conversation_id: None,
+        turn_index: None,
    };
    let handle =
        thread::spawn(move || kebab_app::ask_with_config(cfg, &query, opts));
--- a/crates/kebab-tui/tests/ask.rs
+++ b/crates/kebab-tui/tests/ask.rs
@@ -66,6 +66,8 @@ fn make_answer(grounded: bool, refusal: Option<RefusalReason>, body: &str) -> An
            latency_ms: 1200,
        },
        created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
+        conversation_id: None,
+        turn_index: None,
    }
 }

--- a/tasks/p9/p9-fb-15-rag-multi-turn-core.md
+++ b/tasks/p9/p9-fb-15-rag-multi-turn-core.md
@@ -3,7 +3,7 @@ phase: P9
 component: kebab-rag + kebab-app
 task_id: p9-fb-15
 title: "RAG multi-turn — history-aware prompt + token budget"
-status: planned
+status: in_progress
 depends_on: []
 unblocks: [p9-fb-16, p9-fb-17, p9-fb-18]
 contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md