Merge pull request 'feat(rag): multi-turn ask — Turn + ask_with_history + token budget (p9-fb-15)' (#60) from feat/p9-fb-15-rag-multiturn into main
This commit was merged in pull request #60.
This commit is contained in:
@@ -44,6 +44,7 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
|
||||
- **2026-05-02 P9 도그푸딩 후속 (spec PR #51 + p9-fb-01 + p9-fb-02)** — `kebab ingest` 진행 표시 도입. frozen design §2.4a 신설 (wire schema `ingest_progress.v1` line-delimited streaming) + §10 의 long-running 작업 절 추가. `kebab-app::ingest_with_config_progress(.., progress: Option<Sender<IngestEvent>>)` facade 추가, 기존 `_with_config` 가 `progress=None` forwarding wrapper. CLI 가 indicatif TTY 진행 바 (stderr) / non-TTY 한 줄씩 / `--json` 모드는 line-delimited stdout. p9-fb-03 (TUI background worker) + p9-fb-04 (cancel) 가 같은 stream 위에 build.
|
||||
- **2026-05-02 P9 도그푸딩 후속 (p9-fb-03)** — TUI 의 background ingest worker. Library 의 `r` 키가 `kebab_app::ingest_with_config_progress` 를 spawned thread 에서 호출, run loop 가 매 frame 마다 progress channel drain → 화면 하단 status bar 1 줄 갱신. terminal event (`Completed`/`Aborted`) 후 3 초 final 라인 hold + 자동 hide + Library auto-refresh. spec: `tasks/p9/p9-fb-03-tui-ingest-background.md`. (cancel slot 은 p9-fb-04 가 추가하는 형태로 단일화 — 회차 1 review 결과.)
|
||||
- **2026-05-02 P9 도그푸딩 후속 (p9-fb-04)** — ingest cooperative cancellation. `kebab-app::ingest_with_config_cancellable(.., cancel: Option<Arc<AtomicBool>>)` facade 추가, 기존 `_progress` 가 `cancel=None` forwarding. asset loop iter 시작 boundary 마다 cancel poll → true 면 break + `IngestEvent::Aborted { partial_counts }` + `Ok(IngestReport)` 정상 반환 (Err 아님). 부분 commit 보존, 다음 ingest 가 idempotent 재개. CLI Ctrl-C SIGINT handler (`ctrlc` crate) — 1회: cancel, 2회: hard exit (130). TUI Esc / Ctrl-C 가 cancel signal (in-flight 시), 그 외에는 quit. `IngestState` 에 `cancel: Arc<AtomicBool>` field 추가. spec: `tasks/p9/p9-fb-04-ingest-cancellation.md`.
|
||||
- **2026-05-02 P9 도그푸딩 후속 (spec PR #59 + p9-fb-15)** — RAG multi-turn 도입. frozen design §3.8 갱신 — `Answer` 에 `conversation_id` / `turn_index` optional field, 신규 `Turn` struct, `RefusalReason::LlmStreamAborted` variant. `kebab-rag::AskOpts` 에 `history: Vec<Turn>` / `conversation_id` / `turn_index` 3 field 추가, 기존 caller 는 `Vec::new() / None` (single-shot 동작 동일). `RagPipeline::ask_with_history(query, history, conversation_id, turn_index, opts)` helper. prompt 빌드: `[이전 대화]` 블록을 user prompt 위에 prepend, newest-first, char budget (`cfg.rag.max_context_tokens * 4`) 안에서 oldest 부터 drop. retrieval query expansion: 직전 answer 첫 200 자 concat. wire schema `answer.v1` 에 두 필드 + `format: date-time` 추가. p9-fb-16 (TUI conversation UI) + p9-fb-17/18 (V004 storage + CLI session) 가 같은 facade 위에 build. spec: `tasks/p9/p9-fb-15-rag-multi-turn-core.md`.
|
||||
|
||||
## 다음 task 후보
|
||||
|
||||
|
||||
@@ -30,6 +30,9 @@ fn ask_lexical_smoke() {
|
||||
temperature: Some(0.0),
|
||||
seed: Some(0),
|
||||
stream_sink: None,
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
};
|
||||
// The fixture workspace contains "ownership" content; the model's
|
||||
// citation behavior depends on its training, so we don't assert on
|
||||
|
||||
@@ -430,6 +430,12 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
// once on completion). The TUI ask pane (P9-3) is what
|
||||
// wires up a real `mpsc::Sender` here.
|
||||
stream_sink: None,
|
||||
// p9-fb-15: CLI single-shot ask. p9-fb-18 adds
|
||||
// `--session` / `--repl` for multi-turn over the same
|
||||
// facade (passes a populated `history`).
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
};
|
||||
let ans = kebab_app::ask_with_config(cfg, query, opts)?;
|
||||
if cli.json {
|
||||
|
||||
@@ -20,6 +20,15 @@ pub struct Answer {
|
||||
pub usage: TokenUsage,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
/// p9-fb-15: same conversation 의 turn 들이 공유. CLI single-shot
|
||||
/// (history 없음) / TUI 첫 turn 은 None. blake3 해시 또는 사용자
|
||||
/// 명시 (`kebab ask --session <id>`, p9-fb-18).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub conversation_id: Option<String>,
|
||||
/// p9-fb-15: 같은 conversation 안 0-based 순서. 첫 turn = 0. None
|
||||
/// 이면 single-shot.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub turn_index: Option<u32>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
@@ -28,6 +37,19 @@ pub struct AnswerCitation {
|
||||
pub citation: Citation,
|
||||
}
|
||||
|
||||
/// p9-fb-15: history 가 prompt 에 들어갈 때의 한 turn. RAG facade 가
|
||||
/// `Vec<Turn>` 받아 system + history + retrieval + new question 으로
|
||||
/// prompt 빌드. token budget 안에 fit 안 되면 oldest turn 부터 drop
|
||||
/// (newest 우선 보존).
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct Turn {
|
||||
pub question: String,
|
||||
pub answer: String,
|
||||
pub citations: Vec<AnswerCitation>,
|
||||
#[serde(with = "time::serde::rfc3339")]
|
||||
pub created_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum RefusalReason {
|
||||
@@ -35,6 +57,10 @@ pub enum RefusalReason {
|
||||
LlmSelfJudge,
|
||||
NoIndex,
|
||||
NoChunks,
|
||||
/// p9-fb-15: ask 가 LLM 토큰 stream 도중 cancel 됨. partial answer
|
||||
/// 가 채워져 있을 수 있음 (사용자가 본 부분까지). RAG retrieval
|
||||
/// 자체는 정상 — 모델 generation 단계에서만 중단.
|
||||
LlmStreamAborted,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
|
||||
@@ -54,8 +54,8 @@ pub use search::{
|
||||
SearchMode, SearchQuery,
|
||||
};
|
||||
pub use answer::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason,
|
||||
TokenUsage, TraceId,
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
|
||||
TraceId, Turn,
|
||||
};
|
||||
pub use ingest::{IngestItem, IngestItemKind, IngestReport};
|
||||
pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};
|
||||
|
||||
@@ -496,6 +496,8 @@ mod tests {
|
||||
},
|
||||
usage: TokenUsage { prompt_tokens: 1, completion_tokens: 1, latency_ms: 1 },
|
||||
created_at: OffsetDateTime::UNIX_EPOCH,
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -174,6 +174,11 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
|
||||
temperature: opts.temperature,
|
||||
seed: opts.seed,
|
||||
stream_sink: None,
|
||||
// p9-fb-15: golden eval is single-shot per query; no
|
||||
// conversational history.
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
};
|
||||
match app.ask(&gq.query, ask_opts) {
|
||||
Ok(ans) => Some(ans),
|
||||
|
||||
@@ -36,7 +36,7 @@ use anyhow::{Context, Result};
|
||||
use kebab_core::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
|
||||
GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
|
||||
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
|
||||
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId, Turn,
|
||||
};
|
||||
use kebab_core::versions::PromptTemplateVersion;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
@@ -80,6 +80,22 @@ pub struct AskOpts {
|
||||
/// pipeline — `SendError` is silently swallowed and generation
|
||||
/// continues so the `Answer` row still gets persisted.
|
||||
pub stream_sink: Option<std::sync::mpsc::Sender<String>>,
|
||||
/// p9-fb-15: prior turns of the same conversation. Empty for
|
||||
/// single-shot ask. The pipeline prepends a serialized `[이전
|
||||
/// 대화]` block to the user prompt and uses the most-recent
|
||||
/// answer's first 200 chars to expand the retrieval query
|
||||
/// (cheap concat — LLM-based standalone-question rewriting is
|
||||
/// out of scope per spec §3.8). Newest-first prepended; older
|
||||
/// turns drop when the prompt would otherwise exceed
|
||||
/// `cfg.rag.max_context_tokens`.
|
||||
pub history: Vec<Turn>,
|
||||
/// p9-fb-15: same conversation 의 turn 들이 공유. Filled into
|
||||
/// `Answer.conversation_id`. None for single-shot ask.
|
||||
pub conversation_id: Option<String>,
|
||||
/// p9-fb-15: 0-based index within `conversation_id`. Caller
|
||||
/// (TUI / CLI session) computes from `history.len()`. None for
|
||||
/// single-shot ask.
|
||||
pub turn_index: Option<u32>,
|
||||
}
|
||||
|
||||
// ── RagPipeline ─────────────────────────────────────────────────────────────
|
||||
@@ -111,6 +127,29 @@ impl RagPipeline {
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-15: convenience for multi-turn ask. Stuffs `history`,
|
||||
/// `conversation_id`, `turn_index` into a fresh `AskOpts` (built
|
||||
/// from `opts.mode` + carried-through knobs) and forwards to
|
||||
/// [`Self::ask`]. The returned `Answer` carries the same
|
||||
/// `conversation_id` / `turn_index`. CLI / TUI sessions call this
|
||||
/// once per follow-up question.
|
||||
pub fn ask_with_history(
|
||||
&self,
|
||||
query: &str,
|
||||
history: Vec<Turn>,
|
||||
conversation_id: String,
|
||||
turn_index: u32,
|
||||
opts: AskOpts,
|
||||
) -> Result<Answer> {
|
||||
let combined = AskOpts {
|
||||
history,
|
||||
conversation_id: Some(conversation_id),
|
||||
turn_index: Some(turn_index),
|
||||
..opts
|
||||
};
|
||||
self.ask(query, combined)
|
||||
}
|
||||
|
||||
/// Run one query through the full pipeline. Always persists an
|
||||
/// `answers` row (including refusals); the row write is best-effort
|
||||
/// — a persistence error is surfaced via `tracing::warn!` so the
|
||||
@@ -121,8 +160,14 @@ impl RagPipeline {
|
||||
// ── 1. Retrieve ────────────────────────────────────────────────────
|
||||
// floor at config default — see `AskOpts::k` doc for rationale.
|
||||
let k_effective = opts.k.max(self.config.search.default_k);
|
||||
// p9-fb-15: query expansion when history is present.
|
||||
// Concat the most-recent answer's first 200 chars so the
|
||||
// retriever sees the full conversational context. Cheap —
|
||||
// LLM-based standalone-question rewriting is out of scope
|
||||
// (spec §3.8 marks it P+).
|
||||
let expanded_query = expand_query_with_history(query, &opts.history);
|
||||
let search_query = SearchQuery {
|
||||
text: query.to_string(),
|
||||
text: expanded_query,
|
||||
mode: opts.mode,
|
||||
k: k_effective,
|
||||
filters: SearchFilters::default(),
|
||||
@@ -171,7 +216,25 @@ impl RagPipeline {
|
||||
|
||||
// ── 4. Render prompt ───────────────────────────────────────────────
|
||||
let system = SYSTEM_PROMPT_RAG_V1.to_string();
|
||||
let user = format!("[질문]\n{query}\n\n[근거]\n{packed_text}");
|
||||
// p9-fb-15: prepend `[이전 대화]` block when history is
|
||||
// present. `serialize_history` enforces the spec §3.8
|
||||
// priority — system+question stay untouched, retrieved
|
||||
// chunks already fit (`pack_context` honoured the budget),
|
||||
// so the budget remaining for history is what's left over.
|
||||
let history_budget_chars = remaining_history_budget_chars(
|
||||
self.config.rag.max_context_tokens,
|
||||
&system,
|
||||
query,
|
||||
&packed_text,
|
||||
);
|
||||
let history_block = serialize_history(&opts.history, history_budget_chars);
|
||||
let user = if history_block.is_empty() {
|
||||
format!("[질문]\n{query}\n\n[근거]\n{packed_text}")
|
||||
} else {
|
||||
format!(
|
||||
"{history_block}\n\n[질문]\n{query}\n\n[근거]\n{packed_text}"
|
||||
)
|
||||
};
|
||||
|
||||
// ── 5. Generate ────────────────────────────────────────────────────
|
||||
// Completion budget is bounded only by what the LM context window
|
||||
@@ -322,6 +385,8 @@ impl RagPipeline {
|
||||
},
|
||||
usage: usage_final,
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
};
|
||||
|
||||
// Drop the moved `finish_reason` early into a tracing breadcrumb; the
|
||||
@@ -455,6 +520,8 @@ impl RagPipeline {
|
||||
latency_ms: elapsed_ms,
|
||||
},
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
|
||||
@@ -530,6 +597,8 @@ impl RagPipeline {
|
||||
latency_ms: elapsed_ms,
|
||||
},
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
conversation_id: opts.conversation_id.clone(),
|
||||
turn_index: opts.turn_index,
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
|
||||
@@ -569,6 +638,80 @@ fn est_tokens(s: &str) -> usize {
|
||||
s.chars().count().div_ceil(4)
|
||||
}
|
||||
|
||||
/// p9-fb-15: expand the retrieval query with the most-recent answer's
|
||||
/// first 200 chars when history is non-empty. Cheap concat per spec
|
||||
/// §3.8 — LLM-based standalone-question rewriting is P+. The retriever
|
||||
/// sees `<question> <last answer prefix>` so embedding / FTS hit on
|
||||
/// names from the prior turn ("Y" in "Y vs X 의 차이?") still surfaces
|
||||
/// the right chunks.
|
||||
fn expand_query_with_history(query: &str, history: &[Turn]) -> String {
|
||||
let Some(last) = history.last() else {
|
||||
return query.to_string();
|
||||
};
|
||||
let prefix: String = last.answer.chars().take(200).collect();
|
||||
if prefix.is_empty() {
|
||||
query.to_string()
|
||||
} else {
|
||||
format!("{query} {prefix}")
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-15: how many *chars* of history block we may afford. The
|
||||
/// budget is `cfg.rag.max_context_tokens * BYTES_PER_TOKEN` minus the
|
||||
/// chars already committed to system + question + retrieved chunks.
|
||||
/// Returns 0 (history fully dropped) when budget already exhausted.
|
||||
fn remaining_history_budget_chars(
|
||||
max_context_tokens: usize,
|
||||
system: &str,
|
||||
question: &str,
|
||||
packed_text: &str,
|
||||
) -> usize {
|
||||
let total_chars = max_context_tokens.saturating_mul(4);
|
||||
let used = system.chars().count()
|
||||
+ question.chars().count()
|
||||
+ packed_text.chars().count()
|
||||
// Account for the format-string overhead: `[질문]\n` + `\n\n[근거]\n`
|
||||
// + `\n\n` between history and question. Round up to ~32 chars
|
||||
// to keep the maths simple.
|
||||
+ 32;
|
||||
total_chars.saturating_sub(used)
|
||||
}
|
||||
|
||||
/// p9-fb-15: serialize history into the `[이전 대화]` block. Newest
|
||||
/// turn first per spec §3.8 — the loop walks `history` in reverse and
|
||||
/// stops as soon as appending the next turn would exceed `budget_chars`.
|
||||
/// Empty when history is empty or no turn fits.
|
||||
fn serialize_history(history: &[Turn], budget_chars: usize) -> String {
|
||||
if history.is_empty() || budget_chars == 0 {
|
||||
return String::new();
|
||||
}
|
||||
// Build newest-first, then reverse so the LM reads chronological
|
||||
// order ("Q1/A1\nQ2/A2 → newest at the bottom, just above the
|
||||
// current question").
|
||||
let mut included_rev: Vec<String> = Vec::new();
|
||||
let mut used = 0usize;
|
||||
let header = "[이전 대화]\n";
|
||||
let header_len = header.chars().count();
|
||||
for turn in history.iter().rev() {
|
||||
let block = format!("Q: {}\nA: {}\n", turn.question, turn.answer);
|
||||
let blen = block.chars().count();
|
||||
if used + blen + header_len > budget_chars {
|
||||
break;
|
||||
}
|
||||
used += blen;
|
||||
included_rev.push(block);
|
||||
}
|
||||
if included_rev.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let mut out = String::with_capacity(used + header_len);
|
||||
out.push_str(header);
|
||||
for block in included_rev.iter().rev() {
|
||||
out.push_str(block);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Strict marker regex per design §1 / spec line 107: `[#1]` … `[#999]`.
|
||||
/// Matches without `#`, with whitespace, or with non-digit content are
|
||||
/// intentionally ignored (see test plan rows 5–6).
|
||||
@@ -634,4 +777,104 @@ mod tests {
|
||||
// 8 chars → 2 tokens
|
||||
assert_eq!(est_tokens("abcdefgh"), 2);
|
||||
}
|
||||
|
||||
// ── p9-fb-15: multi-turn helpers ───────────────────────────────────────
|
||||
|
||||
fn fake_turn(question: &str, answer: &str) -> Turn {
|
||||
Turn {
|
||||
question: question.into(),
|
||||
answer: answer.into(),
|
||||
citations: Vec::new(),
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_query_with_history_empty_returns_query_unchanged() {
|
||||
assert_eq!(expand_query_with_history("hi", &[]), "hi");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_query_with_history_concats_last_answer_prefix() {
|
||||
let h = vec![fake_turn("Q1", "first answer body")];
|
||||
let expanded = expand_query_with_history("follow-up", &h);
|
||||
assert!(expanded.starts_with("follow-up "), "got: {expanded}");
|
||||
assert!(
|
||||
expanded.contains("first answer body"),
|
||||
"got: {expanded}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_query_caps_last_answer_at_200_chars() {
|
||||
let long = "x".repeat(500);
|
||||
let h = vec![fake_turn("Q", &long)];
|
||||
let expanded = expand_query_with_history("q", &h);
|
||||
// query (1 char) + space (1) + 200 of x = 202.
|
||||
assert_eq!(expanded.chars().count(), 1 + 1 + 200);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expand_query_uses_last_turn_only() {
|
||||
let h = vec![
|
||||
fake_turn("Q1", "FIRST ANSWER"),
|
||||
fake_turn("Q2", "LATEST ANSWER"),
|
||||
];
|
||||
let expanded = expand_query_with_history("q3", &h);
|
||||
assert!(expanded.contains("LATEST ANSWER"), "got: {expanded}");
|
||||
assert!(!expanded.contains("FIRST ANSWER"), "got: {expanded}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_history_empty_returns_empty_string() {
|
||||
assert_eq!(serialize_history(&[], 1000), "");
|
||||
let h = vec![fake_turn("q", "a")];
|
||||
assert_eq!(serialize_history(&h, 0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_history_chronological_order_with_header() {
|
||||
let h = vec![
|
||||
fake_turn("Q1", "A1"),
|
||||
fake_turn("Q2", "A2"),
|
||||
fake_turn("Q3", "A3"),
|
||||
];
|
||||
let s = serialize_history(&h, 1000);
|
||||
assert!(s.starts_with("[이전 대화]\n"), "got: {s:?}");
|
||||
let q1_pos = s.find("Q1").unwrap();
|
||||
let q3_pos = s.find("Q3").unwrap();
|
||||
assert!(q1_pos < q3_pos, "chronological: oldest first; got: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn serialize_history_drops_oldest_when_budget_tight() {
|
||||
// Budget tight enough that only 1 of 3 turns fits.
|
||||
let h = vec![
|
||||
fake_turn("Q1", "A1"),
|
||||
fake_turn("Q2", "A2"),
|
||||
fake_turn("Q3", "A3"),
|
||||
];
|
||||
// Header is "[이전 대화]\n" (8 chars) + 1 turn ("Q: Q3\nA: A3\n" = 12 chars) ≈ 20.
|
||||
let s = serialize_history(&h, 25);
|
||||
assert!(s.contains("Q3"), "newest must be kept: {s:?}");
|
||||
assert!(!s.contains("Q1"), "oldest dropped: {s:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remaining_history_budget_subtracts_known_pieces() {
|
||||
// total = 100 tokens * 4 chars = 400 chars budget.
|
||||
// system 100 chars + question 50 chars + packed 150 chars + 32 overhead = 332. left = 68.
|
||||
let s = "x".repeat(100);
|
||||
let q = "y".repeat(50);
|
||||
let p = "z".repeat(150);
|
||||
let left = remaining_history_budget_chars(100, &s, &q, &p);
|
||||
assert_eq!(left, 400 - 100 - 50 - 150 - 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remaining_history_budget_clamps_to_zero_when_overrun() {
|
||||
let s = "x".repeat(1000);
|
||||
let left = remaining_history_budget_chars(10, &s, "q", "p");
|
||||
assert_eq!(left, 0);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +72,9 @@ fn default_opts() -> AskOpts {
|
||||
temperature: Some(0.0),
|
||||
seed: Some(0),
|
||||
stream_sink: None,
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -98,6 +98,7 @@ fn refusal_reason_label(r: &RefusalReason) -> &'static str {
|
||||
RefusalReason::LlmSelfJudge => "llm_self_judge",
|
||||
RefusalReason::NoIndex => "no_index",
|
||||
RefusalReason::NoChunks => "no_chunks",
|
||||
RefusalReason::LlmStreamAborted => "llm_stream_aborted",
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -141,6 +141,7 @@ fn render_status(f: &mut Frame, area: Rect, s: &AskState) {
|
||||
Some(RefusalReason::LlmSelfJudge) => " refusal=llm_self_judge",
|
||||
Some(RefusalReason::NoIndex) => " refusal=no_index",
|
||||
Some(RefusalReason::NoChunks) => " refusal=no_chunks",
|
||||
Some(RefusalReason::LlmStreamAborted) => " refusal=llm_stream_aborted",
|
||||
None => "",
|
||||
};
|
||||
vec![
|
||||
@@ -300,6 +301,11 @@ fn spawn_ask_worker(state: &mut App) {
|
||||
temperature: None,
|
||||
seed: None,
|
||||
stream_sink: Some(tx),
|
||||
// p9-fb-15: TUI ask is single-shot in this task; multi-turn
|
||||
// conversation UI lands in p9-fb-16.
|
||||
history: Vec::new(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
};
|
||||
let handle =
|
||||
thread::spawn(move || kebab_app::ask_with_config(cfg, &query, opts));
|
||||
|
||||
@@ -66,6 +66,8 @@ fn make_answer(grounded: bool, refusal: Option<RefusalReason>, body: &str) -> An
|
||||
latency_ms: 1200,
|
||||
},
|
||||
created_at: OffsetDateTime::from_unix_timestamp(1_700_000_000).unwrap(),
|
||||
conversation_id: None,
|
||||
turn_index: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ phase: P9
|
||||
component: kebab-rag + kebab-app
|
||||
task_id: p9-fb-15
|
||||
title: "RAG multi-turn — history-aware prompt + token budget"
|
||||
status: planned
|
||||
status: in_progress
|
||||
depends_on: []
|
||||
unblocks: [p9-fb-16, p9-fb-17, p9-fb-18]
|
||||
contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
|
||||
|
||||
Reference in New Issue
Block a user