feat(fb-34): output budget controls #125
1
Cargo.lock
generated
@@ -3528,6 +3528,7 @@ name = "kebab-app"
|
||||
version = "0.4.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"base64 0.22.1",
|
||||
"blake3",
|
||||
"dirs 5.0.1",
|
||||
"ignore",
|
||||
|
||||
@@ -80,6 +80,7 @@ rmcp = { version = "1.6", default-features = false, features = ["server"
|
||||
# a tokio runtime to host its mock server (the runtime adapter crate stays
|
||||
# sync via reqwest::blocking — wiremock is dev-only there).
|
||||
wiremock = "0.6"
|
||||
base64 = "0.22"
|
||||
|
||||
# Disk-footprint trim for dev / test builds. Codegen, opt-level, and
|
||||
# behavior are unchanged — only DWARF debug info is reduced (line
|
||||
|
||||
@@ -71,7 +71,7 @@ kebab doctor
|
||||
|------|------|
|
||||
| `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 |
|
||||
| `kebab ingest [<path>]` | Markdown / 이미지 / PDF 색인 (idempotent). TTY 에서는 stderr 진행 바, non-TTY (CI / pipe) 는 stderr 한 줄씩, `--json` 은 stdout 에 `ingest_progress.v1` 라인 streaming 후 마지막에 `ingest_report.v1`. Ctrl-C 한 번이면 현재 asset 마무리 후 abort (부분 commit 보존, idempotent re-run), 두 번째 Ctrl-C 는 hard exit. Markdown title 이 frontmatter 에 없어도 첫 H1 → H2 → 첫 paragraph 80 자 → 파일명 순으로 자동 채움 (parser_version `md-frontmatter-v2`) — 기존 색인된 doc 도 다음 ingest 에서 새 title 로 갱신. **Incremental** (p9-fb-23): 두 번째 이후의 ingest 는 변하지 않은 doc (blake3 + parser/chunker/embedder version 모두 동일) 의 parse/chunk/embed/vector upsert 를 자동 스킵. final summary 에 `N unchanged` 카운트 표시. `--force-reingest` 로 skip 무시 강제 재처리. **지원 형식** (extractor 자동 결정 — config 에 명시 불가): Markdown (`.md`), 이미지 (`.png` / `.jpg` / `.jpeg`, OCR + caption), PDF (`.pdf`). 다른 확장자는 자동 skip — `IngestItem.warnings` 에 사유 (`"unsupported media type: .docx"` 등), `IngestReport.skipped_by_extension` 에 카운트 분류, CLI / TUI summary 에 breakdown 표시. |
|
||||
| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale |
|
||||
| `kebab search --mode {lexical,vector,hybrid} "<query>" [--no-cache] [--max-tokens N] [--snippet-chars N] [--cursor <opaque>]` | 검색. hybrid는 RRF fusion, citation 포함. 같은 process 안에서 동일 query (NFKC + trim + lowercase 정규화) 반복 시 in-process LRU 캐시 hit (capacity = `[search] cache_capacity`, default 256). `--no-cache` 로 강제 bypass — 디버깅용. ingest commit 발생 시 `kv['corpus_revision']` bump 으로 모든 entry 자동 stale. **`--max-tokens` / `--snippet-chars` / `--cursor` (p9-fb-34)** — agent budget controls. `--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare array 와 호환 안 됨. mismatched cursor → `error.v1.code = stale_cursor` |
|
||||
| `kebab list docs` | 색인된 문서 목록 |
|
||||
| `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 |
|
||||
| `kebab ask "<query>" [--show-citations / --hide-citations] [--session <id>] [--stream]` | RAG 답변 + 근거 인용. 답변 후 `근거:` block 으로 full path / line range / score 한 줄씩 (default ON — `--hide-citations` 로 끄기, pipe 시 유용). 근거 부족 시 거절. Ollama 필요. `--session <id>` 로 multi-turn — 첫 호출에서 SQLite `chat_sessions` 에 자동 생성, 이후 호출은 prior turns 를 history 로 받아 follow-up. session id 는 사용자 지정 (e.g. `kb-rust-async-2026-05`) — `kebab reset --data-only` 로 모든 session wipe. **`--stream` (p9-fb-33)** 로 ndjson `answer_event.v1` event (retrieval_done → token* → final) 를 stderr 에 흘리고 stdout 마지막 줄에 기존 `answer.v1` — agent 가 token 즉시 소비 가능 |
|
||||
|
||||
@@ -52,6 +52,8 @@ unicode-normalization = "0.1"
|
||||
# p9-fb-31: GitignoreBuilder for .kebabignore matching in ingest_file_with_config.
|
||||
# Same version as kebab-source-fs (0.4) to avoid duplicate dep versions.
|
||||
ignore = "0.4"
|
||||
# p9-fb-34: opaque pagination cursor encodes payload as base64.
|
||||
base64 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
rusqlite = { workspace = true }
|
||||
|
||||
@@ -41,7 +41,7 @@ use lru::LruCache;
|
||||
|
||||
use kebab_core::{
|
||||
Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
|
||||
SearchQuery, VectorStore,
|
||||
SearchOpts, SearchQuery, VectorStore,
|
||||
};
|
||||
use kebab_embed_local::FastembedEmbedder;
|
||||
use kebab_llm_local::OllamaLanguageModel;
|
||||
@@ -50,6 +50,28 @@ use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use kebab_store_vector::LanceVectorStore;
|
||||
|
||||
/// p9-fb-34: top-level wrapper around a paginated, budget-limited
|
||||
/// search result. Mirrors the wire `search_response.v1` shape.
|
||||
///
|
||||
/// `next_cursor` is non-null whenever more hits may be reachable —
|
||||
/// either the retriever filled the page (more behind it), or the
|
||||
/// budget loop popped hits (those popped hits remain fetchable
|
||||
|
|
||||
/// from `offset + returned`). It is null only when the retriever
|
||||
/// returned fewer hits than requested AND nothing was popped — i.e.
|
||||
/// the corpus has nothing more for this query.
|
||||
///
|
||||
/// `truncated` is independent of `next_cursor`: it signals that
|
||||
/// the budget loop modified the page (snippet shorten or k pop).
|
||||
/// Caller may either widen `max_tokens` (and re-issue the same
|
||||
|
claude-reviewer-01
commented
Doc rewrite is sharp — "non-null whenever more hits may be reachable" is the inverse of what R2 caught, and the explicit "truncated is independent of next_cursor" line means a future reader can't slide back into the widen-vs-paginate dichotomy. Schema description + SKILL.md two bullets all carry the same framing — no surface left contradicting another. Doc rewrite is sharp — "non-null whenever more hits may be reachable" is the inverse of what R2 caught, and the explicit "truncated is independent of next_cursor" line means a future reader can't slide back into the widen-vs-paginate dichotomy. Schema description + SKILL.md two bullets all carry the same framing — no surface left contradicting another.
|
||||
/// query) or follow `next_cursor` (to advance through more hits)
|
||||
/// or both.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SearchResponse {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub next_cursor: Option<String>,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
/// Facade state — see module docs for lifetime rules.
|
||||
///
|
||||
/// The struct is public so long-lived callers (kb-eval, the future P9
|
||||
@@ -274,6 +296,134 @@ impl App {
|
||||
Ok(hits)
|
||||
}
|
||||
|
||||
/// p9-fb-34: budget-aware search facade. Returns hits trimmed to
|
||||
/// `opts.max_tokens` (chars/4 approximation) plus pagination
|
||||
/// metadata. `App::search` is now a thin wrapper that drops the
|
||||
/// metadata for backwards compat.
|
||||
///
|
||||
/// `SearchResponse.next_cursor` and `truncated` are independent
|
||||
/// signals — see `SearchResponse` doc for details.
|
||||
pub fn search_with_opts(
|
||||
&self,
|
||||
query: SearchQuery,
|
||||
opts: SearchOpts,
|
||||
) -> Result<SearchResponse> {
|
||||
use crate::cursor;
|
||||
|
||||
let corpus_revision = self.sqlite.corpus_revision().to_string();
|
||||
let offset = match opts.cursor.as_ref() {
|
||||
// p9-fb-34: wrap the typed ErrorV1 in StructuredError so
|
||||
// anyhow carries the structured payload all the way to
|
||||
// `classify` — string formatting here would degrade
|
||||
|
claude-reviewer-01
commented
[critical] spec contract 위반: **[critical]** spec contract 위반: `cursor::decode` 가 만든 구조화된 `ErrorV1 { code: "stale_cursor", ... }` 가 여기서 `anyhow!("stale_cursor: {}", e.message)` 로 string 으로 변환되면서 typed payload 가 사라짐. 이후 CLI/MCP 가 `classify(&err, ...)` 로 wire 변환할 때 어떤 `downcast_ref` 도 매칭 안 돼서 `code: "generic"` 으로 떨어짐. 결과적으로 spec / `search_response.schema.json` description / `SKILL.md` 가 모두 `error.v1.code = stale_cursor` 약속하지만 실제 wire 는 `code: generic`. 권장: `cursor::decode` 의 `ErrorV1` 를 별도 typed error (e.g. `StaleCursorSignal`) 로 wrap → `anyhow::Error::new(...)` 로 보존 → `error_wire::classify` 에 downcast 분기 추가. test 도 `kebab search --cursor <stale> --json` exec 후 stderr ndjson 의 `code` 필드를 직접 검증해야 함.
|
||||
// `code = "stale_cursor"` to `code = "generic"` on the wire.
|
||||
Some(c) => cursor::decode(c, &corpus_revision)
|
||||
.map_err(|e| anyhow::Error::new(crate::error_wire::StructuredError(e)))?,
|
||||
None => 0,
|
||||
};
|
||||
|
||||
let snippet_chars = opts
|
||||
.snippet_chars
|
||||
.unwrap_or(self.config.search.snippet_chars);
|
||||
|
||||
// Fetch enough to satisfy offset + the requested page. The
|
||||
// retriever returns at most `fetch_k` hits — we then drop
|
||||
// `offset` and keep the next `k_effective`. `k = 0` is
|
||||
// treated as "use config default" so a caller passing through
|
||||
// a default-constructed `SearchQuery` still gets useful work
|
||||
// out of the budget facade.
|
||||
let k_effective = if query.k == 0 {
|
||||
self.config.search.default_k
|
||||
} else {
|
||||
query.k
|
||||
};
|
||||
let fetch_k = offset.saturating_add(k_effective);
|
||||
let fetch_query = SearchQuery {
|
||||
k: fetch_k,
|
||||
..query.clone()
|
||||
};
|
||||
let mut all_hits = self.search(fetch_query)?;
|
||||
|
||||
// Skip offset.
|
||||
let drop_n = offset.min(all_hits.len());
|
||||
all_hits.drain(..drop_n);
|
||||
let mut hits: Vec<SearchHit> =
|
||||
all_hits.into_iter().take(k_effective).collect();
|
||||
|
||||
// Apply snippet_chars override if shorter than what the
|
||||
// retriever returned (retriever already honored
|
||||
// `config.search.snippet_chars`; this only kicks in when the
|
||||
// caller asked for *less*).
|
||||
if opts.snippet_chars.is_some() {
|
||||
for h in hits.iter_mut() {
|
||||
if h.snippet.chars().count() > snippet_chars {
|
||||
h.snippet = trim_to_chars(&h.snippet, snippet_chars);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
claude-reviewer-01
commented
[nit] **[nit]** `max_tokens=0` 엣지케이스 미커버. `0_usize.saturating_mul(4) = 0` → step 1 이 snippet 을 60-char floor 까지 줄이고, step 2 가 hits 를 1개까지 pop, 결국 1 hit + 60-char snippet + `truncated=true` 반환. 동작 자체는 합리적 (≥1 hit floor 보장) 하지만 spec 에는 정의 안 됨. 테스트 한 줄 추가 (`max_tokens: Some(0)` → 정확히 1 hit + truncated=true) 하거나 spec 에 "max_tokens 0 = effectively 1-hit floor" 명시 권장.
|
||||
// Budget loop.
|
||||
let mut truncated = false;
|
||||
if let Some(max_tokens) = opts.max_tokens {
|
||||
let max_chars = max_tokens.saturating_mul(4);
|
||||
// Step 1: shorten snippets progressively to a 60-char floor.
|
||||
const SNIPPET_FLOOR: usize = 60;
|
||||
let mut current_snippet_cap = snippet_chars;
|
||||
while estimate_chars(&hits) > max_chars
|
||||
&& current_snippet_cap > SNIPPET_FLOOR
|
||||
{
|
||||
current_snippet_cap =
|
||||
(current_snippet_cap / 2).max(SNIPPET_FLOOR);
|
||||
for h in hits.iter_mut() {
|
||||
if h.snippet.chars().count() > current_snippet_cap {
|
||||
h.snippet =
|
||||
trim_to_chars(&h.snippet, current_snippet_cap);
|
||||
truncated = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Step 2: pop hits from the end until we fit, but always
|
||||
// keep ≥ 1.
|
||||
while estimate_chars(&hits) > max_chars && hits.len() > 1 {
|
||||
hits.pop();
|
||||
truncated = true;
|
||||
}
|
||||
}
|
||||
|
||||
// p9-fb-34: emit cursor whenever more hits may be reachable.
|
||||
// Three cases produce a non-null cursor:
|
||||
// (a) returned == k_effective: retriever filled the page; there
|
||||
// may be more behind it. Speculative — next call may return
|
||||
// an empty page if nothing remains.
|
||||
// (b) truncated by k-pop: returned < k_effective because we
|
||||
// popped hits to fit the budget. Those popped hits live at
|
||||
// offset+returned..; next call (with same or wider budget)
|
||||
// resumes from there.
|
||||
// (c) truncated by snippet-only shrink: returned == k_effective,
|
||||
// falls under (a). Cursor lets caller paginate; widening
|
||||
// --max-tokens lets caller re-fetch fuller snippets at the
|
||||
// same offset.
|
||||
//
|
||||
|
claude-reviewer-01
commented
[significant] snippet-only truncation case 에서 misleading **[significant]** snippet-only truncation case 에서 misleading `next_cursor`. budget 이 hit 을 pop 안 하고 snippet 만 줄였을 때 `truncated=true` 이지만 `returned == k_effective` 이라 `full_page` 분기로 들어가 `next_cursor = offset + k_effective` 로 다음 *페이지* 를 가리킴. 그런데 SKILL.md 에 "either widen max_tokens or paginate via next_cursor" 라고 안내 — agent 가 `next_cursor` 를 따라가면 같은 hit 들의 full-snippet 이 아니라 다음 hit 들을 받음. 두 옵션의 의미가 다름. 명확히 하려면: (1) snippet-only-shrunk 케이스에서는 `next_cursor` 를 emit 하지 않거나, (2) `truncated_kind: "snippet" | "k_pop"` 같은 enum 추가, (3) 최소한 SKILL.md 와 schema description 에 "snippet 단축 시에는 widen 만 의미 있음" 명시.
|
||||
// No cursor when neither (a) nor (b) applies — i.e. the retriever
|
||||
// returned fewer than k_effective AND we didn't pop. That means
|
||||
// end of available results.
|
||||
let returned = hits.len();
|
||||
let next_cursor = if returned == k_effective || truncated {
|
||||
|
claude-reviewer-01
commented
회차 2 — REQUEST_CHANGES — code/doc 의미가 정반대로 갈렸다. 새 조건은
반면 SKILL.md / schema / 바로 위 주석은 정반대로 약속함 ( 선택지:
fb-34 spec 의도가 회차 2 — REQUEST_CHANGES — code/doc 의미가 정반대로 갈렸다. 새 조건은 `returned == k_effective` 일 때만 cursor 를 emit. 그러나 budget 루프 step-2 (`hits.pop()`) 가 실행되면 `hits.len() < k_effective` 가 되므로 **k-pop 케이스에서는 cursor 가 null** 이고, snippet-only shrinkage 케이스에서는 `hits.len() == k_effective` 이라 **cursor 가 non-null** 이다. 즉 실제 동작:
- snippet-only shrunk → `truncated=true, next_cursor=Some(...)`
- k-popped → `truncated=true, next_cursor=None`
반면 SKILL.md / schema / 바로 위 주석은 정반대로 약속함 (`next_cursor != null → k reduced → paginate`, `next_cursor null → snippet shrunk → widen`). 위 주석의 `both produce returned == k_effective` 는 `pop()` 이후에는 성립하지 않음. agent 가 docs 를 따르면 snippet-shrunk 페이지에서는 widen 해야 하는데 paginate 하고, k-popped 페이지에서는 paginate 해야 하는데 widen 하게 됨 — 둘 다 잘못된 방향.
선택지:
1. **docs 를 코드 동작에 맞춰 뒤집기**: "snippet-shrunk → cursor 있음 (다음 페이지에서 fuller snippet 받기 위해 widen + paginate 가능)", "k-popped → cursor 없음 (re-issue with bigger budget)". 하지만 이러면 k-popped 시 진행 불가 — 사용자가 widen 했는데도 같은 query 만 가능하고, popped 된 hits 를 fetch 하려면 cursor 가 필요.
2. **코드를 docs 에 맞추기**: k-pop 분기일 때 `offset + (popped 이전의 returned)` 로 cursor emit, snippet-only shrunk 일 때 cursor 생략. 즉 `truncated_by_pop` 플래그를 step-2 안에서 set 하고 그걸로 분기.
fb-34 spec 의도가 `next_cursor=null → widen` 이라면 옵션 2 가 맞음. 결정과 함께 주석/docs/code 셋 다 sync 시켜야 함.
claude-reviewer-01
commented
Predicate inversion fixed cleanly: Predicate inversion fixed cleanly: `returned == k_effective || truncated` covers both the speculative-full-page path and the k-pop path. The three-case comment block (a/b/c) walks through the invariants exactly the way an agent author would want to read them — and pinning case (c) as falling under (a) closes the snippet-only-shrink corner. Nice.
|
||||
if offset.saturating_add(returned) > 0 {
|
||||
Some(cursor::encode(offset + returned, &corpus_revision))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Ok(SearchResponse {
|
||||
hits,
|
||||
next_cursor,
|
||||
truncated,
|
||||
})
|
||||
}
|
||||
|
||||
/// Run a RAG `ask` against the configured retriever + LLM. Reuses
|
||||
/// the memoized embedder / vector / LLM where applicable.
|
||||
pub fn ask(&self, query: &str, opts: AskOpts) -> Result<Answer> {
|
||||
@@ -627,6 +777,34 @@ fn blake3_truncate(input: &str) -> u128 {
|
||||
u128::from_be_bytes(buf)
|
||||
}
|
||||
|
||||
/// p9-fb-34: trim `s` to at most `n` Unicode scalar chars. Cheap
|
||||
/// alternative to a `.chars().take(n).collect::<String>()` pattern;
|
||||
/// reserves capacity proportional to UTF-8 worst case (4 bytes / char)
|
||||
/// so the inner push never re-allocates.
|
||||
fn trim_to_chars(s: &str, n: usize) -> String {
|
||||
if s.chars().count() <= n {
|
||||
return s.to_string();
|
||||
}
|
||||
let mut out = String::with_capacity(n.saturating_mul(4));
|
||||
for (i, c) in s.chars().enumerate() {
|
||||
if i >= n {
|
||||
break;
|
||||
}
|
||||
out.push(c);
|
||||
}
|
||||
|
claude-reviewer-01
commented
[nit] doc-comment 부정확. "so a single broken hit never makes the loop loop forever" 가 의도 — 하지만 **[nit]** doc-comment 부정확. "so a single broken hit never makes the loop loop forever" 가 의도 — 하지만 `serde_json::to_string` 실패 시 0 을 반환하면 오히려 loop 가 *조기 종료* 됨 (`estimate_chars(&hits) > max_chars` 가 false 가 되니까). 무한 루프 방지가 아니라 graceful degradation. 코멘트 한 줄 정정하거나 panic-on-error 가 더 명확할 수도 (SearchHit 직렬화 실패는 본질적으로 invariant 위반).
|
||||
out
|
||||
}
|
||||
|
||||
/// p9-fb-34: estimate wire JSON char cost of the hit list. Returns 0
|
||||
/// per-hit when serialization fails — a SearchHit serialization
|
||||
/// failure is an invariant violation; we degrade gracefully (loop
|
||||
/// terminates early) rather than panic in the budget loop.
|
||||
fn estimate_chars(hits: &[SearchHit]) -> usize {
|
||||
hits.iter()
|
||||
.map(|h| serde_json::to_string(h).map(|s| s.len()).unwrap_or(0))
|
||||
.sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
75
crates/kebab-app/src/cursor.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
//! p9-fb-34 opaque pagination cursor.
|
||||
//!
|
||||
//! Format: base64(JSON({offset: usize, corpus_revision: string})).
|
||||
//! Opaque to callers — they MUST NOT decode the contents themselves;
|
||||
//! the schema is internal and may change without notice.
|
||||
|
||||
use base64::Engine;
|
||||
use base64::engine::general_purpose::URL_SAFE_NO_PAD;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::error_wire::ErrorV1;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct Payload {
|
||||
offset: usize,
|
||||
corpus_revision: String,
|
||||
}
|
||||
|
||||
/// Encode `(offset, corpus_revision)` as an opaque base64 string.
|
||||
pub fn encode(offset: usize, corpus_revision: &str) -> String {
|
||||
let payload = Payload {
|
||||
offset,
|
||||
corpus_revision: corpus_revision.to_string(),
|
||||
};
|
||||
let json = serde_json::to_vec(&payload).expect("Payload serializes");
|
||||
URL_SAFE_NO_PAD.encode(&json)
|
||||
}
|
||||
|
||||
/// Decode an opaque cursor against the expected `corpus_revision`.
|
||||
/// Mismatch or malformed input returns an `ErrorV1` with
|
||||
/// `code = "stale_cursor"`.
|
||||
//
|
||||
// p9-fb-34: ErrorV1 is the workspace-wide wire error struct (~200B
|
||||
// after monomorphization with Value + String fields). Boxing here
|
||||
// would force every call site to deref through a Box for no win —
|
||||
// the err-path is rare. Single allow at the function level.
|
||||
//
|
||||
// p9-fb-34 round-1 review: differentiate the three failure modes
|
||||
// (base64 / JSON / revision mismatch) with distinct messages — all
|
||||
// keep `code = "stale_cursor"` so the agent's branching logic stays
|
||||
// the same, but humans reading the message get a precise hint.
|
||||
|
claude-reviewer-01
commented
[nit] base64 decode 실패 시 **[nit]** base64 decode 실패 시 `stale("<malformed>", expected_revision)` 호출 → 메시지가 `"cursor was issued against corpus_revision '<malformed>'; current revision is 'rev-xyz'"` 형태가 되는데, 실제로는 base64 자체가 깨졌으므로 "issued against" 표현이 부정확. 원인 구분된 메시지가 agent debugging 에 더 도움. 예: malformed 케이스는 `"cursor is not valid base64"`, JSON 파싱 실패는 `"cursor payload is malformed"`, revision mismatch 만 현재 메시지. 모두 `code: "stale_cursor"` 유지 (caller 의 retry 동작은 동일).
|
||||
#[allow(clippy::result_large_err)]
|
||||
pub fn decode(s: &str, expected_revision: &str) -> Result<usize, ErrorV1> {
|
||||
let bytes = URL_SAFE_NO_PAD.decode(s.as_bytes()).map_err(|_| ErrorV1 {
|
||||
schema_version: "error.v1".to_string(),
|
||||
code: "stale_cursor".to_string(),
|
||||
message: "cursor is not valid base64. Re-issue search to obtain a fresh cursor."
|
||||
.to_string(),
|
||||
details: Value::Null,
|
||||
hint: None,
|
||||
})?;
|
||||
let payload: Payload = serde_json::from_slice(&bytes).map_err(|_| ErrorV1 {
|
||||
schema_version: "error.v1".to_string(),
|
||||
code: "stale_cursor".to_string(),
|
||||
message: "cursor payload is malformed. Re-issue search to obtain a fresh cursor."
|
||||
.to_string(),
|
||||
details: Value::Null,
|
||||
hint: None,
|
||||
})?;
|
||||
if payload.corpus_revision != expected_revision {
|
||||
return Err(ErrorV1 {
|
||||
schema_version: "error.v1".to_string(),
|
||||
code: "stale_cursor".to_string(),
|
||||
message: format!(
|
||||
"cursor was issued against corpus_revision '{}'; current revision is \
|
||||
'{}'. Re-issue search to obtain a fresh cursor.",
|
||||
payload.corpus_revision, expected_revision
|
||||
),
|
||||
details: Value::Null,
|
||||
hint: None,
|
||||
});
|
||||
}
|
||||
Ok(payload.offset)
|
||||
}
|
||||
@@ -11,6 +11,12 @@ use serde_json::{Value, json};
|
||||
|
||||
use crate::error_signal::{ConfigInvalid, LlmError, NotIndexed};
|
||||
|
||||
// p9-fb-34: `stale_cursor` is constructed directly by `cursor::decode`
|
||||
// and surfaced through `StructuredError` (an anyhow-friendly wrapper
|
||||
// that carries the typed `ErrorV1` payload without lossy string
|
||||
// formatting). `classify` short-circuits on it at the top of the
|
||||
// function so the typed `code = "stale_cursor"` reaches the wire.
|
||||
|
||||
/// Wire schema id for [`ErrorV1`]. Single source of truth — kebab-cli
|
||||
/// + kebab-mcp use this via `kebab_app::ERROR_V1_ID`.
|
||||
pub const ERROR_V1_ID: &str = "error.v1";
|
||||
@@ -24,7 +30,29 @@ pub struct ErrorV1 {
|
||||
pub hint: Option<String>,
|
||||
}
|
||||
|
||||
/// p9-fb-34: typed wrapper around an [`ErrorV1`] so callers that
|
||||
/// surface `anyhow::Error` can downcast back to the structured wire
|
||||
/// payload instead of losing it to string formatting. Constructed by
|
||||
/// the cursor code path (`cursor::decode` → `App::search_with_opts`)
|
||||
/// and short-circuited inside [`classify`].
|
||||
#[derive(Debug)]
|
||||
pub struct StructuredError(pub ErrorV1);
|
||||
|
||||
impl std::fmt::Display for StructuredError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "[{}] {}", self.0.code, self.0.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for StructuredError {}
|
||||
|
||||
pub fn classify(err: &anyhow::Error, verbose: bool) -> ErrorV1 {
|
||||
// p9-fb-34: structured wrapper short-circuits — preserves the
|
||||
// typed payload that callers (cursor::decode) constructed
|
||||
// instead of falling through to `code = "generic"`.
|
||||
if let Some(s) = err.downcast_ref::<StructuredError>() {
|
||||
return s.0.clone();
|
||||
}
|
||||
if let Some(s) = err.downcast_ref::<ConfigInvalid>() {
|
||||
return ErrorV1 {
|
||||
schema_version: ERROR_V1_ID.to_string(),
|
||||
@@ -197,4 +225,36 @@ mod tests {
|
||||
let v1 = classify(&err, false);
|
||||
assert_eq!(v1.code, "io_error");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_cursor_is_not_routed_through_classify() {
|
||||
use anyhow::anyhow;
|
||||
let err: anyhow::Error = anyhow!("stale_cursor: rev mismatch");
|
||||
let v1 = classify(&err, false);
|
||||
// p9-fb-34: stale_cursor is constructed directly by cursor::decode
|
||||
// (single source of truth). classify must not pattern-match on
|
||||
// anyhow string contents — that would create two sources of
|
||||
// truth. The bare anyhow string falls through to "generic".
|
||||
assert_ne!(v1.code, "stale_cursor", "classify must not produce stale_cursor from bare anyhow string");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stale_cursor_propagates_through_structured_wrapper() {
|
||||
// p9-fb-34: positive-side contract for the structured-wrapper
|
||||
// path. cursor::decode constructs a typed ErrorV1, the call site
|
||||
// wraps it in `StructuredError`, anyhow carries it, and classify
|
||||
// short-circuits via downcast — preserving the typed code +
|
||||
// message instead of falling through to "generic".
|
||||
let original = ErrorV1 {
|
||||
schema_version: ERROR_V1_ID.to_string(),
|
||||
code: "stale_cursor".to_string(),
|
||||
message: "test stale cursor".to_string(),
|
||||
details: Value::Null,
|
||||
hint: None,
|
||||
};
|
||||
let err: anyhow::Error = anyhow::Error::new(StructuredError(original));
|
||||
let v1 = classify(&err, false);
|
||||
assert_eq!(v1.code, "stale_cursor");
|
||||
assert_eq!(v1.message, "test stale cursor");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
|
||||
mod app;
|
||||
pub mod cursor;
|
||||
pub mod doctor_signal;
|
||||
pub mod error_signal;
|
||||
pub mod error_wire;
|
||||
@@ -65,10 +66,10 @@ pub mod reset;
|
||||
pub mod schema;
|
||||
mod staleness;
|
||||
|
||||
pub use app::App;
|
||||
pub use app::{App, SearchResponse};
|
||||
pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
|
||||
pub use reset::{ResetReport, ResetScope};
|
||||
pub use error_wire::{ERROR_V1_ID, ErrorV1, classify};
|
||||
pub use error_wire::{ERROR_V1_ID, ErrorV1, StructuredError, classify};
|
||||
pub use schema::{Capabilities, Models, SCHEMA_V1_ID, SchemaV1, Stats, WireBlock, schema_with_config};
|
||||
pub use staleness::{compute_stale, mark_stale_in_place};
|
||||
|
||||
@@ -1739,6 +1740,19 @@ pub fn search_uncached_with_config(
|
||||
App::open_with_config(config)?.search_uncached(query)
|
||||
}
|
||||
|
||||
/// p9-fb-34: budget-aware search free function. Mirrors
|
||||
/// [`search_with_config`] but threads `SearchOpts` (max_tokens,
|
||||
/// snippet_chars, cursor) and returns the [`SearchResponse`]
|
||||
/// pagination wrapper. Tasks 6+8 surface this via CLI / MCP.
|
||||
#[doc(hidden)]
|
||||
pub fn search_with_opts_with_config(
|
||||
config: kebab_config::Config,
|
||||
query: kebab_core::SearchQuery,
|
||||
opts: kebab_core::SearchOpts,
|
||||
) -> anyhow::Result<SearchResponse> {
|
||||
App::open_with_config(config)?.search_with_opts(query, opts)
|
||||
}
|
||||
|
||||
// ── ask ──────────────────────────────────────────────────────────────────
|
||||
//
|
||||
// P4-3 wires `ask` end-to-end. The retriever is built per `opts.mode`;
|
||||
|
||||
@@ -63,6 +63,7 @@ pub const SCHEMA_V1_ID: &str = "schema.v1";
|
||||
const WIRE_SCHEMAS: &[&str] = &[
|
||||
"answer.v1",
|
||||
"search_hit.v1",
|
||||
"search_response.v1",
|
||||
"doc_summary.v1",
|
||||
"chunk_inspection.v1",
|
||||
"doctor.v1",
|
||||
|
||||
@@ -79,6 +79,37 @@ impl TestEnv {
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-34 alias — tests added in fb-34 invoke `TestEnv::new()`
|
||||
/// per the plan; route to the existing lexical-only constructor
|
||||
/// so the lane stays AVX-free without churning all the existing
|
||||
/// callers.
|
||||
pub fn new() -> Self {
|
||||
Self::lexical_only()
|
||||
}
|
||||
|
||||
/// p9-fb-34: open a fresh `App` against this env's config. Used
|
||||
/// by integration tests that need to call `App::search_with_opts`
|
||||
/// directly. Caller can invoke this multiple times to simulate
|
||||
/// re-opening the binary after a corpus revision bump.
|
||||
pub fn app(&self) -> kebab_app::App {
|
||||
kebab_app::App::open_with_config(self.config.clone())
|
||||
.expect("App::open_with_config")
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-34: write `content` into the env's workspace at
|
||||
/// `relative_path`, then run a full ingest so the document is
|
||||
/// searchable. Mirrors the convenience helpers used by other
|
||||
/// `TestEnv`-driven crates.
|
||||
pub fn ingest_md(env: &TestEnv, relative_path: &str, content: &str) {
|
||||
let path = env.workspace_root.join(relative_path);
|
||||
if let Some(parent) = path.parent() {
|
||||
std::fs::create_dir_all(parent).expect("create parent dirs");
|
||||
}
|
||||
std::fs::write(&path, content).expect("write workspace file");
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.expect("ingest_with_config");
|
||||
}
|
||||
|
||||
/// Test helper: build a `SearchQuery` for lexical mode at k=10. Used
|
||||
|
||||
24
crates/kebab-app/tests/cursor.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
//! p9-fb-34: cursor encode/decode round-trip + corpus_revision mismatch.
|
||||
|
||||
use kebab_app::cursor;
|
||||
|
||||
#[test]
|
||||
fn cursor_roundtrip_preserves_offset() {
|
||||
let encoded = cursor::encode(5, "rev-abc");
|
||||
let offset = cursor::decode(&encoded, "rev-abc").unwrap();
|
||||
assert_eq!(offset, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor_decode_rejects_mismatched_revision() {
|
||||
let encoded = cursor::encode(7, "rev-old");
|
||||
let err = cursor::decode(&encoded, "rev-new").unwrap_err();
|
||||
assert_eq!(err.code, "stale_cursor");
|
||||
assert!(err.message.contains("rev-old") || err.message.contains("rev-new"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor_decode_rejects_garbage_input() {
|
||||
let err = cursor::decode("not-base64!!!", "any").unwrap_err();
|
||||
assert_eq!(err.code, "stale_cursor");
|
||||
}
|
||||
161
crates/kebab-app/tests/search_budget_integration.rs
Normal file
@@ -0,0 +1,161 @@
|
||||
//! p9-fb-34: App::search_with_opts integration tests.
|
||||
|
||||
mod common;
|
||||
|
||||
use kebab_app::SearchResponse;
|
||||
use kebab_core::{SearchFilters, SearchMode, SearchOpts, SearchQuery};
|
||||
|
||||
fn lex(text: &str, k: usize) -> SearchQuery {
|
||||
SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: SearchMode::Lexical,
|
||||
k,
|
||||
filters: SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_with_opts_no_budget_matches_search() {
|
||||
let env = common::TestEnv::new();
|
||||
common::ingest_md(&env, "a.md", "# T\n\napples are red\n");
|
||||
let app = env.app();
|
||||
|
||||
let baseline = app.search(lex("apples", 5)).unwrap();
|
||||
let resp: SearchResponse = app
|
||||
.search_with_opts(lex("apples", 5), SearchOpts::default())
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(resp.hits.len(), baseline.len());
|
||||
assert!(!resp.truncated);
|
||||
assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_truncates_snippets_when_below_threshold() {
|
||||
let env = common::TestEnv::new();
|
||||
let body: String = "rust ownership is a memory model. ".repeat(10);
|
||||
common::ingest_md(&env, "a.md", &format!("# T\n\n{body}\n"));
|
||||
let app = env.app();
|
||||
|
||||
let unrestricted = app.search(lex("rust", 5)).unwrap();
|
||||
let unrestricted_chars: usize = unrestricted.iter().map(|h| h.snippet.chars().count()).sum();
|
||||
|
||||
let resp = app
|
||||
.search_with_opts(
|
||||
lex("rust", 5),
|
||||
SearchOpts {
|
||||
max_tokens: Some(50),
|
||||
snippet_chars: None,
|
||||
cursor: None,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
let limited_chars: usize = resp.hits.iter().map(|h| h.snippet.chars().count()).sum();
|
||||
|
||||
assert!(resp.truncated, "small budget must trip truncation");
|
||||
assert!(limited_chars < unrestricted_chars, "snippet should shrink");
|
||||
assert!(!resp.hits.is_empty(), "always retain ≥1 hit");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor_paginates_to_next_page() {
|
||||
let env = common::TestEnv::new();
|
||||
for i in 0..6 {
|
||||
common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n"));
|
||||
}
|
||||
let app = env.app();
|
||||
|
||||
let page1 = app
|
||||
.search_with_opts(lex("rust", 2), SearchOpts::default())
|
||||
.unwrap();
|
||||
assert_eq!(page1.hits.len(), 2);
|
||||
let cursor = page1.next_cursor.expect("more hits available");
|
||||
|
||||
let page2 = app
|
||||
.search_with_opts(
|
||||
lex("rust", 2),
|
||||
SearchOpts {
|
||||
max_tokens: None,
|
||||
snippet_chars: None,
|
||||
cursor: Some(cursor),
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(page2.hits.len(), 2);
|
||||
let p1_ids: std::collections::HashSet<_> =
|
||||
page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
|
||||
let p2_ids: std::collections::HashSet<_> =
|
||||
page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
|
||||
assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor_rejected_after_corpus_revision_bump() {
|
||||
let env = common::TestEnv::new();
|
||||
common::ingest_md(&env, "a.md", "# T\n\napples\n");
|
||||
let app = env.app();
|
||||
|
||||
let page1 = app
|
||||
.search_with_opts(lex("apples", 1), SearchOpts::default())
|
||||
.unwrap();
|
||||
// p9-fb-34 round-1 review: replaced silent `if let Some(c) = ...`
|
||||
// with `.expect(...)` so a fixture regression that breaks the
|
||||
// cursor-emission contract fails loudly instead of passing vacuously.
|
||||
|
claude-reviewer-01
commented
[nit] 테스트가 **[nit]** 테스트가 `if let Some(c) = cursor { ... } else 통과` 패턴이라, page1 에 next_cursor 가 없으면 stale-cursor 검증을 한 줄도 실행 안 하고 silent pass. 현재 fixture (1 doc + k=1) 에서는 `returned == k_effective == 1` 이라 `full_page` 분기로 next_cursor 가 emit 되지만, 추후 cursor 발급 조건이 바뀌면 이 테스트가 의도와 달리 noop 으로 변할 위험. `let cursor = page1.next_cursor.expect("k=1 page must emit next_cursor")` 로 강제하거나 fixture 를 2 docs + k=1 로 늘려서 명시적으로 보장 권장.
|
||||
let c = page1
|
||||
.next_cursor
|
||||
.expect("k=1 page must emit next_cursor — fixture too small if this fails");
|
||||
|
||||
common::ingest_md(&env, "b.md", "# B\n\nbananas\n");
|
||||
let app2 = env.app();
|
||||
|
||||
let result = app2.search_with_opts(
|
||||
lex("apples", 1),
|
||||
SearchOpts {
|
||||
max_tokens: None,
|
||||
snippet_chars: None,
|
||||
cursor: Some(c),
|
||||
},
|
||||
);
|
||||
let err = result.unwrap_err();
|
||||
assert!(
|
||||
err.to_string().contains("stale_cursor"),
|
||||
"must surface stale_cursor: {err}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_tokens_zero_returns_one_hit_truncated() {
|
||||
// p9-fb-34 round-1 review: pin the documented "≥1 hit floor"
|
||||
// contract — even with `max_tokens=0` (an absurdly tight budget)
|
||||
// the budget loop must keep one hit and flip `truncated: true`.
|
||||
// Fixture intentionally seeds multiple matches so step 2 of the
|
||||
// budget loop (pop hits to 1) actually fires.
|
||||
let env = common::TestEnv::new();
|
||||
for i in 0..3 {
|
||||
common::ingest_md(
|
||||
&env,
|
||||
&format!("d{i}.md"),
|
||||
&format!("# T{i}\n\napples are red {i}\n"),
|
||||
);
|
||||
}
|
||||
let app = env.app();
|
||||
|
||||
let resp = app
|
||||
.search_with_opts(
|
||||
lex("apples", 5),
|
||||
SearchOpts {
|
||||
max_tokens: Some(0),
|
||||
snippet_chars: None,
|
||||
cursor: None,
|
||||
},
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(resp.hits.len(), 1, "max_tokens=0 collapses to 1-hit floor");
|
||||
assert!(resp.truncated);
|
||||
|
claude-reviewer-01
commented
회차 2 — nit (선택). 회차 2 — nit (선택). `max_tokens=0` 테스트가 `hits.len()==1`, `truncated==true` 까지만 assert 함. 위 issue 와 연결되는데, 이 케이스는 정확히 k-pop 케이스 (5→1) 이므로 새 로직에 따라 `next_cursor==None` 이어야 함. assertion 하나만 추가하면 cursor-emission 의미 표가 테스트로 pin 됨 — 향후 회귀 catch 에 도움. (만약 main 이슈가 옵션 2 로 해결되면 None 이 아니라 Some 이 되겠지만, 어느 쪽이든 명시 assertion 이 contract 를 잠금.)
|
||||
// p9-fb-34 R2: cursor IS emitted on k-pop case so the popped
|
||||
// hits remain reachable.
|
||||
assert!(
|
||||
resp.next_cursor.is_some(),
|
||||
|
claude-reviewer-01
commented
Right test to pin the R2 fix: Right test to pin the R2 fix: `max_tokens=0` is the canonical k-pop scenario (3 fixture docs → 1 hit floor), and the new `next_cursor.is_some()` assert directly guards against the inversion regressing. The error message even explains *why* ("popped hits at offset+returned") so a future failure points the reader straight at the contract.
|
||||
"k-pop truncation must still emit next_cursor; popped hits at offset+returned"
|
||||
);
|
||||
}
|
||||
@@ -108,6 +108,23 @@ enum Cmd {
|
||||
/// future TUI cache-aware search and for explicit intent.
|
||||
#[arg(long)]
|
||||
no_cache: bool,
|
||||
|
||||
/// p9-fb-34: cap result wire JSON size at approximately N tokens
|
||||
/// (chars/4 estimate). When set, smaller snippets and fewer hits
|
||||
/// may be returned; check `truncated` in the JSON wire.
|
||||
#[arg(long)]
|
||||
max_tokens: Option<usize>,
|
||||
|
||||
/// p9-fb-34: per-hit snippet character cap, overrides
|
||||
/// `config.search.snippet_chars` for this call only.
|
||||
#[arg(long)]
|
||||
snippet_chars: Option<usize>,
|
||||
|
||||
/// p9-fb-34: opaque cursor from a previous response's
|
||||
/// `next_cursor` to fetch the next page. Mismatched
|
||||
/// `corpus_revision` returns `error.v1.code = stale_cursor`.
|
||||
#[arg(long)]
|
||||
cursor: Option<String>,
|
||||
},
|
||||
|
||||
/// Retrieval-augmented question answering.
|
||||
@@ -515,6 +532,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
mode,
|
||||
explain: _,
|
||||
no_cache,
|
||||
max_tokens,
|
||||
snippet_chars,
|
||||
cursor,
|
||||
} => {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kebab_core::SearchQuery {
|
||||
@@ -523,16 +543,24 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
k: *k,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
// p9-fb-19: --no-cache routes to the uncached facade.
|
||||
// Both calls go through the same App; only the cache
|
||||
// lookup/insert is skipped.
|
||||
let hits = if *no_cache {
|
||||
kebab_app::search_uncached_with_config(cfg, q)?
|
||||
} else {
|
||||
kebab_app::search_with_config(cfg, q)?
|
||||
let opts = kebab_core::SearchOpts {
|
||||
max_tokens: *max_tokens,
|
||||
snippet_chars: *snippet_chars,
|
||||
cursor: cursor.clone(),
|
||||
};
|
||||
// p9-fb-34: budget-aware path. --no-cache still bypasses the
|
||||
// App-level LRU; wire wrapper applies regardless.
|
||||
let app = kebab_app::App::open_with_config(cfg)?;
|
||||
if *no_cache {
|
||||
app.clear_search_cache();
|
||||
}
|
||||
let resp = app.search_with_opts(q, opts)?;
|
||||
|
claude-reviewer-01
commented
[nit] **[nit]** `--no-cache` semantic 약화. 이전 코드는 `search_uncached_with_config` 으로 cache 자체를 우회. 새 코드는 `clear_search_cache()` 후 `search_with_opts → self.search()` 로 cache miss path 를 타는 방식 — CLI (fresh App) 에서는 무���하지만 long-lived (TUI/MCP) 에서는 clear 와 fetch 사이 다른 thread 의 search 가 cache 를 다시 채울 수 있어 race 가 가능. spec/HOTFIXES 에 `--no-cache` 의미 변경이 명시 안 돼 있음. `App::search_uncached` 를 활용하는 `search_with_opts_uncached` variant 를 추가하거나 (현 facade 는 항상 cached path) HOTFIXES 에 deviation 한 줄 기록 권장.
|
||||
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
|
||||
println!(
|
||||
"{}",
|
||||
serde_json::to_string(&wire::wire_search_response(&resp))?
|
||||
);
|
||||
} else {
|
||||
// p9-fb-32: prefix `[stale]` on the doc_path for hits
|
||||
// whose `stale: true`. Yellow on TTY, plain otherwise —
|
||||
@@ -542,7 +570,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
// lands on); no new dep.
|
||||
use std::io::IsTerminal;
|
||||
let color = std::io::stdout().is_terminal();
|
||||
for h in &hits {
|
||||
for h in &resp.hits {
|
||||
// Show 4-digit score so RRF fused scores (bounded
|
||||
// ~0–0.033 for k_rrf=60) don't all collapse to "0.02".
|
||||
// Append heading_path so multiple chunks from the same
|
||||
@@ -570,6 +598,12 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
heading,
|
||||
);
|
||||
}
|
||||
// p9-fb-34: truncation hint goes to stderr so it
|
||||
// doesn't pollute the stdout hit list.
|
||||
if resp.truncated {
|
||||
let next = resp.next_cursor.as_deref().unwrap_or("(none)");
|
||||
eprintln!("[truncated; use --cursor {next} for the next page]");
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -75,10 +75,18 @@ pub fn wire_search_hit(h: &SearchHit) -> Value {
|
||||
tag_object(v, "search_hit.v1")
|
||||
}
|
||||
|
||||
/// Wrap a list of [`SearchHit`] values as a JSON array of `search_hit.v1`
|
||||
/// objects (one tag per element, per design §2.2).
|
||||
pub fn wire_search_hits(hits: &[SearchHit]) -> Value {
|
||||
Value::Array(hits.iter().map(wire_search_hit).collect())
|
||||
/// p9-fb-34: tag a `SearchResponse` as `search_response.v1`. Wraps
|
||||
/// the existing `search_hit.v1[]` array with pagination + truncation
|
||||
/// metadata. Replaces the previous bare `search_hit.v1[]` top-level
|
||||
/// array (`wire_search_hits`) — see HOTFIXES / fb-34 for the
|
||||
/// breaking shape change.
|
||||
pub fn wire_search_response(r: &kebab_app::SearchResponse) -> Value {
|
||||
let v = serde_json::json!({
|
||||
"hits": r.hits.iter().map(wire_search_hit).collect::<Vec<_>>(),
|
||||
"next_cursor": r.next_cursor,
|
||||
"truncated": r.truncated,
|
||||
});
|
||||
tag_object(v, "search_response.v1")
|
||||
}
|
||||
|
||||
/// Wrap an [`Answer`] as `answer.v1`.
|
||||
@@ -234,13 +242,6 @@ mod tests {
|
||||
assert_eq!(v.as_array().unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_hits_wraps_each_element() {
|
||||
let v = wire_search_hits(&[]);
|
||||
assert!(v.is_array());
|
||||
assert_eq!(v.as_array().unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tag_object_inserts_into_object() {
|
||||
let v = Value::Object(serde_json::Map::new());
|
||||
@@ -248,6 +249,30 @@ mod tests {
|
||||
assert_eq!(schema_of(&tagged), Some("x.v1"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_response_carries_pagination_metadata() {
|
||||
// p9-fb-34: empty-hits SearchResponse round-trips through the
|
||||
// wrapper with its `next_cursor` + `truncated` fields preserved
|
||||
// and the top-level `schema_version` set to `search_response.v1`.
|
||||
let r = kebab_app::SearchResponse {
|
||||
hits: vec![],
|
||||
next_cursor: Some("opaque-cursor-abc".to_string()),
|
||||
truncated: true,
|
||||
};
|
||||
let v = wire_search_response(&r);
|
||||
assert_eq!(schema_of(&v), Some("search_response.v1"));
|
||||
assert!(v.get("hits").and_then(|h| h.as_array()).is_some());
|
||||
assert_eq!(
|
||||
v.get("hits").and_then(|h| h.as_array()).unwrap().len(),
|
||||
0
|
||||
);
|
||||
assert_eq!(
|
||||
v.get("next_cursor").and_then(|c| c.as_str()),
|
||||
Some("opaque-cursor-abc")
|
||||
);
|
||||
assert_eq!(v.get("truncated").and_then(|t| t.as_bool()), Some(true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_wrapper_tags_schema_version() {
|
||||
use kebab_app::{Capabilities, Models, SchemaV1, Stats, WireBlock};
|
||||
|
||||
@@ -126,6 +126,29 @@ pub fn ingest(cfg: &Path, workspace: &Path) {
|
||||
);
|
||||
}
|
||||
|
||||
/// p9-fb-34: invoke `kebab search` with arbitrary trailing flags +
|
||||
/// query, capture stdout + stderr. Caller is responsible for
|
||||
/// supplying `--mode lexical` / `--json` etc. as needed; this helper
|
||||
/// stays unopinionated so a single test can exercise both wire shapes
|
||||
/// (JSON wrapper + plain stderr hint). Asserts the binary exited 0;
|
||||
/// non-zero exits fail the test with stderr included.
|
||||
pub fn run_search_with_args(cfg: &Path, args: &[&str]) -> (String, String) {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let mut cmd = Command::new(bin);
|
||||
cmd.arg("--config").arg(cfg).arg("search");
|
||||
cmd.args(args);
|
||||
let out = cmd.output().expect("kebab search");
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"search failed: args={args:?} stderr={}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
(
|
||||
String::from_utf8_lossy(&out.stdout).to_string(),
|
||||
String::from_utf8_lossy(&out.stderr).to_string(),
|
||||
)
|
||||
}
|
||||
|
||||
/// p9-fb-33: invoke `kebab ask --stream --mode lexical <query>` and
|
||||
/// capture stdout + stderr. Lexical mode skips embeddings (matches
|
||||
/// `wire_ask_stale.rs::run_ask_lexical`). Caller asserts on the
|
||||
|
||||
226
crates/kebab-cli/tests/wire_search_response.rs
Normal file
@@ -0,0 +1,226 @@
|
||||
//! p9-fb-34: CLI search wire wrapper + budget controls.
|
||||
//!
|
||||
//! Lexical-only — no fastembed / no Ollama. Each test builds its own
|
||||
//! TempDir KB via `common::write_config` + `common::ingest` and drives
|
||||
//! `kebab search` through `common::run_search_with_args`. Verifies:
|
||||
//!
|
||||
//! - `--json` emits the `search_response.v1` wrapper (hits + cursor +
|
||||
//! truncated).
|
||||
//! - `--max-tokens` flips `truncated: true` once the budget binds.
|
||||
//! - `--cursor` advances paging (page 2 chunk_ids disjoint from page 1).
|
||||
//! - Plain (non-JSON) output prints the `[truncated; ...]` hint to
|
||||
//! stderr (stdout stays the hit list).
|
||||
|
||||
mod common;
|
||||
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
|
||||
#[test]
|
||||
fn search_json_emits_search_response_v1_wrapper() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
fs::write(workspace.join("a.md"), "# T\n\napples are red.\n").unwrap();
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let (stdout, _stderr) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&["--json", "--mode", "lexical", "apples"],
|
||||
);
|
||||
let v: Value = serde_json::from_str(stdout.trim())
|
||||
.unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
|
||||
assert_eq!(v["schema_version"], "search_response.v1");
|
||||
assert!(v["hits"].is_array(), "hits must be array, got {v}");
|
||||
assert!(
|
||||
v["next_cursor"].is_null() || v["next_cursor"].is_string(),
|
||||
"next_cursor must be null or string, got {}",
|
||||
v["next_cursor"]
|
||||
);
|
||||
assert!(
|
||||
v["truncated"].is_boolean(),
|
||||
"truncated must be bool, got {}",
|
||||
v["truncated"]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_json_truncates_with_max_tokens() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
let body: String = "rust ownership is a memory model. ".repeat(10);
|
||||
fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let (stdout, _stderr) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&["--json", "--mode", "lexical", "--max-tokens", "30", "rust"],
|
||||
);
|
||||
let v: Value = serde_json::from_str(stdout.trim())
|
||||
.unwrap_or_else(|e| panic!("not JSON: {stdout:?}: {e}"));
|
||||
assert_eq!(
|
||||
v["truncated"], true,
|
||||
"30-token cap must trip truncation: {v}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_json_cursor_paginates() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
for i in 0..6 {
|
||||
fs::write(
|
||||
workspace.join(format!("d{i}.md")),
|
||||
format!("# T{i}\n\nrust topic {i}\n"),
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let (page1, _) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&["--json", "--mode", "lexical", "--k", "2", "rust"],
|
||||
);
|
||||
let v1: Value = serde_json::from_str(page1.trim())
|
||||
.unwrap_or_else(|e| panic!("page1 not JSON: {page1:?}: {e}"));
|
||||
let cursor = v1["next_cursor"]
|
||||
.as_str()
|
||||
.unwrap_or_else(|| panic!("next_cursor missing on page1: {v1}"));
|
||||
|
||||
let (page2, _) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&[
|
||||
"--json",
|
||||
"--mode",
|
||||
"lexical",
|
||||
"--k",
|
||||
"2",
|
||||
"--cursor",
|
||||
cursor,
|
||||
"rust",
|
||||
],
|
||||
);
|
||||
let v2: Value = serde_json::from_str(page2.trim())
|
||||
.unwrap_or_else(|e| panic!("page2 not JSON: {page2:?}: {e}"));
|
||||
|
||||
let p1_ids: Vec<String> = v1["hits"]
|
||||
.as_array()
|
||||
.expect("page1 hits array")
|
||||
.iter()
|
||||
.map(|h| {
|
||||
h["chunk_id"]
|
||||
.as_str()
|
||||
.expect("chunk_id string")
|
||||
.to_string()
|
||||
})
|
||||
.collect();
|
||||
let p2_ids: Vec<String> = v2["hits"]
|
||||
.as_array()
|
||||
.expect("page2 hits array")
|
||||
.iter()
|
||||
.map(|h| {
|
||||
h["chunk_id"]
|
||||
.as_str()
|
||||
.expect("chunk_id string")
|
||||
.to_string()
|
||||
})
|
||||
.collect();
|
||||
assert!(
|
||||
!p2_ids.is_empty(),
|
||||
"page2 must return at least one hit (cursor advanced past page1)"
|
||||
);
|
||||
assert!(
|
||||
p2_ids.iter().all(|id| !p1_ids.contains(id)),
|
||||
"page2 must not repeat page1 chunk_ids: page1={p1_ids:?} page2={p2_ids:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_stale_cursor_returns_error_v1_with_stale_cursor_code() {
|
||||
// p9-fb-34 round-1 review: end-to-end wire contract — when the
|
||||
// corpus_revision bumps between cursor issuance and the cursored
|
||||
// search, `kebab --json search --cursor <stale>` must emit an
|
||||
// `error.v1` ndjson line on stderr with `code = "stale_cursor"`.
|
||||
// Pre-fix this returned `code = "generic"` because
|
||||
// `App::search_with_opts` string-formatted the typed payload into
|
||||
// anyhow, losing the structured wrapper.
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
fs::write(workspace.join("a.md"), "# T\n\napples\n").unwrap();
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
// Get a valid cursor first.
|
||||
let (page1_stdout, _) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&["--mode", "lexical", "--json", "--k", "1", "apples"],
|
||||
);
|
||||
let v1: Value = serde_json::from_str(page1_stdout.trim()).expect("json");
|
||||
let cursor = v1["next_cursor"]
|
||||
.as_str()
|
||||
.expect("k=1 page must emit next_cursor — fixture too small if this fails")
|
||||
.to_string();
|
||||
|
||||
// Bump corpus_revision by ingesting a second doc.
|
||||
fs::write(workspace.join("b.md"), "# B\n\nbananas\n").unwrap();
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
// Use the now-stale cursor. Direct invocation (not via the
|
||||
// success-asserting helper) so we can read stderr on failure.
|
||||
let exe = env!("CARGO_BIN_EXE_kebab");
|
||||
let cfg_str = cfg.to_str().expect("utf8");
|
||||
let out = std::process::Command::new(exe)
|
||||
.args([
|
||||
"--config",
|
||||
cfg_str,
|
||||
"--json",
|
||||
"search",
|
||||
"--mode",
|
||||
"lexical",
|
||||
"--json",
|
||||
"--cursor",
|
||||
&cursor,
|
||||
"apples",
|
||||
])
|
||||
.output()
|
||||
.expect("kebab search --cursor");
|
||||
|
||||
let stderr = String::from_utf8_lossy(&out.stderr);
|
||||
// Find the error.v1 ndjson line on stderr (one event per line).
|
||||
let err_line = stderr
|
||||
.lines()
|
||||
.find(|l| {
|
||||
serde_json::from_str::<Value>(l)
|
||||
.ok()
|
||||
.and_then(|v| {
|
||||
v.get("schema_version")
|
||||
.and_then(|s| s.as_str())
|
||||
.map(String::from)
|
||||
})
|
||||
.as_deref()
|
||||
== Some("error.v1")
|
||||
})
|
||||
.unwrap_or_else(|| panic!("no error.v1 line on stderr: {stderr:?}"));
|
||||
|
||||
let v: Value = serde_json::from_str(err_line).expect("error.v1 json");
|
||||
assert_eq!(
|
||||
v["code"], "stale_cursor",
|
||||
"code must be stale_cursor: {err_line}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_plain_emits_truncated_hint_to_stderr() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
let body: String = "rust ownership is a memory model. ".repeat(10);
|
||||
fs::write(workspace.join("a.md"), format!("# T\n\n{body}\n")).unwrap();
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let (_stdout, stderr) = common::run_search_with_args(
|
||||
&cfg,
|
||||
&["--mode", "lexical", "--max-tokens", "30", "rust"],
|
||||
);
|
||||
assert!(
|
||||
stderr.contains("[truncated;"),
|
||||
"stderr must carry truncated hint: {stderr:?}"
|
||||
);
|
||||
}
|
||||
@@ -45,10 +45,21 @@ fn search_json_includes_indexed_at_and_stale() {
|
||||
|
||||
let out = run_search_lexical(&cfg, "apples", true);
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
let arr: serde_json::Value = serde_json::from_str(stdout.trim())
|
||||
.unwrap_or_else(|e| panic!("expected JSON array, got {stdout:?}: {e}"));
|
||||
let arr = arr.as_array().unwrap_or_else(|| panic!("expected array, got {stdout}"));
|
||||
let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty array: {stdout}"));
|
||||
// p9-fb-34: top-level wire is now `search_response.v1` wrapping the
|
||||
// legacy `search_hit.v1[]` under a `hits` field (with pagination +
|
||||
// truncation metadata). Hit shape inside `hits` is unchanged.
|
||||
let resp: serde_json::Value = serde_json::from_str(stdout.trim())
|
||||
.unwrap_or_else(|e| panic!("expected JSON object, got {stdout:?}: {e}"));
|
||||
assert_eq!(
|
||||
resp.get("schema_version").and_then(|v| v.as_str()),
|
||||
Some("search_response.v1"),
|
||||
"expected search_response.v1 wrapper, got {resp}"
|
||||
);
|
||||
let arr = resp
|
||||
.get("hits")
|
||||
.and_then(|h| h.as_array())
|
||||
.unwrap_or_else(|| panic!("expected hits array, got {stdout}"));
|
||||
let first = arr.first().unwrap_or_else(|| panic!("expected ≥1 hit, got empty hits: {stdout}"));
|
||||
assert!(
|
||||
first.get("indexed_at").is_some(),
|
||||
"missing indexed_at in {first}"
|
||||
|
||||
@@ -51,7 +51,7 @@ pub use metadata::{
|
||||
};
|
||||
pub use search::{
|
||||
DocFilter, DocSummary, RetrievalDetail, SearchFilters, SearchHit,
|
||||
SearchMode, SearchQuery,
|
||||
SearchMode, SearchOpts, SearchQuery,
|
||||
};
|
||||
pub use answer::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage,
|
||||
|
||||
@@ -96,6 +96,18 @@ pub struct DocSummary {
|
||||
pub chunker_version: ChunkerVersion,
|
||||
}
|
||||
|
||||
/// p9-fb-34: caller-supplied output budget knobs for `App::search_with_opts`.
|
||||
/// All `None` = no enforcement (existing behavior).
|
||||
#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SearchOpts {
|
||||
/// chars/4 approximation of wire JSON token cost. None = no cap.
|
||||
pub max_tokens: Option<usize>,
|
||||
/// Per-hit snippet character cap. None = use config default.
|
||||
pub snippet_chars: Option<usize>,
|
||||
/// Opaque base64 cursor from a previous response. None = first page.
|
||||
pub cursor: Option<String>,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -135,4 +147,12 @@ mod tests {
|
||||
assert_eq!(v["indexed_at"], "2026-05-09T12:00:00Z");
|
||||
assert_eq!(v["stale"], true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_opts_default_is_all_none() {
|
||||
let opts = SearchOpts::default();
|
||||
assert!(opts.max_tokens.is_none());
|
||||
assert!(opts.snippet_chars.is_none());
|
||||
assert!(opts.cursor.is_none());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
//! `search` tool — wraps `kebab_app::search_with_config`.
|
||||
//! Input: { query, mode?, k? }. Output: search_hit.v1 array JSON.
|
||||
//! `search` tool — wraps `kebab_app::search_with_opts_with_config`.
|
||||
//! Input: { query, mode?, k?, max_tokens?, snippet_chars?, cursor? }.
|
||||
//! Output: search_response.v1 envelope (hits + next_cursor + truncated).
|
||||
//!
|
||||
//! First tool with a non-empty `inputSchema`: `SearchInput` derives
|
||||
//! `JsonSchema` and `Tool::new` uses
|
||||
@@ -17,23 +18,21 @@ pub struct SearchInput {
|
||||
/// User query (free text).
|
||||
pub query: String,
|
||||
/// Retrieval mode: "hybrid" (default), "lexical", or "vector".
|
||||
#[serde(default = "default_mode")]
|
||||
pub mode: String,
|
||||
pub mode: Option<String>,
|
||||
/// Top-K results. Defaults to 10. Clamped to 1–100.
|
||||
#[serde(default = "default_k")]
|
||||
pub k: usize,
|
||||
}
|
||||
|
||||
fn default_mode() -> String {
|
||||
"hybrid".to_string()
|
||||
}
|
||||
fn default_k() -> usize {
|
||||
10
|
||||
pub k: Option<usize>,
|
||||
/// p9-fb-34: cap result wire size at ~N tokens (chars/4 estimate).
|
||||
pub max_tokens: Option<usize>,
|
||||
/// p9-fb-34: per-hit snippet character cap.
|
||||
pub snippet_chars: Option<usize>,
|
||||
/// p9-fb-34: opaque cursor from a previous response.
|
||||
pub cursor: Option<String>,
|
||||
}
|
||||
|
||||
pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
|
||||
let k = input.k.clamp(1, 100);
|
||||
let mode = match input.mode.as_str() {
|
||||
let k = input.k.unwrap_or(10).clamp(1, 100);
|
||||
let mode_str = input.mode.as_deref().unwrap_or("hybrid");
|
||||
let mode = match mode_str {
|
||||
"lexical" => kebab_core::SearchMode::Lexical,
|
||||
"vector" => kebab_core::SearchMode::Vector,
|
||||
_ => kebab_core::SearchMode::Hybrid,
|
||||
@@ -44,11 +43,18 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
|
||||
k,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
match kebab_app::search_with_config((*state.config).clone(), query) {
|
||||
Ok(hits) => {
|
||||
let opts = kebab_core::SearchOpts {
|
||||
max_tokens: input.max_tokens,
|
||||
snippet_chars: input.snippet_chars,
|
||||
cursor: input.cursor,
|
||||
};
|
||||
let cfg_clone = (*state.config).clone();
|
||||
match kebab_app::search_with_opts_with_config(cfg_clone, query, opts) {
|
||||
Ok(resp) => {
|
||||
// SearchHit (kebab-core) does not carry a `schema_version` field,
|
||||
// so we tag each element inline before serialising.
|
||||
let tagged: Vec<serde_json::Value> = hits
|
||||
let tagged: Vec<serde_json::Value> = resp
|
||||
.hits
|
||||
.iter()
|
||||
.map(|h| {
|
||||
let mut v = serde_json::to_value(h).unwrap_or_default();
|
||||
@@ -61,7 +67,13 @@ pub fn handle(state: &KebabAppState, input: SearchInput) -> CallToolResult {
|
||||
v
|
||||
})
|
||||
.collect();
|
||||
match serde_json::to_string(&serde_json::Value::Array(tagged)) {
|
||||
let envelope = serde_json::json!({
|
||||
"schema_version": "search_response.v1",
|
||||
"hits": tagged,
|
||||
"next_cursor": resp.next_cursor,
|
||||
"truncated": resp.truncated,
|
||||
});
|
||||
match serde_json::to_string(&envelope) {
|
||||
Ok(json) => to_tool_success(json),
|
||||
Err(e) => to_tool_error(&anyhow::anyhow!(e)),
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Integration: tools/call name=search — verify response is search_hit.v1 array.
|
||||
//! Integration: tools/call name=search — verify response is search_response.v1.
|
||||
|
||||
use std::fs;
|
||||
|
||||
@@ -22,7 +22,7 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn search_tool_returns_search_hits_array() {
|
||||
async fn search_tool_returns_search_response_v1() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let data_dir = dir.path().join("data");
|
||||
let workspace_root = dir.path().join("notes");
|
||||
@@ -53,8 +53,11 @@ async fn search_tool_returns_search_hits_array() {
|
||||
handler.state(),
|
||||
kebab_mcp::tools::search::SearchInput {
|
||||
query: "kebab".to_string(),
|
||||
mode: "lexical".to_string(),
|
||||
k: 5,
|
||||
mode: Some("lexical".to_string()),
|
||||
k: Some(5),
|
||||
max_tokens: None,
|
||||
snippet_chars: None,
|
||||
cursor: None,
|
||||
},
|
||||
);
|
||||
|
||||
@@ -75,16 +78,33 @@ async fn search_tool_returns_search_hits_array() {
|
||||
};
|
||||
|
||||
let v: serde_json::Value = serde_json::from_str(text).unwrap();
|
||||
let arr = v.as_array().expect("search returns a JSON array");
|
||||
assert_eq!(
|
||||
v.get("schema_version").and_then(|s| s.as_str()),
|
||||
Some("search_response.v1"),
|
||||
"envelope should carry schema_version=search_response.v1"
|
||||
);
|
||||
let hits = v
|
||||
.get("hits")
|
||||
.and_then(|h| h.as_array())
|
||||
.expect("hits must be a JSON array");
|
||||
assert!(
|
||||
!arr.is_empty(),
|
||||
!hits.is_empty(),
|
||||
"expected at least one hit for 'kebab' in 'a.md'"
|
||||
);
|
||||
assert_eq!(
|
||||
arr[0]
|
||||
hits[0]
|
||||
.get("schema_version")
|
||||
.and_then(|s| s.as_str()),
|
||||
Some("search_hit.v1"),
|
||||
"first hit should carry schema_version=search_hit.v1"
|
||||
);
|
||||
// truncated must be present (bool); next_cursor may be null on last page.
|
||||
assert!(
|
||||
v.get("truncated").and_then(|t| t.as_bool()).is_some(),
|
||||
"envelope should carry truncated:bool"
|
||||
);
|
||||
assert!(
|
||||
v.get("next_cursor").is_some(),
|
||||
"envelope should carry next_cursor (possibly null)"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -34,7 +34,7 @@ kamadak-exif = "0.6"
|
||||
# rustls-tls) so both crates share the same TLS backend and the
|
||||
# transitive tokio runtime is brought in once.
|
||||
reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
|
||||
base64 = "0.22"
|
||||
base64 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
@@ -47,7 +47,7 @@ tokio = { workspace = true, features = ["rt-multi-thread"] }
|
||||
# fixture. Only loaded for tests; the production crate doesn't need
|
||||
# font rendering.
|
||||
ab_glyph = "0.2"
|
||||
base64 = "0.22"
|
||||
base64 = { workspace = true }
|
||||
# `kebab-llm/mock` exposes `MockLanguageModel` for hermetic caption
|
||||
# tests. Real adapters (Ollama) live in `kebab-llm-local`, which is
|
||||
# only allowed at the dev-dep level here — the runtime crate stays
|
||||
|
||||
@@ -152,6 +152,25 @@ stderr 의 events.ndjson 은 한 줄 = 한 event 의 ndjson — `retrieval_done`
|
||||
|
||||
agent 가 stderr 를 닫으면 (`head -c 1` 등) pipeline 이 LLM stream 을 즉시 중단하고 `RefusalReason::LlmStreamAborted` 로 partial answer 를 `answers` 테이블에 기록.
|
||||
|
||||
### Pagination + budget (fb-34)
|
||||
|
||||
```bash
|
||||
# First page
|
||||
kebab search "rust" --json --k 5 > page1.json
|
||||
jq '.next_cursor' page1.json
|
||||
|
||||
# Next page using the returned cursor
|
||||
NEXT=$(jq -r '.next_cursor' page1.json)
|
||||
kebab search "rust" --json --k 5 --cursor "$NEXT" > page2.json
|
||||
|
||||
# Budget cap — returns smaller snippet / fewer hits + truncated=true
|
||||
kebab search "rust" --json --max-tokens 200 | jq '.truncated, (.hits | length)'
|
||||
```
|
||||
|
||||
`next_cursor` 는 corpus_revision 변경 (이후 ingest 등) 시 invalid — 다음 호출이 `error.v1.code = stale_cursor` 로 거절. agent 는 새 search 로 재발급 받기.
|
||||
|
||||
`--json` 출력은 `search_response.v1` wrapper (`{hits, next_cursor, truncated}`) — pre-fb-34 의 bare `search_hit.v1[]` 배열과 호환 안 됨.
|
||||
|
||||
## P6-4 이미지 ingestion 옵션
|
||||
|
||||
`config.toml` 에 다음 절을 추가하면 `kebab ingest` 가 `**/*.png` / `**/*.jpg` 등 이미지 자산도 함께 색인합니다 (텍스트만 색인하려면 생략):
|
||||
|
||||
1535
docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md
Normal file
@@ -0,0 +1,230 @@
|
||||
---
|
||||
title: "p9-fb-34 — Output budget controls design"
|
||||
phase: P9
|
||||
component: kebab-core + kebab-app + kebab-cli + kebab-mcp + wire-schema
|
||||
task_id: p9-fb-34
|
||||
status: design
|
||||
target_version: 0.5.0
|
||||
contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
|
||||
contract_sections: [§4 search, §10 UX, wire-schema search_hit.v1]
|
||||
date: 2026-05-09
|
||||
---
|
||||
|
||||
# p9-fb-34 — Output budget controls
|
||||
|
||||
## Goal
|
||||
|
||||
`kebab search` agent UX 개선. context window 제약 있는 agent 가 검색 결과 size 와 페이지네이션을 명시적으로 제어할 수 있게 한다. CLI surface 우선, MCP tool 도 동일 인자로 동시 노출. ask path 는 scope out (별도 `rag.max_context_tokens` 가 이미 budget 담당).
|
||||
|
||||
## Behavior contract
|
||||
|
||||
### CLI flags
|
||||
|
||||
`kebab search "<query>"` 에 세 가지 flag 신규:
|
||||
|
||||
| flag | 의미 | default |
|
||||
|------|------|---------|
|
||||
| `--max-tokens N` | 결과 wire JSON 의 추정 token 수 cap (`chars/4` 근사). 초과 시 truncate priority 적용. | 미설정 = 비활성 (기존 동작) |
|
||||
| `--snippet-chars N` | 각 hit snippet 최대 chars. config 의 `search.snippet_chars` 보다 우선. | 미설정 = config 값 |
|
||||
| `--cursor <opaque>` | 이전 호출의 `next_cursor` 값. 다음 페이지 hits 만 반환. | 미설정 = 첫 페이지 |
|
||||
|
||||
### Wire shape
|
||||
|
||||
`kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` wrapper object 로 교체:
|
||||
|
||||
```json
|
||||
{
|
||||
"schema_version": "search_response.v1",
|
||||
"hits": [/* search_hit.v1[] */],
|
||||
"next_cursor": "<base64>" | null,
|
||||
"truncated": true | false
|
||||
}
|
||||
```
|
||||
|
||||
**Backwards-compat broken** — agent 가 `[0]` 직접 인덱싱하면 깨짐. CLI plain (`--json` 없이) 출력 무영향. HOTFIXES 에 결정 로그.
|
||||
|
||||
### Token estimation
|
||||
|
||||
`chars/4` 근사 (RAG `pack_context` 와 일관). tiktoken-rs 등 신규 dep 없음. 정확도 ±15% 수준 — agent budget 제어 목적상 충분. wire schema description 에 "approximation" 명시.
|
||||
|
||||
### Truncate priority
|
||||
|
||||
`opts.max_tokens` 가 Some 일 때만 동작. 단계별:
|
||||
|
||||
1. **Snippet 단축** — 각 hit snippet 을 `opts.snippet_chars.unwrap_or(config.search.snippet_chars)` 로 자른 뒤, 여전히 budget 초과면 60-char floor 까지 점진 단축.
|
||||
2. **k 축소** — snippet 60 char 까지 줄여도 초과면 마지막 hit 부터 pop. 최소 1 hit 보장.
|
||||
3. **truncated flag** — 위 어느 단계라도 동작 시 `truncated: true`. agent 는 `next_cursor` 로 다음 페이지 요청 가능.
|
||||
|
||||
metadata (rank/score/doc_path/citation) 는 끝까지 유지 — agent 가 hit 자체를 못 찾으면 무의미.
|
||||
|
||||
### Pagination cursor
|
||||
|
||||
cursor 는 opaque base64 — 내부적으로 `{offset: usize, corpus_revision: string}` JSON 의 base64 encode.
|
||||
|
||||
- 첫 호출: cursor 미설정 → offset 0.
|
||||
- 응답: 남은 hit 있으면 `next_cursor = encode(offset + returned, current_revision)`. 없으면 `null`.
|
||||
- 다음 호출: `--cursor <prev>` → decode → offset 만큼 skip.
|
||||
- corpus_revision mismatch (이후 ingest 등으로 corpus 가 변경됨) → `error.v1.code = "stale_cursor"`, exit 2. agent 책임으로 재호출.
|
||||
|
||||
retriever 호출 시 k = `effective_k + offset` 만큼 fetch 후 offset 만큼 skip 해 응답.
|
||||
|
||||
### Stale cursor error
|
||||
|
||||
`error.v1.code` enum 에 `"stale_cursor"` 추가. message 예시: `"cursor was issued against corpus_revision 'abc'; current revision is 'xyz'. Re-issue search to obtain a fresh cursor."`
|
||||
|
||||
## Allowed / forbidden dependencies
|
||||
|
||||
- `kebab-core`: `SearchOpts` 신규 도메인 type 정의. 신규 dep 없음 (option / String 만).
|
||||
- `kebab-app`: cursor encode/decode 헬퍼 (base64 + serde_json). `base64` workspace dep 가 이미 있을 가능성 높음 — 확인 후 필요 시 추가.
|
||||
- `kebab-cli`: clap 인자 추가, wire wrapper 헬퍼.
|
||||
- `kebab-mcp`: tool input schema 확장.
|
||||
- `kebab-tui`: 변경 없음 (Search 패널 budget 미사용. fb-3X 후속).
|
||||
- `kebab-search`: 변경 없음 — retriever signature 보존.
|
||||
|
||||
`kebab-core` 가 다른 `kebab-*` crate 의존 금지 룰 준수.
|
||||
|
||||
## Public surface delta
|
||||
|
||||
### kebab-core
|
||||
|
||||
```rust
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct SearchOpts {
|
||||
/// p9-fb-34: chars/4 approximation. None = no budget enforcement.
|
||||
pub max_tokens: Option<usize>,
|
||||
/// p9-fb-34: per-hit snippet character cap. None = use config default.
|
||||
pub snippet_chars: Option<usize>,
|
||||
/// p9-fb-34: opaque base64 cursor from a previous response.
|
||||
pub cursor: Option<String>,
|
||||
}
|
||||
```
|
||||
|
||||
### kebab-app
|
||||
|
||||
```rust
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SearchResponse {
|
||||
pub hits: Vec<SearchHit>,
|
||||
pub next_cursor: Option<String>,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
impl App {
|
||||
/// p9-fb-34: budget-aware search.
|
||||
pub fn search_with_opts(
|
||||
&self,
|
||||
query: SearchQuery,
|
||||
opts: SearchOpts,
|
||||
) -> Result<SearchResponse>;
|
||||
|
||||
// Existing — thin wrapper for backwards-compat.
|
||||
pub fn search(&self, query: SearchQuery) -> Result<Vec<SearchHit>> {
|
||||
let resp = self.search_with_opts(query, SearchOpts::default())?;
|
||||
Ok(resp.hits)
|
||||
}
|
||||
}
|
||||
|
||||
// cursor helpers (private to app crate)
|
||||
pub(crate) fn encode_cursor(offset: usize, corpus_revision: &str) -> String;
|
||||
pub(crate) fn decode_cursor(
|
||||
s: &str,
|
||||
expected_revision: &str,
|
||||
) -> Result<usize /* offset */, ErrorV1 /* stale_cursor */>;
|
||||
```
|
||||
|
||||
### kebab-cli
|
||||
|
||||
```rust
|
||||
// Cmd::Search 새 인자
|
||||
#[arg(long)] max_tokens: Option<usize>,
|
||||
#[arg(long)] snippet_chars: Option<usize>,
|
||||
#[arg(long)] cursor: Option<String>,
|
||||
```
|
||||
|
||||
```rust
|
||||
// wire helper
|
||||
pub fn wire_search_response(r: &SearchResponse) -> Value {
|
||||
let v = serde_json::json!({
|
||||
"hits": r.hits.iter().map(wire_search_hit).collect::<Vec<_>>(),
|
||||
"next_cursor": r.next_cursor,
|
||||
"truncated": r.truncated,
|
||||
});
|
||||
tag_object(v, "search_response.v1")
|
||||
}
|
||||
```
|
||||
|
||||
plain output: 기존 hit 줄들 + truncated 시 stderr 한 줄:
|
||||
|
||||
```
|
||||
[truncated; use --cursor <next_cursor> for the next page]
|
||||
```
|
||||
|
||||
### kebab-mcp
|
||||
|
||||
`SearchInput` 에 optional 필드 추가:
|
||||
|
||||
```rust
|
||||
pub struct SearchInput {
|
||||
pub query: String,
|
||||
pub mode: Option<String>,
|
||||
pub k: Option<usize>,
|
||||
/// p9-fb-34
|
||||
pub max_tokens: Option<usize>,
|
||||
pub snippet_chars: Option<usize>,
|
||||
pub cursor: Option<String>,
|
||||
}
|
||||
```
|
||||
|
||||
출력: `search_response.v1` JSON tag 적용 (CLI 와 동일 wrapper).
|
||||
|
||||
## Test plan
|
||||
|
||||
| kind | description |
|
||||
|------|-------------|
|
||||
| unit (kebab-app) | `cursor::encode/decode` round-trip + corpus_revision mismatch → `StaleCursor` |
|
||||
| unit (kebab-app) | `App::search_with_opts` budget=None → 기존 `App::search` 동일 (truncated=false, next_cursor 채움) |
|
||||
| unit (kebab-app) | budget=200 tokens → snippet 60-char floor 까지 단축, truncated=true |
|
||||
| unit (kebab-app) | budget < single-hit 최소 → k=1 + truncated=true (1 hit 보장) |
|
||||
| unit (kebab-app) | snippet_chars override → 해당 길이로 truncate |
|
||||
| 통합 (kebab-app) | cursor offset 5 호출 → 6번째 hit 부터 반환 |
|
||||
| 통합 (kebab-app) | corpus_revision bump 후 cursor 재호출 → `StaleCursor` error.v1 |
|
||||
| 통합 (kebab-cli) | `kebab search "x" --json` → `search_response.v1` shape |
|
||||
| 통합 (kebab-cli) | `--max-tokens 200 --json` → truncated=true, hits 짧음 |
|
||||
| 통합 (kebab-cli) | `--cursor <encoded>` → 다음 페이지 |
|
||||
| 통합 (kebab-cli) | plain output: `[truncated; ...]` stderr 한 줄 |
|
||||
| 통합 (kebab-mcp) | `mcp__kebab__search` tool 이 `search_response.v1` 반환 |
|
||||
| 통합 (wire-schema) | `search_response.schema.json` validate 샘플 (with/without next_cursor) |
|
||||
| 통합 (kebab-app) | 기존 `App::search` 호출자 (TUI 등) 무영향 — return type 동일 |
|
||||
|
||||
## Implementation steps (high-level)
|
||||
|
||||
1. wire schema 신규 `search_response.schema.json` + `error.v1` enum 에 `stale_cursor` 추가.
|
||||
2. `kebab-core::SearchOpts` 도메인 type.
|
||||
3. `kebab-app::SearchResponse` + `cursor` 모듈 (encode/decode).
|
||||
4. `App::search_with_opts` impl (budget loop, cursor handling).
|
||||
5. `App::search` thin wrapper 보존.
|
||||
6. `kebab-cli::Cmd::Search` 새 flag + wire wrapper helper + plain truncated hint.
|
||||
7. `kebab-mcp::SearchInput` 확장 + 출력 wrapper.
|
||||
8. 단위 + 통합 테스트.
|
||||
9. README + SMOKE — `--max-tokens` / `--cursor` 예시.
|
||||
10. tasks/INDEX.md / spec status flip.
|
||||
11. `tasks/HOTFIXES.md` — wire breaking 결정 로그.
|
||||
12. `integrations/claude-code/kebab/SKILL.md` — search 결과 shape 변경 명시.
|
||||
|
||||
## Risks / notes
|
||||
|
||||
- **Wire breaking**: agent 가 기존 `search_hit.v1[]` 배열 직접 파싱 시 깨짐. HOTFIXES 결정 로그 + skill notes 반영 필수. 내부 single-user 환경이라 실용적 영향 적음.
|
||||
- **`App::search` 시그니처 보존** 으로 TUI / 기존 caller 무영향.
|
||||
- **chars/4 추정 정확도** ±15% — agent budget 보호 목적상 충분. tiktoken 도입은 별도 task.
|
||||
- **cursor opaque** — agent 가 base64 decode 시도 막을 방법 없음. spec 에 "구조 변경 가능, 직접 파싱 금지" 명시.
|
||||
- **corpus_revision 이 fb-19 LRU cache invalidation key 와 동일 source** — 별도 source-of-truth 추가 불필요.
|
||||
- **TUI Search 패널 budget UI** — out of scope. 사용자가 원하면 fb-3X 후속.
|
||||
|
||||
## Documentation updates (implementation PR 동시)
|
||||
|
||||
- `README.md` — `kebab search` 명령 표 row 업데이트, `--max-tokens` / `--cursor` 한 줄.
|
||||
- `docs/SMOKE.md` — pagination walkthrough 한 단락 (cursor 흐름 예시).
|
||||
- `tasks/p9/p9-fb-34-output-budget-controls.md` — `status: open → completed`, design/plan 링크 추가.
|
||||
- `tasks/INDEX.md` — fb-34 행 ✅.
|
||||
- `tasks/HOTFIXES.md` — `2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1` 결정 로그.
|
||||
- `integrations/claude-code/kebab/SKILL.md` — Recipe 의 search 결과 파싱 패턴 (`response.hits[]`) + cursor 예시.
|
||||
14
docs/wire-schema/v1/search_response.schema.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "https://kb.local/wire/v1/search_response.schema.json",
|
||||
"title": "SearchResponse v1",
|
||||
"description": "Top-level wrapper for `kebab search --json` output. Replaces the bare `search_hit.v1[]` array — wraps it with pagination + truncation metadata. Token counts are approximate (chars/4 estimate, no tokenizer dep). On `truncated: true`, caller may either widen `--max-tokens` or follow `next_cursor` for the next page. Stale `next_cursor` (corpus_revision changed since issued) returns `error.v1.code = stale_cursor`.",
|
||||
|
claude-reviewer-01
commented
[nit, blocker for ship] description 끝에 **[nit, blocker for ship]** description 끝에 `error.v1.code = stale_cursor` 명시 — 위 `app.rs:317` 이슈 때문에 실제 wire 는 `code: "generic"` 으로 떨어짐. 둘 중 하나를 맞춰야 함: (a) `app.rs` 의 typed-error 보존을 고치고 schema 그대로 유지 (권장), (b) schema description 을 `code: "generic"` 또는 "the error message contains 'stale_cursor:'" 로 수정. (a) 가 spec contract 와 맞으므로 권장.
|
||||
"type": "object",
|
||||
"required": ["schema_version", "hits", "next_cursor", "truncated"],
|
||||
"properties": {
|
||||
"schema_version": { "const": "search_response.v1" },
|
||||
"hits": { "type": "array", "description": "search_hit.v1[]" },
|
||||
"next_cursor": { "type": ["string", "null"], "description": "Opaque base64 cursor for next page; null when no more hits." },
|
||||
"truncated": { "type": "boolean", "description": "True when budget forced snippet shortening or k reduction. Independent of `next_cursor`: caller may widen `max_tokens` (re-issue same query) or follow `next_cursor` (advance through more hits) or both." }
|
||||
}
|
||||
}
|
||||
@@ -32,7 +32,7 @@ When `kebab` is registered as an MCP server (see `~/.claude/mcp.json` example be
|
||||
|
||||
| tool | purpose | mutation |
|
||||
|------|---------|----------|
|
||||
| `mcp__kebab__search` | corpus search → `search_hit.v1[]` | no |
|
||||
| `mcp__kebab__search` | corpus search → `search_response.v1` (`{hits, next_cursor, truncated}`) | no |
|
||||
| `mcp__kebab__ask` | RAG answer → `answer.v1` | no |
|
||||
| `mcp__kebab__schema` | capability discovery → `schema.v1` | no |
|
||||
| `mcp__kebab__doctor` | health check → `doctor.v1` | no |
|
||||
@@ -47,12 +47,14 @@ Use when the user wants to **find** a doc, or when you (the model) need raw chun
|
||||
|
||||
Input:
|
||||
```json
|
||||
{ "query": "<query>", "mode": "hybrid", "k": 10 }
|
||||
{ "query": "<query>", "mode": "hybrid", "k": 10, "max_tokens": null, "snippet_chars": null, "cursor": null }
|
||||
```
|
||||
|
||||
- `mode = "hybrid"` is the default-correct choice. Use `"vector"` for semantic-only ("docs about X concept"), `"lexical"` for exact strings ("the literal flag `--foo-bar`").
|
||||
- Output is `search_hit.v1` array. Key fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
|
||||
- **`max_tokens` / `snippet_chars` / `cursor` (p9-fb-34)** — agent budget controls. Set `max_tokens` to cap result wire size (chars/4 estimate); set `cursor` to the previous response's `next_cursor` to fetch the next page.
|
||||
- Output is `search_response.v1`: `{ hits: search_hit.v1[], next_cursor: string|null, truncated: bool }`. Iterate `response.hits[]` for individual hits. Key hit fields: `rank`, `score`, `doc_path`, `heading_path[]`, `section_label`, `snippet`, `citation` (line range / page), `chunk_id`.
|
||||
- Cite back to the user as `doc_path § heading_path[-1]` so they can open the source.
|
||||
- When `truncated: true`, the budget loop modified the page (snippet shortening or k reduction). `next_cursor` is **independent** — non-null whenever more hits may be reachable. Caller may widen `max_tokens` (re-issue same query for fuller snippets / more hits per page) or follow `next_cursor` (advance through more hits) or both. Mismatched cursor (corpus_revision changed) returns `error.v1.code = stale_cursor` — re-issue the search to obtain a fresh one.
|
||||
|
||||
### `mcp__kebab__ask` — when you need the answer
|
||||
|
||||
@@ -102,7 +104,9 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
|
||||
## Parsing tips
|
||||
|
||||
- MCP tools return JSON content blocks; CLI prints **one JSON value to stdout**, progress / warnings to stderr. Capture stdout only: `kebab search ... --json 2>/dev/null`.
|
||||
- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.[] | {rank, doc_path, heading: .heading_path[-1], snippet}'`.
|
||||
- `search` output can be large for broad queries. Project relevant fields when summarizing — for CLI: `jq '.hits[] | {rank, doc_path, heading: .heading_path[-1], snippet}'` (note: `.hits[]`, not `.[]` — fb-34 wrapped the array). Use `--max-tokens N` (CLI) / `max_tokens` (MCP) to cap wire size in advance.
|
||||
- Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor.
|
||||
|
claude-reviewer-01
commented
[nit, blocker for ship] 같은 이슈 — **[nit, blocker for ship]** 같은 이슈 — `error.v1.code = stale_cursor` 약속하지만 현재 구현은 `code: generic`. agent 가 SKILL 가이드 따라 `code` 로 분기 작성하면 fall-through 됨. `app.rs:317` 의 typed-error 보존이 들어간 후에야 이 SKILL.md 가 truthful 해짐.
|
||||
- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both.
|
||||
- `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape.
|
||||
- Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear.
|
||||
- `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality.
|
||||
|
||||
@@ -14,6 +14,21 @@ historical contract that was implemented; this file accumulates the
|
||||
deltas so phase 5+ readers can find the live behavior without diffing
|
||||
git history.
|
||||
|
||||
## 2026-05-09 — p9-fb-34: search wire wrapped in search_response.v1
|
||||
|
||||
**무엇이 바뀌었나**: `kebab search --json` stdout 이 기존 `search_hit.v1[]` 배열에서 신규 `search_response.v1` object 로 교체. wrapper 가 `hits`, `next_cursor`, `truncated` 세 필드를 가짐.
|
||||
|
||||
**Spec contract 와의 관계**: 명시적 wire breaking change. spec `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md` 의 §Wire shape 절에 단일 출처 결정.
|
||||
|
||||
**의식적 결정**:
|
||||
- pagination + truncation metadata 를 `search_hit` 자체에 흡수하면 단일 hit 의 도메인 의미가 오염됨 (모든 hit 가 `next_cursor` 필드 보유 등). top-level wrapper 가 분리도 깨끗.
|
||||
- 외부 consumer 영향: 단일 사용자 환경 + Claude Code skill 한 곳. skill 은 fb-34 와 동시 갱신.
|
||||
- 이 변경은 search_hit.v1 자체 schema 는 손대지 않음 — 도메인 stable.
|
||||
|
||||
**영향 받는 consumer**: kebab-tui (Search 패널 — 변경 불필요, App::search 시그니처 보존), kebab-mcp (search tool — 같은 PR 에서 갱신), Claude Code skill (같은 PR 에서 갱신). 외부 producer/consumer 없음.
|
||||
|
||||
**`--no-cache` 의미 변화**: fb-34 이전 `--no-cache` 는 `search_uncached_with_config` 로 cache 자체를 우회. fb-34 는 cached path 위에 `clear_search_cache()` 호출 후 search 실행 — long-lived process (TUI / MCP) 에서는 clear 와 fetch 사이 race window 가 있음. CLI (fresh App per call) 에서는 무영향. 후속 fb-3X 에서 `search_with_opts_uncached` 추가로 격리.
|
||||
|
||||
## 2026-05-09 — p9-fb-33: AskOpts.stream_sink type widened to StreamEvent
|
||||
|
||||
**무엇이 바뀌었나**: `kebab_rag::AskOpts.stream_sink` 의 타입이 `Option<mpsc::Sender<String>>` 에서 `Option<mpsc::Sender<StreamEvent>>` 로 변경됨. `kebab_app::StreamEvent` 가 새 re-export.
|
||||
|
||||
@@ -122,7 +122,7 @@ P0~P5 는 직렬. P6~P9 는 P5 이후 병렬 가능.
|
||||
### 🎯 0.4.0 — agent surface refinement (additive only)
|
||||
- [p9-fb-32 stale doc indicator](p9/p9-fb-32-stale-doc-indicator.md) — ✅ 머지 + v0.4.0 cut 후보 (2026-05-09)
|
||||
- [p9-fb-33 streaming ask (ndjson delta)](p9/p9-fb-33-streaming-ask.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
|
||||
- [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ⏳ 미구현, brainstorm 필요
|
||||
- [p9-fb-34 output budget controls](p9/p9-fb-34-output-budget-controls.md) — ✅ 머지 + v0.5.0 cut 후보 (2026-05-09)
|
||||
- [p9-fb-35 verbatim fetch](p9/p9-fb-35-verbatim-fetch.md) — ⏳ 미구현, brainstorm 필요
|
||||
- [p9-fb-36 search filter args](p9/p9-fb-36-search-filters.md) — ⏳ 미구현, brainstorm 필요
|
||||
- [p9-fb-37 trace + stats](p9/p9-fb-37-trace-and-stats.md) — ⏳ 미구현, brainstorm 필요 (depends_on 27)
|
||||
|
||||
@@ -3,8 +3,8 @@ phase: P9
|
||||
component: kebab-cli + kebab-app + wire-schema
|
||||
task_id: p9-fb-34
|
||||
title: "Output budget controls (--max-tokens / --snippet-chars / pagination)"
|
||||
status: open
|
||||
target_version: 0.4.0
|
||||
status: completed
|
||||
target_version: 0.5.0
|
||||
depends_on: []
|
||||
unblocks: []
|
||||
contract_source: ../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
|
||||
@@ -14,7 +14,10 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent context window 제
|
||||
|
||||
# p9-fb-34 — Output budget controls
|
||||
|
||||
> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. budget 적용 layer (truncate vs k 조정) / cursor 형식 / 기본값 brainstorm 후 확정.
|
||||
> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 의 `2026-05-09 — p9-fb-34` 항목 참조 — live source of truth.
|
||||
|
||||
상세 설계: `docs/superpowers/specs/2026-05-09-p9-fb-34-output-budget-controls-design.md`.
|
||||
구현 계획: `docs/superpowers/plans/2026-05-09-p9-fb-34-output-budget-controls.md`.
|
||||
|
||||
## 증상 / 동기
|
||||
|
||||
|
||||
회차 2 — nit (위 main 이슈의 후속).
SearchResponsedoc 도 "or when the budget loop truncated mid-page" 라고 적혀 있는데, 새 로직은 mid-page truncation 중 k-pop 케이스에서는 cursor 를 emit 하지 않고 snippet-only 케이스에서만 emit 함. 코드 동작과 일치하도록 수정 필요. 라인 303-304 의 search_with_opts doc 도 동일한 문구.