From 0fffd690717232a2f6349a4b7cca91df8952f7d4 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sun, 10 May 2026 00:01:56 +0900 Subject: [PATCH] feat(cli): kebab fetch chunk / doc / span (fb-35) JSON output is fetch_result.v1; plain output is human-friendly labeled sections (chunk: before / target / after; doc/span: full text + stderr truncated hint). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-cli/src/main.rs | 123 +++++++++++++++++++++++++++++++++++ crates/kebab-cli/src/wire.rs | 6 ++ 2 files changed, 129 insertions(+) diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs index 16857e5..c92ba27 100644 --- a/crates/kebab-cli/src/main.rs +++ b/crates/kebab-cli/src/main.rs @@ -86,6 +86,12 @@ enum Cmd { what: InspectWhat, }, + /// p9-fb-35: verbatim chunk / doc / span fetch. + Fetch { + #[command(subcommand)] + what: FetchWhat, + }, + /// Lexical / vector / hybrid search over chunks. Search { query: String, @@ -261,6 +267,33 @@ enum InspectWhat { Chunk { id: String }, } +#[derive(Subcommand, Debug)] +enum FetchWhat { + /// Fetch a single chunk verbatim, optionally with surrounding context. + Chunk { + id: String, + /// p9-fb-35: include ±N chunks before and after the target. + #[arg(long)] + context: Option, + }, + /// Fetch the entire normalized markdown text of a document. + Doc { + id: String, + /// p9-fb-35: chars/4 budget cap. + #[arg(long)] + max_tokens: Option, + }, + /// Fetch a 1-based line range of a document. PDF / audio rejected. + Span { + doc_id: String, + line_start: u32, + line_end: u32, + /// p9-fb-35: chars/4 budget cap. + #[arg(long)] + max_tokens: Option, + }, +} + #[derive(Subcommand, Debug)] enum EvalWhat { /// Run the golden suite end-to-end and persist `eval_runs` + @@ -526,6 +559,49 @@ fn run(cli: &Cli) -> anyhow::Result<()> { } }, + Cmd::Fetch { what } => { + let cfg = kebab_config::Config::load(cli.config.as_deref())?; + let (query, opts) = match what { + FetchWhat::Chunk { id, context } => ( + kebab_core::FetchQuery::Chunk(kebab_core::ChunkId(id.clone())), + kebab_core::FetchOpts { + context: *context, + max_tokens: None, + }, + ), + FetchWhat::Doc { id, max_tokens } => ( + kebab_core::FetchQuery::Doc(kebab_core::DocumentId(id.clone())), + kebab_core::FetchOpts { + context: None, + max_tokens: *max_tokens, + }, + ), + FetchWhat::Span { + doc_id, + line_start, + line_end, + max_tokens, + } => ( + kebab_core::FetchQuery::Span { + doc_id: kebab_core::DocumentId(doc_id.clone()), + line_start: *line_start, + line_end: *line_end, + }, + kebab_core::FetchOpts { + context: None, + max_tokens: *max_tokens, + }, + ), + }; + let result = kebab_app::fetch_with_config(cfg, query, opts)?; + if cli.json { + println!("{}", serde_json::to_string(&wire::wire_fetch_result(&result))?); + } else { + render_fetch_plain(&result); + } + Ok(()) + } + Cmd::Search { query, k, @@ -1112,6 +1188,53 @@ fn confirm_destructive( Ok(matches!(s.as_str(), "y" | "yes")) } +/// p9-fb-35: human-friendly plain output for `kebab fetch`. +fn render_fetch_plain(r: &kebab_core::FetchResult) { + println!("# {} ({})", r.doc_path.0, format_kind(r.kind)); + if r.stale { + println!("[stale; indexed_at = {}]", r.indexed_at); + } + match r.kind { + kebab_core::FetchKind::Chunk => { + if !r.context_before.is_empty() { + println!("\n=== before ==="); + for c in &r.context_before { + let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or(""); + println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text); + } + } + if let Some(c) = &r.chunk { + println!("\n=== target ==="); + let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or(""); + println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text); + } + if !r.context_after.is_empty() { + println!("\n=== after ==="); + for c in &r.context_after { + let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or(""); + println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text); + } + } + } + kebab_core::FetchKind::Doc | kebab_core::FetchKind::Span => { + if let Some(text) = &r.text { + println!("\n{text}"); + } + if r.truncated { + eprintln!("[truncated; widen --max-tokens for fuller text]"); + } + } + } +} + +fn format_kind(k: kebab_core::FetchKind) -> &'static str { + match k { + kebab_core::FetchKind::Chunk => "chunk", + kebab_core::FetchKind::Doc => "doc", + kebab_core::FetchKind::Span => "span", + } +} + #[cfg(test)] mod tests { //! p9-fb-32: unit tests for `render_ask_plain_citations`. The diff --git a/crates/kebab-cli/src/wire.rs b/crates/kebab-cli/src/wire.rs index 649d3f0..178fa22 100644 --- a/crates/kebab-cli/src/wire.rs +++ b/crates/kebab-cli/src/wire.rs @@ -189,6 +189,12 @@ pub fn wire_error_v1(e: &kebab_app::ErrorV1) -> Value { tag_object(v, "error.v1") } +/// p9-fb-35: tag a [`kebab_core::FetchResult`] as `fetch_result.v1`. +pub fn wire_fetch_result(r: &kebab_core::FetchResult) -> Value { + let v = serde_json::to_value(r).expect("FetchResult serializes"); + tag_object(v, "fetch_result.v1") +} + #[cfg(test)] mod tests { use super::*;