From 7d1f855f7e40ce5c2b1849f5860c90e6a4f3c973 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Sat, 9 May 2026 23:48:40 +0900 Subject: [PATCH] feat(app): App::fetch doc mode with budget (fb-35) Walks CanonicalDocument blocks, serializes to markdown, applies chars/4 budget when opts.max_tokens is set. doc_not_found preserved through StructuredError. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/fetch.rs | 69 +++++++++++++++++--- crates/kebab-app/tests/fetch_integration.rs | 70 +++++++++++++++++++++ 2 files changed, 131 insertions(+), 8 deletions(-) diff --git a/crates/kebab-app/src/fetch.rs b/crates/kebab-app/src/fetch.rs index 5d1dfeb..63ad507 100644 --- a/crates/kebab-app/src/fetch.rs +++ b/crates/kebab-app/src/fetch.rs @@ -104,9 +104,67 @@ fn fetch_chunk(app: &App, id: ChunkId, opts: FetchOpts) -> Result { }) } -fn fetch_doc(_app: &App, _id: DocumentId, _opts: FetchOpts) -> Result { - // Implemented in Task 4. - anyhow::bail!("fetch_doc not yet implemented") +fn fetch_doc(app: &App, id: DocumentId, opts: FetchOpts) -> Result { + let doc = ::get_document(&app.sqlite, &id)? + .ok_or_else(|| { + anyhow::Error::new(StructuredError(ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "doc_not_found".to_string(), + message: format!("doc_id '{}' not found", id.0), + details: serde_json::Value::Null, + hint: None, + })) + })?; + + let mut text = fmt_canonical_to_markdown(&doc); + let mut truncated = false; + if let Some(max_tokens) = opts.max_tokens { + let max_chars = max_tokens.saturating_mul(4); + if text.chars().count() > max_chars { + text = trim_to_chars(&text, max_chars); + truncated = true; + } + } + + let now = OffsetDateTime::now_utc(); + let stale = compute_stale( + doc_metadata_updated_at(&doc), + now, + app.config.search.stale_threshold_days, + ); + + Ok(FetchResult { + kind: FetchKind::Doc, + doc_id: doc.doc_id.clone(), + doc_path: doc.workspace_path.clone(), + indexed_at: doc_metadata_updated_at(&doc), + stale, + chunk: None, + context_before: Vec::new(), + context_after: Vec::new(), + text: Some(text), + line_start: None, + line_end: None, + effective_end: None, + truncated, + }) +} + +/// p9-fb-35: trim string to N chars (Unicode-safe). Mirrors fb-34's +/// helper at `crates/kebab-app/src/app.rs` — kept local to avoid +/// re-exporting an internal helper. +fn trim_to_chars(s: &str, n: usize) -> String { + if s.chars().count() <= n { + return s.to_string(); + } + let mut out = String::with_capacity(n * 4); + for (i, c) in s.chars().enumerate() { + if i >= n { + break; + } + out.push(c); + } + out } fn fetch_span( @@ -170,11 +228,6 @@ fn doc_metadata_updated_at(doc: &CanonicalDocument) -> OffsetDateTime { /// flatten to plain text via the already-flattened `TextBlock.text` /// field. Good enough for an agent reading verbatim context. Used by /// Task 4 (doc mode) and Task 5 (span mode). -// -// The first caller lands in Task 4 (`fetch_doc`); silence the -// stop-gap dead-code warning until then so this Task 3 commit lands -// with a clean clippy run. -#[allow(dead_code)] pub(crate) fn fmt_canonical_to_markdown(doc: &CanonicalDocument) -> String { let mut out = String::with_capacity(1024); for (i, block) in doc.blocks.iter().enumerate() { diff --git a/crates/kebab-app/tests/fetch_integration.rs b/crates/kebab-app/tests/fetch_integration.rs index 8ca9a57..c745527 100644 --- a/crates/kebab-app/tests/fetch_integration.rs +++ b/crates/kebab-app/tests/fetch_integration.rs @@ -83,3 +83,73 @@ fn fetch_chunk_unknown_id_returns_chunk_not_found() { "expected chunk_not_found error, got: {msg}" ); } + +#[test] +fn fetch_doc_returns_serialized_markdown() { + let env = common::TestEnv::new(); + let body = "# Heading One\n\nFirst paragraph.\n\n## Sub\n\nSecond.\n"; + common::ingest_md(&env, "doc.md", body); + let app = env.app(); + + // Discover doc_id via search hit (avoids depending on list_docs API shape). + let q = kebab_core::SearchQuery { + text: "First".to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 1, + filters: kebab_core::SearchFilters::default(), + }; + let hits = app.search(q).unwrap(); + let doc_id = hits[0].doc_id.clone(); + + let result = app + .fetch(FetchQuery::Doc(doc_id), FetchOpts::default()) + .unwrap(); + assert_eq!(result.kind, FetchKind::Doc); + let text = result.text.expect("doc text"); + assert!(text.contains("Heading One"), "doc text contains heading: {text:?}"); + assert!(text.contains("First paragraph"), "doc text contains body"); + assert!(!result.truncated); +} + +#[test] +fn fetch_doc_unknown_id_returns_doc_not_found() { + let env = common::TestEnv::new(); + let app = env.app(); + let err = app + .fetch( + FetchQuery::Doc(kebab_core::DocumentId("nonexistent-doc".to_string())), + FetchOpts::default(), + ) + .unwrap_err(); + assert!(err.to_string().contains("doc_not_found"), "got: {err}"); +} + +#[test] +fn fetch_doc_with_max_tokens_truncates() { + let env = common::TestEnv::new(); + let p = "Lorem ipsum dolor sit amet consectetur adipiscing elit. ".repeat(20); + let body = format!("# Big\n\n{p}\n"); + common::ingest_md(&env, "big.md", &body); + let app = env.app(); + let q = kebab_core::SearchQuery { + text: "Lorem".to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 1, + filters: kebab_core::SearchFilters::default(), + }; + let hits = app.search(q).unwrap(); + let doc_id = hits[0].doc_id.clone(); + + let result = app + .fetch( + FetchQuery::Doc(doc_id), + FetchOpts { + context: None, + max_tokens: Some(20), // ~80 chars + }, + ) + .unwrap(); + assert!(result.truncated); + let text = result.text.expect("doc text"); + assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count()); +}