From 7d1f855f7e40ce5c2b1849f5860c90e6a4f3c973 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sat, 9 May 2026 23:48:40 +0900
Subject: [PATCH] feat(app): App::fetch doc mode with budget (fb-35)
Walks CanonicalDocument blocks, serializes to markdown, applies
chars/4 budget when opts.max_tokens is set. doc_not_found
preserved through StructuredError.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/fetch.rs | 69 +++++++++++++++++---
crates/kebab-app/tests/fetch_integration.rs | 70 +++++++++++++++++++++
2 files changed, 131 insertions(+), 8 deletions(-)
diff --git a/crates/kebab-app/src/fetch.rs b/crates/kebab-app/src/fetch.rs
index 5d1dfeb..63ad507 100644
--- a/crates/kebab-app/src/fetch.rs
+++ b/crates/kebab-app/src/fetch.rs
@@ -104,9 +104,67 @@ fn fetch_chunk(app: &App, id: ChunkId, opts: FetchOpts) -> Result {
})
}
-fn fetch_doc(_app: &App, _id: DocumentId, _opts: FetchOpts) -> Result {
- // Implemented in Task 4.
- anyhow::bail!("fetch_doc not yet implemented")
+fn fetch_doc(app: &App, id: DocumentId, opts: FetchOpts) -> Result {
+ let doc = ::get_document(&app.sqlite, &id)?
+ .ok_or_else(|| {
+ anyhow::Error::new(StructuredError(ErrorV1 {
+ schema_version: ERROR_V1_ID.to_string(),
+ code: "doc_not_found".to_string(),
+ message: format!("doc_id '{}' not found", id.0),
+ details: serde_json::Value::Null,
+ hint: None,
+ }))
+ })?;
+
+ let mut text = fmt_canonical_to_markdown(&doc);
+ let mut truncated = false;
+ if let Some(max_tokens) = opts.max_tokens {
+ let max_chars = max_tokens.saturating_mul(4);
+ if text.chars().count() > max_chars {
+ text = trim_to_chars(&text, max_chars);
+ truncated = true;
+ }
+ }
+
+ let now = OffsetDateTime::now_utc();
+ let stale = compute_stale(
+ doc_metadata_updated_at(&doc),
+ now,
+ app.config.search.stale_threshold_days,
+ );
+
+ Ok(FetchResult {
+ kind: FetchKind::Doc,
+ doc_id: doc.doc_id.clone(),
+ doc_path: doc.workspace_path.clone(),
+ indexed_at: doc_metadata_updated_at(&doc),
+ stale,
+ chunk: None,
+ context_before: Vec::new(),
+ context_after: Vec::new(),
+ text: Some(text),
+ line_start: None,
+ line_end: None,
+ effective_end: None,
+ truncated,
+ })
+}
+
+/// p9-fb-35: trim string to N chars (Unicode-safe). Mirrors fb-34's
+/// helper at `crates/kebab-app/src/app.rs` — kept local to avoid
+/// re-exporting an internal helper.
+fn trim_to_chars(s: &str, n: usize) -> String {
+ if s.chars().count() <= n {
+ return s.to_string();
+ }
+ let mut out = String::with_capacity(n * 4);
+ for (i, c) in s.chars().enumerate() {
+ if i >= n {
+ break;
+ }
+ out.push(c);
+ }
+ out
}
fn fetch_span(
@@ -170,11 +228,6 @@ fn doc_metadata_updated_at(doc: &CanonicalDocument) -> OffsetDateTime {
/// flatten to plain text via the already-flattened `TextBlock.text`
/// field. Good enough for an agent reading verbatim context. Used by
/// Task 4 (doc mode) and Task 5 (span mode).
-//
-// The first caller lands in Task 4 (`fetch_doc`); silence the
-// stop-gap dead-code warning until then so this Task 3 commit lands
-// with a clean clippy run.
-#[allow(dead_code)]
pub(crate) fn fmt_canonical_to_markdown(doc: &CanonicalDocument) -> String {
let mut out = String::with_capacity(1024);
for (i, block) in doc.blocks.iter().enumerate() {
diff --git a/crates/kebab-app/tests/fetch_integration.rs b/crates/kebab-app/tests/fetch_integration.rs
index 8ca9a57..c745527 100644
--- a/crates/kebab-app/tests/fetch_integration.rs
+++ b/crates/kebab-app/tests/fetch_integration.rs
@@ -83,3 +83,73 @@ fn fetch_chunk_unknown_id_returns_chunk_not_found() {
"expected chunk_not_found error, got: {msg}"
);
}
+
+#[test]
+fn fetch_doc_returns_serialized_markdown() {
+ let env = common::TestEnv::new();
+ let body = "# Heading One\n\nFirst paragraph.\n\n## Sub\n\nSecond.\n";
+ common::ingest_md(&env, "doc.md", body);
+ let app = env.app();
+
+ // Discover doc_id via search hit (avoids depending on list_docs API shape).
+ let q = kebab_core::SearchQuery {
+ text: "First".to_string(),
+ mode: kebab_core::SearchMode::Lexical,
+ k: 1,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let hits = app.search(q).unwrap();
+ let doc_id = hits[0].doc_id.clone();
+
+ let result = app
+ .fetch(FetchQuery::Doc(doc_id), FetchOpts::default())
+ .unwrap();
+ assert_eq!(result.kind, FetchKind::Doc);
+ let text = result.text.expect("doc text");
+ assert!(text.contains("Heading One"), "doc text contains heading: {text:?}");
+ assert!(text.contains("First paragraph"), "doc text contains body");
+ assert!(!result.truncated);
+}
+
+#[test]
+fn fetch_doc_unknown_id_returns_doc_not_found() {
+ let env = common::TestEnv::new();
+ let app = env.app();
+ let err = app
+ .fetch(
+ FetchQuery::Doc(kebab_core::DocumentId("nonexistent-doc".to_string())),
+ FetchOpts::default(),
+ )
+ .unwrap_err();
+ assert!(err.to_string().contains("doc_not_found"), "got: {err}");
+}
+
+#[test]
+fn fetch_doc_with_max_tokens_truncates() {
+ let env = common::TestEnv::new();
+ let p = "Lorem ipsum dolor sit amet consectetur adipiscing elit. ".repeat(20);
+ let body = format!("# Big\n\n{p}\n");
+ common::ingest_md(&env, "big.md", &body);
+ let app = env.app();
+ let q = kebab_core::SearchQuery {
+ text: "Lorem".to_string(),
+ mode: kebab_core::SearchMode::Lexical,
+ k: 1,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let hits = app.search(q).unwrap();
+ let doc_id = hits[0].doc_id.clone();
+
+ let result = app
+ .fetch(
+ FetchQuery::Doc(doc_id),
+ FetchOpts {
+ context: None,
+ max_tokens: Some(20), // ~80 chars
+ },
+ )
+ .unwrap();
+ assert!(result.truncated);
+ let text = result.text.expect("doc text");
+ assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
+}