From 1b9d89eb3a7504e39d5397ca0f8e1cfdddabf98c Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Sat, 9 May 2026 23:54:22 +0900
Subject: [PATCH] feat(app): App::fetch span mode + PDF/audio rejection (fb-35)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Line-based slice over fmt_canonical_to_markdown output.
PDF / audio source_type → span_not_supported StructuredError.
Out-of-range line_end clamps to total; effective_end reflects
post-budget trim. invalid_input on zero / inverted bounds.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/fetch.rs | 104 ++++++++++++++++++--
crates/kebab-app/tests/fetch_integration.rs | 97 ++++++++++++++++++
2 files changed, 194 insertions(+), 7 deletions(-)
diff --git a/crates/kebab-app/src/fetch.rs b/crates/kebab-app/src/fetch.rs
index 63ad507..b44d65c 100644
--- a/crates/kebab-app/src/fetch.rs
+++ b/crates/kebab-app/src/fetch.rs
@@ -168,14 +168,104 @@ fn trim_to_chars(s: &str, n: usize) -> String {
}
fn fetch_span(
- _app: &App,
- _id: DocumentId,
- _line_start: u32,
- _line_end: u32,
- _opts: FetchOpts,
+ app: &App,
+ id: DocumentId,
+ line_start: u32,
+ line_end: u32,
+ opts: FetchOpts,
) -> Result {
- // Implemented in Task 5.
- anyhow::bail!("fetch_span not yet implemented")
+ let doc = ::get_document(&app.sqlite, &id)?
+ .ok_or_else(|| {
+ anyhow::Error::new(StructuredError(ErrorV1 {
+ schema_version: ERROR_V1_ID.to_string(),
+ code: "doc_not_found".to_string(),
+ message: format!("doc_id '{}' not found", id.0),
+ details: serde_json::Value::Null,
+ hint: None,
+ }))
+ })?;
+
+ // Reject line-incompatible media types (PDF / audio). `SourceType`
+ // (markdown / note / paper / reference / inbox) is the *user-facing*
+ // category, not the rendering format — the actual byte-level format
+ // lives on the source `RawAsset.media_type`. Look it up via
+ // workspace_path (unique key per asset).
+ if let Some(asset) = ::get_asset_by_workspace_path(
+ &app.sqlite,
+ &doc.workspace_path,
+ )? {
+ if matches!(
+ asset.media_type,
+ kebab_core::MediaType::Pdf | kebab_core::MediaType::Audio(_)
+ ) {
+ return Err(anyhow::Error::new(StructuredError(ErrorV1 {
+ schema_version: ERROR_V1_ID.to_string(),
+ code: "span_not_supported".to_string(),
+ message: format!(
+ "doc '{}' has media_type {:?}; line-based span fetch unsupported. \
+ Use `fetch chunk` or `fetch doc` instead.",
+ id.0, asset.media_type
+ ),
+ details: serde_json::Value::Null,
+ hint: Some("kind = chunk or kind = doc instead".to_string()),
+ })));
+ }
+ }
+
+ if line_start == 0 || line_end == 0 || line_end < line_start {
+ return Err(anyhow::Error::new(StructuredError(ErrorV1 {
+ schema_version: ERROR_V1_ID.to_string(),
+ code: "invalid_input".to_string(),
+ message: format!(
+ "line_start ({line_start}) and line_end ({line_end}) must be 1-based with start <= end"
+ ),
+ details: serde_json::Value::Null,
+ hint: None,
+ })));
+ }
+
+ let full = fmt_canonical_to_markdown(&doc);
+ let lines: Vec<&str> = full.lines().collect();
+ let total = lines.len() as u32;
+ let effective_end_raw = line_end.min(total).max(line_start);
+ let lo = (line_start - 1) as usize;
+ let hi = effective_end_raw as usize;
+ let mut text = lines[lo..hi].join("\n");
+
+ let mut truncated = effective_end_raw != line_end;
+ let mut effective_end = effective_end_raw;
+ if let Some(max_tokens) = opts.max_tokens {
+ let max_chars = max_tokens.saturating_mul(4);
+ if text.chars().count() > max_chars {
+ text = trim_to_chars(&text, max_chars);
+ truncated = true;
+ let kept = text.lines().count() as u32;
+ effective_end = (line_start - 1) + kept;
+ }
+ }
+
+ let now = OffsetDateTime::now_utc();
+ let stale = compute_stale(
+ doc_metadata_updated_at(&doc),
+ now,
+ app.config.search.stale_threshold_days,
+ );
+
+ Ok(FetchResult {
+ kind: FetchKind::Span,
+ doc_id: doc.doc_id.clone(),
+ doc_path: doc.workspace_path.clone(),
+ indexed_at: doc_metadata_updated_at(&doc),
+ stale,
+ chunk: None,
+ context_before: Vec::new(),
+ context_after: Vec::new(),
+ text: Some(text),
+ line_start: Some(line_start),
+ line_end: Some(line_end),
+ effective_end: Some(effective_end),
+ truncated,
+ })
}
/// p9-fb-35: list chunks for a document in ordinal order, return
diff --git a/crates/kebab-app/tests/fetch_integration.rs b/crates/kebab-app/tests/fetch_integration.rs
index c745527..7b445cc 100644
--- a/crates/kebab-app/tests/fetch_integration.rs
+++ b/crates/kebab-app/tests/fetch_integration.rs
@@ -153,3 +153,100 @@ fn fetch_doc_with_max_tokens_truncates() {
let text = result.text.expect("doc text");
assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
}
+
+#[test]
+fn fetch_span_returns_line_range() {
+ let env = common::TestEnv::new();
+ // Use a list so the canonical-to-markdown roundtrip emits 5
+ // single-line entries joined by `\n` (paragraphs would be joined by
+ // `\n\n`, and CommonMark soft breaks inside one paragraph collapse to
+ // spaces — see crates/kebab-parse-md/src/blocks.rs `Event::SoftBreak`).
+ let body = "- Line one.\n- Line two.\n- Line three.\n- Line four.\n- Line five.\n";
+ common::ingest_md(&env, "lines.md", body);
+ let app = env.app();
+
+ let q = kebab_core::SearchQuery {
+ text: "Line".to_string(),
+ mode: kebab_core::SearchMode::Lexical,
+ k: 1,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let hits = app.search(q).unwrap();
+ let doc_id = hits[0].doc_id.clone();
+
+ let result = app
+ .fetch(
+ FetchQuery::Span {
+ doc_id,
+ line_start: 2,
+ line_end: 4,
+ },
+ FetchOpts::default(),
+ )
+ .unwrap();
+ assert_eq!(result.kind, FetchKind::Span);
+ let text = result.text.expect("span text");
+ let line_count = text.lines().count();
+ assert_eq!(line_count, 3, "span should be 3 lines: {text:?}");
+ assert_eq!(result.line_start, Some(2));
+ assert_eq!(result.line_end, Some(4));
+ assert_eq!(result.effective_end, Some(4));
+ assert!(!result.truncated);
+}
+
+#[test]
+fn fetch_span_clamps_line_end_when_out_of_range() {
+ let env = common::TestEnv::new();
+ common::ingest_md(&env, "short.md", "Line one.\nLine two.\n");
+ let app = env.app();
+ let q = kebab_core::SearchQuery {
+ text: "Line".to_string(),
+ mode: kebab_core::SearchMode::Lexical,
+ k: 1,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let hits = app.search(q).unwrap();
+ let doc_id = hits[0].doc_id.clone();
+
+ let result = app
+ .fetch(
+ FetchQuery::Span {
+ doc_id,
+ line_start: 1,
+ line_end: 999,
+ },
+ FetchOpts::default(),
+ )
+ .unwrap();
+ let text = result.text.expect("span text");
+ let actual_lines = text.lines().count();
+ assert_eq!(result.effective_end, Some(actual_lines as u32));
+ assert!(actual_lines < 999);
+}
+
+#[test]
+fn fetch_span_invalid_input_when_zero_lines() {
+ let env = common::TestEnv::new();
+ common::ingest_md(&env, "a.md", "Line one.\n");
+ let app = env.app();
+ let q = kebab_core::SearchQuery {
+ text: "Line".to_string(),
+ mode: kebab_core::SearchMode::Lexical,
+ k: 1,
+ filters: kebab_core::SearchFilters::default(),
+ };
+ let hits = app.search(q).unwrap();
+ let doc_id = hits[0].doc_id.clone();
+
+ let err = app
+ .fetch(
+ FetchQuery::Span {
+ doc_id,
+ line_start: 0,
+ line_end: 0,
+ },
+ FetchOpts::default(),
+ )
+ .unwrap_err();
+ assert!(err.to_string().contains("invalid_input"), "got: {err}");
+}