diff --git a/crates/kebab-app/src/fetch.rs b/crates/kebab-app/src/fetch.rs index 63ad507..b44d65c 100644 --- a/crates/kebab-app/src/fetch.rs +++ b/crates/kebab-app/src/fetch.rs @@ -168,14 +168,104 @@ fn trim_to_chars(s: &str, n: usize) -> String { } fn fetch_span( - _app: &App, - _id: DocumentId, - _line_start: u32, - _line_end: u32, - _opts: FetchOpts, + app: &App, + id: DocumentId, + line_start: u32, + line_end: u32, + opts: FetchOpts, ) -> Result { - // Implemented in Task 5. - anyhow::bail!("fetch_span not yet implemented") + let doc = ::get_document(&app.sqlite, &id)? + .ok_or_else(|| { + anyhow::Error::new(StructuredError(ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "doc_not_found".to_string(), + message: format!("doc_id '{}' not found", id.0), + details: serde_json::Value::Null, + hint: None, + })) + })?; + + // Reject line-incompatible media types (PDF / audio). `SourceType` + // (markdown / note / paper / reference / inbox) is the *user-facing* + // category, not the rendering format — the actual byte-level format + // lives on the source `RawAsset.media_type`. Look it up via + // workspace_path (unique key per asset). + if let Some(asset) = ::get_asset_by_workspace_path( + &app.sqlite, + &doc.workspace_path, + )? { + if matches!( + asset.media_type, + kebab_core::MediaType::Pdf | kebab_core::MediaType::Audio(_) + ) { + return Err(anyhow::Error::new(StructuredError(ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "span_not_supported".to_string(), + message: format!( + "doc '{}' has media_type {:?}; line-based span fetch unsupported. \ + Use `fetch chunk` or `fetch doc` instead.", + id.0, asset.media_type + ), + details: serde_json::Value::Null, + hint: Some("kind = chunk or kind = doc instead".to_string()), + }))); + } + } + + if line_start == 0 || line_end == 0 || line_end < line_start { + return Err(anyhow::Error::new(StructuredError(ErrorV1 { + schema_version: ERROR_V1_ID.to_string(), + code: "invalid_input".to_string(), + message: format!( + "line_start ({line_start}) and line_end ({line_end}) must be 1-based with start <= end" + ), + details: serde_json::Value::Null, + hint: None, + }))); + } + + let full = fmt_canonical_to_markdown(&doc); + let lines: Vec<&str> = full.lines().collect(); + let total = lines.len() as u32; + let effective_end_raw = line_end.min(total).max(line_start); + let lo = (line_start - 1) as usize; + let hi = effective_end_raw as usize; + let mut text = lines[lo..hi].join("\n"); + + let mut truncated = effective_end_raw != line_end; + let mut effective_end = effective_end_raw; + if let Some(max_tokens) = opts.max_tokens { + let max_chars = max_tokens.saturating_mul(4); + if text.chars().count() > max_chars { + text = trim_to_chars(&text, max_chars); + truncated = true; + let kept = text.lines().count() as u32; + effective_end = (line_start - 1) + kept; + } + } + + let now = OffsetDateTime::now_utc(); + let stale = compute_stale( + doc_metadata_updated_at(&doc), + now, + app.config.search.stale_threshold_days, + ); + + Ok(FetchResult { + kind: FetchKind::Span, + doc_id: doc.doc_id.clone(), + doc_path: doc.workspace_path.clone(), + indexed_at: doc_metadata_updated_at(&doc), + stale, + chunk: None, + context_before: Vec::new(), + context_after: Vec::new(), + text: Some(text), + line_start: Some(line_start), + line_end: Some(line_end), + effective_end: Some(effective_end), + truncated, + }) } /// p9-fb-35: list chunks for a document in ordinal order, return diff --git a/crates/kebab-app/tests/fetch_integration.rs b/crates/kebab-app/tests/fetch_integration.rs index c745527..7b445cc 100644 --- a/crates/kebab-app/tests/fetch_integration.rs +++ b/crates/kebab-app/tests/fetch_integration.rs @@ -153,3 +153,100 @@ fn fetch_doc_with_max_tokens_truncates() { let text = result.text.expect("doc text"); assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count()); } + +#[test] +fn fetch_span_returns_line_range() { + let env = common::TestEnv::new(); + // Use a list so the canonical-to-markdown roundtrip emits 5 + // single-line entries joined by `\n` (paragraphs would be joined by + // `\n\n`, and CommonMark soft breaks inside one paragraph collapse to + // spaces — see crates/kebab-parse-md/src/blocks.rs `Event::SoftBreak`). + let body = "- Line one.\n- Line two.\n- Line three.\n- Line four.\n- Line five.\n"; + common::ingest_md(&env, "lines.md", body); + let app = env.app(); + + let q = kebab_core::SearchQuery { + text: "Line".to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 1, + filters: kebab_core::SearchFilters::default(), + }; + let hits = app.search(q).unwrap(); + let doc_id = hits[0].doc_id.clone(); + + let result = app + .fetch( + FetchQuery::Span { + doc_id, + line_start: 2, + line_end: 4, + }, + FetchOpts::default(), + ) + .unwrap(); + assert_eq!(result.kind, FetchKind::Span); + let text = result.text.expect("span text"); + let line_count = text.lines().count(); + assert_eq!(line_count, 3, "span should be 3 lines: {text:?}"); + assert_eq!(result.line_start, Some(2)); + assert_eq!(result.line_end, Some(4)); + assert_eq!(result.effective_end, Some(4)); + assert!(!result.truncated); +} + +#[test] +fn fetch_span_clamps_line_end_when_out_of_range() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "short.md", "Line one.\nLine two.\n"); + let app = env.app(); + let q = kebab_core::SearchQuery { + text: "Line".to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 1, + filters: kebab_core::SearchFilters::default(), + }; + let hits = app.search(q).unwrap(); + let doc_id = hits[0].doc_id.clone(); + + let result = app + .fetch( + FetchQuery::Span { + doc_id, + line_start: 1, + line_end: 999, + }, + FetchOpts::default(), + ) + .unwrap(); + let text = result.text.expect("span text"); + let actual_lines = text.lines().count(); + assert_eq!(result.effective_end, Some(actual_lines as u32)); + assert!(actual_lines < 999); +} + +#[test] +fn fetch_span_invalid_input_when_zero_lines() { + let env = common::TestEnv::new(); + common::ingest_md(&env, "a.md", "Line one.\n"); + let app = env.app(); + let q = kebab_core::SearchQuery { + text: "Line".to_string(), + mode: kebab_core::SearchMode::Lexical, + k: 1, + filters: kebab_core::SearchFilters::default(), + }; + let hits = app.search(q).unwrap(); + let doc_id = hits[0].doc_id.clone(); + + let err = app + .fetch( + FetchQuery::Span { + doc_id, + line_start: 0, + line_end: 0, + }, + FetchOpts::default(), + ) + .unwrap_err(); + assert!(err.to_string().contains("invalid_input"), "got: {err}"); +}