feat(app): App::fetch span mode + PDF/audio rejection (fb-35)

Line-based slice over fmt_canonical_to_markdown output.
PDF / audio source_type → span_not_supported StructuredError.
Out-of-range line_end clamps to total; effective_end reflects
post-budget trim. invalid_input on zero / inverted bounds.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-09 23:54:22 +09:00
parent 7d1f855f7e
commit 1b9d89eb3a
2 changed files with 194 additions and 7 deletions

View File

@@ -153,3 +153,100 @@ fn fetch_doc_with_max_tokens_truncates() {
let text = result.text.expect("doc text");
assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
}
#[test]
fn fetch_span_returns_line_range() {
let env = common::TestEnv::new();
// Use a list so the canonical-to-markdown roundtrip emits 5
// single-line entries joined by `\n` (paragraphs would be joined by
// `\n\n`, and CommonMark soft breaks inside one paragraph collapse to
// spaces — see crates/kebab-parse-md/src/blocks.rs `Event::SoftBreak`).
let body = "- Line one.\n- Line two.\n- Line three.\n- Line four.\n- Line five.\n";
common::ingest_md(&env, "lines.md", body);
let app = env.app();
let q = kebab_core::SearchQuery {
text: "Line".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 1,
filters: kebab_core::SearchFilters::default(),
};
let hits = app.search(q).unwrap();
let doc_id = hits[0].doc_id.clone();
let result = app
.fetch(
FetchQuery::Span {
doc_id,
line_start: 2,
line_end: 4,
},
FetchOpts::default(),
)
.unwrap();
assert_eq!(result.kind, FetchKind::Span);
let text = result.text.expect("span text");
let line_count = text.lines().count();
assert_eq!(line_count, 3, "span should be 3 lines: {text:?}");
assert_eq!(result.line_start, Some(2));
assert_eq!(result.line_end, Some(4));
assert_eq!(result.effective_end, Some(4));
assert!(!result.truncated);
}
#[test]
fn fetch_span_clamps_line_end_when_out_of_range() {
let env = common::TestEnv::new();
common::ingest_md(&env, "short.md", "Line one.\nLine two.\n");
let app = env.app();
let q = kebab_core::SearchQuery {
text: "Line".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 1,
filters: kebab_core::SearchFilters::default(),
};
let hits = app.search(q).unwrap();
let doc_id = hits[0].doc_id.clone();
let result = app
.fetch(
FetchQuery::Span {
doc_id,
line_start: 1,
line_end: 999,
},
FetchOpts::default(),
)
.unwrap();
let text = result.text.expect("span text");
let actual_lines = text.lines().count();
assert_eq!(result.effective_end, Some(actual_lines as u32));
assert!(actual_lines < 999);
}
#[test]
fn fetch_span_invalid_input_when_zero_lines() {
let env = common::TestEnv::new();
common::ingest_md(&env, "a.md", "Line one.\n");
let app = env.app();
let q = kebab_core::SearchQuery {
text: "Line".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 1,
filters: kebab_core::SearchFilters::default(),
};
let hits = app.search(q).unwrap();
let doc_id = hits[0].doc_id.clone();
let err = app
.fetch(
FetchQuery::Span {
doc_id,
line_start: 0,
line_end: 0,
},
FetchOpts::default(),
)
.unwrap_err();
assert!(err.to_string().contains("invalid_input"), "got: {err}");
}