feat(app): App::fetch span mode + PDF/audio rejection (fb-35)
Line-based slice over fmt_canonical_to_markdown output. PDF / audio source_type → span_not_supported StructuredError. Out-of-range line_end clamps to total; effective_end reflects post-budget trim. invalid_input on zero / inverted bounds. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -153,3 +153,100 @@ fn fetch_doc_with_max_tokens_truncates() {
|
||||
let text = result.text.expect("doc text");
|
||||
assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_span_returns_line_range() {
|
||||
let env = common::TestEnv::new();
|
||||
// Use a list so the canonical-to-markdown roundtrip emits 5
|
||||
// single-line entries joined by `\n` (paragraphs would be joined by
|
||||
// `\n\n`, and CommonMark soft breaks inside one paragraph collapse to
|
||||
// spaces — see crates/kebab-parse-md/src/blocks.rs `Event::SoftBreak`).
|
||||
let body = "- Line one.\n- Line two.\n- Line three.\n- Line four.\n- Line five.\n";
|
||||
common::ingest_md(&env, "lines.md", body);
|
||||
let app = env.app();
|
||||
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "Line".to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 1,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = app.search(q).unwrap();
|
||||
let doc_id = hits[0].doc_id.clone();
|
||||
|
||||
let result = app
|
||||
.fetch(
|
||||
FetchQuery::Span {
|
||||
doc_id,
|
||||
line_start: 2,
|
||||
line_end: 4,
|
||||
},
|
||||
FetchOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(result.kind, FetchKind::Span);
|
||||
let text = result.text.expect("span text");
|
||||
let line_count = text.lines().count();
|
||||
assert_eq!(line_count, 3, "span should be 3 lines: {text:?}");
|
||||
assert_eq!(result.line_start, Some(2));
|
||||
assert_eq!(result.line_end, Some(4));
|
||||
assert_eq!(result.effective_end, Some(4));
|
||||
assert!(!result.truncated);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_span_clamps_line_end_when_out_of_range() {
|
||||
let env = common::TestEnv::new();
|
||||
common::ingest_md(&env, "short.md", "Line one.\nLine two.\n");
|
||||
let app = env.app();
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "Line".to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 1,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = app.search(q).unwrap();
|
||||
let doc_id = hits[0].doc_id.clone();
|
||||
|
||||
let result = app
|
||||
.fetch(
|
||||
FetchQuery::Span {
|
||||
doc_id,
|
||||
line_start: 1,
|
||||
line_end: 999,
|
||||
},
|
||||
FetchOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let text = result.text.expect("span text");
|
||||
let actual_lines = text.lines().count();
|
||||
assert_eq!(result.effective_end, Some(actual_lines as u32));
|
||||
assert!(actual_lines < 999);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fetch_span_invalid_input_when_zero_lines() {
|
||||
let env = common::TestEnv::new();
|
||||
common::ingest_md(&env, "a.md", "Line one.\n");
|
||||
let app = env.app();
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "Line".to_string(),
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 1,
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = app.search(q).unwrap();
|
||||
let doc_id = hits[0].doc_id.clone();
|
||||
|
||||
let err = app
|
||||
.fetch(
|
||||
FetchQuery::Span {
|
||||
doc_id,
|
||||
line_start: 0,
|
||||
line_end: 0,
|
||||
},
|
||||
FetchOpts::default(),
|
||||
)
|
||||
.unwrap_err();
|
||||
assert!(err.to_string().contains("invalid_input"), "got: {err}");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user