fix(fb-35): address PR #126 round 1 review

- fetch_span: panic-fix on line_start > total / empty doc
  (return empty text + effective_end = line_start - 1 instead of
  out-of-bounds slice)
- truncated: reserved for budget-driven truncation only; line
  range clamp signaled via effective_end < line_end
- spec / SKILL.md / README: align rejection wording to "PDF /
  audio" (matches code; Image OCR allowed for span)
- store: warning comment on list_chunk_ids_for_doc — chunk_id
  hash sort does NOT preserve document position; real fix is a
  chunks.ordinal column, tracked as follow-up
- surrounding_chunks: saturating_add to defend against u32::MAX
  context arg on 32-bit targets
- tests: line_start > total returns empty + chunk context at
  doc boundary clamps lower bound

Deferred nits (follow-up): table-separator strict CommonMark form;
MCP per-mode strict validation; CLI chunk_id truncation in plain
output. None block correctness.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-10 00:45:29 +09:00
parent 2a6b3dc7e6
commit 7dddc1d706
5 changed files with 129 additions and 17 deletions

View File

@@ -250,3 +250,70 @@ fn fetch_span_invalid_input_when_zero_lines() {
.unwrap_err();
assert!(err.to_string().contains("invalid_input"), "got: {err}");
}
#[test]
fn fetch_span_line_start_beyond_total_returns_empty_text() {
let env = common::TestEnv::new();
let body = "- Line one.\n- Line two.\n";
common::ingest_md(&env, "two_lines.md", body);
let app = env.app();
let q = kebab_core::SearchQuery {
text: "Line".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 1,
filters: kebab_core::SearchFilters::default(),
};
let hits = app.search(q).unwrap();
let doc_id = hits[0].doc_id.clone();
let result = app
.fetch(
FetchQuery::Span {
doc_id,
line_start: 100,
line_end: 200,
},
FetchOpts::default(),
)
.unwrap();
let text = result.text.expect("text field");
assert!(text.is_empty(), "out-of-range request returns empty text");
assert!(
!result.truncated,
"out-of-range is NOT truncated (budget-only flag)"
);
}
#[test]
fn fetch_chunk_context_at_first_chunk_clamps_lower_bound() {
let env = common::TestEnv::new();
// Multi-chunk markdown so context ±N has neighbors.
let body =
"# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
common::ingest_md(&env, "boundary.md", body);
let app = env.app();
let q = kebab_core::SearchQuery {
text: "First".to_string(),
mode: kebab_core::SearchMode::Lexical,
k: 1,
filters: kebab_core::SearchFilters::default(),
};
let hits = app.search(q).unwrap();
let chunk_id = hits[0].chunk_id.clone();
let result = app
.fetch(
FetchQuery::Chunk(chunk_id),
FetchOpts {
context: Some(2),
max_tokens: None,
},
)
.unwrap();
// context_before may be empty if target is the first chunk;
// context_after should have ≤ 2 entries. Both clamped at doc boundaries.
assert!(
result.context_before.len() + result.context_after.len() <= 4,
"doc boundary should clamp ±N to fit chunk count"
);
}