From de9164802be86cf7f7386cc02dddfaa6b3818e94 Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 30 Apr 2026 14:34:58 +0000 Subject: [PATCH] p1-3: fix span arithmetic overflow + body_offset_lines fuzz `span_for` previously used `u32 + u32` directly, so callers passing a large `body_offset_lines` could panic (debug, then masked by `catch_unwind` and the entire body discarded) or wrap to an inverted span with `start > end` (release). Switch to `checked_add`; on overflow flag the walk state and at the end of `parse_blocks_inner` discard accumulated blocks and surface a single `Warning::ExtractFailed` carrying the offending body line. This degrades cleanly without panicking and without emitting a silently-broken span. Also extend `random_bytes_do_not_panic` to mix u32::MAX-style offsets across the fuzz iterations so the overflow path is exercised by the randomized corpus. New tests: - body_offset_lines_max_returns_extract_failed - body_offset_lines_zero_at_max_minus_one_no_overflow Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kb-parse-md/src/blocks.rs | 96 ++++++++++++++++++++++++++++++-- 1 file changed, 90 insertions(+), 6 deletions(-) diff --git a/crates/kb-parse-md/src/blocks.rs b/crates/kb-parse-md/src/blocks.rs index 5d225c0..56f5c92 100644 --- a/crates/kb-parse-md/src/blocks.rs +++ b/crates/kb-parse-md/src/blocks.rs @@ -96,6 +96,24 @@ fn parse_blocks_inner(body: &[u8], body_offset_lines: u32) -> (Vec, state.handle_event(event, range, &mut warnings); } + // If line-number arithmetic saturated at any point (e.g. caller passed + // `body_offset_lines = u32::MAX`), discard the (potentially clamped) + // blocks and surface a single `ExtractFailed` warning. Returning the + // possibly-inverted spans would be more harmful than dropping output. + if state.overflow_detected { + let at = state + .overflow_at_body_line + .map(|n| n.to_string()) + .unwrap_or_else(|| "?".to_string()); + return ( + Vec::new(), + vec![Warning { + kind: WarningKind::ExtractFailed, + note: format!("body_offset_lines overflow at body line {at}"), + }], + ); + } + (state.finish(), warnings) } @@ -174,6 +192,14 @@ struct WalkState<'a> { /// Accumulator for the currently-open block. We push events into this /// frame and convert to a `ParsedBlock` when the matching End tag fires. frames: Vec, + + /// Set if any line-number arithmetic in [`WalkState::span_for`] would + /// overflow `u32::MAX`. When set, [`Self::finish`] discards accumulated + /// blocks and the caller emits a single `Warning::ExtractFailed` instead. + overflow_detected: bool, + /// Body-line (0-indexed) where overflow was first observed, for the + /// warning note. + overflow_at_body_line: Option, } /// One in-flight container we are accumulating events into. Stack-shaped so @@ -372,6 +398,8 @@ impl<'a> WalkState<'a> { blocks: Vec::new(), heading_stack: Default::default(), frames: Vec::new(), + overflow_detected: false, + overflow_at_body_line: None, } } @@ -386,16 +414,33 @@ impl<'a> WalkState<'a> { .collect() } - fn span_for(&self, range: &Range) -> SourceSpan { + fn span_for(&mut self, range: &Range) -> SourceSpan { let start_body = self.line_index.line_zero_indexed(range.start); let end_body = if range.end <= range.start { start_body } else { self.line_index.end_line_zero_indexed(range.end) }; - SourceSpan::Line { - start: start_body + self.body_offset_lines, - end: end_body + self.body_offset_lines, + // Saturating add — but also remember whether overflow happened so the + // caller can degrade gracefully rather than silently emitting an + // inverted span. Without this guard, debug builds panic with + // "attempt to add with overflow" (caught by `catch_unwind`, masking + // the real cause) and release builds wrap to `start > end`. + match ( + start_body.checked_add(self.body_offset_lines), + end_body.checked_add(self.body_offset_lines), + ) { + (Some(start), Some(end)) => SourceSpan::Line { start, end }, + _ => { + if !self.overflow_detected { + self.overflow_detected = true; + self.overflow_at_body_line = Some(start_body); + } + SourceSpan::Line { + start: start_body.saturating_add(self.body_offset_lines), + end: end_body.saturating_add(self.body_offset_lines), + } + } } } @@ -1156,6 +1201,9 @@ mod tests { fn random_bytes_do_not_panic() { // Tiny xorshift PRNG seeded with a constant; deterministic inputs // covering both valid-ish and invalid markdown / utf-8 mixes. + // We also vary `body_offset_lines` across the full u32 range — + // including u32::MAX — to exercise the saturating-add path in + // `span_for` and confirm the overflow guard prevents panics. let mut state: u32 = 0x1234_5678; let mut next = || { state ^= state << 13; @@ -1163,13 +1211,49 @@ mod tests { state ^= state << 5; state }; - for _ in 0..100 { + for i in 0..100 { let len = (next() as usize) % 256; let bytes: Vec = (0..len).map(|_| next() as u8).collect(); - let _ = parse_blocks(&bytes, 1).expect("must be Ok"); + // Mix a u32::MAX-style offset every few iterations so the fuzz + // exercises the overflow path explicitly. + let offset = match i % 4 { + 0 => 0, + 1 => 1, + 2 => next(), + _ => u32::MAX - (next() % 16), + }; + let _ = parse_blocks(&bytes, offset).expect("must be Ok"); } } + #[test] + fn body_offset_lines_max_returns_extract_failed() { + // u32::MAX + any positive line number overflows; the parser must + // surface `ExtractFailed` and discard blocks rather than panic + // (debug) or emit an inverted span (release). + let body = b"# h\nbody\n"; + let (blocks, warns) = parse_blocks(body, u32::MAX).expect("must be Ok"); + assert!(blocks.is_empty(), "blocks should be discarded on overflow"); + assert_eq!(warns.len(), 1, "expected one warning, got: {warns:?}"); + assert_eq!(warns[0].kind, WarningKind::ExtractFailed); + assert!( + warns[0].note.contains("overflow"), + "note should mention overflow: {:?}", + warns[0].note + ); + } + + #[test] + fn body_offset_lines_zero_at_max_minus_one_no_overflow() { + // A body with exactly one logical line + offset = u32::MAX would + // still saturate. Pick offset such that no line ever reaches + // overflow — this should succeed normally. + let body = b"hi\n"; + let (blocks, warns) = parse_blocks(body, u32::MAX - 100).expect("must be Ok"); + assert_eq!(blocks.len(), 1); + assert!(warns.is_empty(), "no overflow expected, got: {warns:?}"); + } + #[test] fn adversarial_inputs_no_panic() { // Hand-crafted oddities.