From 23ff4d68af4a45c1db7ca3fcf1b2b49a5d5fa415 Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 30 Apr 2026 14:40:42 +0000 Subject: [PATCH] p1-3: preserve whitespace in link text across SoftBreak/HardBreak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `[multi\nline](http://x)` produced `Inline::Link.text = "multiline"` because the SoftBreak/HardBreak handler called `push_text(" ")` — which updates `paragraph.text` and the inline buffer, but NOT the open link frame's flattened text accumulator. Text events flowed through `push_link_text`; line breaks didn't. Add `push_link_text(" ")` alongside the existing `push_text(" ")` in the break handler so a line break inside `[ ... ](href)` collapses to a visible space rather than disappearing. New tests: - link_with_soft_break_preserves_space_in_text - link_with_hard_break_preserves_space_in_text Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kb-parse-md/src/blocks.rs | 57 +++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/crates/kb-parse-md/src/blocks.rs b/crates/kb-parse-md/src/blocks.rs index f080d0c..7c1a0ed 100644 --- a/crates/kb-parse-md/src/blocks.rs +++ b/crates/kb-parse-md/src/blocks.rs @@ -1080,7 +1080,15 @@ impl<'a> WalkState<'a> { current_cell.push(' '); return; } - self.with_current_inlines(|buf| buf.push_text(" ")); + // Update both `paragraph.text` (via push_text) and the + // open link's flattened text accumulator (via + // push_link_text). Without push_link_text here, a + // multi-line `[text\nmore](href)` collapses to "textmore" + // — losing the visible space between words. + self.with_current_inlines(|buf| { + buf.push_text(" "); + buf.push_link_text(" "); + }); } // Everything else (HTML, footnote refs, task list markers, math, // rules, etc.) is dropped silently per design §3.4. @@ -1639,6 +1647,53 @@ mod tests { // ---- inline filter ------------------------------------------------------- + #[test] + fn link_with_soft_break_preserves_space_in_text() { + // Without the push_link_text fix, this collapses to "multiline". + let body = "[multi\nline](http://x)\n"; + let (blocks, _) = parse(body, 1); + assert_eq!(blocks.len(), 1); + match &blocks[0].payload { + ParsedPayload::Paragraph { inlines, .. } => { + let link = inlines + .iter() + .find(|i| matches!(i, Inline::Link { .. })) + .expect("link present"); + match link { + Inline::Link { text, href } => { + assert_eq!(text, "multi line"); + assert_eq!(href, "http://x"); + } + _ => unreachable!(), + } + } + _ => panic!("expected paragraph, got {:?}", blocks[0].payload), + } + } + + #[test] + fn link_with_hard_break_preserves_space_in_text() { + // Two trailing spaces + newline = HardBreak in CommonMark. + let body = "[multi \nline](http://x)\n"; + let (blocks, _) = parse(body, 1); + assert_eq!(blocks.len(), 1); + match &blocks[0].payload { + ParsedPayload::Paragraph { inlines, .. } => { + let link = inlines + .iter() + .find(|i| matches!(i, Inline::Link { .. })) + .expect("link present"); + match link { + Inline::Link { text, .. } => { + assert_eq!(text, "multi line"); + } + _ => unreachable!(), + } + } + _ => panic!("expected paragraph"), + } + } + #[test] fn only_allowed_inlines_emitted() { let body = "**bold** *em* `code` [link](u)\n";