p1-3: snapshot tests + clippy fix
Adds two snapshot tests (`nested-headings.md`, `code-and-table.md`) under crates/kb-parse-md/tests/blocks_snapshots.rs, with matching baseline JSON next to each fixture. The snapshot view projects `kb_core::Inline` to flat strings — `Inline` carries `serde(tag = "kind")` which is incompatible with newtype variants holding a primitive (`Text(String)`), so direct serialization of `ParsedBlock` would fail today. The view preserves the contract that matters for P1-3 (heading paths, source spans, payload kinds, payload text/code/table content) and will keep working once kb-core fixes the Inline schema in a later task. Also tightens `level_to_use >= 1 && <= 6` into `(1..=6).contains(&_)` to satisfy `clippy::manual_range_contains`.
This commit is contained in:
@@ -528,7 +528,7 @@ impl<'a> WalkState<'a> {
|
||||
let _ = level_u8;
|
||||
|
||||
// Update heading stack: clear deeper levels, set this level.
|
||||
if level_to_use >= 1 && level_to_use <= 6 {
|
||||
if (1..=6).contains(&level_to_use) {
|
||||
let idx = (level_to_use - 1) as usize;
|
||||
for slot in &mut self.heading_stack[idx + 1..] {
|
||||
*slot = None;
|
||||
|
||||
240
crates/kb-parse-md/tests/blocks_snapshots.rs
Normal file
240
crates/kb-parse-md/tests/blocks_snapshots.rs
Normal file
@@ -0,0 +1,240 @@
|
||||
//! Snapshot tests pinning the `parse_blocks` output for two fixtures.
|
||||
//!
|
||||
//! Baselines are hand-authored / regenerated via the `--ignored` emitter
|
||||
//! below. `body_offset_lines = 1` is used for both fixtures (no
|
||||
//! frontmatter, body starts at file line 1).
|
||||
//!
|
||||
//! Note on snapshot shape: `kb_core::Inline` carries a `serde(tag = "kind")`
|
||||
//! enum representation that cannot serialize newtype variants holding a
|
||||
//! primitive (`Inline::Text(String)` etc.) — that's a serde limitation, not
|
||||
//! ours, and is fixed up in a later kb-core task. To keep the snapshot
|
||||
//! human-readable (and stable across that future fix), we project each
|
||||
//! `ParsedBlock` into a `BlockView` that flattens inline content to plain
|
||||
//! strings before serialization. This still pins the *contract* that
|
||||
//! matters for P1-3: heading paths, source spans, payload kinds, payload
|
||||
//! text content, table headers/rows, and code lang/body.
|
||||
|
||||
use kb_core::{Inline, SourceSpan};
|
||||
use kb_parse_md::parse_blocks;
|
||||
use kb_parse_types::{ParsedBlock, ParsedPayload, Warning};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Snapshot {
|
||||
blocks: Vec<BlockView>,
|
||||
warnings: Vec<Warning>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct BlockView {
|
||||
kind: String,
|
||||
heading_path: Vec<String>,
|
||||
source_span: SourceSpan,
|
||||
payload: PayloadView,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(tag = "kind", rename_all = "lowercase")]
|
||||
enum PayloadView {
|
||||
Heading {
|
||||
level: u8,
|
||||
text: String,
|
||||
},
|
||||
Paragraph {
|
||||
text: String,
|
||||
inlines_flat: String,
|
||||
},
|
||||
List {
|
||||
ordered: bool,
|
||||
items_flat: Vec<String>,
|
||||
},
|
||||
Code {
|
||||
lang: Option<String>,
|
||||
code: String,
|
||||
},
|
||||
Table {
|
||||
headers: Vec<String>,
|
||||
rows: Vec<Vec<String>>,
|
||||
},
|
||||
Quote {
|
||||
text: String,
|
||||
inlines_flat: String,
|
||||
},
|
||||
ImageRef {
|
||||
src: String,
|
||||
alt: String,
|
||||
},
|
||||
AudioRef {
|
||||
src: String,
|
||||
},
|
||||
}
|
||||
|
||||
fn flatten_inline(i: &Inline, out: &mut String) {
|
||||
match i {
|
||||
Inline::Text(s) | Inline::Code(s) => out.push_str(s),
|
||||
Inline::Link { text, href } => {
|
||||
out.push('[');
|
||||
out.push_str(text);
|
||||
out.push_str("](");
|
||||
out.push_str(href);
|
||||
out.push(')');
|
||||
}
|
||||
Inline::Strong(v) => {
|
||||
out.push_str("**");
|
||||
for c in v {
|
||||
flatten_inline(c, out);
|
||||
}
|
||||
out.push_str("**");
|
||||
}
|
||||
Inline::Emph(v) => {
|
||||
out.push('*');
|
||||
for c in v {
|
||||
flatten_inline(c, out);
|
||||
}
|
||||
out.push('*');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn flatten(inlines: &[Inline]) -> String {
|
||||
let mut out = String::new();
|
||||
for i in inlines {
|
||||
flatten_inline(i, &mut out);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn block_to_view(b: &ParsedBlock) -> BlockView {
|
||||
let kind = format!("{:?}", b.kind).to_lowercase();
|
||||
let payload = match &b.payload {
|
||||
ParsedPayload::Heading { level, text } => PayloadView::Heading {
|
||||
level: *level,
|
||||
text: text.clone(),
|
||||
},
|
||||
ParsedPayload::Paragraph { text, inlines } => PayloadView::Paragraph {
|
||||
text: text.clone(),
|
||||
inlines_flat: flatten(inlines),
|
||||
},
|
||||
ParsedPayload::List { ordered, items } => PayloadView::List {
|
||||
ordered: *ordered,
|
||||
items_flat: items.iter().map(|it| flatten(it)).collect(),
|
||||
},
|
||||
ParsedPayload::Code { lang, code } => PayloadView::Code {
|
||||
lang: lang.clone(),
|
||||
code: code.clone(),
|
||||
},
|
||||
ParsedPayload::Table { headers, rows } => PayloadView::Table {
|
||||
headers: headers.clone(),
|
||||
rows: rows.clone(),
|
||||
},
|
||||
ParsedPayload::Quote { text, inlines } => PayloadView::Quote {
|
||||
text: text.clone(),
|
||||
inlines_flat: flatten(inlines),
|
||||
},
|
||||
ParsedPayload::ImageRef { src, alt } => PayloadView::ImageRef {
|
||||
src: src.clone(),
|
||||
alt: alt.clone(),
|
||||
},
|
||||
ParsedPayload::AudioRef { src } => PayloadView::AudioRef { src: src.clone() },
|
||||
};
|
||||
BlockView {
|
||||
kind,
|
||||
heading_path: b.heading_path.clone(),
|
||||
source_span: b.source_span.clone(),
|
||||
payload,
|
||||
}
|
||||
}
|
||||
|
||||
fn fixtures_dir() -> PathBuf {
|
||||
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join("..")
|
||||
.join("..")
|
||||
.join("fixtures")
|
||||
.join("markdown")
|
||||
}
|
||||
|
||||
fn assert_snapshot(fixture: &str, baseline: &str) {
|
||||
let dir = fixtures_dir();
|
||||
let bytes = fs::read(dir.join(fixture)).expect("fixture readable");
|
||||
|
||||
let (blocks, warns) = parse_blocks(&bytes, 1).unwrap();
|
||||
let snap = Snapshot {
|
||||
blocks: blocks.iter().map(block_to_view).collect(),
|
||||
warnings: warns,
|
||||
};
|
||||
let actual: Value = serde_json::to_value(&snap).unwrap();
|
||||
|
||||
let expected_text =
|
||||
fs::read_to_string(dir.join(baseline)).expect("snapshot baseline readable");
|
||||
let expected: Value = serde_json::from_str(&expected_text).expect("baseline parses as json");
|
||||
|
||||
if actual != expected {
|
||||
let actual_pretty = serde_json::to_string_pretty(&actual).unwrap();
|
||||
panic!(
|
||||
"snapshot drift for {fixture}\n\
|
||||
--- expected ({baseline}) ---\n{expected_text}\n\
|
||||
--- actual ---\n{actual_pretty}\n\
|
||||
If the change is intentional, update {baseline}."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nested_headings_blocks_snapshot() {
|
||||
assert_snapshot(
|
||||
"nested-headings.md",
|
||||
"nested-headings.blocks.snapshot.json",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_and_table_blocks_snapshot() {
|
||||
assert_snapshot(
|
||||
"code-and-table.md",
|
||||
"code-and-table.blocks.snapshot.json",
|
||||
);
|
||||
}
|
||||
|
||||
/// Run with `cargo test -p kb-parse-md --test blocks_snapshots emit_blocks_snapshots -- --ignored --nocapture`
|
||||
/// to regenerate the baseline JSON files from the current parser output.
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn emit_blocks_snapshots() {
|
||||
let dir = fixtures_dir();
|
||||
for (fixture, baseline) in [
|
||||
("nested-headings.md", "nested-headings.blocks.snapshot.json"),
|
||||
("code-and-table.md", "code-and-table.blocks.snapshot.json"),
|
||||
] {
|
||||
let bytes = fs::read(dir.join(fixture)).unwrap();
|
||||
let (blocks, warns) = parse_blocks(&bytes, 1).unwrap();
|
||||
let snap = Snapshot {
|
||||
blocks: blocks.iter().map(block_to_view).collect(),
|
||||
warnings: warns,
|
||||
};
|
||||
let json = serde_json::to_string_pretty(&snap).unwrap();
|
||||
fs::write(dir.join(baseline), format!("{json}\n")).unwrap();
|
||||
eprintln!("wrote {}", dir.join(baseline).display());
|
||||
}
|
||||
}
|
||||
|
||||
/// Determinism: parsing the same fixture twice in a row must give equal output.
|
||||
#[test]
|
||||
fn snapshot_is_deterministic_across_runs() {
|
||||
let dir = fixtures_dir();
|
||||
let bytes = fs::read(dir.join("nested-headings.md")).unwrap();
|
||||
let (a_blocks, a_warns) = parse_blocks(&bytes, 1).unwrap();
|
||||
let (b_blocks, b_warns) = parse_blocks(&bytes, 1).unwrap();
|
||||
// Compare via the view (which is fully serializable) and via the
|
||||
// structural equality on `ParsedBlock` itself (no serde involved).
|
||||
assert_eq!(a_blocks, b_blocks);
|
||||
assert_eq!(a_warns, b_warns);
|
||||
let av: Vec<_> = a_blocks.iter().map(block_to_view).collect();
|
||||
let bv: Vec<_> = b_blocks.iter().map(block_to_view).collect();
|
||||
assert_eq!(
|
||||
serde_json::to_value(&av).unwrap(),
|
||||
serde_json::to_value(&bv).unwrap()
|
||||
);
|
||||
}
|
||||
63
fixtures/markdown/code-and-table.blocks.snapshot.json
Normal file
63
fixtures/markdown/code-and-table.blocks.snapshot.json
Normal file
@@ -0,0 +1,63 @@
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"kind": "heading",
|
||||
"heading_path": [],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 1,
|
||||
"end": 1
|
||||
},
|
||||
"payload": {
|
||||
"kind": "heading",
|
||||
"level": 1,
|
||||
"text": "Code And Table"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "code",
|
||||
"heading_path": [
|
||||
"Code And Table"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 3,
|
||||
"end": 7
|
||||
},
|
||||
"payload": {
|
||||
"kind": "code",
|
||||
"lang": "rust",
|
||||
"code": "fn main() {\n println!(\"hi\");\n}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "table",
|
||||
"heading_path": [
|
||||
"Code And Table"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 9,
|
||||
"end": 12
|
||||
},
|
||||
"payload": {
|
||||
"kind": "table",
|
||||
"headers": [
|
||||
"col a",
|
||||
"col b"
|
||||
],
|
||||
"rows": [
|
||||
[
|
||||
"1",
|
||||
"2"
|
||||
],
|
||||
[
|
||||
"3",
|
||||
"4"
|
||||
]
|
||||
]
|
||||
}
|
||||
}
|
||||
],
|
||||
"warnings": []
|
||||
}
|
||||
136
fixtures/markdown/nested-headings.blocks.snapshot.json
Normal file
136
fixtures/markdown/nested-headings.blocks.snapshot.json
Normal file
@@ -0,0 +1,136 @@
|
||||
{
|
||||
"blocks": [
|
||||
{
|
||||
"kind": "heading",
|
||||
"heading_path": [],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 1,
|
||||
"end": 1
|
||||
},
|
||||
"payload": {
|
||||
"kind": "heading",
|
||||
"level": 1,
|
||||
"text": "Top"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "paragraph",
|
||||
"heading_path": [
|
||||
"Top"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 3,
|
||||
"end": 3
|
||||
},
|
||||
"payload": {
|
||||
"kind": "paragraph",
|
||||
"text": "intro",
|
||||
"inlines_flat": "intro"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "heading",
|
||||
"heading_path": [
|
||||
"Top"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 5,
|
||||
"end": 5
|
||||
},
|
||||
"payload": {
|
||||
"kind": "heading",
|
||||
"level": 2,
|
||||
"text": "Section A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "paragraph",
|
||||
"heading_path": [
|
||||
"Top",
|
||||
"Section A"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 7,
|
||||
"end": 7
|
||||
},
|
||||
"payload": {
|
||||
"kind": "paragraph",
|
||||
"text": "body of A",
|
||||
"inlines_flat": "body of A"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "heading",
|
||||
"heading_path": [
|
||||
"Top",
|
||||
"Section A"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 9,
|
||||
"end": 9
|
||||
},
|
||||
"payload": {
|
||||
"kind": "heading",
|
||||
"level": 3,
|
||||
"text": "Sub A.1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "paragraph",
|
||||
"heading_path": [
|
||||
"Top",
|
||||
"Section A",
|
||||
"Sub A.1"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 11,
|
||||
"end": 11
|
||||
},
|
||||
"payload": {
|
||||
"kind": "paragraph",
|
||||
"text": "deeper",
|
||||
"inlines_flat": "deeper"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "heading",
|
||||
"heading_path": [
|
||||
"Top"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 13,
|
||||
"end": 13
|
||||
},
|
||||
"payload": {
|
||||
"kind": "heading",
|
||||
"level": 2,
|
||||
"text": "Section B"
|
||||
}
|
||||
},
|
||||
{
|
||||
"kind": "paragraph",
|
||||
"heading_path": [
|
||||
"Top",
|
||||
"Section B"
|
||||
],
|
||||
"source_span": {
|
||||
"kind": "line",
|
||||
"start": 15,
|
||||
"end": 15
|
||||
},
|
||||
"payload": {
|
||||
"kind": "paragraph",
|
||||
"text": "body of B",
|
||||
"inlines_flat": "body of B"
|
||||
}
|
||||
}
|
||||
],
|
||||
"warnings": []
|
||||
}
|
||||
Reference in New Issue
Block a user