fix(fb-35): address PR #126 round 2 review
- wire schema: relax effective_end.minimum 1 → 0 + expand description to cover line-clamp + out-of-range sentinel (panic-fix R1 emits Some(0) when line_start=1 and range is beyond doc end — schema must accept it) - tests: tighten first-chunk-target boundary test to assert ≤ 2 total neighbors (3-chunk doc, N=2). Strict "first chunk → context_before empty" not assertable until chunks.ordinal column lands (R1 #9 architectural caveat) - store: trim contradiction in list_chunk_ids_for_doc warning comment — drop "good enough for sequentially chunked markdown" phrase that conflicts with "hash sort dominates" paragraph above Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -310,10 +310,20 @@ fn fetch_chunk_context_at_first_chunk_clamps_lower_bound() {
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
// context_before may be empty if target is the first chunk;
|
// p9-fb-35 R2: doc has 3 chunks; ±2 should clamp the total
|
||||||
// context_after should have ≤ 2 entries. Both clamped at doc boundaries.
|
// neighbor count to ≤ 2 + 1 (= excludes target).
|
||||||
|
//
|
||||||
|
// ⚠ Strict "first-chunk → context_before is empty" cannot be
|
||||||
|
// asserted here yet because chunks.ordinal column does not exist
|
||||||
|
// — `list_chunk_ids_for_doc` orders by `(created_at, chunk_id)`
|
||||||
|
// and chunk_id is a blake3 hash, so the "First chunk" content
|
||||||
|
// may land at any hash-order position within the doc. The clamp
|
||||||
|
// logic itself is correct (target_idx ± n → [0..len]); we just
|
||||||
|
// can't pin which chunk is hash-order-first. Tracked as
|
||||||
|
// follow-up: V007 chunks.ordinal migration.
|
||||||
|
let total = result.context_before.len() + result.context_after.len();
|
||||||
assert!(
|
assert!(
|
||||||
result.context_before.len() + result.context_after.len() <= 4,
|
total <= 2,
|
||||||
"doc boundary should clamp ±N to fit chunk count"
|
"doc with 3 chunks ±2 → at most 2 neighbors (excludes target), got {total}"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -389,10 +389,11 @@ impl SqliteStore {
|
|||||||
///
|
///
|
||||||
/// Real fix is a `chunks.ordinal` column (V007 migration) or sort
|
/// Real fix is a `chunks.ordinal` column (V007 migration) or sort
|
||||||
/// by `chunks.source_spans_json[0]` start offset. Tracked as
|
/// by `chunks.source_spans_json[0]` start offset. Tracked as
|
||||||
/// follow-up; current behavior is good enough for sequentially
|
/// follow-up. Until then `--context` neighbors are best-effort —
|
||||||
/// chunked markdown where created_at uniqueness varies, but PDFs
|
/// they may or may not align with document position depending on
|
||||||
/// (page-aligned chunks) and large docs may surprise the agent.
|
/// whether `chunk_id` hash order happens to match insertion order
|
||||||
/// See `tasks/HOTFIXES.md` if/when this is escalated.
|
/// for that particular doc. Large markdown / PDF (page-aligned
|
||||||
|
/// chunks) likely re-orders. See `tasks/HOTFIXES.md` if escalated.
|
||||||
pub fn list_chunk_ids_for_doc(
|
pub fn list_chunk_ids_for_doc(
|
||||||
&self,
|
&self,
|
||||||
doc_id: &kebab_core::DocumentId,
|
doc_id: &kebab_core::DocumentId,
|
||||||
|
|||||||
@@ -18,7 +18,7 @@
|
|||||||
"text": { "type": "string", "description": "kind=doc/span: markdown text (truncated if budget tripped)" },
|
"text": { "type": "string", "description": "kind=doc/span: markdown text (truncated if budget tripped)" },
|
||||||
"line_start": { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested start line (1-based)" },
|
"line_start": { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested start line (1-based)" },
|
||||||
"line_end": { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested end line (1-based, inclusive)" },
|
"line_end": { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: requested end line (1-based, inclusive)" },
|
||||||
"effective_end": { "type": ["integer", "null"], "minimum": 1, "description": "kind=span: actual emitted end line after budget truncation" },
|
"effective_end": { "type": ["integer", "null"], "minimum": 0, "description": "kind=span: actual end line of emitted text (1-based, inclusive). Equals `line_end` on full slice; less than `line_end` when (a) requested range exceeded total lines (line clamp) or (b) `--max-tokens` budget trimmed the tail. Special case: `line_start - 1` (which is 0 when line_start=1) signals the entire requested range was beyond doc end — returned `text` is empty." },
|
||||||
"truncated": { "type": "boolean", "description": "kind=doc/span: budget forced text truncation. Always false for chunk." }
|
"truncated": { "type": "boolean", "description": "kind=doc/span: budget forced text truncation. Always false for chunk." }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user