From a166b7051c8a1e3890c965684f2b7a102872a221 Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 30 Apr 2026 05:17:32 +0000 Subject: [PATCH] p0-1: wire-schema stubs, doc/spec stubs, V001 migration, fixtures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - docs/wire-schema/v1/ ships 7 schema stubs (citation, search_hit, answer, ingest_report, doc_summary, chunk_inspection, doctor) that pin schema_version + required fields per design §2. Full property validation lands in later phases. - docs/spec/ ships 7 markdown stubs each linking to the canonical frozen design (domain-model, ids, canonical-document, chunk-policy, citation-policy, module-boundaries, ai-generation-guidelines). - migrations/V001__init.sql contains only schema_meta + migrations tables per design §5.1; data tables ship in P1-6/P2-1/P3-3. - fixtures/ has the 11 subdirectories every downstream task references (markdown, source-fs, search/{lexical,hybrid}, embed, vector, rag, eval, image, pdf, audio). Empty subdirs use .gitkeep so they track. fixtures/markdown/ ships the 3 phase-0 fixtures: simple-note.md, nested-headings.md, code-and-table.md. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/spec/ai-generation-guidelines.md | 12 ++++++ docs/spec/canonical-document.md | 7 ++++ docs/spec/chunk-policy.md | 8 ++++ docs/spec/citation-policy.md | 7 ++++ docs/spec/domain-model.md | 6 +++ docs/spec/ids.md | 6 +++ docs/spec/module-boundaries.md | 8 ++++ docs/wire-schema/v1/answer.schema.json | 31 +++++++++++++++ .../v1/chunk_inspection.schema.json | 32 +++++++++++++++ docs/wire-schema/v1/citation.schema.json | 19 +++++++++ docs/wire-schema/v1/doc_summary.schema.json | 39 +++++++++++++++++++ docs/wire-schema/v1/doctor.schema.json | 25 ++++++++++++ docs/wire-schema/v1/ingest_report.schema.json | 28 +++++++++++++ docs/wire-schema/v1/search_hit.schema.json | 38 ++++++++++++++++++ fixtures/audio/.gitkeep | 0 fixtures/embed/.gitkeep | 0 fixtures/eval/.gitkeep | 0 fixtures/image/.gitkeep | 0 fixtures/markdown/code-and-table.md | 12 ++++++ fixtures/markdown/nested-headings.md | 15 +++++++ fixtures/markdown/simple-note.md | 3 ++ fixtures/pdf/.gitkeep | 0 fixtures/rag/.gitkeep | 0 fixtures/search/hybrid/.gitkeep | 0 fixtures/search/lexical/.gitkeep | 0 fixtures/source-fs/.gitkeep | 0 fixtures/vector/.gitkeep | 0 migrations/V001__init.sql | 15 +++++++ 28 files changed, 311 insertions(+) create mode 100644 docs/spec/ai-generation-guidelines.md create mode 100644 docs/spec/canonical-document.md create mode 100644 docs/spec/chunk-policy.md create mode 100644 docs/spec/citation-policy.md create mode 100644 docs/spec/domain-model.md create mode 100644 docs/spec/ids.md create mode 100644 docs/spec/module-boundaries.md create mode 100644 docs/wire-schema/v1/answer.schema.json create mode 100644 docs/wire-schema/v1/chunk_inspection.schema.json create mode 100644 docs/wire-schema/v1/citation.schema.json create mode 100644 docs/wire-schema/v1/doc_summary.schema.json create mode 100644 docs/wire-schema/v1/doctor.schema.json create mode 100644 docs/wire-schema/v1/ingest_report.schema.json create mode 100644 docs/wire-schema/v1/search_hit.schema.json create mode 100644 fixtures/audio/.gitkeep create mode 100644 fixtures/embed/.gitkeep create mode 100644 fixtures/eval/.gitkeep create mode 100644 fixtures/image/.gitkeep create mode 100644 fixtures/markdown/code-and-table.md create mode 100644 fixtures/markdown/nested-headings.md create mode 100644 fixtures/markdown/simple-note.md create mode 100644 fixtures/pdf/.gitkeep create mode 100644 fixtures/rag/.gitkeep create mode 100644 fixtures/search/hybrid/.gitkeep create mode 100644 fixtures/search/lexical/.gitkeep create mode 100644 fixtures/source-fs/.gitkeep create mode 100644 fixtures/vector/.gitkeep create mode 100644 migrations/V001__init.sql diff --git a/docs/spec/ai-generation-guidelines.md b/docs/spec/ai-generation-guidelines.md new file mode 100644 index 0000000..b0040fd --- /dev/null +++ b/docs/spec/ai-generation-guidelines.md @@ -0,0 +1,12 @@ +# AI generation guidelines + +When implementing tasks against this codebase: + +- Treat the frozen design doc as the single source of truth. Do not invent + new fields, traits, or enum variants. +- Prefer editing existing files to creating new ones; reuse types from + `kb-core` instead of duplicating shapes. +- For each task, follow the task spec under `tasks/p/p-.md`. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §11 + §12. diff --git a/docs/spec/canonical-document.md b/docs/spec/canonical-document.md new file mode 100644 index 0000000..0926fc2 --- /dev/null +++ b/docs/spec/canonical-document.md @@ -0,0 +1,7 @@ +# CanonicalDocument + +Medium-agnostic representation of a document with `Block`s, `SourceSpan`s, +and provenance. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §3.4 + §3.7a. diff --git a/docs/spec/chunk-policy.md b/docs/spec/chunk-policy.md new file mode 100644 index 0000000..fe204fc --- /dev/null +++ b/docs/spec/chunk-policy.md @@ -0,0 +1,8 @@ +# Chunk policy + +`ChunkPolicy` carries `target_tokens`, `overlap_tokens`, +`respect_markdown_headings`, and `chunker_version`. Chunkers expose a +`policy_hash` so chunk IDs include the policy. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §3.5 + §7.1 + §7.2. diff --git a/docs/spec/citation-policy.md b/docs/spec/citation-policy.md new file mode 100644 index 0000000..3ef0f82 --- /dev/null +++ b/docs/spec/citation-policy.md @@ -0,0 +1,7 @@ +# Citation policy + +Citations use W3C Media Fragments URIs to locate evidence inside a +document. Five variants: `Line`, `Page`, `Region`, `Caption`, `Time`. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §3.5 + §0 Q3. diff --git a/docs/spec/domain-model.md b/docs/spec/domain-model.md new file mode 100644 index 0000000..98ef0ec --- /dev/null +++ b/docs/spec/domain-model.md @@ -0,0 +1,6 @@ +# Domain model + +The domain types live in `kb-core` and mirror the frozen design exactly. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §3. diff --git a/docs/spec/ids.md b/docs/spec/ids.md new file mode 100644 index 0000000..19bbc15 --- /dev/null +++ b/docs/spec/ids.md @@ -0,0 +1,6 @@ +# ID recipe + +All `kb-*` IDs are 32 hex chars: the first 32 of `blake3(canonical_json(tuple))`. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §4. diff --git a/docs/spec/module-boundaries.md b/docs/spec/module-boundaries.md new file mode 100644 index 0000000..7225608 --- /dev/null +++ b/docs/spec/module-boundaries.md @@ -0,0 +1,8 @@ +# Module boundaries + +`kb-core` is leaf — every other crate depends on it. Parsers depend on +`kb-parse-types` (not on `kb-normalize`); `kb-normalize` depends on +`kb-parse-types` (not on parsers). UI crates depend only on `kb-app`. + +Canonical source: +[docs/superpowers/specs/2026-04-27-kb-final-form-design.md](../superpowers/specs/2026-04-27-kb-final-form-design.md), §8. diff --git a/docs/wire-schema/v1/answer.schema.json b/docs/wire-schema/v1/answer.schema.json new file mode 100644 index 0000000..7428cf1 --- /dev/null +++ b/docs/wire-schema/v1/answer.schema.json @@ -0,0 +1,31 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/answer.schema.json", + "title": "Answer v1", + "description": "Stub schema — declares the schema_version label and the required fields per design §2.3.", + "type": "object", + "required": [ + "schema_version", + "answer", + "citations", + "grounded", + "model", + "prompt_template_version", + "retrieval", + "usage", + "created_at" + ], + "properties": { + "schema_version": { "const": "answer.v1" }, + "answer": { "type": "string" }, + "citations": { "type": "array" }, + "grounded": { "type": "boolean" }, + "refusal_reason": { "type": ["string", "null"] }, + "model": { "type": "object" }, + "embedding": { "type": ["object", "null"] }, + "prompt_template_version": { "type": "string" }, + "retrieval": { "type": "object" }, + "usage": { "type": "object" }, + "created_at": { "type": "string" } + } +} diff --git a/docs/wire-schema/v1/chunk_inspection.schema.json b/docs/wire-schema/v1/chunk_inspection.schema.json new file mode 100644 index 0000000..0771e22 --- /dev/null +++ b/docs/wire-schema/v1/chunk_inspection.schema.json @@ -0,0 +1,32 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/chunk_inspection.schema.json", + "title": "ChunkInspection v1", + "description": "Stub schema — declares the schema_version label and the required fields per design §2.6.", + "type": "object", + "required": [ + "schema_version", + "chunk_id", + "doc_id", + "doc_path", + "heading_path", + "text", + "source_spans", + "block_ids", + "token_estimate", + "chunker_version" + ], + "properties": { + "schema_version": { "const": "chunk_inspection.v1" }, + "chunk_id": { "type": "string" }, + "doc_id": { "type": "string" }, + "doc_path": { "type": "string" }, + "heading_path": { "type": "array", "items": { "type": "string" } }, + "text": { "type": "string" }, + "source_spans": { "type": "array" }, + "block_ids": { "type": "array", "items": { "type": "string" } }, + "token_estimate": { "type": "integer", "minimum": 0 }, + "chunker_version": { "type": "string" }, + "embeddings": { "type": "array" } + } +} diff --git a/docs/wire-schema/v1/citation.schema.json b/docs/wire-schema/v1/citation.schema.json new file mode 100644 index 0000000..90ebe0f --- /dev/null +++ b/docs/wire-schema/v1/citation.schema.json @@ -0,0 +1,19 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/citation.schema.json", + "title": "Citation v1", + "description": "Stub schema — declares the schema_version label and the always-present fields. Variant-discriminated property validation lands in a later phase.", + "type": "object", + "required": ["schema_version", "kind", "path", "uri"], + "properties": { + "schema_version": { "const": "citation.v1" }, + "kind": { "enum": ["line", "page", "region", "caption", "time"] }, + "path": { "type": "string" }, + "uri": { "type": "string" }, + "line": { "type": "object" }, + "page": { "type": "object" }, + "region": { "type": "object" }, + "caption": { "type": "object" }, + "time": { "type": "object" } + } +} diff --git a/docs/wire-schema/v1/doc_summary.schema.json b/docs/wire-schema/v1/doc_summary.schema.json new file mode 100644 index 0000000..d757e8f --- /dev/null +++ b/docs/wire-schema/v1/doc_summary.schema.json @@ -0,0 +1,39 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/doc_summary.schema.json", + "title": "DocSummary v1", + "description": "Stub schema — declares the schema_version label and the required fields per design §2.5.", + "type": "object", + "required": [ + "schema_version", + "doc_id", + "doc_path", + "title", + "lang", + "tags", + "trust_level", + "source_type", + "byte_len", + "chunk_count", + "created_at", + "updated_at", + "parser_version", + "chunker_version" + ], + "properties": { + "schema_version": { "const": "doc_summary.v1" }, + "doc_id": { "type": "string" }, + "doc_path": { "type": "string" }, + "title": { "type": "string" }, + "lang": { "type": "string" }, + "tags": { "type": "array", "items": { "type": "string" } }, + "trust_level": { "type": "string" }, + "source_type": { "type": "string" }, + "byte_len": { "type": "integer", "minimum": 0 }, + "chunk_count": { "type": "integer", "minimum": 0 }, + "created_at": { "type": "string" }, + "updated_at": { "type": "string" }, + "parser_version": { "type": "string" }, + "chunker_version": { "type": "string" } + } +} diff --git a/docs/wire-schema/v1/doctor.schema.json b/docs/wire-schema/v1/doctor.schema.json new file mode 100644 index 0000000..212c165 --- /dev/null +++ b/docs/wire-schema/v1/doctor.schema.json @@ -0,0 +1,25 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/doctor.schema.json", + "title": "DoctorReport v1", + "description": "Stub schema — declares the schema_version label and the required fields per design §2.7.", + "type": "object", + "required": ["schema_version", "ok", "checks"], + "properties": { + "schema_version": { "const": "doctor.v1" }, + "ok": { "type": "boolean" }, + "checks": { + "type": "array", + "items": { + "type": "object", + "required": ["name", "ok", "detail"], + "properties": { + "name": { "type": "string" }, + "ok": { "type": "boolean" }, + "detail": { "type": "string" }, + "hint": { "type": ["string", "null"] } + } + } + } + } +} diff --git a/docs/wire-schema/v1/ingest_report.schema.json b/docs/wire-schema/v1/ingest_report.schema.json new file mode 100644 index 0000000..be25ad0 --- /dev/null +++ b/docs/wire-schema/v1/ingest_report.schema.json @@ -0,0 +1,28 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/ingest_report.schema.json", + "title": "IngestReport v1", + "description": "Stub schema — declares the schema_version label and the required fields per design §2.4.", + "type": "object", + "required": [ + "schema_version", + "scope", + "scanned", + "new", + "updated", + "skipped", + "errors", + "duration_ms" + ], + "properties": { + "schema_version": { "const": "ingest_report.v1" }, + "scope": { "type": "object" }, + "scanned": { "type": "integer", "minimum": 0 }, + "new": { "type": "integer", "minimum": 0 }, + "updated": { "type": "integer", "minimum": 0 }, + "skipped": { "type": "integer", "minimum": 0 }, + "errors": { "type": "integer", "minimum": 0 }, + "duration_ms": { "type": "integer", "minimum": 0 }, + "items": { "type": ["array", "null"] } + } +} diff --git a/docs/wire-schema/v1/search_hit.schema.json b/docs/wire-schema/v1/search_hit.schema.json new file mode 100644 index 0000000..01b8a96 --- /dev/null +++ b/docs/wire-schema/v1/search_hit.schema.json @@ -0,0 +1,38 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://kb.local/wire/v1/search_hit.schema.json", + "title": "SearchHit v1", + "description": "Stub schema — declares the schema_version label and the required top-level fields per design §2.2.", + "type": "object", + "required": [ + "schema_version", + "rank", + "score", + "chunk_id", + "doc_id", + "doc_path", + "heading_path", + "snippet", + "citation", + "retrieval", + "index_version", + "chunker_version" + ], + "properties": { + "schema_version": { "const": "search_hit.v1" }, + "rank": { "type": "integer", "minimum": 1 }, + "score": { "type": "number" }, + "chunk_id": { "type": "string" }, + "doc_id": { "type": "string" }, + "doc_path": { "type": "string" }, + "heading_path": { "type": "array", "items": { "type": "string" } }, + "section_label": { "type": ["string", "null"] }, + "snippet": { "type": "string" }, + "snippet_full_text": { "type": "boolean" }, + "citation": { "type": "object" }, + "retrieval": { "type": "object" }, + "index_version": { "type": "string" }, + "embedding_model": { "type": ["string", "null"] }, + "chunker_version": { "type": "string" } + } +} diff --git a/fixtures/audio/.gitkeep b/fixtures/audio/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/embed/.gitkeep b/fixtures/embed/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/eval/.gitkeep b/fixtures/eval/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/image/.gitkeep b/fixtures/image/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/markdown/code-and-table.md b/fixtures/markdown/code-and-table.md new file mode 100644 index 0000000..6a4a59f --- /dev/null +++ b/fixtures/markdown/code-and-table.md @@ -0,0 +1,12 @@ +# Code And Table + +```rust +fn main() { + println!("hi"); +} +``` + +| col a | col b | +|-------|-------| +| 1 | 2 | +| 3 | 4 | diff --git a/fixtures/markdown/nested-headings.md b/fixtures/markdown/nested-headings.md new file mode 100644 index 0000000..43f9d72 --- /dev/null +++ b/fixtures/markdown/nested-headings.md @@ -0,0 +1,15 @@ +# Top + +intro + +## Section A + +body of A + +### Sub A.1 + +deeper + +## Section B + +body of B diff --git a/fixtures/markdown/simple-note.md b/fixtures/markdown/simple-note.md new file mode 100644 index 0000000..72bc4bf --- /dev/null +++ b/fixtures/markdown/simple-note.md @@ -0,0 +1,3 @@ +# Simple Note + +A short paragraph. diff --git a/fixtures/pdf/.gitkeep b/fixtures/pdf/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/rag/.gitkeep b/fixtures/rag/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/search/hybrid/.gitkeep b/fixtures/search/hybrid/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/search/lexical/.gitkeep b/fixtures/search/lexical/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/source-fs/.gitkeep b/fixtures/source-fs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/vector/.gitkeep b/fixtures/vector/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/migrations/V001__init.sql b/migrations/V001__init.sql new file mode 100644 index 0000000..2db2d5e --- /dev/null +++ b/migrations/V001__init.sql @@ -0,0 +1,15 @@ +-- V001__init.sql — schema bootstrap. +-- Per design §5.1 + §5.9. Only the meta + migrations tables land here; +-- data tables (assets, documents, blocks, chunks, fts5, …) ship in later +-- phase-specific migrations (P1-6 / P2-1 / P3-3). + +CREATE TABLE schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +CREATE TABLE migrations ( + id INTEGER PRIMARY KEY, + applied_at TEXT NOT NULL, + description TEXT NOT NULL +);