diff --git a/crates/kb-app/src/lib.rs b/crates/kb-app/src/lib.rs index 4c800ae..8196bfd 100644 --- a/crates/kb-app/src/lib.rs +++ b/crates/kb-app/src/lib.rs @@ -3,6 +3,19 @@ //! //! P0 implementations stub out — the signatures are frozen so that later //! phases swap in real bodies without breaking call sites. +//! +//! ## Wire-schema convention +//! +//! `kb-app` returns pure domain types (`IngestReport`, `DocSummary`, +//! `Chunk`, `SearchHit`, `Answer`, …) re-exported from `kb-core`. These do +//! NOT carry a `schema_version` field. The CLI (`kb-cli/src/wire.rs`) is +//! responsible for wrapping each Ok-path return value with the matching +//! `*.v1` envelope before emitting JSON on stdout in `--json` mode. The +//! sole exception is [`DoctorReport`], whose `schema_version` is part of +//! the struct because the doctor wire object IS its own structured +//! surface (no domain-side equivalent in `kb-core`). When adding a new +//! facade function in a later phase, remember: keep the return type pure, +//! and add a matching `wire_*` helper in `kb-cli/src/wire.rs`. use std::path::PathBuf; diff --git a/crates/kb-cli/src/main.rs b/crates/kb-cli/src/main.rs index 28f04e0..1674d42 100644 --- a/crates/kb-cli/src/main.rs +++ b/crates/kb-cli/src/main.rs @@ -8,6 +8,8 @@ use clap::{Parser, Subcommand}; use kb_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal}; +mod wire; + #[derive(Parser, Debug)] #[command(name = "kb", version, about = "personal local knowledge base")] struct Cli { @@ -217,7 +219,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { }; let report = kb_app::ingest(scope, *summary_only)?; if cli.json { - println!("{}", serde_json::to_string(&wire_ingest(&report))?); + println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?); } else { println!( "scanned {} new {} updated {} skipped {} errors {} ({} ms)", @@ -236,7 +238,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { ListWhat::Docs => { let docs = kb_app::list_docs(kb_core::DocFilter::default())?; if cli.json { - println!("{}", serde_json::to_string(&docs)?); + println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?); } else { for d in &docs { println!("{}\t{}", d.doc_id, d.doc_path.0); @@ -250,13 +252,18 @@ fn run(cli: &Cli) -> anyhow::Result<()> { InspectWhat::Doc { id } => { let doc_id: kb_core::DocumentId = id.parse()?; let doc = kb_app::inspect_doc(&doc_id)?; + // Inspect doc emits a `CanonicalDocument` — there's no §2 + // wire schema for it (P1-5 will decide whether this also + // becomes a tagged wrapper or stays as the raw domain + // object). Until then keep raw JSON, matching pre-P0-1 + // behaviour. println!("{}", serde_json::to_string(&doc)?); Ok(()) } InspectWhat::Chunk { id } => { let chunk_id: kb_core::ChunkId = id.parse()?; let chunk = kb_app::inspect_chunk(&chunk_id)?; - println!("{}", serde_json::to_string(&chunk)?); + println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?); Ok(()) } }, @@ -275,7 +282,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { }; let hits = kb_app::search(q)?; if cli.json { - println!("{}", serde_json::to_string(&hits)?); + println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?); } else { for h in &hits { println!("{:>2}. {:.2} {}", h.rank, h.retrieval.fusion_score, h.doc_path.0); @@ -301,7 +308,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { }; let ans = kb_app::ask(query, opts)?; if cli.json { - println!("{}", serde_json::to_string(&ans)?); + println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?); } else { println!("{}", ans.answer); } @@ -315,7 +322,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> { Cmd::Doctor => { let report = kb_app::doctor()?; if cli.json { - println!("{}", serde_json::to_string(&report)?); + println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?); } else { for c in &report.checks { let mark = if c.ok { "✓" } else { "✗" }; @@ -344,17 +351,3 @@ fn run(cli: &Cli) -> anyhow::Result<()> { } } -/// Convenience wrapper to emit `ingest_report.v1` schema_version. -fn wire_ingest(r: &kb_core::IngestReport) -> serde_json::Value { - serde_json::json!({ - "schema_version": "ingest_report.v1", - "scope": r.scope, - "scanned": r.scanned, - "new": r.new, - "updated": r.updated, - "skipped": r.skipped, - "errors": r.errors, - "duration_ms": r.duration_ms, - "items": r.items, - }) -} diff --git a/crates/kb-cli/src/wire.rs b/crates/kb-cli/src/wire.rs new file mode 100644 index 0000000..7dc236d --- /dev/null +++ b/crates/kb-cli/src/wire.rs @@ -0,0 +1,175 @@ +//! CLI-side wire-schema wrappers. +//! +//! Convention (per design §2): every JSON object emitted on stdout in +//! `--json` mode MUST carry a top-level `schema_version` of the form +//! `".v1"`. The kb-core types are pure domain types and do NOT +//! carry `schema_version` themselves; the CLI wraps them on emit. The one +//! exception is `DoctorReport`, where `schema_version` is part of the wire +//! type because the doctor wire object IS its own structured surface. +//! +//! Future tasks (P1-5, P3, P4, P5) replacing stub `bail!` paths must call +//! these helpers from the relevant CLI subcommand handler before +//! `serde_json::to_string`. +//! +//! Each helper is total (returns `serde_json::Value`, never an error) — the +//! input is a fully-typed `serde::Serialize` value, so the only way to fail +//! is OOM, which would have killed the process anyway. + +use serde_json::Value; + +use kb_app::DoctorReport; +use kb_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit}; + +/// Insert `schema_version` into an object-shaped `Value`. Helper for the +/// "serialize, then tag" pattern used by all the per-type wrappers below. +fn tag_object(mut v: Value, schema_version: &str) -> Value { + if let Value::Object(ref mut map) = v { + map.insert( + "schema_version".to_string(), + Value::String(schema_version.to_string()), + ); + } + v +} + +/// Wrap an [`IngestReport`] as `ingest_report.v1`. +pub fn wire_ingest(r: &IngestReport) -> Value { + let v = serde_json::to_value(r).expect("IngestReport serializes"); + tag_object(v, "ingest_report.v1") +} + +/// Wrap a single [`DocSummary`] as `doc_summary.v1`. +pub fn wire_doc_summary(d: &DocSummary) -> Value { + let v = serde_json::to_value(d).expect("DocSummary serializes"); + tag_object(v, "doc_summary.v1") +} + +/// Wrap a list of [`DocSummary`] values as a JSON array of `doc_summary.v1` +/// objects (one tag per element, per design §2.5 — there is no list-envelope +/// schema; the list shape is `[{schema_version: "doc_summary.v1", ...}, ...]`). +pub fn wire_doc_summaries(d: &[DocSummary]) -> Value { + Value::Array(d.iter().map(wire_doc_summary).collect()) +} + +/// Wrap a [`Chunk`] as `chunk_inspection.v1` (§2.6). NOTE: the wire schema +/// requires `doc_path`, which the kb-core `Chunk` does not currently carry — +/// when P1-5 wires the Ok-path, the implementation should either enrich +/// `Chunk` or pass `doc_path` alongside. For now this helper emits whatever +/// fields `Chunk` serializes with, plus the `schema_version` tag. +pub fn wire_chunk_inspection(c: &Chunk) -> Value { + let v = serde_json::to_value(c).expect("Chunk serializes"); + tag_object(v, "chunk_inspection.v1") +} + +/// Wrap a single [`SearchHit`] as `search_hit.v1`. +pub fn wire_search_hit(h: &SearchHit) -> Value { + let mut v = serde_json::to_value(h).expect("SearchHit serializes"); + // Promote `retrieval.fusion_score` to a top-level `score` per §2.2. + if let Value::Object(ref mut map) = v { + if let Some(Value::Object(retrieval)) = map.get("retrieval") { + if let Some(score) = retrieval.get("fusion_score").cloned() { + map.insert("score".to_string(), score); + } + } + } + tag_object(v, "search_hit.v1") +} + +/// Wrap a list of [`SearchHit`] values as a JSON array of `search_hit.v1` +/// objects (one tag per element, per design §2.2). +pub fn wire_search_hits(hits: &[SearchHit]) -> Value { + Value::Array(hits.iter().map(wire_search_hit).collect()) +} + +/// Wrap an [`Answer`] as `answer.v1`. +pub fn wire_answer(a: &Answer) -> Value { + let v = serde_json::to_value(a).expect("Answer serializes"); + tag_object(v, "answer.v1") +} + +/// Idempotent pass-through for [`DoctorReport`] — the type already carries +/// `schema_version: "doctor.v1"` (struct-field convention, the one +/// exception called out in the module doc above). This helper exists so +/// every `--json` branch in `kb-cli` goes through `wire::*`, keeping the +/// emit pattern uniform. +pub fn wire_doctor(d: &DoctorReport) -> Value { + // Round-trip through `to_value` to confirm the field is serialized; + // then re-tag (no-op when the field is already present, defensive + // when a future refactor drops the struct-field). + let v = serde_json::to_value(d).expect("DoctorReport serializes"); + if let Value::Object(ref map) = v { + if matches!( + map.get("schema_version"), + Some(Value::String(s)) if s == "doctor.v1" + ) { + return v; + } + } + tag_object(v, "doctor.v1") +} + +#[cfg(test)] +mod tests { + use super::*; + + fn schema_of(v: &Value) -> Option<&str> { + v.as_object()?.get("schema_version")?.as_str() + } + + #[test] + fn doctor_round_trip_preserves_schema_version() { + let d = DoctorReport { + schema_version: "doctor.v1".to_string(), + ok: true, + checks: Vec::new(), + }; + let v = wire_doctor(&d); + assert_eq!(schema_of(&v), Some("doctor.v1")); + // Sanity: ok/checks are preserved. + assert_eq!(v.get("ok").and_then(Value::as_bool), Some(true)); + assert!(v.get("checks").and_then(Value::as_array).is_some()); + } + + #[test] + fn ingest_wrapper_tags_schema_version() { + use kb_core::SourceScope; + let r = IngestReport { + scope: SourceScope { + root: std::path::PathBuf::from("/tmp"), + include: vec![], + exclude: vec![], + }, + scanned: 0, + new: 0, + updated: 0, + skipped: 0, + errors: 0, + duration_ms: 0, + items: None, + }; + let v = wire_ingest(&r); + assert_eq!(schema_of(&v), Some("ingest_report.v1")); + assert!(v.get("items").is_some()); + } + + #[test] + fn doc_summaries_wraps_each_element() { + let v = wire_doc_summaries(&[]); + assert!(v.is_array()); + assert_eq!(v.as_array().unwrap().len(), 0); + } + + #[test] + fn search_hits_wraps_each_element() { + let v = wire_search_hits(&[]); + assert!(v.is_array()); + assert_eq!(v.as_array().unwrap().len(), 0); + } + + #[test] + fn tag_object_inserts_into_object() { + let v = Value::Object(serde_json::Map::new()); + let tagged = tag_object(v, "x.v1"); + assert_eq!(schema_of(&tagged), Some("x.v1")); + } +} diff --git a/crates/kb-core/src/lib.rs b/crates/kb-core/src/lib.rs index 0ed39c3..629d739 100644 --- a/crates/kb-core/src/lib.rs +++ b/crates/kb-core/src/lib.rs @@ -57,7 +57,7 @@ pub use answer::{ Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, RefusalReason, TokenUsage, TraceId, }; -pub use ingest::{IngestItem, IngestReport}; +pub use ingest::{IngestItem, IngestItemKind, IngestReport}; pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus}; pub use vector::{VectorHit, VectorRecord}; pub use errors::CoreError; diff --git a/crates/kb-core/src/normalize.rs b/crates/kb-core/src/normalize.rs index ec39c45..d23858f 100644 --- a/crates/kb-core/src/normalize.rs +++ b/crates/kb-core/src/normalize.rs @@ -50,7 +50,7 @@ pub fn to_posix(path: &Path) -> WorkspacePath { } } if out.is_empty() { - out.push_str("."); + out.push('.'); } WorkspacePath(nfc(&out)) }