Files
kebab/crates/kebab-cli/src/wire.rs
th-kim0823 29629e6786 feat(cli): kebab ask --stream emits ndjson on stderr (fb-33)
Background-thread driver runs ask_with_config; main thread
drains the receiver, serializes each StreamEvent to ndjson on
stderr. BrokenPipe → drop receiver → pipeline SendError →
cancel + LlmStreamAborted refusal. Final stdout line is the
existing answer.v1 (ingest_progress.v1 backwards-compat
pattern).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-09 15:03:41 +09:00

316 lines
12 KiB
Rust

//! CLI-side wire-schema wrappers.
//!
//! Convention (per design §2): every JSON object emitted on stdout in
//! `--json` mode MUST carry a top-level `schema_version` of the form
//! `"<object>.v1"`. The kb-core types are pure domain types and do NOT
//! carry `schema_version` themselves; the CLI wraps them on emit. The one
//! exception is `DoctorReport`, where `schema_version` is part of the wire
//! type because the doctor wire object IS its own structured surface.
//!
//! Future tasks (P1-5, P3, P4, P5) replacing stub `bail!` paths must call
//! these helpers from the relevant CLI subcommand handler before
//! `serde_json::to_string`.
//!
//! Each helper is total (returns `serde_json::Value`, never an error) — the
//! input is a fully-typed `serde::Serialize` value, so the only way to fail
//! is OOM, which would have killed the process anyway.
use serde_json::Value;
use kebab_app::DoctorReport;
use kebab_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
/// Insert `schema_version` into an object-shaped `Value`. Helper for the
/// "serialize, then tag" pattern used by all the per-type wrappers below.
fn tag_object(mut v: Value, schema_version: &str) -> Value {
if let Value::Object(ref mut map) = v {
map.insert(
"schema_version".to_string(),
Value::String(schema_version.to_string()),
);
}
v
}
/// Wrap an [`IngestReport`] as `ingest_report.v1`.
pub fn wire_ingest(r: &IngestReport) -> Value {
let v = serde_json::to_value(r).expect("IngestReport serializes");
tag_object(v, "ingest_report.v1")
}
/// Wrap a single [`DocSummary`] as `doc_summary.v1`.
pub fn wire_doc_summary(d: &DocSummary) -> Value {
let v = serde_json::to_value(d).expect("DocSummary serializes");
tag_object(v, "doc_summary.v1")
}
/// Wrap a list of [`DocSummary`] values as a JSON array of `doc_summary.v1`
/// objects (one tag per element, per design §2.5 — there is no list-envelope
/// schema; the list shape is `[{schema_version: "doc_summary.v1", ...}, ...]`).
pub fn wire_doc_summaries(d: &[DocSummary]) -> Value {
Value::Array(d.iter().map(wire_doc_summary).collect())
}
/// Wrap a [`Chunk`] as `chunk_inspection.v1` (§2.6). NOTE: the wire schema
/// requires `doc_path`, which the kb-core `Chunk` does not currently carry —
/// when P1-5 wires the Ok-path, the implementation should either enrich
/// `Chunk` or pass `doc_path` alongside. For now this helper emits whatever
/// fields `Chunk` serializes with, plus the `schema_version` tag.
pub fn wire_chunk_inspection(c: &Chunk) -> Value {
let v = serde_json::to_value(c).expect("Chunk serializes");
tag_object(v, "chunk_inspection.v1")
}
/// Wrap a single [`SearchHit`] as `search_hit.v1`.
pub fn wire_search_hit(h: &SearchHit) -> Value {
let mut v = serde_json::to_value(h).expect("SearchHit serializes");
// Promote `retrieval.fusion_score` to a top-level `score` per §2.2.
if let Value::Object(ref mut map) = v {
if let Some(Value::Object(retrieval)) = map.get("retrieval") {
if let Some(score) = retrieval.get("fusion_score").cloned() {
map.insert("score".to_string(), score);
}
}
}
tag_object(v, "search_hit.v1")
}
/// Wrap a list of [`SearchHit`] values as a JSON array of `search_hit.v1`
/// objects (one tag per element, per design §2.2).
pub fn wire_search_hits(hits: &[SearchHit]) -> Value {
Value::Array(hits.iter().map(wire_search_hit).collect())
}
/// Wrap an [`Answer`] as `answer.v1`.
pub fn wire_answer(a: &Answer) -> Value {
let v = serde_json::to_value(a).expect("Answer serializes");
tag_object(v, "answer.v1")
}
/// p9-fb-33: tag a [`StreamEvent`] as `answer_event.v1` ndjson.
///
/// The timestamp is added at emit time (caller fills `ts`), since the
/// pipeline doesn't carry one in the in-process enum — mirrors the
/// `wire_ingest_progress` pattern (§2 ingest_progress.v1).
pub fn wire_answer_event(
ev: &kebab_app::StreamEvent,
ts: time::OffsetDateTime,
) -> Value {
let mut v = serde_json::to_value(ev).expect("StreamEvent serializes");
let ts_str = ts
.format(&time::format_description::well_known::Rfc3339)
.expect("OffsetDateTime formats as RFC3339");
if let Value::Object(ref mut map) = v {
map.insert("ts".to_string(), Value::String(ts_str));
}
tag_object(v, "answer_event.v1")
}
/// Idempotent pass-through for [`DoctorReport`] — the type already carries
/// `schema_version: "doctor.v1"` (struct-field convention, the one
/// exception called out in the module doc above). This helper exists so
/// every `--json` branch in `kb-cli` goes through `wire::*`, keeping the
/// emit pattern uniform.
pub fn wire_doctor(d: &DoctorReport) -> Value {
// Round-trip through `to_value` to confirm the field is serialized;
// then re-tag (no-op when the field is already present, defensive
// when a future refactor drops the struct-field).
let v = serde_json::to_value(d).expect("DoctorReport serializes");
if let Value::Object(ref map) = v {
if matches!(
map.get("schema_version"),
Some(Value::String(s)) if s == "doctor.v1"
) {
return v;
}
}
tag_object(v, "doctor.v1")
}
/// Wrap a [`kebab_app::ResetReport`] as `reset_report.v1`.
pub fn wire_reset(r: &kebab_app::ResetReport) -> Value {
let v = serde_json::to_value(r).expect("ResetReport serializes");
tag_object(v, "reset_report.v1")
}
/// Wrap an [`kebab_app::IngestEvent`] as `ingest_progress.v1`. Adds
/// the `schema_version` discriminator on top of serde's existing
/// `kind` discriminator, plus an `ts` field with the current
/// wall-clock — the emit site is the only place that knows the moment
/// of emission, so the timestamp is stamped here rather than carried
/// on the event itself.
pub fn wire_ingest_progress(
event: &kebab_app::IngestEvent,
) -> anyhow::Result<Value> {
let mut v = serde_json::to_value(event)?;
if let Value::Object(ref mut map) = v {
map.insert(
"ts".to_string(),
Value::String(crate::progress::now_rfc3339()?),
);
}
Ok(tag_object(v, "ingest_progress.v1"))
}
/// Wrap a [`kebab_app::SchemaV1`] as `schema.v1`.
///
/// Uses the idempotent re-tag pattern (mirrors `wire_doctor`) because
/// `SchemaV1` already carries `schema_version: "schema.v1"` as a struct
/// field. The re-tag is a defensive no-op when the field is present; it
/// stamps the correct version if a future refactor ever drops the field.
pub fn wire_schema(s: &kebab_app::SchemaV1) -> Value {
let v = serde_json::to_value(s).expect("SchemaV1 serializes");
if let Value::Object(ref map) = v {
if matches!(
map.get("schema_version"),
Some(Value::String(s)) if s == kebab_app::SCHEMA_V1_ID
) {
return v;
}
}
tag_object(v, kebab_app::SCHEMA_V1_ID)
}
/// Wrap an [`kebab_app::ErrorV1`] as `error.v1`.
///
/// Uses the simple `tag_object` pattern because `ErrorV1` is a
/// type that does NOT carry `schema_version` itself
/// (kebab-core convention).
pub fn wire_error_v1(e: &kebab_app::ErrorV1) -> Value {
let v = serde_json::to_value(e).expect("ErrorV1 serializes");
tag_object(v, "error.v1")
}
#[cfg(test)]
mod tests {
use super::*;
fn schema_of(v: &Value) -> Option<&str> {
v.as_object()?.get("schema_version")?.as_str()
}
#[test]
fn doctor_round_trip_preserves_schema_version() {
let d = DoctorReport {
schema_version: "doctor.v1".to_string(),
ok: true,
checks: Vec::new(),
};
let v = wire_doctor(&d);
assert_eq!(schema_of(&v), Some("doctor.v1"));
// Sanity: ok/checks are preserved.
assert_eq!(v.get("ok").and_then(Value::as_bool), Some(true));
assert!(v.get("checks").and_then(Value::as_array).is_some());
}
#[test]
fn ingest_wrapper_tags_schema_version() {
use kebab_core::SourceScope;
let r = IngestReport {
scope: SourceScope {
root: std::path::PathBuf::from("/tmp"),
include: vec![],
exclude: vec![],
},
scanned: 0,
new: 0,
updated: 0,
skipped: 0,
unchanged: 0,
errors: 0,
duration_ms: 0,
skipped_by_extension: std::collections::BTreeMap::new(),
items: None,
};
let v = wire_ingest(&r);
assert_eq!(schema_of(&v), Some("ingest_report.v1"));
assert!(v.get("items").is_some());
}
#[test]
fn doc_summaries_wraps_each_element() {
let v = wire_doc_summaries(&[]);
assert!(v.is_array());
assert_eq!(v.as_array().unwrap().len(), 0);
}
#[test]
fn search_hits_wraps_each_element() {
let v = wire_search_hits(&[]);
assert!(v.is_array());
assert_eq!(v.as_array().unwrap().len(), 0);
}
#[test]
fn tag_object_inserts_into_object() {
let v = Value::Object(serde_json::Map::new());
let tagged = tag_object(v, "x.v1");
assert_eq!(schema_of(&tagged), Some("x.v1"));
}
#[test]
fn schema_wrapper_tags_schema_version() {
use kebab_app::{Capabilities, Models, SchemaV1, Stats, WireBlock};
let schema = SchemaV1 {
schema_version: "schema.v1".to_string(),
kebab_version: "0.2.1".to_string(),
wire: WireBlock { schemas: vec!["answer.v1".to_string()] },
capabilities: Capabilities {
json_mode: true, ingest_progress: true, ingest_cancellation: true,
rag_multi_turn: true, search_cache: true, incremental_ingest: true,
streaming_ask: false, http_daemon: false, mcp_server: false,
single_file_ingest: false,
},
models: Models {
parser_version: "x".to_string(),
chunker_version: "y".to_string(),
embedding_version: "z".to_string(),
prompt_template_version: "w".to_string(),
index_version: "v".to_string(),
corpus_revision: 7,
},
stats: Stats {
doc_count: 1, chunk_count: 2, asset_count: 1,
last_ingest_at: None,
},
};
let v = wire_schema(&schema);
assert_eq!(schema_of(&v), Some("schema.v1"));
assert_eq!(v.get("kebab_version").and_then(Value::as_str), Some("0.2.1"));
}
#[test]
fn error_wrapper_tags_schema_version_and_emits_code() {
use kebab_app::ErrorV1;
let err = ErrorV1 {
schema_version: "error.v1".to_string(),
code: "config_invalid".to_string(),
message: "bad config".to_string(),
details: serde_json::json!({"path": "/tmp/x"}),
hint: Some("check the path".to_string()),
};
let v = wire_error_v1(&err);
assert_eq!(schema_of(&v), Some("error.v1"));
assert_eq!(v.get("code").and_then(Value::as_str), Some("config_invalid"));
}
#[test]
fn reset_wrapper_tags_schema_version_and_serializes_scope() {
let r = kebab_app::ResetReport {
scope: kebab_app::ResetScope::DataOnly,
removed_paths: vec![std::path::PathBuf::from("/tmp/x")],
embedding_rows_truncated: 0,
};
let v = wire_reset(&r);
assert_eq!(schema_of(&v), Some("reset_report.v1"));
assert_eq!(v.get("scope").and_then(Value::as_str), Some("data_only"));
assert_eq!(
v.get("embedding_rows_truncated").and_then(Value::as_u64),
Some(0)
);
let paths = v.get("removed_paths").and_then(Value::as_array).unwrap();
assert_eq!(paths.len(), 1);
assert_eq!(paths[0].as_str(), Some("/tmp/x"));
}
}