{ "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": "https://kb.local/wire/v1/ingest_progress.schema.json", "title": "IngestProgressEvent v1", "description": "Streaming progress event emitted by `kebab ingest --json`. One event per line (line-delimited JSON). Discriminated by `kind`. The terminal events are `completed` and `aborted` — every ingest run ends with exactly one of them. The final stdout line of a `--json` ingest is still the existing `ingest_report.v1` for backwards compatibility; progress events stream above it.", "type": "object", "required": ["schema_version", "kind", "ts"], "properties": { "schema_version": { "const": "ingest_progress.v1" }, "kind": { "type": "string", "enum": [ "scan_started", "scan_completed", "asset_started", "asset_finished", "asset_chunked", "expansion_progress", "asset_timings", "embed_batch_started", "embed_batch_finished", "pdf_ocr_started", "pdf_ocr_finished", "completed", "aborted" ] }, "ts": { "type": "string", "format": "date-time", "description": "RFC 3339 timestamp at the moment the event was emitted." }, "root": { "type": "string", "description": "scan_started: workspace root being walked." }, "total": { "type": "integer", "minimum": 0, "description": "scan_completed / asset_started / asset_finished: total assets discovered." }, "idx": { "type": "integer", "minimum": 1, "description": "asset_started / asset_finished: 1-based index of the current asset within the scan." }, "path": { "type": "string", "description": "asset_started: workspace-relative path of the asset being processed." }, "media": { "type": "string", "description": "asset_started: media kind label (e.g. `markdown`, `pdf`, `image`)." }, "result": { "type": "string", "enum": ["new", "updated", "skipped", "error"], "description": "asset_finished: per-asset outcome (mirrors `ingest_report.v1.items[].kind`)." }, "chunks": { "type": "integer", "minimum": 0, "description": "asset_finished / asset_chunked / expansion_progress (v0.24.0): chunk count produced for this asset." }, "done": { "type": "integer", "minimum": 0, "description": "expansion_progress (v0.24.0, additive): chunks processed so far in the per-chunk alias-expansion loop (cache hits included). Throttled: emitted at most every 25 chunks or once per second, plus a final frame where done == chunks." }, "parse_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): parse phase wall-clock (ms). Markdown path only." }, "chunk_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): chunk phase wall-clock (ms). Markdown path only." }, "expansion_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): alias-expansion phase wall-clock (ms). Markdown path only; 0 when expansion is disabled." }, "embed_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): embed + vector phase wall-clock (ms) — embedding, vector upsert, and stale-vector purge. Markdown path only." }, "store_ms": { "type": "integer", "minimum": 0, "description": "asset_timings (v0.24.0, additive): SQLite persist phase wall-clock (ms) — put_asset/document/blocks/chunks only. Markdown path only." }, "n_chunks": { "type": "integer", "minimum": 0, "description": "embed_batch_started / embed_batch_finished: chunks in this embedding batch." }, "ms": { "type": "integer", "minimum": 0, "description": "embed_batch_finished / pdf_ocr_finished: wall-clock duration (ms). pdf_ocr_finished skip path 의 의미는 mixed (DCTDecode 부재 시 0, engine 실패 시 latency-before-bail)." }, "chars": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished: char count of OCR result. Skip 시 0." }, "page": { "type": "integer", "minimum": 1, "description": "pdf_ocr_started / pdf_ocr_finished: 1-based PDF page number under OCR." }, "ocr_engine": { "type": "string", "description": "pdf_ocr_finished: engine_name (e.g. 'ollama-vision')." }, "skipped": { "type": "boolean", "description": "pdf_ocr_finished: true 일 시 OCR 미수행 (DCTDecode 부재 또는 engine 실패). chars=0 만으로는 skip 과 0-char result 구분 불가." }, "image_byte_size": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image byte size." }, "image_width": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image width px." }, "image_height": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image height px." }, "failure_reason": { "type": "string", "description": "pdf_ocr_finished (optional, v0.20.x): OCR failure reason. Present iff skipped=true due to engine error. Values: timeout | ocr_error | network_error | other." }, "counts": { "type": "object", "description": "completed / aborted: aggregate counters at the moment the run ended (mirrors fields on `ingest_report.v1`).", "properties": { "scanned": { "type": "integer", "minimum": 0 }, "new": { "type": "integer", "minimum": 0 }, "updated": { "type": "integer", "minimum": 0 }, "skipped": { "type": "integer", "minimum": 0 }, "errors": { "type": "integer", "minimum": 0 }, "chunks_indexed": { "type": "integer", "minimum": 0 }, "embeddings_indexed": { "type": "integer", "minimum": 0 } } } } }