feat(wire): PdfOcrProgress.Finished + ingest_progress.v1 additive 4 fields
v0.20.x ingest log feature 의 wire side. additive minor cascade:
* PdfOcrProgress::Finished + IngestEvent::PdfOcrFinished 의 4 field:
- image_byte_size: Option<u64>
- image_width: Option<u32>
- image_height: Option<u32>
- failure_reason: Option<String>
* docs/wire-schema/v1/ingest_progress.schema.json — 4 추가 property
(모두 optional, required 변경 없음 = additive minor)
* integrations/claude-code/kebab/SKILL.md — wire schema description 동기
기존 ingest_progress.v1 consumer (CLI wire dump, integration test
fixture, kebab-cli wire_search/wire_ask) 는 4 추가 field 의
Option::None 으로 backward-compat. version bump 0 (additive minor =
binary-version cascade trigger 아님 per CLAUDE.md §Versioning cascade).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -99,6 +99,18 @@ pub enum IngestEvent {
|
||||
chars: u32,
|
||||
ocr_engine: String,
|
||||
skipped: bool,
|
||||
/// v0.20.x ingest log: raster image byte size (additive minor, optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
image_byte_size: Option<u64>,
|
||||
/// v0.20.x ingest log: raster image width in pixels (additive minor, optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
image_width: Option<u32>,
|
||||
/// v0.20.x ingest log: raster image height in pixels (additive minor, optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
image_height: Option<u32>,
|
||||
/// v0.20.x ingest log: OCR failure reason (additive minor, optional).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
failure_reason: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@@ -1869,6 +1869,10 @@ fn ingest_one_pdf_asset(
|
||||
ms,
|
||||
chars,
|
||||
skipped,
|
||||
image_byte_size,
|
||||
image_width,
|
||||
image_height,
|
||||
failure_reason,
|
||||
} => {
|
||||
if let Some(sender) = progress {
|
||||
let _ = sender.send(
|
||||
@@ -1878,6 +1882,10 @@ fn ingest_one_pdf_asset(
|
||||
chars,
|
||||
ocr_engine: engine.engine_name().to_string(),
|
||||
skipped,
|
||||
image_byte_size,
|
||||
image_width,
|
||||
image_height,
|
||||
failure_reason: failure_reason.clone(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
@@ -147,6 +147,10 @@ where
|
||||
ms: 0,
|
||||
chars: 0,
|
||||
skipped: true,
|
||||
image_byte_size: None,
|
||||
image_width: None,
|
||||
image_height: None,
|
||||
failure_reason: None,
|
||||
});
|
||||
continue;
|
||||
};
|
||||
@@ -175,6 +179,10 @@ where
|
||||
ms: start.elapsed().as_millis() as u64,
|
||||
chars: 0,
|
||||
skipped: true,
|
||||
image_byte_size: Some(page_image_bytes.len() as u64),
|
||||
image_width: None,
|
||||
image_height: None,
|
||||
failure_reason: Some("ocr_error".to_string()),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
@@ -249,6 +257,10 @@ where
|
||||
ms: elapsed_ms,
|
||||
chars: chars_ocr,
|
||||
skipped: false,
|
||||
image_byte_size: Some(page_image_bytes.len() as u64),
|
||||
image_width: None,
|
||||
image_height: None,
|
||||
failure_reason: None,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -291,5 +303,14 @@ pub enum PdfOcrProgress {
|
||||
/// `true` = DCTDecode 부재 또는 OCR engine 실패 로 skip.
|
||||
/// `false` = 정상 OCR 완료.
|
||||
skipped: bool,
|
||||
/// v0.20.x ingest log: raster image byte size (additive, optional).
|
||||
image_byte_size: Option<u64>,
|
||||
/// v0.20.x ingest log: raster image width in pixels (additive, optional).
|
||||
image_width: Option<u32>,
|
||||
/// v0.20.x ingest log: raster image height in pixels (additive, optional).
|
||||
image_height: Option<u32>,
|
||||
/// v0.20.x ingest log: failure reason string when OCR failed (additive, optional).
|
||||
/// Values: "timeout" | "ocr_error" | "network_error" | None (success).
|
||||
failure_reason: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -210,7 +210,7 @@ impl ProgressDisplay {
|
||||
let _ = writeln!(err, " 📷 OCR page {page}...");
|
||||
}
|
||||
}
|
||||
IngestEvent::PdfOcrFinished { page, ms, chars, ocr_engine, skipped } => {
|
||||
IngestEvent::PdfOcrFinished { page, ms, chars, ocr_engine, skipped, .. } => {
|
||||
if !quiet {
|
||||
let mut err = std::io::stderr().lock();
|
||||
if *skipped {
|
||||
|
||||
@@ -40,6 +40,10 @@
|
||||
"page": { "type": "integer", "minimum": 1, "description": "pdf_ocr_started / pdf_ocr_finished: 1-based PDF page number under OCR." },
|
||||
"ocr_engine": { "type": "string", "description": "pdf_ocr_finished: engine_name (e.g. 'ollama-vision')." },
|
||||
"skipped": { "type": "boolean", "description": "pdf_ocr_finished: true 일 시 OCR 미수행 (DCTDecode 부재 또는 engine 실패). chars=0 만으로는 skip 과 0-char result 구분 불가." },
|
||||
"image_byte_size": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image byte size." },
|
||||
"image_width": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image width px." },
|
||||
"image_height": { "type": "integer", "minimum": 0, "description": "pdf_ocr_finished (optional, v0.20.x): raster image height px." },
|
||||
"failure_reason": { "type": "string", "description": "pdf_ocr_finished (optional, v0.20.x): OCR failure reason. Present iff skipped=true due to engine error. Values: timeout | ocr_error | network_error | other." },
|
||||
"counts": {
|
||||
"type": "object",
|
||||
"description": "completed / aborted: aggregate counters at the moment the run ended (mirrors fields on `ingest_report.v1`).",
|
||||
|
||||
@@ -145,7 +145,7 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
|
||||
- Pagination: `search_response.v1.next_cursor` is opaque base64 — pass back as `--cursor` (CLI) or `cursor` (MCP) for the next page. `null` means no more hits. `corpus_revision` mismatch returns `error.v1.code = stale_cursor` — re-issue search to obtain a fresh cursor.
|
||||
- `search_response.v1.truncated = true` means budget forced snippet shortening or k reduction. Independent of `next_cursor`: widen `max_tokens` for fuller snippets, follow `next_cursor` for more hits, or both.
|
||||
- `ask`'s `citations[]` mirrors `search_hit.v1` minus retrieval internals — same `doc_path` / `citation` shape.
|
||||
- Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear.
|
||||
- Schema reference lives in the kebab repo at `docs/wire-schema/v1/*.schema.json` if a field is unclear. v0.20.x additive minor: `ingest_progress.v1` `pdf_ocr_finished` events carry 4 optional new fields (`image_byte_size`, `image_width`, `image_height`, `failure_reason`) — absent on pre-v0.20 events (backward compat).
|
||||
- `search_hit.v1` and `answer.v1.citations[]` carry `indexed_at` (RFC3339) + `stale` (bool). When `stale == true`, the source doc hasn't been re-processed since `config.search.stale_threshold_days`. Surface this caveat to the user when summarizing — the cited snapshot may not reflect current reality.
|
||||
|
||||
## Capability discovery
|
||||
|
||||
Reference in New Issue
Block a user