style: cargo fmt --all (v0.20.x logging r2 feature follow-up)
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -4133,6 +4133,7 @@ dependencies = [
|
||||
"base64 0.22.1",
|
||||
"blake3",
|
||||
"dirs 5.0.1",
|
||||
"filetime",
|
||||
"ignore",
|
||||
"image",
|
||||
"kebab-chunk",
|
||||
|
||||
@@ -1147,10 +1147,7 @@ impl App {
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub fn inspect_ocr_stats_with_config(
|
||||
&self,
|
||||
_cfg: &kebab_config::Config,
|
||||
) -> Result<OcrStatsV1> {
|
||||
pub fn inspect_ocr_stats_with_config(&self, _cfg: &kebab_config::Config) -> Result<OcrStatsV1> {
|
||||
use crate::ingest_log::percentiles;
|
||||
let conn = self.sqlite.read_conn();
|
||||
|
||||
@@ -1189,9 +1186,7 @@ impl App {
|
||||
let mut by_engine = std::collections::BTreeMap::new();
|
||||
{
|
||||
let mut stmt = conn
|
||||
.prepare(
|
||||
"SELECT ocr_engine, COUNT(*) FROM pdf_ocr_events GROUP BY ocr_engine",
|
||||
)
|
||||
.prepare("SELECT ocr_engine, COUNT(*) FROM pdf_ocr_events GROUP BY ocr_engine")
|
||||
.context("prepare engine query")?;
|
||||
let rows = stmt
|
||||
.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, u64>(1)?)))
|
||||
@@ -1215,17 +1210,14 @@ impl App {
|
||||
LIMIT 10",
|
||||
)
|
||||
.context("prepare by_doc query")?;
|
||||
stmt.query_map(
|
||||
[],
|
||||
|r| {
|
||||
stmt.query_map([], |r| {
|
||||
Ok(OcrStatsByDoc {
|
||||
doc_id: r.get(0)?,
|
||||
failure_count: r.get(1)?,
|
||||
success_count: r.get(2)?,
|
||||
p90_ms: None, // per-doc p90 deferred (open question #3)
|
||||
})
|
||||
},
|
||||
)
|
||||
})
|
||||
.context("query by_doc")?
|
||||
.filter_map(|r| r.ok())
|
||||
.collect()
|
||||
|
||||
@@ -206,9 +206,7 @@ impl IngestSummary {
|
||||
/// Simple percentile extraction on a sorted copy of `samples`.
|
||||
/// Returns `(p50, p90, p99, max)`. All `None` when samples is empty.
|
||||
/// p99 surfaces via `inspect ocr-stats`; `IngestSummary` uses p50/p90/max only.
|
||||
pub(crate) fn percentiles(
|
||||
samples: &[u64],
|
||||
) -> (Option<u64>, Option<u64>, Option<u64>, Option<u64>) {
|
||||
pub(crate) fn percentiles(samples: &[u64]) -> (Option<u64>, Option<u64>, Option<u64>, Option<u64>) {
|
||||
if samples.is_empty() {
|
||||
return (None, None, None, None);
|
||||
}
|
||||
@@ -245,13 +243,7 @@ pub(crate) fn cleanup_old_logs(
|
||||
.collect();
|
||||
|
||||
// Sort newest-first by mtime (files without mtime go to the end).
|
||||
entries.sort_by_key(|e| {
|
||||
std::cmp::Reverse(
|
||||
e.metadata()
|
||||
.ok()
|
||||
.and_then(|m| m.modified().ok()),
|
||||
)
|
||||
});
|
||||
entries.sort_by_key(|e| std::cmp::Reverse(e.metadata().ok().and_then(|m| m.modified().ok())));
|
||||
|
||||
let cutoff = SystemTime::now()
|
||||
.checked_sub(std::time::Duration::from_secs(
|
||||
@@ -414,11 +406,7 @@ mod tests {
|
||||
let mtime = SystemTime::now()
|
||||
.checked_sub(std::time::Duration::from_secs(age_days * 86400))
|
||||
.unwrap();
|
||||
filetime::set_file_mtime(
|
||||
&path,
|
||||
filetime::FileTime::from_system_time(mtime),
|
||||
)
|
||||
.unwrap();
|
||||
filetime::set_file_mtime(&path, filetime::FileTime::from_system_time(mtime)).unwrap();
|
||||
}
|
||||
// keep_recent=3, retention_days=90 (no time-based deletion)
|
||||
cleanup_old_logs(dir, 3, 90).unwrap();
|
||||
@@ -442,11 +430,7 @@ mod tests {
|
||||
let mtime = SystemTime::now()
|
||||
.checked_sub(std::time::Duration::from_secs(90 * 86400))
|
||||
.unwrap();
|
||||
filetime::set_file_mtime(
|
||||
&path,
|
||||
filetime::FileTime::from_system_time(mtime),
|
||||
)
|
||||
.unwrap();
|
||||
filetime::set_file_mtime(&path, filetime::FileTime::from_system_time(mtime)).unwrap();
|
||||
}
|
||||
// keep_recent=10 (both within count) but retention_days=30 → both stale
|
||||
cleanup_old_logs(dir, 10, 30).unwrap();
|
||||
@@ -454,6 +438,10 @@ mod tests {
|
||||
.unwrap()
|
||||
.filter_map(|e| e.ok())
|
||||
.collect();
|
||||
assert_eq!(remaining.len(), 0, "stale files must be deleted even within keep_recent");
|
||||
assert_eq!(
|
||||
remaining.len(),
|
||||
0,
|
||||
"stale files must be deleted even within keep_recent"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,8 +190,7 @@ where
|
||||
kind: ProvenanceKind::Warning,
|
||||
note: Some(note),
|
||||
});
|
||||
let (image_width, image_height) =
|
||||
extract_image_dimensions(&page_image_bytes)
|
||||
let (image_width, image_height) = extract_image_dimensions(&page_image_bytes)
|
||||
.map(|(w, h)| (Some(w), Some(h)))
|
||||
.unwrap_or((None, None));
|
||||
emit_progress(PdfOcrProgress::Finished {
|
||||
|
||||
@@ -111,7 +111,10 @@ fn ocr_failures_corpus_wide() {
|
||||
|
||||
assert_eq!(result.schema_version, "ocr_failures.v1");
|
||||
assert!(result.failure_count >= 1, "expected at least 1 failure");
|
||||
assert!(!result.failures.is_empty(), "failures list must be non-empty");
|
||||
assert!(
|
||||
!result.failures.is_empty(),
|
||||
"failures list must be non-empty"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC-5: `inspect_ocr_failures` with doc_id filter returns matching rows.
|
||||
|
||||
@@ -66,8 +66,7 @@ async fn ingest_dual_write_doc_id_matches_ndjson() {
|
||||
std::fs::copy(scanned_pdf_src(), &dest).expect("copy scanned PDF");
|
||||
|
||||
// Run ingest
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest");
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest");
|
||||
|
||||
// Read ndjson log
|
||||
let log_files: Vec<_> = std::fs::read_dir(&log_dir)
|
||||
|
||||
@@ -54,10 +54,7 @@ ingest_log_dir = "{state_dir}/logs"
|
||||
"#;
|
||||
let w: LoggingWrapper = toml::from_str(toml).expect("old logging config must parse");
|
||||
assert!(w.logging.ingest_log_enabled);
|
||||
assert_eq!(
|
||||
w.logging.ingest_log_dir,
|
||||
PathBuf::from("{state_dir}/logs")
|
||||
);
|
||||
assert_eq!(w.logging.ingest_log_dir, PathBuf::from("{state_dir}/logs"));
|
||||
assert_eq!(
|
||||
w.logging.keep_recent_runs, 100,
|
||||
"keep_recent_runs must default to 100"
|
||||
|
||||
@@ -1018,11 +1018,19 @@ impl SqliteStore {
|
||||
ms, chars, success, reason, ocr_engine)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
rusqlite::params![
|
||||
run_id, ts, doc_id, doc_path, page,
|
||||
image_byte_size, image_width, image_height,
|
||||
ms, chars,
|
||||
run_id,
|
||||
ts,
|
||||
doc_id,
|
||||
doc_path,
|
||||
page,
|
||||
image_byte_size,
|
||||
image_width,
|
||||
image_height,
|
||||
ms,
|
||||
chars,
|
||||
if success { 1i32 } else { 0i32 },
|
||||
reason, ocr_engine
|
||||
reason,
|
||||
ocr_engine
|
||||
],
|
||||
)?;
|
||||
Ok(())
|
||||
@@ -1034,8 +1042,7 @@ impl SqliteStore {
|
||||
/// means "delete everything older than now" (i.e. all past rows).
|
||||
pub fn prune_pdf_ocr_events(&self, retention_days: u32) -> anyhow::Result<u64> {
|
||||
use time::format_description::well_known::Rfc3339;
|
||||
let cutoff = time::OffsetDateTime::now_utc()
|
||||
- time::Duration::days(retention_days as i64);
|
||||
let cutoff = time::OffsetDateTime::now_utc() - time::Duration::days(retention_days as i64);
|
||||
let cutoff_ts = cutoff
|
||||
.format(&Rfc3339)
|
||||
.unwrap_or_else(|_| "1970-01-01T00:00:00Z".to_string());
|
||||
|
||||
@@ -25,7 +25,11 @@ fn v008_pdf_ocr_events_table_exists() {
|
||||
)
|
||||
.optional()
|
||||
});
|
||||
assert_eq!(name.as_deref(), Some("pdf_ocr_events"), "pdf_ocr_events table must exist after V008");
|
||||
assert_eq!(
|
||||
name.as_deref(),
|
||||
Some("pdf_ocr_events"),
|
||||
"pdf_ocr_events table must exist after V008"
|
||||
);
|
||||
}
|
||||
|
||||
/// AC-8: insert 2 rows with different timestamps; prune with retention_days=0
|
||||
@@ -74,9 +78,7 @@ fn record_and_prune_pdf_ocr_event() {
|
||||
|
||||
// prune with retention_days=0 → cutoff=now → deletes any row with ts < now.
|
||||
// The 1970 row should be deleted; the 2099 row survives.
|
||||
let pruned = store
|
||||
.prune_pdf_ocr_events(0)
|
||||
.expect("prune");
|
||||
let pruned = store.prune_pdf_ocr_events(0).expect("prune");
|
||||
assert_eq!(pruned, 1, "should have deleted exactly 1 old row");
|
||||
|
||||
// Verify only the future row remains
|
||||
|
||||
Reference in New Issue
Block a user