style: cargo fmt --all (round 4 ingest log feature follow-up)

Phase C4 executor 의 마지막 `fix(test): clippy + fmt fixes` commit 이
test file 부분만 fmt 적용. workspace 전체 fmt 누락 발견 → cargo fmt --all
적용. 모든 import alphabetical reorder + line wrapping 정합.

추가 untracked artifact 동시 commit:
- docs/superpowers/specs/2026-05-28-v0.20-ingest-log-spec.md (491 line, ACCEPT)
- docs/superpowers/plans/2026-05-28-v0.20-ingest-log-plan.md (616 line, ACCEPT)

workspace test: 1370 passed / 0 failed / 50 ignored, ingest_log_smoke green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 04:18:40 +00:00
parent 445b096215
commit 685007789a
235 changed files with 6520 additions and 3955 deletions

View File

@@ -420,12 +420,16 @@ pub struct PdfCfg {
impl PdfCfg {
pub fn defaults() -> Self {
Self { ocr: PdfOcrCfg::defaults() }
Self {
ocr: PdfOcrCfg::defaults(),
}
}
}
impl Default for PdfCfg {
fn default() -> Self { Self::defaults() }
fn default() -> Self {
Self::defaults()
}
}
/// v0.20.x ingest log surface: structured ndjson log written per ingest run.
@@ -444,7 +448,9 @@ pub struct LoggingCfg {
pub ingest_log_dir: PathBuf,
}
fn default_ingest_log_enabled() -> bool { true }
fn default_ingest_log_enabled() -> bool {
true
}
fn default_ingest_log_dir() -> PathBuf {
PathBuf::from("{state_dir}/logs")
}
@@ -531,10 +537,18 @@ impl PdfOcrCfg {
/// metro-korea.pdf page 8/9/13) 의 OCR 을 강제 timeout 시켜 본문 indexed 손실.
/// **conservative starting point 180s 로 재조정** + dogfood evidence 기반 sweet spot
/// 점진적 축소 정책. user 가 `[pdf.ocr] request_timeout_secs = N` 으로 직접 tune.
fn default_pdf_ocr_request_timeout_secs() -> u64 { 180 }
fn default_pdf_ocr_valid_ratio() -> f32 { 0.5 }
fn default_pdf_ocr_min_char_count() -> u32 { 20 }
fn default_pdf_ocr_lang_hint() -> Option<String> { Some("kor".to_string()) }
fn default_pdf_ocr_request_timeout_secs() -> u64 {
180
}
fn default_pdf_ocr_valid_ratio() -> f32 {
0.5
}
fn default_pdf_ocr_min_char_count() -> u32 {
20
}
fn default_pdf_ocr_lang_hint() -> Option<String> {
Some("kor".to_string())
}
/// p9-fb-14: TUI-only configuration. Currently a single `theme`
/// selector (`"dark"` / `"light"`); future fields (custom role
@@ -675,8 +689,7 @@ impl Config {
explain_default: false,
max_context_tokens: 8000,
multi_hop_max_depth: default_multi_hop_max_depth(),
multi_hop_max_sub_queries_per_iter:
default_multi_hop_max_sub_queries_per_iter(),
multi_hop_max_sub_queries_per_iter: default_multi_hop_max_sub_queries_per_iter(),
multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
nli_threshold: default_nli_threshold(),
},
@@ -1015,11 +1028,7 @@ impl Config {
"KEBAB_IMAGE_OCR_ENDPOINT" => {
// Empty env value is treated the same as "fall back
// to models.llm.endpoint" — i.e. set None.
self.image.ocr.endpoint = if v.is_empty() {
None
} else {
Some(v.clone())
};
self.image.ocr.endpoint = if v.is_empty() { None } else { Some(v.clone()) };
}
"KEBAB_IMAGE_OCR_LANGUAGES" => {
// Comma-separated list, e.g. "eng,kor".
@@ -1319,7 +1328,10 @@ theme = "dark"
#[test]
fn env_overrides_chunking_target_tokens() {
let mut env = HashMap::new();
env.insert("KEBAB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
env.insert(
"KEBAB_CHUNKING_TARGET_TOKENS".to_string(),
"777".to_string(),
);
let c = Config::defaults().apply_env(&env);
assert_eq!(c.chunking.target_tokens, 777);
}
@@ -1331,7 +1343,10 @@ theme = "dark"
"KEBAB_MODELS_LLM_ENDPOINT".to_string(),
"http://10.0.0.1:11434".to_string(),
);
env.insert("KEBAB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
env.insert(
"KEBAB_MODELS_LLM_TEMPERATURE".to_string(),
"0.7".to_string(),
);
let c = Config::defaults().apply_env(&env);
assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434");
assert!((c.models.llm.temperature - 0.7).abs() < 1e-6);
@@ -1361,8 +1376,7 @@ theme = "dark"
/// shared with the OCR-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
#[test]
fn legacy_config_without_request_timeout_secs_uses_default() {
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
.expect("parse legacy config");
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
assert_eq!(c.models.llm.request_timeout_secs, 300);
}
@@ -1391,10 +1405,7 @@ theme = "dark"
/// existing configs that omit the new field keep behaving identically.
#[test]
fn default_ocr_request_timeout_secs_is_300() {
assert_eq!(
Config::defaults().image.ocr.request_timeout_secs,
300
);
assert_eq!(Config::defaults().image.ocr.request_timeout_secs, 300);
}
#[test]
@@ -1414,8 +1425,7 @@ theme = "dark"
/// with the LLM-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
#[test]
fn legacy_config_without_ocr_request_timeout_secs_uses_default() {
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
.expect("parse legacy config");
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
assert_eq!(c.image.ocr.request_timeout_secs, 300);
}
@@ -1428,10 +1438,7 @@ theme = "dark"
#[test]
fn default_multi_hop_max_sub_queries_per_iter_is_5() {
assert_eq!(
Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
5
);
assert_eq!(Config::defaults().rag.multi_hop_max_sub_queries_per_iter, 5);
}
#[test]
@@ -1445,10 +1452,7 @@ theme = "dark"
#[test]
fn env_overrides_multi_hop_knobs() {
let mut env = HashMap::new();
env.insert(
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
"5".to_string(),
);
env.insert("KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(), "5".to_string());
env.insert(
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
"7".to_string(),
@@ -1470,8 +1474,7 @@ theme = "dark"
/// (that fixture also predates the multi_hop_* fields).
#[test]
fn legacy_config_without_multi_hop_knobs_uses_defaults() {
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
.expect("parse legacy config");
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
assert_eq!(c.rag.multi_hop_max_depth, 3);
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
// v0.18 dogfood (post-PR-7): pool default 30 → 15.
@@ -1504,8 +1507,7 @@ theme = "dark"
/// all PR-9c-1 fields).
#[test]
fn legacy_config_without_nli_uses_defaults() {
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
.expect("parse legacy config");
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
assert_eq!(c.rag.nli_threshold, 0.0);
assert_eq!(
c.models.nli.model,
@@ -1705,7 +1707,11 @@ max_context_tokens = 8000
"[workspace]\ninclude = [\"**/*.md\", \"**/*.txt\"]",
);
let parsed: Result<Config, _> = toml::from_str(&toml_text);
assert!(parsed.is_ok(), "legacy include must not break load: {:?}", parsed.err());
assert!(
parsed.is_ok(),
"legacy include must not break load: {:?}",
parsed.err()
);
let cfg = parsed.unwrap();
assert_eq!(cfg.workspace.root, "/tmp/kebab-legacy");
}
@@ -1715,7 +1721,10 @@ max_context_tokens = 8000
#[test]
fn workspace_cfg_has_only_root_and_exclude_fields() {
let ws = Config::defaults().workspace;
let WorkspaceCfg { root: _, exclude: _ } = &ws;
let WorkspaceCfg {
root: _,
exclude: _,
} = &ws;
}
#[test]
@@ -1727,9 +1736,10 @@ max_context_tokens = 8000
#[test]
fn env_override_stale_threshold() {
let c = Config::defaults();
let env: HashMap<String, String> = [
("KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(), "7".to_string()),
]
let env: HashMap<String, String> = [(
"KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(),
"7".to_string(),
)]
.into_iter()
.collect();
let c = c.apply_env(&env);
@@ -1744,9 +1754,10 @@ max_context_tokens = 8000
// `fb27_tests::file_negative_stale_threshold_returns_config_invalid`)
// is the spec-required hard error surface.
let c = Config::defaults();
let env: HashMap<String, String> = [
("KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(), "-5".to_string()),
]
let env: HashMap<String, String> = [(
"KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(),
"-5".to_string(),
)]
.into_iter()
.collect();
let c = c.apply_env(&env);
@@ -1765,7 +1776,10 @@ max_context_tokens = 8000
std::env::set_var("XDG_CONFIG_HOME", "/tmp/kebabtest-xdg-config");
}
let p = Config::xdg_config_path();
assert_eq!(p, PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml"));
assert_eq!(
p,
PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml")
);
// SAFETY: scope-local restore.
unsafe {
match prev {
@@ -1810,10 +1824,7 @@ max_context_tokens = 8000
let base = Config::defaults();
let mut toml_text = toml::to_string(&base).unwrap();
// Inject max_file_bytes override into the [ingest.code] table.
toml_text = toml_text.replace(
"max_file_bytes = 262144",
"max_file_bytes = 524288",
);
toml_text = toml_text.replace("max_file_bytes = 262144", "max_file_bytes = 524288");
let cfg: Config = toml::from_str(&toml_text).unwrap();
assert_eq!(cfg.ingest.code.max_file_bytes, 524_288);
}
@@ -1828,7 +1839,8 @@ mod fb27_tests {
fn config_invalid_carries_path_and_cause() {
let nonexistent = PathBuf::from("/this/path/should/not/exist/kebab.toml");
let err = Config::from_file(&nonexistent).unwrap_err();
let signal = err.downcast_ref::<ConfigInvalid>()
let signal = err
.downcast_ref::<ConfigInvalid>()
.expect("from_file error should downcast to ConfigInvalid");
assert_eq!(signal.path, nonexistent);
assert!(!signal.cause.is_empty(), "cause should be non-empty");
@@ -1840,7 +1852,8 @@ mod fb27_tests {
let p = dir.path().join("bad.toml");
std::fs::write(&p, "this is not [valid toml").unwrap();
let err = Config::from_file(&p).unwrap_err();
let signal = err.downcast_ref::<ConfigInvalid>()
let signal = err
.downcast_ref::<ConfigInvalid>()
.expect("malformed TOML should downcast to ConfigInvalid");
assert_eq!(signal.path, p);
assert!(!signal.cause.is_empty(), "cause should be non-empty");
@@ -1864,13 +1877,11 @@ mod fb27_tests {
toml_text.contains("stale_threshold_days = 30"),
"default value drifted; update test fixture"
);
toml_text = toml_text.replace(
"stale_threshold_days = 30",
"stale_threshold_days = -5",
);
toml_text = toml_text.replace("stale_threshold_days = 30", "stale_threshold_days = -5");
std::fs::write(&p, &toml_text).unwrap();
let err = Config::from_file(&p).unwrap_err();
let signal = err.downcast_ref::<ConfigInvalid>()
let signal = err
.downcast_ref::<ConfigInvalid>()
.expect("negative stale_threshold_days should downcast to ConfigInvalid");
assert_eq!(signal.path, p);
assert!(

View File

@@ -157,7 +157,9 @@ mod tests {
#[test]
fn xdg_data_home_set_replaces_var() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") };
@@ -168,7 +170,9 @@ mod tests {
#[test]
fn xdg_data_home_unset_uses_default() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::remove_var("XDG_DATA_HOME") };
@@ -181,7 +185,9 @@ mod tests {
#[test]
fn xdg_with_no_default_resolves_to_empty_when_unset() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::remove_var("XDG_DATA_HOME") };
@@ -193,7 +199,9 @@ mod tests {
#[test]
fn leading_tilde_expands_to_home() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let home = std::env::var("HOME").expect("HOME must be set in tests");
let p = expand_path("~/runs", "");
assert_eq!(p, PathBuf::from(home).join("runs"));
@@ -229,7 +237,9 @@ mod tests {
#[test]
fn tilde_path_ignores_base_dir() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let home = std::env::var("HOME").expect("HOME must be set in tests");
let base = Path::new("/tmp/ignored-cfg");
let p = expand_path_with_base("~/x", "", base);
@@ -238,7 +248,9 @@ mod tests {
#[test]
fn xdg_var_path_ignores_base_dir() {
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
@@ -255,7 +267,9 @@ mod tests {
// Order matters: substitute `{data_dir}` (which itself contains
// an unexpanded `${XDG_DATA_HOME}` and `~`), then the other two
// resolve the result.
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _lock = ENV_LOCK
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };

View File

@@ -2,13 +2,15 @@
//
// Integration tests for [pdf.ocr] config section (v0.20.0 sub-item 1).
use std::collections::HashMap;
use kebab_config::{Config, PdfCfg};
use std::collections::HashMap;
// Test 1: toml roundtrip — spec §4.5 line 1034-1047 example block.
// Config requires many required fields; test the [pdf] section via PdfCfg wrapper.
#[derive(serde::Deserialize)]
struct PdfWrapper { pdf: PdfCfg }
struct PdfWrapper {
pdf: PdfCfg,
}
#[test]
fn pdf_ocr_toml_roundtrip() {
@@ -50,7 +52,10 @@ fn pdf_ocr_defaults_off_with_qwen_3b() {
assert_eq!(cfg.pdf.ocr.engine, "ollama-vision");
assert_eq!(cfg.pdf.ocr.model, "qwen2.5vl:3b");
assert!(cfg.pdf.ocr.endpoint.is_none());
assert_eq!(cfg.pdf.ocr.languages, vec!["eng".to_string(), "kor".to_string()]);
assert_eq!(
cfg.pdf.ocr.languages,
vec!["eng".to_string(), "kor".to_string()]
);
assert_eq!(cfg.pdf.ocr.max_pixels, 2048);
assert_eq!(cfg.pdf.ocr.request_timeout_secs, 180); // Bug #11: 600 → 60 → 180 (HOTFIXES 2026-05-28)
assert!((cfg.pdf.ocr.valid_ratio_threshold - 0.5).abs() < 1e-6);
@@ -63,9 +68,15 @@ fn pdf_ocr_defaults_off_with_qwen_3b() {
fn pdf_ocr_env_overrides() {
let mut env: HashMap<String, String> = HashMap::new();
env.insert("KEBAB_PDF_OCR_ENABLED".to_string(), "true".to_string());
env.insert("KEBAB_PDF_OCR_MODEL".to_string(), "qwen2.5vl:7b".to_string());
env.insert(
"KEBAB_PDF_OCR_MODEL".to_string(),
"qwen2.5vl:7b".to_string(),
);
env.insert("KEBAB_PDF_OCR_ALWAYS_ON".to_string(), "true".to_string());
env.insert("KEBAB_PDF_OCR_VALID_RATIO_THRESHOLD".to_string(), "0.75".to_string());
env.insert(
"KEBAB_PDF_OCR_VALID_RATIO_THRESHOLD".to_string(),
"0.75".to_string(),
);
let cfg = Config::defaults().apply_env(&env);