style: cargo fmt --all (round 4 ingest log feature follow-up)
Phase C4 executor 의 마지막 `fix(test): clippy + fmt fixes` commit 이 test file 부분만 fmt 적용. workspace 전체 fmt 누락 발견 → cargo fmt --all 적용. 모든 import alphabetical reorder + line wrapping 정합. 추가 untracked artifact 동시 commit: - docs/superpowers/specs/2026-05-28-v0.20-ingest-log-spec.md (491 line, ACCEPT) - docs/superpowers/plans/2026-05-28-v0.20-ingest-log-plan.md (616 line, ACCEPT) workspace test: 1370 passed / 0 failed / 50 ignored, ingest_log_smoke green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -420,12 +420,16 @@ pub struct PdfCfg {
|
||||
|
||||
impl PdfCfg {
|
||||
pub fn defaults() -> Self {
|
||||
Self { ocr: PdfOcrCfg::defaults() }
|
||||
Self {
|
||||
ocr: PdfOcrCfg::defaults(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for PdfCfg {
|
||||
fn default() -> Self { Self::defaults() }
|
||||
fn default() -> Self {
|
||||
Self::defaults()
|
||||
}
|
||||
}
|
||||
|
||||
/// v0.20.x ingest log surface: structured ndjson log written per ingest run.
|
||||
@@ -444,7 +448,9 @@ pub struct LoggingCfg {
|
||||
pub ingest_log_dir: PathBuf,
|
||||
}
|
||||
|
||||
fn default_ingest_log_enabled() -> bool { true }
|
||||
fn default_ingest_log_enabled() -> bool {
|
||||
true
|
||||
}
|
||||
fn default_ingest_log_dir() -> PathBuf {
|
||||
PathBuf::from("{state_dir}/logs")
|
||||
}
|
||||
@@ -531,10 +537,18 @@ impl PdfOcrCfg {
|
||||
/// metro-korea.pdf page 8/9/13) 의 OCR 을 강제 timeout 시켜 본문 indexed 손실.
|
||||
/// **conservative starting point 180s 로 재조정** + dogfood evidence 기반 sweet spot
|
||||
/// 점진적 축소 정책. user 가 `[pdf.ocr] request_timeout_secs = N` 으로 직접 tune.
|
||||
fn default_pdf_ocr_request_timeout_secs() -> u64 { 180 }
|
||||
fn default_pdf_ocr_valid_ratio() -> f32 { 0.5 }
|
||||
fn default_pdf_ocr_min_char_count() -> u32 { 20 }
|
||||
fn default_pdf_ocr_lang_hint() -> Option<String> { Some("kor".to_string()) }
|
||||
fn default_pdf_ocr_request_timeout_secs() -> u64 {
|
||||
180
|
||||
}
|
||||
fn default_pdf_ocr_valid_ratio() -> f32 {
|
||||
0.5
|
||||
}
|
||||
fn default_pdf_ocr_min_char_count() -> u32 {
|
||||
20
|
||||
}
|
||||
fn default_pdf_ocr_lang_hint() -> Option<String> {
|
||||
Some("kor".to_string())
|
||||
}
|
||||
|
||||
/// p9-fb-14: TUI-only configuration. Currently a single `theme`
|
||||
/// selector (`"dark"` / `"light"`); future fields (custom role
|
||||
@@ -675,8 +689,7 @@ impl Config {
|
||||
explain_default: false,
|
||||
max_context_tokens: 8000,
|
||||
multi_hop_max_depth: default_multi_hop_max_depth(),
|
||||
multi_hop_max_sub_queries_per_iter:
|
||||
default_multi_hop_max_sub_queries_per_iter(),
|
||||
multi_hop_max_sub_queries_per_iter: default_multi_hop_max_sub_queries_per_iter(),
|
||||
multi_hop_max_pool_chunks: default_multi_hop_max_pool_chunks(),
|
||||
nli_threshold: default_nli_threshold(),
|
||||
},
|
||||
@@ -1015,11 +1028,7 @@ impl Config {
|
||||
"KEBAB_IMAGE_OCR_ENDPOINT" => {
|
||||
// Empty env value is treated the same as "fall back
|
||||
// to models.llm.endpoint" — i.e. set None.
|
||||
self.image.ocr.endpoint = if v.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(v.clone())
|
||||
};
|
||||
self.image.ocr.endpoint = if v.is_empty() { None } else { Some(v.clone()) };
|
||||
}
|
||||
"KEBAB_IMAGE_OCR_LANGUAGES" => {
|
||||
// Comma-separated list, e.g. "eng,kor".
|
||||
@@ -1319,7 +1328,10 @@ theme = "dark"
|
||||
#[test]
|
||||
fn env_overrides_chunking_target_tokens() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KEBAB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
|
||||
env.insert(
|
||||
"KEBAB_CHUNKING_TARGET_TOKENS".to_string(),
|
||||
"777".to_string(),
|
||||
);
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.chunking.target_tokens, 777);
|
||||
}
|
||||
@@ -1331,7 +1343,10 @@ theme = "dark"
|
||||
"KEBAB_MODELS_LLM_ENDPOINT".to_string(),
|
||||
"http://10.0.0.1:11434".to_string(),
|
||||
);
|
||||
env.insert("KEBAB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
|
||||
env.insert(
|
||||
"KEBAB_MODELS_LLM_TEMPERATURE".to_string(),
|
||||
"0.7".to_string(),
|
||||
);
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434");
|
||||
assert!((c.models.llm.temperature - 0.7).abs() < 1e-6);
|
||||
@@ -1361,8 +1376,7 @@ theme = "dark"
|
||||
/// shared with the OCR-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
|
||||
#[test]
|
||||
fn legacy_config_without_request_timeout_secs_uses_default() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
|
||||
assert_eq!(c.models.llm.request_timeout_secs, 300);
|
||||
}
|
||||
|
||||
@@ -1391,10 +1405,7 @@ theme = "dark"
|
||||
/// existing configs that omit the new field keep behaving identically.
|
||||
#[test]
|
||||
fn default_ocr_request_timeout_secs_is_300() {
|
||||
assert_eq!(
|
||||
Config::defaults().image.ocr.request_timeout_secs,
|
||||
300
|
||||
);
|
||||
assert_eq!(Config::defaults().image.ocr.request_timeout_secs, 300);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1414,8 +1425,7 @@ theme = "dark"
|
||||
/// with the LLM-side invariant via [`LEGACY_PRE_TIMEOUT_TOML`].
|
||||
#[test]
|
||||
fn legacy_config_without_ocr_request_timeout_secs_uses_default() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
|
||||
assert_eq!(c.image.ocr.request_timeout_secs, 300);
|
||||
}
|
||||
|
||||
@@ -1428,10 +1438,7 @@ theme = "dark"
|
||||
|
||||
#[test]
|
||||
fn default_multi_hop_max_sub_queries_per_iter_is_5() {
|
||||
assert_eq!(
|
||||
Config::defaults().rag.multi_hop_max_sub_queries_per_iter,
|
||||
5
|
||||
);
|
||||
assert_eq!(Config::defaults().rag.multi_hop_max_sub_queries_per_iter, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1445,10 +1452,7 @@ theme = "dark"
|
||||
#[test]
|
||||
fn env_overrides_multi_hop_knobs() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(),
|
||||
"5".to_string(),
|
||||
);
|
||||
env.insert("KEBAB_RAG_MULTI_HOP_MAX_DEPTH".to_string(), "5".to_string());
|
||||
env.insert(
|
||||
"KEBAB_RAG_MULTI_HOP_MAX_SUB_QUERIES_PER_ITER".to_string(),
|
||||
"7".to_string(),
|
||||
@@ -1470,8 +1474,7 @@ theme = "dark"
|
||||
/// (that fixture also predates the multi_hop_* fields).
|
||||
#[test]
|
||||
fn legacy_config_without_multi_hop_knobs_uses_defaults() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
|
||||
assert_eq!(c.rag.multi_hop_max_depth, 3);
|
||||
assert_eq!(c.rag.multi_hop_max_sub_queries_per_iter, 5);
|
||||
// v0.18 dogfood (post-PR-7): pool default 30 → 15.
|
||||
@@ -1504,8 +1507,7 @@ theme = "dark"
|
||||
/// all PR-9c-1 fields).
|
||||
#[test]
|
||||
fn legacy_config_without_nli_uses_defaults() {
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML)
|
||||
.expect("parse legacy config");
|
||||
let c: Config = toml::from_str(LEGACY_PRE_TIMEOUT_TOML).expect("parse legacy config");
|
||||
assert_eq!(c.rag.nli_threshold, 0.0);
|
||||
assert_eq!(
|
||||
c.models.nli.model,
|
||||
@@ -1705,7 +1707,11 @@ max_context_tokens = 8000
|
||||
"[workspace]\ninclude = [\"**/*.md\", \"**/*.txt\"]",
|
||||
);
|
||||
let parsed: Result<Config, _> = toml::from_str(&toml_text);
|
||||
assert!(parsed.is_ok(), "legacy include must not break load: {:?}", parsed.err());
|
||||
assert!(
|
||||
parsed.is_ok(),
|
||||
"legacy include must not break load: {:?}",
|
||||
parsed.err()
|
||||
);
|
||||
let cfg = parsed.unwrap();
|
||||
assert_eq!(cfg.workspace.root, "/tmp/kebab-legacy");
|
||||
}
|
||||
@@ -1715,7 +1721,10 @@ max_context_tokens = 8000
|
||||
#[test]
|
||||
fn workspace_cfg_has_only_root_and_exclude_fields() {
|
||||
let ws = Config::defaults().workspace;
|
||||
let WorkspaceCfg { root: _, exclude: _ } = &ws;
|
||||
let WorkspaceCfg {
|
||||
root: _,
|
||||
exclude: _,
|
||||
} = &ws;
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1727,9 +1736,10 @@ max_context_tokens = 8000
|
||||
#[test]
|
||||
fn env_override_stale_threshold() {
|
||||
let c = Config::defaults();
|
||||
let env: HashMap<String, String> = [
|
||||
("KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(), "7".to_string()),
|
||||
]
|
||||
let env: HashMap<String, String> = [(
|
||||
"KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(),
|
||||
"7".to_string(),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let c = c.apply_env(&env);
|
||||
@@ -1744,9 +1754,10 @@ max_context_tokens = 8000
|
||||
// `fb27_tests::file_negative_stale_threshold_returns_config_invalid`)
|
||||
// is the spec-required hard error surface.
|
||||
let c = Config::defaults();
|
||||
let env: HashMap<String, String> = [
|
||||
("KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(), "-5".to_string()),
|
||||
]
|
||||
let env: HashMap<String, String> = [(
|
||||
"KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(),
|
||||
"-5".to_string(),
|
||||
)]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let c = c.apply_env(&env);
|
||||
@@ -1765,7 +1776,10 @@ max_context_tokens = 8000
|
||||
std::env::set_var("XDG_CONFIG_HOME", "/tmp/kebabtest-xdg-config");
|
||||
}
|
||||
let p = Config::xdg_config_path();
|
||||
assert_eq!(p, PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml"));
|
||||
assert_eq!(
|
||||
p,
|
||||
PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml")
|
||||
);
|
||||
// SAFETY: scope-local restore.
|
||||
unsafe {
|
||||
match prev {
|
||||
@@ -1810,10 +1824,7 @@ max_context_tokens = 8000
|
||||
let base = Config::defaults();
|
||||
let mut toml_text = toml::to_string(&base).unwrap();
|
||||
// Inject max_file_bytes override into the [ingest.code] table.
|
||||
toml_text = toml_text.replace(
|
||||
"max_file_bytes = 262144",
|
||||
"max_file_bytes = 524288",
|
||||
);
|
||||
toml_text = toml_text.replace("max_file_bytes = 262144", "max_file_bytes = 524288");
|
||||
let cfg: Config = toml::from_str(&toml_text).unwrap();
|
||||
assert_eq!(cfg.ingest.code.max_file_bytes, 524_288);
|
||||
}
|
||||
@@ -1828,7 +1839,8 @@ mod fb27_tests {
|
||||
fn config_invalid_carries_path_and_cause() {
|
||||
let nonexistent = PathBuf::from("/this/path/should/not/exist/kebab.toml");
|
||||
let err = Config::from_file(&nonexistent).unwrap_err();
|
||||
let signal = err.downcast_ref::<ConfigInvalid>()
|
||||
let signal = err
|
||||
.downcast_ref::<ConfigInvalid>()
|
||||
.expect("from_file error should downcast to ConfigInvalid");
|
||||
assert_eq!(signal.path, nonexistent);
|
||||
assert!(!signal.cause.is_empty(), "cause should be non-empty");
|
||||
@@ -1840,7 +1852,8 @@ mod fb27_tests {
|
||||
let p = dir.path().join("bad.toml");
|
||||
std::fs::write(&p, "this is not [valid toml").unwrap();
|
||||
let err = Config::from_file(&p).unwrap_err();
|
||||
let signal = err.downcast_ref::<ConfigInvalid>()
|
||||
let signal = err
|
||||
.downcast_ref::<ConfigInvalid>()
|
||||
.expect("malformed TOML should downcast to ConfigInvalid");
|
||||
assert_eq!(signal.path, p);
|
||||
assert!(!signal.cause.is_empty(), "cause should be non-empty");
|
||||
@@ -1864,13 +1877,11 @@ mod fb27_tests {
|
||||
toml_text.contains("stale_threshold_days = 30"),
|
||||
"default value drifted; update test fixture"
|
||||
);
|
||||
toml_text = toml_text.replace(
|
||||
"stale_threshold_days = 30",
|
||||
"stale_threshold_days = -5",
|
||||
);
|
||||
toml_text = toml_text.replace("stale_threshold_days = 30", "stale_threshold_days = -5");
|
||||
std::fs::write(&p, &toml_text).unwrap();
|
||||
let err = Config::from_file(&p).unwrap_err();
|
||||
let signal = err.downcast_ref::<ConfigInvalid>()
|
||||
let signal = err
|
||||
.downcast_ref::<ConfigInvalid>()
|
||||
.expect("negative stale_threshold_days should downcast to ConfigInvalid");
|
||||
assert_eq!(signal.path, p);
|
||||
assert!(
|
||||
|
||||
@@ -157,7 +157,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn xdg_data_home_set_replaces_var() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _guard = XdgGuard::capture();
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") };
|
||||
@@ -168,7 +170,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn xdg_data_home_unset_uses_default() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _guard = XdgGuard::capture();
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::remove_var("XDG_DATA_HOME") };
|
||||
@@ -181,7 +185,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn xdg_with_no_default_resolves_to_empty_when_unset() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _guard = XdgGuard::capture();
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::remove_var("XDG_DATA_HOME") };
|
||||
@@ -193,7 +199,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn leading_tilde_expands_to_home() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let home = std::env::var("HOME").expect("HOME must be set in tests");
|
||||
let p = expand_path("~/runs", "");
|
||||
assert_eq!(p, PathBuf::from(home).join("runs"));
|
||||
@@ -229,7 +237,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn tilde_path_ignores_base_dir() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let home = std::env::var("HOME").expect("HOME must be set in tests");
|
||||
let base = Path::new("/tmp/ignored-cfg");
|
||||
let p = expand_path_with_base("~/x", "", base);
|
||||
@@ -238,7 +248,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn xdg_var_path_ignores_base_dir() {
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _guard = XdgGuard::capture();
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
|
||||
@@ -255,7 +267,9 @@ mod tests {
|
||||
// Order matters: substitute `{data_dir}` (which itself contains
|
||||
// an unexpanded `${XDG_DATA_HOME}` and `~`), then the other two
|
||||
// resolve the result.
|
||||
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _lock = ENV_LOCK
|
||||
.lock()
|
||||
.unwrap_or_else(std::sync::PoisonError::into_inner);
|
||||
let _guard = XdgGuard::capture();
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
|
||||
|
||||
@@ -2,13 +2,15 @@
|
||||
//
|
||||
// Integration tests for [pdf.ocr] config section (v0.20.0 sub-item 1).
|
||||
|
||||
use std::collections::HashMap;
|
||||
use kebab_config::{Config, PdfCfg};
|
||||
use std::collections::HashMap;
|
||||
|
||||
// Test 1: toml roundtrip — spec §4.5 line 1034-1047 example block.
|
||||
// Config requires many required fields; test the [pdf] section via PdfCfg wrapper.
|
||||
#[derive(serde::Deserialize)]
|
||||
struct PdfWrapper { pdf: PdfCfg }
|
||||
struct PdfWrapper {
|
||||
pdf: PdfCfg,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn pdf_ocr_toml_roundtrip() {
|
||||
@@ -50,7 +52,10 @@ fn pdf_ocr_defaults_off_with_qwen_3b() {
|
||||
assert_eq!(cfg.pdf.ocr.engine, "ollama-vision");
|
||||
assert_eq!(cfg.pdf.ocr.model, "qwen2.5vl:3b");
|
||||
assert!(cfg.pdf.ocr.endpoint.is_none());
|
||||
assert_eq!(cfg.pdf.ocr.languages, vec!["eng".to_string(), "kor".to_string()]);
|
||||
assert_eq!(
|
||||
cfg.pdf.ocr.languages,
|
||||
vec!["eng".to_string(), "kor".to_string()]
|
||||
);
|
||||
assert_eq!(cfg.pdf.ocr.max_pixels, 2048);
|
||||
assert_eq!(cfg.pdf.ocr.request_timeout_secs, 180); // Bug #11: 600 → 60 → 180 (HOTFIXES 2026-05-28)
|
||||
assert!((cfg.pdf.ocr.valid_ratio_threshold - 0.5).abs() < 1e-6);
|
||||
@@ -63,9 +68,15 @@ fn pdf_ocr_defaults_off_with_qwen_3b() {
|
||||
fn pdf_ocr_env_overrides() {
|
||||
let mut env: HashMap<String, String> = HashMap::new();
|
||||
env.insert("KEBAB_PDF_OCR_ENABLED".to_string(), "true".to_string());
|
||||
env.insert("KEBAB_PDF_OCR_MODEL".to_string(), "qwen2.5vl:7b".to_string());
|
||||
env.insert(
|
||||
"KEBAB_PDF_OCR_MODEL".to_string(),
|
||||
"qwen2.5vl:7b".to_string(),
|
||||
);
|
||||
env.insert("KEBAB_PDF_OCR_ALWAYS_ON".to_string(), "true".to_string());
|
||||
env.insert("KEBAB_PDF_OCR_VALID_RATIO_THRESHOLD".to_string(), "0.75".to_string());
|
||||
env.insert(
|
||||
"KEBAB_PDF_OCR_VALID_RATIO_THRESHOLD".to_string(),
|
||||
"0.75".to_string(),
|
||||
);
|
||||
|
||||
let cfg = Config::defaults().apply_env(&env);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user