Files
kebab/crates/kebab-app/tests/ingest_log_smoke.rs
altair823 d5c69f6715 refactor(config): v3 경로 call-site sweep (kebab-app/kebab-eval/kebab-parse-image)
부모 경로에 .ingest 삽입(leaf 구조체 불변). src + 테스트 call-site 전부.
kebab-cli 테스트의 v2 TOML fixture 는 from_file 자동변환(T6) 경로 검증용으로 유지.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-04 12:40:06 +00:00

172 lines
5.8 KiB
Rust

// crates/kebab-app/tests/ingest_log_smoke.rs
//
// Integration tests for ingest_log feature (v0.20.x). Spec §5 AC-9 + AC-6.
use std::path::PathBuf;
use kebab_app::{IngestOpts, ingest_with_config_opts};
use kebab_config::{Config, LoggingCfg};
use kebab_core::SourceScope;
use serde_json::Value;
use tempfile::TempDir;
fn minimal_config(workspace: &std::path::Path, log_dir: &std::path::Path) -> Config {
let data_dir = workspace.parent().unwrap().join("data");
std::fs::create_dir_all(&data_dir).unwrap();
let model_dir = workspace.parent().unwrap().join("models");
std::fs::create_dir_all(&model_dir).unwrap();
let mut cfg = Config::defaults();
cfg.workspace.root = workspace.to_string_lossy().into_owned();
cfg.workspace.exclude.clear();
cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
cfg.storage.model_dir = model_dir.to_string_lossy().into_owned();
cfg.models.embedding.provider = "none".to_string();
cfg.models.embedding.dimensions = 0;
cfg.ingest.chunking.target_tokens = 80;
cfg.ingest.chunking.overlap_tokens = 20;
cfg.logging = LoggingCfg {
ingest_log_enabled: true,
ingest_log_dir: log_dir.to_path_buf(),
..Default::default()
};
cfg
}
/// AC-9: ingest → log file exists + each line valid JSON + last line kind=summary + scanned>0.
#[test]
fn ingest_log_smoke() {
let tmp = TempDir::new().unwrap();
let workspace = tmp.path().join("kb");
std::fs::create_dir_all(&workspace).unwrap();
let log_dir = tmp.path().join("logs");
// 1. Minimal corpus: 1 markdown + 1 scanned PDF (OCR disabled — no Ollama needed).
std::fs::write(
workspace.join("hello.md"),
"# Hello\n\nThis is a smoke test.\n",
)
.unwrap();
let pdf_src = PathBuf::from("../kebab-parse-pdf/tests/fixtures/scanned_page1.pdf");
if pdf_src.exists() {
std::fs::copy(&pdf_src, workspace.join("scanned.pdf")).unwrap();
}
// 2. Config with logging enabled.
let cfg = minimal_config(&workspace, &log_dir);
let scope = SourceScope {
root: workspace.clone(),
exclude: vec![],
..Default::default()
};
// 3. Run ingest.
ingest_with_config_opts(cfg, scope, false, IngestOpts::default())
.expect("ingest should succeed");
// 4. Assert log file exists in log_dir.
let log_files: Vec<_> = std::fs::read_dir(&log_dir)
.unwrap()
.filter_map(Result::ok)
.filter(|e| {
e.file_name().to_string_lossy().starts_with("ingest-")
&& e.file_name().to_string_lossy().ends_with(".ndjson")
})
.collect();
assert_eq!(
log_files.len(),
1,
"expected exactly 1 ingest-*.ndjson file, found: {log_files:?}"
);
// 5. Parse each line as JSON — assert kind field present and valid.
let body = std::fs::read_to_string(log_files[0].path()).unwrap();
let lines: Vec<&str> = body.lines().collect();
assert!(!lines.is_empty(), "log file should not be empty");
let valid_kinds = ["ocr", "parse_error", "skip", "error", "summary"];
for line in &lines {
let v: Value = serde_json::from_str(line)
.unwrap_or_else(|e| panic!("line is not valid JSON: {e}\nline: {line}"));
let kind = v
.get("kind")
.and_then(|k| k.as_str())
.unwrap_or_else(|| panic!("line missing 'kind' field: {line}"));
assert!(
valid_kinds.contains(&kind),
"unexpected kind '{kind}' in line: {line}"
);
}
// 6. Last line must be kind=summary with scanned > 0.
let last = lines.last().unwrap();
let last_v: Value = serde_json::from_str(last).unwrap();
assert_eq!(
last_v.get("kind").and_then(|k| k.as_str()),
Some("summary"),
"last line must be kind=summary, got: {last}"
);
let scanned = last_v.get("scanned").and_then(Value::as_u64).unwrap_or(0);
assert!(scanned > 0, "summary.scanned should be > 0, got: {last}");
}
/// AC-6: ingest_log_enabled=false → no log file created.
#[test]
fn ingest_log_disabled_emits_no_file() {
let tmp = TempDir::new().unwrap();
let workspace = tmp.path().join("kb");
std::fs::create_dir_all(&workspace).unwrap();
let log_dir = tmp.path().join("logs");
std::fs::write(
workspace.join("hello.md"),
"# Hello\n\nDisabled log test.\n",
)
.unwrap();
let data_dir = tmp.path().join("data");
std::fs::create_dir_all(&data_dir).unwrap();
let model_dir = tmp.path().join("models");
std::fs::create_dir_all(&model_dir).unwrap();
let mut cfg = Config::defaults();
cfg.workspace.root = workspace.to_string_lossy().into_owned();
cfg.workspace.exclude.clear();
cfg.storage.data_dir = data_dir.to_string_lossy().into_owned();
cfg.storage.model_dir = model_dir.to_string_lossy().into_owned();
cfg.models.embedding.provider = "none".to_string();
cfg.models.embedding.dimensions = 0;
cfg.logging = LoggingCfg {
ingest_log_enabled: false,
ingest_log_dir: log_dir.clone(),
..Default::default()
};
let scope = SourceScope {
root: workspace.clone(),
exclude: vec![],
..Default::default()
};
ingest_with_config_opts(cfg, scope, false, IngestOpts::default())
.expect("ingest should succeed");
// log_dir should either not exist or contain 0 ingest-*.ndjson files.
let log_file_count = if log_dir.exists() {
std::fs::read_dir(&log_dir)
.unwrap()
.filter_map(Result::ok)
.filter(|e| {
e.file_name().to_string_lossy().starts_with("ingest-")
&& e.file_name().to_string_lossy().ends_with(".ndjson")
})
.count()
} else {
0
};
assert_eq!(
log_file_count, 0,
"no ingest-*.ndjson file should be created when disabled"
);
}