refactor(config): v3 경로 call-site sweep (kebab-app/kebab-eval/kebab-parse-image)
부모 경로에 .ingest 삽입(leaf 구조체 불변). src + 테스트 call-site 전부. kebab-cli 테스트의 v2 TOML fixture 는 from_file 자동변환(T6) 경로 검증용으로 유지. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -62,8 +62,8 @@ impl TestEnv {
|
||||
// Drop in a small chunk policy so the fixture's small files
|
||||
// emit at least a couple of chunks even with overlap_tokens
|
||||
// honored.
|
||||
config.chunking.target_tokens = 80;
|
||||
config.chunking.overlap_tokens = 20;
|
||||
config.ingest.chunking.target_tokens = 80;
|
||||
config.ingest.chunking.overlap_tokens = 20;
|
||||
|
||||
Self {
|
||||
temp,
|
||||
|
||||
@@ -63,7 +63,7 @@ fn chunking_change_reindexes_all_types() {
|
||||
let scanned = first.scanned;
|
||||
|
||||
// Bump target_tokens — folds into every type's signature.
|
||||
env.config.chunking.target_tokens += 100;
|
||||
env.config.ingest.chunking.target_tokens += 100;
|
||||
|
||||
let second = reingest(&env);
|
||||
assert_eq!(second.scanned, scanned);
|
||||
|
||||
@@ -34,11 +34,11 @@ fn cfg_with_image_pipeline(env: &TestEnv, mock_endpoint: &str) -> Config {
|
||||
let mut cfg = env.config.clone();
|
||||
// p9-fb-25: workspace.include removed; extension routing is now
|
||||
// handled by extractor matching alone (no config knob).
|
||||
cfg.image.ocr.enabled = true;
|
||||
cfg.image.ocr.endpoint = Some(mock_endpoint.to_string());
|
||||
cfg.image.ocr.model = "vision-mock:1b".to_string();
|
||||
cfg.image.ocr.max_pixels = 512;
|
||||
cfg.image.caption.enabled = false; // tested separately below
|
||||
cfg.ingest.image.ocr.enabled = true;
|
||||
cfg.ingest.image.ocr.endpoint = Some(mock_endpoint.to_string());
|
||||
cfg.ingest.image.ocr.model = "vision-mock:1b".to_string();
|
||||
cfg.ingest.image.ocr.max_pixels = 512;
|
||||
cfg.ingest.image.caption.enabled = false; // tested separately below
|
||||
cfg.models.llm.endpoint = mock_endpoint.to_string();
|
||||
cfg.models.llm.model = "vision-mock:1b".to_string();
|
||||
cfg
|
||||
@@ -161,8 +161,8 @@ async fn ingest_image_with_ocr_and_caption_populates_both_fields() {
|
||||
let env = TestEnv::lexical_only();
|
||||
write_red_png(&env.workspace_root, "diagram.png");
|
||||
let mut cfg = cfg_with_image_pipeline(&env, &server.uri());
|
||||
cfg.image.caption.enabled = true;
|
||||
cfg.image.caption.max_pixels = 384;
|
||||
cfg.ingest.image.caption.enabled = true;
|
||||
cfg.ingest.image.caption.max_pixels = 384;
|
||||
|
||||
let cfg_clone = cfg.clone();
|
||||
let scope = env.scope();
|
||||
@@ -270,8 +270,8 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
|
||||
let mut cfg = env.config.clone();
|
||||
// p9-fb-25: workspace.include removed; extension routing is now
|
||||
// handled by extractor matching alone (no config knob).
|
||||
cfg.image.ocr.enabled = false;
|
||||
cfg.image.caption.enabled = false;
|
||||
cfg.ingest.image.ocr.enabled = false;
|
||||
cfg.ingest.image.caption.enabled = false;
|
||||
|
||||
let cfg_clone = cfg.clone();
|
||||
let scope = env.scope();
|
||||
@@ -334,8 +334,8 @@ async fn garbage_png_increments_errors_counter_exactly_once() {
|
||||
let mut cfg = env.config.clone();
|
||||
// p9-fb-25: workspace.include removed; extension routing is now
|
||||
// handled by extractor matching alone (no config knob).
|
||||
cfg.image.ocr.enabled = false;
|
||||
cfg.image.caption.enabled = false;
|
||||
cfg.ingest.image.ocr.enabled = false;
|
||||
cfg.ingest.image.caption.enabled = false;
|
||||
|
||||
let cfg_clone = cfg.clone();
|
||||
let scope = env.scope();
|
||||
|
||||
@@ -23,8 +23,8 @@ fn minimal_config(workspace: &std::path::Path, log_dir: &std::path::Path) -> Con
|
||||
cfg.storage.model_dir = model_dir.to_string_lossy().into_owned();
|
||||
cfg.models.embedding.provider = "none".to_string();
|
||||
cfg.models.embedding.dimensions = 0;
|
||||
cfg.chunking.target_tokens = 80;
|
||||
cfg.chunking.overlap_tokens = 20;
|
||||
cfg.ingest.chunking.target_tokens = 80;
|
||||
cfg.ingest.chunking.overlap_tokens = 20;
|
||||
cfg.logging = LoggingCfg {
|
||||
ingest_log_enabled: true,
|
||||
ingest_log_dir: log_dir.to_path_buf(),
|
||||
|
||||
@@ -22,8 +22,8 @@ fn ollama_endpoint() -> String {
|
||||
|
||||
fn make_ocr_env_real() -> TestEnv {
|
||||
let mut env = TestEnv::lexical_only();
|
||||
env.config.pdf.ocr.enabled = true;
|
||||
env.config.pdf.ocr.endpoint = Some(ollama_endpoint());
|
||||
env.config.ingest.pdf.ocr.enabled = true;
|
||||
env.config.ingest.pdf.ocr.endpoint = Some(ollama_endpoint());
|
||||
env.config.models.embedding.provider = "none".to_string();
|
||||
|
||||
let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
@@ -92,8 +92,8 @@ fn ocr_text_indexed_and_searchable() {
|
||||
#[test]
|
||||
fn ingest_with_cancel_aborts_mid_pdf() {
|
||||
let mut env = TestEnv::lexical_only();
|
||||
env.config.pdf.ocr.enabled = true;
|
||||
env.config.pdf.ocr.endpoint = Some("http://127.0.0.1:1".to_string());
|
||||
env.config.ingest.pdf.ocr.enabled = true;
|
||||
env.config.ingest.pdf.ocr.endpoint = Some("http://127.0.0.1:1".to_string());
|
||||
|
||||
let src = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
|
||||
@@ -196,9 +196,9 @@ fn pdf_ocr_progress_emits_started_finished_events() {
|
||||
config.storage.data_dir = data_dir.to_string_lossy().into_owned();
|
||||
config.models.embedding.provider = "none".to_string();
|
||||
config.models.embedding.dimensions = 0;
|
||||
config.pdf.ocr.enabled = true;
|
||||
config.ingest.pdf.ocr.enabled = true;
|
||||
if let Ok(endpoint) = std::env::var("KEBAB_PDF_OCR_ENDPOINT") {
|
||||
config.pdf.ocr.endpoint = Some(endpoint);
|
||||
config.ingest.pdf.ocr.endpoint = Some(endpoint);
|
||||
}
|
||||
|
||||
let scope = kebab_core::SourceScope {
|
||||
|
||||
@@ -49,9 +49,9 @@ async fn ingest_dual_write_doc_id_matches_ndjson() {
|
||||
let result = spawn_blocking(move || {
|
||||
let mut env = TestEnv::lexical_only();
|
||||
// Enable PDF OCR + set up mock endpoint
|
||||
env.config.pdf.ocr.enabled = true;
|
||||
env.config.pdf.ocr.endpoint = Some(mock_url.clone());
|
||||
env.config.pdf.ocr.model = "qwen2.5vl:3b".to_string();
|
||||
env.config.ingest.pdf.ocr.enabled = true;
|
||||
env.config.ingest.pdf.ocr.endpoint = Some(mock_url.clone());
|
||||
env.config.ingest.pdf.ocr.model = "qwen2.5vl:3b".to_string();
|
||||
// Enable ingest log
|
||||
let log_dir = env.temp.path().join("logs");
|
||||
std::fs::create_dir_all(&log_dir).unwrap();
|
||||
|
||||
@@ -121,8 +121,8 @@ fn cfg_with_pdf(env: &TestEnv) -> Config {
|
||||
// PDF ingest does not need OCR / caption / LM — leave defaults
|
||||
// (ocr.enabled=false, caption.enabled=false). The image pipeline
|
||||
// construction step skips both adapters.
|
||||
cfg.image.ocr.enabled = false;
|
||||
cfg.image.caption.enabled = false;
|
||||
cfg.ingest.image.ocr.enabled = false;
|
||||
cfg.ingest.image.caption.enabled = false;
|
||||
cfg
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,8 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
|
||||
cfg.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
|
||||
cfg.models.embedding.provider = "none".to_string();
|
||||
cfg.models.embedding.dimensions = 0;
|
||||
cfg.chunking.target_tokens = 80;
|
||||
cfg.chunking.overlap_tokens = 20;
|
||||
cfg.ingest.chunking.target_tokens = 80;
|
||||
cfg.ingest.chunking.overlap_tokens = 20;
|
||||
cfg
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
|
||||
config.storage.model_dir = data_dir.join("models").to_string_lossy().into_owned();
|
||||
config.models.embedding.provider = "none".to_string();
|
||||
config.models.embedding.dimensions = 0;
|
||||
config.chunking.target_tokens = 80;
|
||||
config.chunking.overlap_tokens = 20;
|
||||
config.ingest.chunking.target_tokens = 80;
|
||||
config.ingest.chunking.overlap_tokens = 20;
|
||||
config
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user