diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index d05d05c..7dd0b78 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -1251,10 +1251,35 @@ fn ingest_one_asset( build_canonical_document(asset, metadata, parsed_blocks, parser_version, all_warnings) .context("kb-parse-md::build_canonical_document")?; - let chunks = MdHeadingV1Chunker + let mut chunks = MdHeadingV1Chunker .chunk(&canonical, chunk_policy) .context("kb-chunk::MdHeadingV1Chunker::chunk")?; + // Phase 2 doc-side expansion: flag on 이면 청크당 별칭 생성 (fail-soft). + if app.config.ingest.expansion.enabled { + let exp = &app.config.ingest.expansion; + let llm_built = if exp.model.is_empty() { + OllamaLanguageModel::new(&app.config) + } else { + OllamaLanguageModel::with_model(&app.config, &exp.model) + }; + match llm_built { + Ok(llm) => { + let generator = + crate::expansion::ExpansionGenerator::new(&llm, exp.max_aliases_per_chunk); + for chunk in &mut chunks { + chunk.aliases = generator.generate(chunk); + } + } + Err(e) => { + tracing::warn!( + target: "kebab-app", error = %e, + "kb-app::ingest: expansion LLM 빌드 실패 — 별칭 없이 진행" + ); + } + } + } + // Stamp chunker + embedding versions so Task 7's skip detection has // data on the second run. canonical.last_chunker_version = Some(MdHeadingV1Chunker.chunker_version()); diff --git a/crates/kebab-app/tests/search_lexical.rs b/crates/kebab-app/tests/search_lexical.rs index 3534867..29d8333 100644 --- a/crates/kebab-app/tests/search_lexical.rs +++ b/crates/kebab-app/tests/search_lexical.rs @@ -109,10 +109,10 @@ fn first_ingest_bumps_corpus_revision() { let env = TestEnv::lexical_only(); let store_before = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap(); store_before.run_migrations().unwrap(); - // V004 seeds 0; V009 migration bumps to 1 to invalidate any pre-V009 - // LRU cache (spec §5.2). Baseline before ingest = post-migration value. + // V004 seeds 0; V009 + V010 migrations each bump by 1 to invalidate + // stale LRU caches (spec §5.2). Baseline before ingest = 2. let baseline = store_before.corpus_revision(); - assert_eq!(baseline, 1, "fresh store post-V009 baseline = 1"); + assert_eq!(baseline, 2, "fresh store post-V010 baseline = 2"); let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); assert!( diff --git a/crates/kebab-llm-local/src/ollama.rs b/crates/kebab-llm-local/src/ollama.rs index 7f45ff5..e42fe73 100644 --- a/crates/kebab-llm-local/src/ollama.rs +++ b/crates/kebab-llm-local/src/ollama.rs @@ -95,6 +95,24 @@ impl OllamaLanguageModel { default_seed: llm.seed, }) } + + /// `new` 와 동일하되 모델 ID 만 override. doc-side expansion(Task 5)이 + /// `[ingest.expansion].model` 을 쓸 수 있게 한다. 빈 문자열이면 호출측이 + /// `new` 를 쓰도록 분기(여기선 비어있지 않은 model_id 를 신뢰). + pub fn with_model(config: &kebab_config::Config, model_id: &str) -> anyhow::Result { + let llm = &config.models.llm; + let client = reqwest::blocking::Client::builder() + .timeout(Duration::from_secs(llm.request_timeout_secs)) + .build()?; + Ok(Self { + client, + endpoint: llm.endpoint.clone(), + model_id: model_id.to_string(), + context_tokens: llm.context_tokens, + default_temperature: llm.temperature, + default_seed: llm.seed, + }) + } } impl LanguageModel for OllamaLanguageModel {