feat(app): ingest 별칭 생성 hook (flag off 기본, fail-soft)

This commit is contained in:
2026-05-30 03:03:09 +00:00
parent bddcd53688
commit cde4d75f6b
3 changed files with 47 additions and 4 deletions

View File

@@ -1251,10 +1251,35 @@ fn ingest_one_asset(
build_canonical_document(asset, metadata, parsed_blocks, parser_version, all_warnings)
.context("kb-parse-md::build_canonical_document")?;
let chunks = MdHeadingV1Chunker
let mut chunks = MdHeadingV1Chunker
.chunk(&canonical, chunk_policy)
.context("kb-chunk::MdHeadingV1Chunker::chunk")?;
// Phase 2 doc-side expansion: flag on 이면 청크당 별칭 생성 (fail-soft).
if app.config.ingest.expansion.enabled {
let exp = &app.config.ingest.expansion;
let llm_built = if exp.model.is_empty() {
OllamaLanguageModel::new(&app.config)
} else {
OllamaLanguageModel::with_model(&app.config, &exp.model)
};
match llm_built {
Ok(llm) => {
let generator =
crate::expansion::ExpansionGenerator::new(&llm, exp.max_aliases_per_chunk);
for chunk in &mut chunks {
chunk.aliases = generator.generate(chunk);
}
}
Err(e) => {
tracing::warn!(
target: "kebab-app", error = %e,
"kb-app::ingest: expansion LLM 빌드 실패 — 별칭 없이 진행"
);
}
}
}
// Stamp chunker + embedding versions so Task 7's skip detection has
// data on the second run.
canonical.last_chunker_version = Some(MdHeadingV1Chunker.chunker_version());

View File

@@ -109,10 +109,10 @@ fn first_ingest_bumps_corpus_revision() {
let env = TestEnv::lexical_only();
let store_before = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
store_before.run_migrations().unwrap();
// V004 seeds 0; V009 migration bumps to 1 to invalidate any pre-V009
// LRU cache (spec §5.2). Baseline before ingest = post-migration value.
// V004 seeds 0; V009 + V010 migrations each bump by 1 to invalidate
// stale LRU caches (spec §5.2). Baseline before ingest = 2.
let baseline = store_before.corpus_revision();
assert_eq!(baseline, 1, "fresh store post-V009 baseline = 1");
assert_eq!(baseline, 2, "fresh store post-V010 baseline = 2");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert!(

View File

@@ -95,6 +95,24 @@ impl OllamaLanguageModel {
default_seed: llm.seed,
})
}
/// `new` 와 동일하되 모델 ID 만 override. doc-side expansion(Task 5)이
/// `[ingest.expansion].model` 을 쓸 수 있게 한다. 빈 문자열이면 호출측이
/// `new` 를 쓰도록 분기(여기선 비어있지 않은 model_id 를 신뢰).
pub fn with_model(config: &kebab_config::Config, model_id: &str) -> anyhow::Result<Self> {
let llm = &config.models.llm;
let client = reqwest::blocking::Client::builder()
.timeout(Duration::from_secs(llm.request_timeout_secs))
.build()?;
Ok(Self {
client,
endpoint: llm.endpoint.clone(),
model_id: model_id.to_string(),
context_tokens: llm.context_tokens,
default_temperature: llm.temperature,
default_seed: llm.seed,
})
}
}
impl LanguageModel for OllamaLanguageModel {