feat(config): [ingest.expansion] flag (default off)
This commit is contained in:
@@ -595,6 +595,8 @@ impl UiCfg {
|
||||
#[serde(default)]
|
||||
pub struct IngestCfg {
|
||||
pub code: IngestCodeCfg,
|
||||
#[serde(default)]
|
||||
pub expansion: IngestExpansionCfg,
|
||||
}
|
||||
|
||||
/// p10-1A-1: settings for the code ingest pipeline. All fields have
|
||||
@@ -635,6 +637,31 @@ impl Default for IngestCodeCfg {
|
||||
}
|
||||
}
|
||||
|
||||
/// doc-side expansion config. Default: disabled (requires explicit opt-in).
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct IngestExpansionCfg {
|
||||
/// Whether doc-side alias expansion is enabled during ingest.
|
||||
pub enabled: bool,
|
||||
/// Ollama model used for alias generation (empty = use LLM default).
|
||||
pub model: String,
|
||||
/// Maximum aliases generated per chunk.
|
||||
pub max_aliases_per_chunk: usize,
|
||||
/// Prompt template version tag.
|
||||
pub prompt_version: String,
|
||||
}
|
||||
|
||||
impl Default for IngestExpansionCfg {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
model: String::new(),
|
||||
max_aliases_per_chunk: 8,
|
||||
prompt_version: "expansion-v1".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Defaults per design §6.4.
|
||||
pub fn defaults() -> Self {
|
||||
@@ -1119,6 +1146,22 @@ impl Config {
|
||||
self.pdf.ocr.lang_hint = if v.is_empty() { None } else { Some(v.clone()) };
|
||||
}
|
||||
|
||||
// ingest.expansion
|
||||
"KEBAB_INGEST_EXPANSION_ENABLED" => {
|
||||
self.ingest.expansion.enabled = parse_bool(v);
|
||||
}
|
||||
"KEBAB_INGEST_EXPANSION_MODEL" => {
|
||||
self.ingest.expansion.model = v.clone();
|
||||
}
|
||||
"KEBAB_INGEST_EXPANSION_MAX_ALIASES" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.ingest.expansion.max_aliases_per_chunk = n;
|
||||
}
|
||||
}
|
||||
"KEBAB_INGEST_EXPANSION_PROMPT_VERSION" => {
|
||||
self.ingest.expansion.prompt_version = v.clone();
|
||||
}
|
||||
|
||||
// Unknown KEBAB_* keys are silently ignored — see
|
||||
// `env_unknown_key_is_ignored` test.
|
||||
_ => {}
|
||||
@@ -1846,6 +1889,28 @@ max_context_tokens = 8000
|
||||
let cfg: Config = toml::from_str(&toml_text).unwrap();
|
||||
assert_eq!(cfg.ingest.code.max_file_bytes, 524_288);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expansion_defaults_off() {
|
||||
let cfg = Config::defaults();
|
||||
assert!(!cfg.ingest.expansion.enabled);
|
||||
assert_eq!(cfg.ingest.expansion.max_aliases_per_chunk, 8);
|
||||
assert_eq!(cfg.ingest.expansion.prompt_version, "expansion-v1");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expansion_env_override() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KEBAB_INGEST_EXPANSION_ENABLED".into(), "true".into());
|
||||
env.insert("KEBAB_INGEST_EXPANSION_MODEL".into(), "gemma3:4b".into());
|
||||
env.insert("KEBAB_INGEST_EXPANSION_MAX_ALIASES".into(), "12".into());
|
||||
env.insert("KEBAB_INGEST_EXPANSION_PROMPT_VERSION".into(), "expansion-v2".into());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert!(c.ingest.expansion.enabled);
|
||||
assert_eq!(c.ingest.expansion.model, "gemma3:4b");
|
||||
assert_eq!(c.ingest.expansion.max_aliases_per_chunk, 12);
|
||||
assert_eq!(c.ingest.expansion.prompt_version, "expansion-v2");
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user