From d91b60325e3bd94e3b2f027459dbd727fc59cf75 Mon Sep 17 00:00:00 2001 From: altair823 Date: Thu, 30 Apr 2026 08:53:59 +0000 Subject: [PATCH] p0-1: address review (apply_env full schema map, drop dead Option in logging::init) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - kb-config::apply_env now covers every leaf key in `Config` via an explicit grep-friendly match block (one arm per leaf), keyed `KB_
_`. Booleans flow through a shared `parse_bool` helper. Numeric leaves silently keep their prior value on parse failure so a malformed env entry can't crash startup. - New tests: env_unknown_key_is_ignored, env_overrides_chunking_target_tokens, env_overrides_models_llm_endpoint_and_temperature, env_overrides_indexing_watch_filesystem_bool. - kb-app::logging::init now returns `Result` instead of `Result>` — the inner `Option` was always `Some` so the wrapper was dead. kb-cli/main.rs collapses the call from `.ok().flatten()` to `.ok()`, preserving fail-soft semantics on logging init. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kb-app/src/logging.rs | 7 +- crates/kb-cli/src/main.rs | 4 +- crates/kb-config/src/lib.rs | 171 +++++++++++++++++++++++++++++++++-- 3 files changed, 172 insertions(+), 10 deletions(-) diff --git a/crates/kb-app/src/logging.rs b/crates/kb-app/src/logging.rs index 4e5c755..30865fc 100644 --- a/crates/kb-app/src/logging.rs +++ b/crates/kb-app/src/logging.rs @@ -16,8 +16,9 @@ pub enum LogLevel { } /// Initialize tracing. Returns a guard to keep alive until exit. Idempotent -/// — a second call is a no-op. -pub fn init(level: LogLevel) -> Result> { +/// — a second call is a no-op (the second `try_init` is dropped silently +/// but the guard is still returned so the caller can keep it alive). +pub fn init(level: LogLevel) -> Result { let log_dir = kb_config::Config::xdg_state_dir().join("logs"); std::fs::create_dir_all(&log_dir)?; @@ -38,5 +39,5 @@ pub fn init(level: LogLevel) -> Result> { // no-op. let _ = registry.try_init(); - Ok(Some(guard)) + Ok(guard) } diff --git a/crates/kb-cli/src/main.rs b/crates/kb-cli/src/main.rs index 1674d42..b31bae5 100644 --- a/crates/kb-cli/src/main.rs +++ b/crates/kb-cli/src/main.rs @@ -158,7 +158,9 @@ fn main() -> ExitCode { } else { kb_app::logging::LogLevel::Default }; - let _log_guard = kb_app::logging::init(level).ok().flatten(); + // Fail-soft: if logging init errors (e.g. XDG state dir is read-only), + // proceed without a guard rather than crashing — `kb` is still usable. + let _log_guard = kb_app::logging::init(level).ok(); match run(&cli) { Ok(()) => ExitCode::from(0), Err(e) => { diff --git a/crates/kb-config/src/lib.rs b/crates/kb-config/src/lib.rs index 40b35ec..b2b1352 100644 --- a/crates/kb-config/src/lib.rs +++ b/crates/kb-config/src/lib.rs @@ -189,28 +189,139 @@ impl Config { } /// Apply `KB_
_` env overrides. Unknown keys are ignored. + /// + /// The mapping is an explicit grep-friendly whitelist — one match arm + /// per leaf key in `Config`. Booleans accept `1` / `true` / `yes` + /// (case-insensitive) for true and anything else for false. Numeric + /// keys silently keep their prior value if the env value fails to + /// parse, so a malformed `KB_*` cannot crash startup. pub fn apply_env(mut self, env: &HashMap) -> Self { for (k, v) in env { if !k.starts_with("KB_") { continue; } - // Match a small, intentional whitelist for P0 — full env→config - // mapping lands when the rest of the schema is wired up. match k.as_str() { + // workspace "KB_WORKSPACE_ROOT" => self.workspace.root = v.clone(), + + // storage + "KB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(), + "KB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(), + "KB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(), + "KB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(), + "KB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(), + "KB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(), + "KB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(), + "KB_STORAGE_COPY_THRESHOLD_MB" => { + if let Ok(n) = v.parse::() { + self.storage.copy_threshold_mb = n; + } + } + + // indexing + "KB_INDEXING_MAX_PARALLEL_EXTRACTORS" => { + if let Ok(n) = v.parse::() { + self.indexing.max_parallel_extractors = n; + } + } + "KB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => { + if let Ok(n) = v.parse::() { + self.indexing.max_parallel_embeddings = n; + } + } + "KB_INDEXING_WATCH_FILESYSTEM" => { + self.indexing.watch_filesystem = parse_bool(v); + } + + // chunking + "KB_CHUNKING_TARGET_TOKENS" => { + if let Ok(n) = v.parse::() { + self.chunking.target_tokens = n; + } + } + "KB_CHUNKING_OVERLAP_TOKENS" => { + if let Ok(n) = v.parse::() { + self.chunking.overlap_tokens = n; + } + } + "KB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => { + self.chunking.respect_markdown_headings = parse_bool(v); + } + "KB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(), + + // models.embedding + "KB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(), + "KB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(), + "KB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(), + "KB_MODELS_EMBEDDING_DIMENSIONS" => { + if let Ok(n) = v.parse::() { + self.models.embedding.dimensions = n; + } + } + "KB_MODELS_EMBEDDING_BATCH_SIZE" => { + if let Ok(n) = v.parse::() { + self.models.embedding.batch_size = n; + } + } + + // models.llm + "KB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(), + "KB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(), + "KB_MODELS_LLM_CONTEXT_TOKENS" => { + if let Ok(n) = v.parse::() { + self.models.llm.context_tokens = n; + } + } + "KB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(), + "KB_MODELS_LLM_TEMPERATURE" => { + if let Ok(f) = v.parse::() { + self.models.llm.temperature = f; + } + } + "KB_MODELS_LLM_SEED" => { + if let Ok(n) = v.parse::() { + self.models.llm.seed = n; + } + } + + // search + "KB_SEARCH_DEFAULT_K" => { + if let Ok(n) = v.parse::() { + self.search.default_k = n; + } + } + "KB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(), + "KB_SEARCH_RRF_K" => { + if let Ok(n) = v.parse::() { + self.search.rrf_k = n; + } + } + "KB_SEARCH_SNIPPET_CHARS" => { + if let Ok(n) = v.parse::() { + self.search.snippet_chars = n; + } + } + + // rag + "KB_RAG_PROMPT_TEMPLATE_VERSION" => { + self.rag.prompt_template_version = v.clone(); + } "KB_RAG_SCORE_GATE" => { if let Ok(f) = v.parse::() { self.rag.score_gate = f; } } "KB_RAG_EXPLAIN_DEFAULT" => { - self.rag.explain_default = matches!(v.as_str(), "1" | "true" | "yes"); + self.rag.explain_default = parse_bool(v); } - "KB_SEARCH_DEFAULT_K" => { - if let Ok(k) = v.parse::() { - self.search.default_k = k; + "KB_RAG_MAX_CONTEXT_TOKENS" => { + if let Ok(n) = v.parse::() { + self.rag.max_context_tokens = n; } } + + // Unknown KB_* keys are silently ignored — see + // `env_unknown_key_is_ignored` test. _ => {} } } @@ -272,6 +383,13 @@ impl Config { } } +/// Parse a permissive boolean — `1` / `true` / `yes` (case-insensitive) +/// for true, anything else for false. Used by `apply_env` for boolean +/// leaves of `Config`. +fn parse_bool(s: &str) -> bool { + matches!(s.to_ascii_lowercase().as_str(), "1" | "true" | "yes") +} + #[cfg(test)] mod tests { use super::*; @@ -309,6 +427,47 @@ mod tests { assert_eq!(c.search.default_k, 25); } + #[test] + fn env_unknown_key_is_ignored() { + let baseline = Config::defaults(); + let mut env = HashMap::new(); + env.insert("KB_NOPE_FOO".to_string(), "garbage".to_string()); + let c = Config::defaults().apply_env(&env); + assert_eq!(c, baseline); + } + + #[test] + fn env_overrides_chunking_target_tokens() { + let mut env = HashMap::new(); + env.insert("KB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string()); + let c = Config::defaults().apply_env(&env); + assert_eq!(c.chunking.target_tokens, 777); + } + + #[test] + fn env_overrides_models_llm_endpoint_and_temperature() { + let mut env = HashMap::new(); + env.insert( + "KB_MODELS_LLM_ENDPOINT".to_string(), + "http://10.0.0.1:11434".to_string(), + ); + env.insert("KB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string()); + let c = Config::defaults().apply_env(&env); + assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434"); + assert!((c.models.llm.temperature - 0.7).abs() < 1e-6); + } + + #[test] + fn env_overrides_indexing_watch_filesystem_bool() { + let mut env = HashMap::new(); + env.insert( + "KB_INDEXING_WATCH_FILESYSTEM".to_string(), + "true".to_string(), + ); + let c = Config::defaults().apply_env(&env); + assert!(c.indexing.watch_filesystem); + } + #[test] fn xdg_paths_honor_env() { // Must restore env after the test to avoid polluting other tests.