p0-1: address review (apply_env full schema map, drop dead Option in logging::init)

- kb-config::apply_env now covers every leaf key in `Config` via an
  explicit grep-friendly match block (one arm per leaf), keyed
  `KB_<SECTION>_<KEY>`. Booleans flow through a shared `parse_bool` helper.
  Numeric leaves silently keep their prior value on parse failure so a
  malformed env entry can't crash startup.
- New tests: env_unknown_key_is_ignored,
  env_overrides_chunking_target_tokens,
  env_overrides_models_llm_endpoint_and_temperature,
  env_overrides_indexing_watch_filesystem_bool.
- kb-app::logging::init now returns `Result<WorkerGuard>` instead of
  `Result<Option<WorkerGuard>>` — the inner `Option` was always `Some` so
  the wrapper was dead. kb-cli/main.rs collapses the call from
  `.ok().flatten()` to `.ok()`, preserving fail-soft semantics on logging
  init.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-30 08:53:59 +00:00
parent ca0eb2f9cb
commit d91b60325e
3 changed files with 172 additions and 10 deletions

View File

@@ -16,8 +16,9 @@ pub enum LogLevel {
}
/// Initialize tracing. Returns a guard to keep alive until exit. Idempotent
/// — a second call is a no-op.
pub fn init(level: LogLevel) -> Result<Option<WorkerGuard>> {
/// — a second call is a no-op (the second `try_init` is dropped silently
/// but the guard is still returned so the caller can keep it alive).
pub fn init(level: LogLevel) -> Result<WorkerGuard> {
let log_dir = kb_config::Config::xdg_state_dir().join("logs");
std::fs::create_dir_all(&log_dir)?;
@@ -38,5 +39,5 @@ pub fn init(level: LogLevel) -> Result<Option<WorkerGuard>> {
// no-op.
let _ = registry.try_init();
Ok(Some(guard))
Ok(guard)
}

View File

@@ -158,7 +158,9 @@ fn main() -> ExitCode {
} else {
kb_app::logging::LogLevel::Default
};
let _log_guard = kb_app::logging::init(level).ok().flatten();
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
// proceed without a guard rather than crashing — `kb` is still usable.
let _log_guard = kb_app::logging::init(level).ok();
match run(&cli) {
Ok(()) => ExitCode::from(0),
Err(e) => {

View File

@@ -189,28 +189,139 @@ impl Config {
}
/// Apply `KB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored.
///
/// The mapping is an explicit grep-friendly whitelist — one match arm
/// per leaf key in `Config`. Booleans accept `1` / `true` / `yes`
/// (case-insensitive) for true and anything else for false. Numeric
/// keys silently keep their prior value if the env value fails to
/// parse, so a malformed `KB_*` cannot crash startup.
pub fn apply_env(mut self, env: &HashMap<String, String>) -> Self {
for (k, v) in env {
if !k.starts_with("KB_") {
continue;
}
// Match a small, intentional whitelist for P0 — full env→config
// mapping lands when the rest of the schema is wired up.
match k.as_str() {
// workspace
"KB_WORKSPACE_ROOT" => self.workspace.root = v.clone(),
// storage
"KB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(),
"KB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(),
"KB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(),
"KB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(),
"KB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(),
"KB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(),
"KB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(),
"KB_STORAGE_COPY_THRESHOLD_MB" => {
if let Ok(n) = v.parse::<u64>() {
self.storage.copy_threshold_mb = n;
}
}
// indexing
"KB_INDEXING_MAX_PARALLEL_EXTRACTORS" => {
if let Ok(n) = v.parse::<u32>() {
self.indexing.max_parallel_extractors = n;
}
}
"KB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => {
if let Ok(n) = v.parse::<u32>() {
self.indexing.max_parallel_embeddings = n;
}
}
"KB_INDEXING_WATCH_FILESYSTEM" => {
self.indexing.watch_filesystem = parse_bool(v);
}
// chunking
"KB_CHUNKING_TARGET_TOKENS" => {
if let Ok(n) = v.parse::<usize>() {
self.chunking.target_tokens = n;
}
}
"KB_CHUNKING_OVERLAP_TOKENS" => {
if let Ok(n) = v.parse::<usize>() {
self.chunking.overlap_tokens = n;
}
}
"KB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => {
self.chunking.respect_markdown_headings = parse_bool(v);
}
"KB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(),
// models.embedding
"KB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(),
"KB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(),
"KB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(),
"KB_MODELS_EMBEDDING_DIMENSIONS" => {
if let Ok(n) = v.parse::<usize>() {
self.models.embedding.dimensions = n;
}
}
"KB_MODELS_EMBEDDING_BATCH_SIZE" => {
if let Ok(n) = v.parse::<usize>() {
self.models.embedding.batch_size = n;
}
}
// models.llm
"KB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(),
"KB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(),
"KB_MODELS_LLM_CONTEXT_TOKENS" => {
if let Ok(n) = v.parse::<usize>() {
self.models.llm.context_tokens = n;
}
}
"KB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(),
"KB_MODELS_LLM_TEMPERATURE" => {
if let Ok(f) = v.parse::<f32>() {
self.models.llm.temperature = f;
}
}
"KB_MODELS_LLM_SEED" => {
if let Ok(n) = v.parse::<u64>() {
self.models.llm.seed = n;
}
}
// search
"KB_SEARCH_DEFAULT_K" => {
if let Ok(n) = v.parse::<usize>() {
self.search.default_k = n;
}
}
"KB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(),
"KB_SEARCH_RRF_K" => {
if let Ok(n) = v.parse::<u32>() {
self.search.rrf_k = n;
}
}
"KB_SEARCH_SNIPPET_CHARS" => {
if let Ok(n) = v.parse::<usize>() {
self.search.snippet_chars = n;
}
}
// rag
"KB_RAG_PROMPT_TEMPLATE_VERSION" => {
self.rag.prompt_template_version = v.clone();
}
"KB_RAG_SCORE_GATE" => {
if let Ok(f) = v.parse::<f32>() {
self.rag.score_gate = f;
}
}
"KB_RAG_EXPLAIN_DEFAULT" => {
self.rag.explain_default = matches!(v.as_str(), "1" | "true" | "yes");
self.rag.explain_default = parse_bool(v);
}
"KB_SEARCH_DEFAULT_K" => {
if let Ok(k) = v.parse::<usize>() {
self.search.default_k = k;
"KB_RAG_MAX_CONTEXT_TOKENS" => {
if let Ok(n) = v.parse::<usize>() {
self.rag.max_context_tokens = n;
}
}
// Unknown KB_* keys are silently ignored — see
// `env_unknown_key_is_ignored` test.
_ => {}
}
}
@@ -272,6 +383,13 @@ impl Config {
}
}
/// Parse a permissive boolean — `1` / `true` / `yes` (case-insensitive)
/// for true, anything else for false. Used by `apply_env` for boolean
/// leaves of `Config`.
fn parse_bool(s: &str) -> bool {
matches!(s.to_ascii_lowercase().as_str(), "1" | "true" | "yes")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -309,6 +427,47 @@ mod tests {
assert_eq!(c.search.default_k, 25);
}
#[test]
fn env_unknown_key_is_ignored() {
let baseline = Config::defaults();
let mut env = HashMap::new();
env.insert("KB_NOPE_FOO".to_string(), "garbage".to_string());
let c = Config::defaults().apply_env(&env);
assert_eq!(c, baseline);
}
#[test]
fn env_overrides_chunking_target_tokens() {
let mut env = HashMap::new();
env.insert("KB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
let c = Config::defaults().apply_env(&env);
assert_eq!(c.chunking.target_tokens, 777);
}
#[test]
fn env_overrides_models_llm_endpoint_and_temperature() {
let mut env = HashMap::new();
env.insert(
"KB_MODELS_LLM_ENDPOINT".to_string(),
"http://10.0.0.1:11434".to_string(),
);
env.insert("KB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
let c = Config::defaults().apply_env(&env);
assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434");
assert!((c.models.llm.temperature - 0.7).abs() < 1e-6);
}
#[test]
fn env_overrides_indexing_watch_filesystem_bool() {
let mut env = HashMap::new();
env.insert(
"KB_INDEXING_WATCH_FILESYSTEM".to_string(),
"true".to_string(),
);
let c = Config::defaults().apply_env(&env);
assert!(c.indexing.watch_filesystem);
}
#[test]
fn xdg_paths_honor_env() {
// Must restore env after the test to avoid polluting other tests.