fix(cli): honor --config flag + improve search output legibility #20

Merged
altair823 merged 1 commits from fix/cli-config-flag-and-search-output into main 2026-05-01 13:26:11 +00:00
2 changed files with 95 additions and 29 deletions

View File

@@ -20,14 +20,18 @@
//! facade function in a later phase, remember: keep the return type pure,
//! and add a matching `wire_*` helper in `kb-cli/src/wire.rs`.
//!
//! ## Test seam
//! ## Config seam (`*_with_config`)
//!
//! Each public free function has a `pub(crate) fn *_with_config`
//! companion that takes a fully-resolved `Config` directly. Public
//! callers go through the top-level functions which call
//! `load_config()`; integration tests call the `_with_config` variant
//! and pass a mutated `Config` pointing at a `TempDir` to avoid
//! polluting the user's real `data_dir` / `model_dir`.
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
//! companion that takes a fully-resolved [`kb_config::Config`] directly.
//! Three callers go through it: (1) the top-level free functions
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes
//! `--config <path>` (CLI builds the Config via
//! `Config::load(cli.config.as_deref())` and threads it in directly so
//! the flag is honored); (3) integration tests, which mutate a Config
//! to point at a `TempDir` to avoid polluting the user's real
//! `data_dir` / `model_dir`. `#[doc(hidden)]` keeps rustdoc clean while
//! still allowing the cross-crate calls.
use std::path::PathBuf;
use std::sync::Arc;
@@ -136,6 +140,10 @@ fn expand_tilde(s: &str) -> PathBuf {
/// what `kb-cli` does at the top of every subcommand path; we re-do
/// the load inside each facade entry so callers don't have to thread
/// a Config through.
///
/// Callers that already have a Config in hand (CLI honoring `--config`,
/// integration tests, TUI session) should bypass this and call the
/// matching `*_with_config` helper directly.
fn load_config() -> anyhow::Result<kb_config::Config> {
kb_config::Config::load(None)
}
@@ -147,8 +155,10 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
ingest_with_config(config, scope, summary_only)
}
/// Test-only seam — kb-cli must call the public free function
/// ([`ingest`]), not this. See module docs.
/// Config-explicit variant — bypasses [`load_config`] when the

P3-5의 #[doc(hidden)] pub fn *_with_config을 test seam에서 "config-explicit API"로 의미 확장한 결정이 정확합니다 — --config 우회를 위해 새 함수를 추가하기보다 이미 존재하는 seam을 정식 cross-crate API로 promote. #[doc(hidden)]은 유지해 rustdoc은 깔끔하지만 kb-cli 같은 workspace 내 caller는 호출 가능. 의미 변화를 module doc-comment에 명시한 점도 좋습니다.

P3-5의 `#[doc(hidden)] pub fn *_with_config`을 test seam에서 "config-explicit API"로 의미 확장한 결정이 정확합니다 — `--config` 우회를 위해 새 함수를 추가하기보다 이미 존재하는 seam을 정식 cross-crate API로 promote. `#[doc(hidden)]`은 유지해 rustdoc은 깔끔하지만 kb-cli 같은 workspace 내 caller는 호출 가능. 의미 변화를 module doc-comment에 명시한 점도 좋습니다.
/// caller (kb-cli with `--config`, integration tests, TUI session)
/// already has a [`kb_config::Config`] in hand. The public free
/// function [`ingest`] wraps this with the XDG-default load.
#[doc(hidden)]
pub fn ingest_with_config(
config: kb_config::Config,
@@ -807,22 +817,38 @@ pub fn ask(_query: &str, _opts: AskOpts) -> anyhow::Result<Answer> {
anyhow::bail!("not yet wired (P4-3)")
}
/// Run the doctor checks. P0 emits `config_loaded` + `data_dir_writable`
/// (downstream checks land in later phases).
pub fn doctor() -> anyhow::Result<DoctorReport> {
/// Run the doctor checks against the explicit config path the user
/// requested via `--config` (or the XDG default if `None`). The
/// `config_loaded` check reports the actual path probed and the
/// `data_dir_writable` check probes the resolved `storage.data_dir`
/// from that config (so `--config` users see their custom paths
/// reflected in the report rather than the XDG defaults).
pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow::Result<DoctorReport> {
tracing::debug!("doctor() invoked");
let mut checks = Vec::new();
// config_loaded — defaults always load; from-file is best-effort.
let cfg_path = kb_config::Config::xdg_config_path();
let (config_ok, config_detail) = if cfg_path.exists() {
// Resolve the config path the same way `Config::load` does: explicit
// override first, else XDG default. Report whichever was probed.
let cfg_path: PathBuf = match config_path {
Some(p) => p.to_path_buf(),
None => kb_config::Config::xdg_config_path(),
};
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
match kb_config::Config::from_file(&cfg_path) {
Ok(_) => (true, cfg_path.display().to_string()),
Err(e) => (false, format!("{} ({e})", cfg_path.display())),
Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
}
} else if config_path.is_some() {
// Explicit `--config <path>` that doesn't exist is a hard error
// — defaults would silently mask the user's intent.
(
false,
format!("{} (not found)", cfg_path.display()),
None,
)
} else {
// Defaults are always loadable; report the path that would be read.
(true, format!("{} (defaults)", cfg_path.display()))
// No `--config` and no XDG file: defaults are always loadable.
(true, format!("{} (defaults)", cfg_path.display()), None)
};
checks.push(DoctorCheck {
name: "config_loaded".to_string(),
@@ -830,13 +856,26 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
detail: config_detail,
hint: if config_ok {
None
} else if config_path.is_some() {
Some("--config path does not exist or is malformed".to_string())
} else {
Some("run `kb init` to seed config".to_string())
},
});
// data_dir_writable — try to create the dir and write a probe file.
let data_dir = kb_config::Config::xdg_data_dir();
// data_dir_writable — probe the resolved storage.data_dir from the
// loaded config when present, else the XDG default. Apply env
// overrides so KB_STORAGE_DATA_DIR is respected too.
let data_dir = match loaded_cfg.as_ref() {
Some(c) => {
// Re-apply env overrides on top so the same precedence as
// Config::load is preserved here.
let env: std::collections::HashMap<String, String> = std::env::vars().collect();
let merged = c.clone().apply_env(&env);
expand_tilde(&merged.storage.data_dir)
}
None => kb_config::Config::xdg_data_dir(),
};
let writable = (|| -> anyhow::Result<()> {
std::fs::create_dir_all(&data_dir)?;

doctor_with_config_path--config <path>가 존재하지 않을 때 silent default fallback이 아니라 hard error로 실패. defaults가 사용자 의도를 가리는 시나리오를 차단 — kb doctor --config wrong.toml이 "config_loaded ✓ ~/.config/kb/config.toml (defaults)"로 거짓 통과하지 않습니다.

`doctor_with_config_path`이 `--config <path>`가 존재하지 않을 때 silent default fallback이 아니라 hard error로 실패. defaults가 사용자 의도를 가리는 시나리오를 차단 — `kb doctor --config wrong.toml`이 "config_loaded ✓ ~/.config/kb/config.toml (defaults)"로 거짓 통과하지 않습니다.
let probe = data_dir.join(".kb-doctor-probe");
@@ -849,7 +888,7 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
Err(e) => (
false,
format!("{} ({e})", data_dir.display()),
Some("ensure XDG_DATA_HOME is writable".to_string()),
Some("ensure the configured data_dir is writable".to_string()),
),
};
checks.push(DoctorCheck {
@@ -866,3 +905,11 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
checks,
})
}
/// Run the doctor checks against the XDG-default config. Convenience
/// wrapper that mirrors the historical `kb-app::doctor()` signature
/// for callers that don't honor `--config` (e.g., legacy code paths
/// or smoke harnesses).
pub fn doctor() -> anyhow::Result<DoctorReport> {
doctor_with_config_path(None)
}

View File

@@ -219,7 +219,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
include: cfg.workspace.include.clone(),
exclude: cfg.workspace.exclude.clone(),
};
let report = kb_app::ingest(scope, *summary_only)?;
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
} else {
@@ -238,7 +238,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::List { what } => match what {
ListWhat::Docs => {
let docs = kb_app::list_docs(kb_core::DocFilter::default())?;
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
} else {
@@ -252,8 +253,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::Inspect { what } => match what {
InspectWhat::Doc { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let doc_id: kb_core::DocumentId = id.parse()?;
let doc = kb_app::inspect_doc(&doc_id)?;
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?;
// Inspect doc emits a `CanonicalDocument` — there's no §2
// wire schema for it (P1-5 will decide whether this also
// becomes a tagged wrapper or stays as the raw domain
@@ -263,8 +265,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Ok(())
}
InspectWhat::Chunk { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let chunk_id: kb_core::ChunkId = id.parse()?;
let chunk = kb_app::inspect_chunk(&chunk_id)?;
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?;
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
Ok(())
}
@@ -276,18 +279,34 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
mode,
explain: _,
} => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let q = kb_core::SearchQuery {
text: query.clone(),
mode: (*mode).into(),
k: *k,
filters: kb_core::SearchFilters::default(),
};
let hits = kb_app::search(q)?;
let hits = kb_app::search_with_config(cfg, q)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
} else {
for h in &hits {
println!("{:>2}. {:.2} {}", h.rank, h.retrieval.fusion_score, h.doc_path.0);
// Show 4-digit score so RRF fused scores (bounded
// ~00.033 for k_rrf=60) don't all collapse to "0.02".

search printer 변경 두 가지 모두 사용자 입장에서 즉시 체감되는 개선:

  1. {:.4} — RRF 합산 점수가 (0, ~2/k_rrf] 즉 k_rrf=60 default에서 ≤ 0.033이라 {:.2}는 모든 hit을 "0.02"로 뭉개버렸습니다. 4자리로 ordering signal 보존.
  2. > head1 / head2 접미사 — 같은 문서에서 나온 다른 chunk가 " arch/rag-architecture.md"로만 표시되어 분간 불가능했던 문제 해결. heading_path가 비어있으면 접미사 생략해 lexical short-doc 케이스에서 noise 안 만듦.
search printer 변경 두 가지 모두 사용자 입장에서 즉시 체감되는 개선: 1. `{:.4}` — RRF 합산 점수가 (0, ~2/k_rrf] 즉 k_rrf=60 default에서 ≤ 0.033이라 `{:.2}`는 모든 hit을 "0.02"로 뭉개버렸습니다. 4자리로 ordering signal 보존. 2. `> head1 / head2` 접미사 — 같은 문서에서 나온 다른 chunk가 " arch/rag-architecture.md"로만 표시되어 분간 불가능했던 문제 해결. heading_path가 비어있으면 접미사 생략해 lexical short-doc 케이스에서 noise 안 만듦.
// Append heading_path so multiple chunks from the same
// document are distinguishable on a single line.
let heading = if h.heading_path.is_empty() {
String::new()
} else {
format!(" > {}", h.heading_path.join(" / "))
};
println!(
"{:>2}. {:.4} {}{}",
h.rank,
h.retrieval.fusion_score,
h.doc_path.0,
heading,
);
}
}
Ok(())
@@ -322,7 +341,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
}
Cmd::Doctor => {
let report = kb_app::doctor()?;
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
} else {