fix(cli): honor --config flag + improve search output legibility
Two issues surfaced during the post-P3-5 manual smoke test against a
six-document workspace:
1. --config flag was silently ignored. kb-cli read cli.config only
while building SourceScope inside the Ingest arm, then called
kb_app::ingest(scope, summary_only) which internally re-loads
Config::load(None) — falling back to ~/.config/kb/config.toml
regardless of what the user passed. Same pattern in search,
list, inspect, doctor. Users had to rely on KB_* env vars to
point at a non-default config.
2. Search output collapsed RRF hybrid scores to "0.02" because
`{:.2}` truncated the (0, 0.033]-bounded fused score, and
chunks from the same document showed up as identical lines
("3. 0.02 arch/rag-architecture.md") since heading_path was
never printed.
Fix:
- kb-app: doctor/ingest/search/list/inspect already had
*_with_config(Config, ...) seams introduced for integration tests
(#[doc(hidden)] pub). Repurpose them as the official "config-explicit"
API — kb-cli now builds the Config once via
Config::load(cli.config.as_deref()) at the top of every subcommand
and threads it into the *_with_config variant. Module doc-comment
updated to reflect three callers (CLI --config, integration tests,
TUI session) instead of "test-only seam".
- kb-app: doctor() rewritten as doctor_with_config_path(Option<&Path>)
that respects an explicit path. config_loaded probe now reports the
actual path checked, returning a clear hard error if --config points
at a non-existent or malformed file (defaults would silently mask
user intent). data_dir_writable resolves storage.data_dir from the
loaded config (with env overrides applied via Config::apply_env) so
--config users see their custom paths reflected. Original doctor()
signature kept as a None-passing wrapper.
- kb-cli: ingest/search/list/inspect/doctor each call the
*_with_config* companion. Search printer switches to {:.4} score
formatting (RRF hybrid range bounded by ~2/k_rrf ≈ 0.033 at k_rrf=60
default) and appends `> head1 / head2` when heading_path is non-
empty so chunks from the same document are visually distinguishable.
Verified manually:
- `kb --config /tmp/kb-smoke/config.toml doctor` reports the
custom config path + custom data_dir, not the XDG defaults.
- `kb --config /tmp/kb-smoke/config.toml search "..." --mode hybrid`
returns hits with distinct 4-digit scores and heading paths
("rust/ownership.md > Rust 소유권 모델 / Borrow checker").
Workspace 269 passed / 24 ignored / 0 failed; cargo clippy
--workspace --all-targets -- -D warnings clean.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -20,14 +20,18 @@
|
||||
//! facade function in a later phase, remember: keep the return type pure,
|
||||
//! and add a matching `wire_*` helper in `kb-cli/src/wire.rs`.
|
||||
//!
|
||||
//! ## Test seam
|
||||
//! ## Config seam (`*_with_config`)
|
||||
//!
|
||||
//! Each public free function has a `pub(crate) fn *_with_config`
|
||||
//! companion that takes a fully-resolved `Config` directly. Public
|
||||
//! callers go through the top-level functions which call
|
||||
//! `load_config()`; integration tests call the `_with_config` variant
|
||||
//! and pass a mutated `Config` pointing at a `TempDir` to avoid
|
||||
//! polluting the user's real `data_dir` / `model_dir`.
|
||||
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
|
||||
//! companion that takes a fully-resolved [`kb_config::Config`] directly.
|
||||
//! Three callers go through it: (1) the top-level free functions
|
||||
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes
|
||||
//! `--config <path>` (CLI builds the Config via
|
||||
//! `Config::load(cli.config.as_deref())` and threads it in directly so
|
||||
//! the flag is honored); (3) integration tests, which mutate a Config
|
||||
//! to point at a `TempDir` to avoid polluting the user's real
|
||||
//! `data_dir` / `model_dir`. `#[doc(hidden)]` keeps rustdoc clean while
|
||||
//! still allowing the cross-crate calls.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
@@ -136,6 +140,10 @@ fn expand_tilde(s: &str) -> PathBuf {
|
||||
/// what `kb-cli` does at the top of every subcommand path; we re-do
|
||||
/// the load inside each facade entry so callers don't have to thread
|
||||
/// a Config through.
|
||||
///
|
||||
/// Callers that already have a Config in hand (CLI honoring `--config`,
|
||||
/// integration tests, TUI session) should bypass this and call the
|
||||
/// matching `*_with_config` helper directly.
|
||||
fn load_config() -> anyhow::Result<kb_config::Config> {
|
||||
kb_config::Config::load(None)
|
||||
}
|
||||
@@ -147,8 +155,10 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
|
||||
ingest_with_config(config, scope, summary_only)
|
||||
}
|
||||
|
||||
/// Test-only seam — kb-cli must call the public free function
|
||||
/// ([`ingest`]), not this. See module docs.
|
||||
/// Config-explicit variant — bypasses [`load_config`] when the
|
||||
/// caller (kb-cli with `--config`, integration tests, TUI session)
|
||||
/// already has a [`kb_config::Config`] in hand. The public free
|
||||
/// function [`ingest`] wraps this with the XDG-default load.
|
||||
#[doc(hidden)]
|
||||
pub fn ingest_with_config(
|
||||
config: kb_config::Config,
|
||||
@@ -807,22 +817,38 @@ pub fn ask(_query: &str, _opts: AskOpts) -> anyhow::Result<Answer> {
|
||||
anyhow::bail!("not yet wired (P4-3)")
|
||||
}
|
||||
|
||||
/// Run the doctor checks. P0 emits `config_loaded` + `data_dir_writable`
|
||||
/// (downstream checks land in later phases).
|
||||
pub fn doctor() -> anyhow::Result<DoctorReport> {
|
||||
/// Run the doctor checks against the explicit config path the user
|
||||
/// requested via `--config` (or the XDG default if `None`). The
|
||||
/// `config_loaded` check reports the actual path probed and the
|
||||
/// `data_dir_writable` check probes the resolved `storage.data_dir`
|
||||
/// from that config (so `--config` users see their custom paths
|
||||
/// reflected in the report rather than the XDG defaults).
|
||||
pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow::Result<DoctorReport> {
|
||||
tracing::debug!("doctor() invoked");
|
||||
let mut checks = Vec::new();
|
||||
|
||||
// config_loaded — defaults always load; from-file is best-effort.
|
||||
let cfg_path = kb_config::Config::xdg_config_path();
|
||||
let (config_ok, config_detail) = if cfg_path.exists() {
|
||||
// Resolve the config path the same way `Config::load` does: explicit
|
||||
// override first, else XDG default. Report whichever was probed.
|
||||
let cfg_path: PathBuf = match config_path {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => kb_config::Config::xdg_config_path(),
|
||||
};
|
||||
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
|
||||
match kb_config::Config::from_file(&cfg_path) {
|
||||
Ok(_) => (true, cfg_path.display().to_string()),
|
||||
Err(e) => (false, format!("{} ({e})", cfg_path.display())),
|
||||
Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
|
||||
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
|
||||
}
|
||||
} else if config_path.is_some() {
|
||||
// Explicit `--config <path>` that doesn't exist is a hard error
|
||||
// — defaults would silently mask the user's intent.
|
||||
(
|
||||
false,
|
||||
format!("{} (not found)", cfg_path.display()),
|
||||
None,
|
||||
)
|
||||
} else {
|
||||
// Defaults are always loadable; report the path that would be read.
|
||||
(true, format!("{} (defaults)", cfg_path.display()))
|
||||
// No `--config` and no XDG file: defaults are always loadable.
|
||||
(true, format!("{} (defaults)", cfg_path.display()), None)
|
||||
};
|
||||
checks.push(DoctorCheck {
|
||||
name: "config_loaded".to_string(),
|
||||
@@ -830,13 +856,26 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
|
||||
detail: config_detail,
|
||||
hint: if config_ok {
|
||||
None
|
||||
} else if config_path.is_some() {
|
||||
Some("--config path does not exist or is malformed".to_string())
|
||||
} else {
|
||||
Some("run `kb init` to seed config".to_string())
|
||||
},
|
||||
});
|
||||
|
||||
// data_dir_writable — try to create the dir and write a probe file.
|
||||
let data_dir = kb_config::Config::xdg_data_dir();
|
||||
// data_dir_writable — probe the resolved storage.data_dir from the
|
||||
// loaded config when present, else the XDG default. Apply env
|
||||
// overrides so KB_STORAGE_DATA_DIR is respected too.
|
||||
let data_dir = match loaded_cfg.as_ref() {
|
||||
Some(c) => {
|
||||
// Re-apply env overrides on top so the same precedence as
|
||||
// Config::load is preserved here.
|
||||
let env: std::collections::HashMap<String, String> = std::env::vars().collect();
|
||||
let merged = c.clone().apply_env(&env);
|
||||
expand_tilde(&merged.storage.data_dir)
|
||||
}
|
||||
None => kb_config::Config::xdg_data_dir(),
|
||||
};
|
||||
let writable = (|| -> anyhow::Result<()> {
|
||||
std::fs::create_dir_all(&data_dir)?;
|
||||
let probe = data_dir.join(".kb-doctor-probe");
|
||||
@@ -849,7 +888,7 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
|
||||
Err(e) => (
|
||||
false,
|
||||
format!("{} ({e})", data_dir.display()),
|
||||
Some("ensure XDG_DATA_HOME is writable".to_string()),
|
||||
Some("ensure the configured data_dir is writable".to_string()),
|
||||
),
|
||||
};
|
||||
checks.push(DoctorCheck {
|
||||
@@ -866,3 +905,11 @@ pub fn doctor() -> anyhow::Result<DoctorReport> {
|
||||
checks,
|
||||
})
|
||||
}
|
||||
|
||||
/// Run the doctor checks against the XDG-default config. Convenience
|
||||
/// wrapper that mirrors the historical `kb-app::doctor()` signature
|
||||
/// for callers that don't honor `--config` (e.g., legacy code paths
|
||||
/// or smoke harnesses).
|
||||
pub fn doctor() -> anyhow::Result<DoctorReport> {
|
||||
doctor_with_config_path(None)
|
||||
}
|
||||
|
||||
@@ -219,7 +219,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
include: cfg.workspace.include.clone(),
|
||||
exclude: cfg.workspace.exclude.clone(),
|
||||
};
|
||||
let report = kb_app::ingest(scope, *summary_only)?;
|
||||
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
|
||||
} else {
|
||||
@@ -238,7 +238,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::List { what } => match what {
|
||||
ListWhat::Docs => {
|
||||
let docs = kb_app::list_docs(kb_core::DocFilter::default())?;
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
|
||||
} else {
|
||||
@@ -252,8 +253,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::Inspect { what } => match what {
|
||||
InspectWhat::Doc { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let doc_id: kb_core::DocumentId = id.parse()?;
|
||||
let doc = kb_app::inspect_doc(&doc_id)?;
|
||||
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?;
|
||||
// Inspect doc emits a `CanonicalDocument` — there's no §2
|
||||
// wire schema for it (P1-5 will decide whether this also
|
||||
// becomes a tagged wrapper or stays as the raw domain
|
||||
@@ -263,8 +265,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
InspectWhat::Chunk { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let chunk_id: kb_core::ChunkId = id.parse()?;
|
||||
let chunk = kb_app::inspect_chunk(&chunk_id)?;
|
||||
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?;
|
||||
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
|
||||
Ok(())
|
||||
}
|
||||
@@ -276,18 +279,34 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
mode,
|
||||
explain: _,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kb_core::SearchQuery {
|
||||
text: query.clone(),
|
||||
mode: (*mode).into(),
|
||||
k: *k,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search(q)?;
|
||||
let hits = kb_app::search_with_config(cfg, q)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
|
||||
} else {
|
||||
for h in &hits {
|
||||
println!("{:>2}. {:.2} {}", h.rank, h.retrieval.fusion_score, h.doc_path.0);
|
||||
// Show 4-digit score so RRF fused scores (bounded
|
||||
// ~0–0.033 for k_rrf=60) don't all collapse to "0.02".
|
||||
// Append heading_path so multiple chunks from the same
|
||||
// document are distinguishable on a single line.
|
||||
let heading = if h.heading_path.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
format!(" > {}", h.heading_path.join(" / "))
|
||||
};
|
||||
println!(
|
||||
"{:>2}. {:.4} {}{}",
|
||||
h.rank,
|
||||
h.retrieval.fusion_score,
|
||||
h.doc_path.0,
|
||||
heading,
|
||||
);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -322,7 +341,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
Cmd::Doctor => {
|
||||
let report = kb_app::doctor()?;
|
||||
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user