Files
kebab/crates/kebab-cli/src/main.rs
altair823 fa02a7c68d feat: ingest cooperative cancellation (p9-fb-04)
Ctrl-C / Esc 가 ingest 를 즉시 중단. 현재 in-flight asset 마무리 후
이후 asset 미실행, IngestEvent::Aborted { partial_counts } 발신,
Ok(IngestReport) 정상 반환 (Err 아님). 부분 commit 보존, 다음 ingest
가 idempotent 재개.

신규 facade: kebab-app::ingest_with_config_cancellable(.., progress,
cancel: Option<Arc<AtomicBool>>). 기존 _progress 가 cancel=None
forwarding wrapper. asset loop 시작 boundary 마다 atomic load —
true 면 break + Aborted emit + 정상 종료. Lock 없음.

CLI: ctrlc crate 신규 dep. SIGINT handler 가 첫 신호에 cancel.store(true)
+ stderr hint, 두 번째 신호에 std::process::exit(130) (canonical SIGINT
exit code). install_sigint_cancel() helper 가 Arc<AtomicBool> 반환,
Cmd::Ingest 가 facade 에 전달.

TUI: IngestState 에 cancel: Arc<AtomicBool> field 추가 (회차 1 review
결과의 reshape 정확). start_ingest 가 둘 다 만들어 worker 에 clone
move. cancel_running_ingest(&app) helper — Esc / Ctrl-C 가
ingest 진행 중일 때만 cancel 우선, 그 외에는 quit.

Test:
- 3 facade integration (cancel-before / cancel-mid / no-cancel
  default).
- 3 tui lib unit (cancel_running_ingest no-state / in-flight /
  terminated).

Plan 갱신: p9-fb-04 status planned → in_progress. 머지 후 한 줄
commit 으로 completed flip.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 21:36:17 +00:00

647 lines
22 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! `kb` — command-line interface. Each subcommand maps 1:1 to a `kb-app`
//! function. Exit codes per design §10.
use std::path::PathBuf;
use std::process::ExitCode;
use clap::{Parser, Subcommand};
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
mod cancel;
mod progress;
mod wire;
#[derive(Parser, Debug)]
#[command(name = "kebab", version, about = "personal local knowledge base")]
struct Cli {
/// Path to a non-default `config.toml`.
#[arg(long, global = true)]
config: Option<PathBuf>,
/// Show anyhow chain on errors.
#[arg(long, global = true)]
verbose: bool,
/// Show tracing target/level on errors.
#[arg(long, global = true)]
debug: bool,
/// Emit machine-readable wire JSON (`*.v1`).
#[arg(long, global = true)]
json: bool,
#[command(subcommand)]
command: Cmd,
}
#[derive(Subcommand, Debug)]
enum Cmd {
/// Initialise XDG dirs + workspace + `config.toml`.
Init {
/// Overwrite an existing `config.toml`.
#[arg(long)]
force: bool,
},
/// Scan the workspace and ingest new/updated documents.
Ingest {
/// Workspace root override.
#[arg(long)]
root: Option<PathBuf>,
/// Suppress the per-file `items` list.
#[arg(long)]
summary_only: bool,
},
/// Listing subcommands.
List {
#[command(subcommand)]
what: ListWhat,
},
/// Inspect documents or chunks by ID.
Inspect {
#[command(subcommand)]
what: InspectWhat,
},
/// Lexical / vector / hybrid search over chunks.
Search {
query: String,
#[arg(long, default_value_t = 10)]
k: usize,
#[arg(long, value_enum, default_value_t = ModeFlag::Hybrid)]
mode: ModeFlag,
#[arg(long)]
explain: bool,
},
/// Retrieval-augmented question answering.
Ask {
query: String,
#[arg(long, default_value_t = 8)]
k: usize,
#[arg(long, value_enum, default_value_t = ModeFlag::Hybrid)]
mode: ModeFlag,
#[arg(long)]
explain: bool,
#[arg(long)]
temperature: Option<f32>,
#[arg(long)]
seed: Option<u64>,
},
/// Wipe XDG data dirs (and optionally the Lance vector store) so the
/// workspace can be re-initialised. **Irreversible.** Without
/// `--yes`, prompts on TTY; aborts in non-interactive contexts.
Reset {
/// Wipe config + data + cache + state. Implies losing
/// `config.toml` — re-run `kebab init` afterwards.
#[arg(long, group = "reset_scope")]
all: bool,
/// Default. Wipe data + cache + state. Config is preserved.
#[arg(long, group = "reset_scope")]
data_only: bool,
/// Wipe only the Lance vector store + truncate
/// `embedding_records`. SQLite documents / chunks survive so the
/// next `kebab ingest` re-embeds without re-parsing.
#[arg(long, group = "reset_scope")]
vector_only: bool,
/// Wipe only the config dir.
#[arg(long, group = "reset_scope")]
config_only: bool,
/// Skip the interactive confirm. Required in non-interactive
/// contexts (CI, pipes).
#[arg(long)]
yes: bool,
},
/// Health check.
Doctor,
/// Launch the Ratatui shell (P9-1 — Library pane only; search /
/// ask / inspect panes land with p9-2 / p9-3 / p9-4).
Tui,
/// Eval suite (placeholder; lands in P9).
Eval {
#[command(subcommand)]
what: EvalWhat,
},
}
#[derive(Subcommand, Debug)]
enum ListWhat {
/// List documents currently indexed.
Docs,
}
#[derive(Subcommand, Debug)]
enum InspectWhat {
/// Inspect a single document by ID.
Doc { id: String },
/// Inspect a single chunk by ID.
Chunk { id: String },
}
#[derive(Subcommand, Debug)]
enum EvalWhat {
/// Run the golden suite end-to-end and persist `eval_runs` +
/// `eval_query_results` + `runs_dir/<run_id>/per_query.jsonl`
/// (P5-1).
Run {
#[arg(long, default_value = "golden")]
suite: String,
#[arg(long, value_enum, default_value_t = ModeFlag::Lexical)]
mode: ModeFlag,
#[arg(long, default_value_t = 10)]
k: usize,
#[arg(long)]
with_rag: bool,
#[arg(long)]
temperature: Option<f32>,
#[arg(long)]
seed: Option<u64>,
},
/// Compute aggregate metrics for a stored run and write them back
/// into `eval_runs.aggregate_json` (P5-2).
Aggregate { run_id: String },
/// Diff two stored runs (P5-2). Default output is a Markdown
/// summary; use `--json` (top-level flag) for the raw report.
Compare {
run_a: String,
run_b: String,
/// Refuse to compare when the two runs' `chunker_version`
/// differ (default is graceful doc-id fallback).
#[arg(long)]
strict_chunker_version: bool,
/// Also write the Markdown report to
/// `runs_dir/<run_b>/report.md`.
#[arg(long)]
write_report: bool,
},
}
#[derive(Clone, Copy, Debug, clap::ValueEnum)]
enum ModeFlag {
Lexical,
Vector,
Hybrid,
}
impl From<ModeFlag> for kebab_core::SearchMode {
fn from(m: ModeFlag) -> Self {
match m {
ModeFlag::Lexical => kebab_core::SearchMode::Lexical,
ModeFlag::Vector => kebab_core::SearchMode::Vector,
ModeFlag::Hybrid => kebab_core::SearchMode::Hybrid,
}
}
}
fn main() -> ExitCode {
let cli = Cli::parse();
let level = if cli.debug {
kebab_app::logging::LogLevel::Debug
} else if cli.verbose {
kebab_app::logging::LogLevel::Verbose
} else {
kebab_app::logging::LogLevel::Default
};
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
// proceed without a guard rather than crashing — `kb` is still usable.
let _log_guard = kebab_app::logging::init(level).ok();
match run(&cli) {
Ok(()) => ExitCode::from(0),
Err(e) => {
let code = exit_code(&e);
// Refusals at exit code 1 print to stdout (already done by the
// caller); errors go to stderr.
if code != 1 {
eprintln!("error: {e}");
if cli.verbose {
for cause in e.chain().skip(1) {
eprintln!(" caused by: {cause}");
}
}
}
ExitCode::from(code)
}
}
}
fn exit_code(err: &anyhow::Error) -> u8 {
if err.downcast_ref::<RefusalSignal>().is_some() {
return 1;
}
if err.downcast_ref::<NoHitSignal>().is_some() {
return 1;
}
if err.downcast_ref::<DoctorUnhealthy>().is_some() {
return 3;
}
2
}
fn run(cli: &Cli) -> anyhow::Result<()> {
match &cli.command {
Cmd::Init { force } => {
kebab_app::init_workspace(*force)?;
if !cli.json {
println!(
"created {}",
kebab_config::Config::xdg_config_path().display()
);
println!("created {}", kebab_config::Config::xdg_data_dir().display());
println!("created {}", kebab_config::Config::xdg_state_dir().display());
println!("hint edit the config above, then `kb ingest`");
}
Ok(())
}
Cmd::Ingest {
root,
summary_only,
} => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let scope = kebab_core::SourceScope {
root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)),
include: cfg.workspace.include.clone(),
exclude: cfg.workspace.exclude.clone(),
};
// p9-fb-02: spawn the progress display on a background
// thread; the ingest call below holds the `Sender` end of
// the channel and emits per-step events into it. When the
// call returns, the `Sender` drops and the display thread
// sees `recv()` return Err — exits cleanly.
let mode = progress::ProgressMode::from_flags(cli.json);
let (tx, rx) = std::sync::mpsc::channel::<kebab_app::IngestEvent>();
let display_handle = std::thread::spawn(move || {
progress::ProgressDisplay::new(mode).run(rx)
});
// p9-fb-04: register a Ctrl-C handler that flips the same
// AtomicBool the facade polls at each step boundary. The
// *second* Ctrl-C is a hard exit (handled inside `cancel`).
let cancel_token = cancel::install_sigint_cancel()?;
let ingest_result = kebab_app::ingest_with_config_cancellable(
cfg,
scope,
*summary_only,
Some(tx),
Some(cancel_token),
);
// Join the display thread *before* surfacing the ingest
// outcome so the spinner / final newline is flushed
// regardless of whether ingest returned Ok or Err.
// join() returns Result<Result<(), anyhow::Error>, Box<dyn Any>>;
// we discard both — display thread errors / panics are
// best-effort and must not change ingest's exit code.
let _ = display_handle.join();
let report = ingest_result?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
} else {
println!(
"scanned {} new {} updated {} skipped {} errors {} ({} ms)",
report.scanned,
report.new,
report.updated,
report.skipped,
report.errors,
report.duration_ms
);
}
Ok(())
}
Cmd::List { what } => match what {
ListWhat::Docs => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let docs = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
} else {
for d in &docs {
println!("{}\t{}", d.doc_id, d.doc_path.0);
}
}
Ok(())
}
},
Cmd::Inspect { what } => match what {
InspectWhat::Doc { id } => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let doc_id: kebab_core::DocumentId = id.parse()?;
let doc = kebab_app::inspect_doc_with_config(cfg, &doc_id)?;
// Inspect doc emits a `CanonicalDocument` — there's no §2
// wire schema for it (P1-5 will decide whether this also
// becomes a tagged wrapper or stays as the raw domain
// object). Until then keep raw JSON, matching pre-P0-1
// behaviour.
println!("{}", serde_json::to_string(&doc)?);
Ok(())
}
InspectWhat::Chunk { id } => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let chunk_id: kebab_core::ChunkId = id.parse()?;
let chunk = kebab_app::inspect_chunk_with_config(cfg, &chunk_id)?;
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
Ok(())
}
},
Cmd::Search {
query,
k,
mode,
explain: _,
} => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let q = kebab_core::SearchQuery {
text: query.clone(),
mode: (*mode).into(),
k: *k,
filters: kebab_core::SearchFilters::default(),
};
let hits = kebab_app::search_with_config(cfg, q)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
} else {
for h in &hits {
// Show 4-digit score so RRF fused scores (bounded
// ~00.033 for k_rrf=60) don't all collapse to "0.02".
// Append heading_path so multiple chunks from the same
// document are distinguishable on a single line.
let heading = if h.heading_path.is_empty() {
String::new()
} else {
format!(" > {}", h.heading_path.join(" / "))
};
println!(
"{:>2}. {:.4} {}{}",
h.rank,
h.retrieval.fusion_score,
h.doc_path.0,
heading,
);
}
}
Ok(())
}
Cmd::Ask {
query,
k,
mode,
explain,
temperature,
seed,
} => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let opts = kebab_app::AskOpts {
k: *k,
explain: *explain,
mode: (*mode).into(),
temperature: *temperature,
seed: *seed,
// CLI ask is non-streaming today (the answer prints all at
// once on completion). The TUI ask pane (P9-3) is what
// wires up a real `mpsc::Sender` here.
stream_sink: None,
};
let ans = kebab_app::ask_with_config(cfg, query, opts)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
} else {
println!("{}", ans.answer);
}
// Refusal → exit 1.
if !ans.grounded {
return Err(RefusalSignal.into());
}
Ok(())
}
Cmd::Reset {
all,
data_only: _,
vector_only,
config_only,
yes,
} => {
use kebab_app::ResetScope;
// `--data-only` explicit OR no scope flag at all → DataOnly.
// The `data_only: _` binding above is intentional — clap's
// `group = "reset_scope"` already enforces mutual exclusion,
// so the flag's presence does not change the resolved scope.
let scope = if *all {
ResetScope::All
} else if *vector_only {
ResetScope::VectorOnly
} else if *config_only {
ResetScope::ConfigOnly
} else {
ResetScope::DataOnly
};
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let paths = kebab_app::reset::enumerate_paths(scope, &cfg);
let bytes = kebab_app::reset::estimate_size_bytes(&paths);
if !*yes {
use std::io::IsTerminal;
if !std::io::stdin().is_terminal() {
anyhow::bail!(
"reset is destructive and stdin is non-interactive — pass --yes to proceed"
);
}
if !confirm_destructive(scope, &paths, bytes)? {
eprintln!("aborted.");
return Ok(());
}
}
let report = kebab_app::reset::execute(scope, &cfg)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_reset(&report))?);
} else {
println!(
"removed {} path(s); embedding_rows_truncated={}",
report.removed_paths.len(),
report.embedding_rows_truncated
);
for p in &report.removed_paths {
println!(" - {}", p.display());
}
if matches!(scope, ResetScope::All | ResetScope::ConfigOnly) {
println!("hint: run `kebab init` to recreate config.toml");
}
}
Ok(())
}
Cmd::Doctor => {
let report = kebab_app::doctor_with_config_path(cli.config.as_deref())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
} else {
for c in &report.checks {
let mark = if c.ok { "" } else { "" };
println!("{mark} {:<20} {}", c.name, c.detail);
if let (false, Some(hint)) = (c.ok, c.hint.as_ref()) {
println!(" hint: {hint}");
}
}
if !report.ok {
println!();
let failed = report.checks.iter().filter(|c| !c.ok).count();
println!("{failed} check(s) failed.");
}
}
if !report.ok {
return Err(DoctorUnhealthy.into());
}
Ok(())
}
Cmd::Tui => {
// P9-1: Ratatui shell with Library pane. Search / Ask /
// Inspect panes land in p9-2 / p9-3 / p9-4.
let config = match cli.config.as_deref() {
Some(path) => kebab_config::Config::load(Some(path))?,
None => kebab_config::Config::load(None)?,
};
let mut app = kebab_tui::App::new(config)?;
app.run()
}
Cmd::Eval { what } => match what {
EvalWhat::Run {
suite,
mode,
k,
with_rag,
temperature,
seed,
} => {
let opts = kebab_eval::EvalRunOpts {
suite: suite.clone(),
mode: (*mode).into(),
with_rag: *with_rag,
k: *k,
temperature: *temperature,
seed: *seed,
};
let run = kebab_eval::run_eval(&opts)?;
if cli.json {
println!("{}", serde_json::to_string_pretty(&run)?);
} else {
println!("run_id: {}", run.run_id);
println!("queries: {}", run.per_query.len());
let failed = run.per_query.iter().filter(|q| q.error.is_some()).count();
println!("failed: {failed}");
}
Ok(())
}
EvalWhat::Aggregate { run_id } => {
let agg = kebab_eval::compute_aggregate(run_id)?;
kebab_eval::store_aggregate(run_id, &agg)?;
if cli.json {
println!("{}", serde_json::to_string_pretty(&agg)?);
} else {
println!("run_id: {run_id}");
println!("queries: {} ({} failed)", agg.total_queries, agg.failed_queries);
println!("hit@1: {:.4}", agg.hit_at_k.get(&1).copied().unwrap_or(0.0));
println!("hit@5: {:.4}", agg.hit_at_k.get(&5).copied().unwrap_or(0.0));
println!("MRR: {:.4}", agg.mrr);
}
Ok(())
}
EvalWhat::Compare {
run_a,
run_b,
strict_chunker_version,
write_report,
} => {
let cfg = kebab_config::Config::load(None)?;
let opts = kebab_eval::CompareOpts {
strict_chunker_version: *strict_chunker_version,
};
let report = kebab_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
let md = kebab_eval::render_report_md(&report);
if cli.json {
println!("{}", serde_json::to_string_pretty(&report)?);
} else {
print!("{md}");
}
if *write_report {
let resolved_data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
let runs_dir = kebab_config::expand_path(
&cfg.storage.runs_dir,
&resolved_data_dir.to_string_lossy(),
);
let dir = runs_dir.join(run_b);
std::fs::create_dir_all(&dir)?;
let path = dir.join("report.md");
std::fs::write(&path, &md)?;
if !cli.json {
eprintln!("wrote {}", path.display());
}
}
Ok(())
}
},
}
}
/// Minimal stdin/stdout confirm prompt for destructive ops. No new dep —
/// uses stdlib `IsTerminal` (the caller is expected to have already
/// short-circuited the non-TTY case). Returns `Ok(true)` only when the
/// user types `y` / `Y` / `yes`. Empty input or anything else → `false`
/// (safe default).
fn confirm_destructive(
scope: kebab_app::ResetScope,
paths: &[std::path::PathBuf],
bytes: u64,
) -> anyhow::Result<bool> {
use std::io::Write;
let mut out = std::io::stderr().lock();
writeln!(out, "kebab reset ({:?}): about to remove", scope)?;
for p in paths {
writeln!(out, " - {}", p.display())?;
}
writeln!(out, "estimated total: {} bytes", bytes)?;
write!(out, "Proceed? [y/N] ")?;
out.flush()?;
let mut line = String::new();
std::io::stdin().read_line(&mut line)?;
let s = line.trim().to_ascii_lowercase();
Ok(matches!(s.as_str(), "y" | "yes"))
}