Merge pull request 'chore: workspace-wide cleanup — clippy::pedantic baseline + auto-fix' (#181) from chore/workspace-wide-cleanup-pre-v0-18 into main

Reviewed-on: #181
This commit was merged in pull request #181.
This commit is contained in:
2026-05-26 04:48:50 +00:00
134 changed files with 1073 additions and 524 deletions

View File

@@ -34,6 +34,94 @@ license = "MIT OR Apache-2.0"
repository = "https://github.com/altair823/kebab"
version = "0.17.2"
# pre-v0.18 workspace-wide cleanup: enable clippy::pedantic group with
# intentional allow-list. The allowed lints are either cosmetic (doc style),
# informational (function size), or carry intentional truncation we accept
# (numeric casts in tokenizer/ONNX inputs, hash modular reduction, etc).
[workspace.lints.clippy]
pedantic = { level = "warn", priority = -1 }
# Intentional u32 ↔ i64 casts in kebab-nli (ONNX i64 inputs from tokenizer u32 ids).
# u64 ↔ usize across kebab-store-sqlite row counts. Wide truncation is auditable
# at use site, not lint-wide.
cast_possible_truncation = "allow"
cast_possible_wrap = "allow"
cast_sign_loss = "allow"
cast_precision_loss = "allow"
# Doc markdown style is cosmetic; we run rustdoc on demand.
doc_markdown = "allow"
missing_errors_doc = "allow"
missing_panics_doc = "allow"
# Informational only — splitting a long pipeline function isn't always cleaner.
too_many_lines = "allow"
# `Foo::default()` is concise and idiomatic here; `<Foo as Default>::default()`
# adds noise without surfacing intent.
default_trait_access = "allow"
# Module name prefix on public items keeps the wire/log surface readable
# (`refusal_reason::no_chunks` etc).
module_name_repetitions = "allow"
# We use `#[must_use]` deliberately on public results, not blanket.
must_use_candidate = "allow"
# `String` arg sometimes signals "I'll consume this" — let signature decide.
needless_pass_by_value = "allow"
# Idiomatic single-line bindings stay; let-else expansion isn't always clearer.
manual_let_else = "allow"
# `use` after `let` is a common kebab pattern (scoped imports next to use site).
items_after_statements = "allow"
# Naming pairs like `chunk_id` / `chunks_id` are intentional domain terms.
similar_names = "allow"
# `iter.map(format!).collect::<String>()` is idiomatic when the per-element
# string is genuinely independent — `fold` only wins on accumulation patterns.
format_collect = "allow"
# Exhaustive `match` with explicit variant arms (vs `_`) catches future
# variant additions at compile time (kebab core's `RefusalReason` pattern).
match_wildcard_for_single_variants = "allow"
# Copy types under `&self` keep call-site discipline; auto-deref noise > tiny perf gain.
trivially_copy_pass_by_ref = "allow"
# `unnecessary_wraps` flags helpers that could drop `Result`, but keeping the
# Result allows future error variants without churning callers.
unnecessary_wraps = "allow"
# NLI score / RRF fusion / similarity threshold comparisons are intentional —
# floats live in the `[0, 1]` band and are compared with explicit thresholds.
float_cmp = "allow"
# File-extension dispatch is keyed on ASCII conventions; case sensitivity
# is part of the spec for `.md`, `.pdf`, etc.
case_sensitive_file_extension_comparisons = "allow"
# Config / opts structs intentionally bundle boolean flags (ingest options,
# search modes, etc) — splitting them into enums would obscure the wire shape.
struct_excessive_bools = "allow"
# `bytecount` crate would be a new dep just for one-off ASCII counts.
naive_bytecount = "allow"
# `#[ignore]` annotations on tests document via the test name + nearby comment.
ignore_without_reason = "allow"
# `format!` push patterns are a hot path for kebab-tui's progressive rendering;
# `write!` rewrite needs a verified-equal benchmark before swapping.
format_push_string = "allow"
# Builder-style `with_*` methods return `Self`; the existing `#[must_use]`
# discipline lives on aggregate constructors, not every chainable setter.
return_self_not_must_use = "allow"
# Match arms grouped by side-effect over body equality (e.g. snake_case wire
# label tables) — fanning them out keeps adding a new variant trivial.
match_same_arms = "allow"
# Remaining style-only warnings: trailing `continue` is sometimes clearer than
# rewriting, `_x` underscored bindings document intent at the use site, and
# `!(a == b)` reads better than `a != b` when paired with a complementary check.
needless_continue = "allow"
used_underscore_binding = "allow"
nonminimal_bool = "allow"
# Other one-off cosmetic items: large literal formatting, doc link quoting,
# `Clone::clone_from` swap, `str::replace` chaining, `Iterator::any` ergonomics.
unreadable_literal = "allow"
many_single_char_names = "allow"
doc_link_with_quotes = "allow"
assigning_clones = "allow"
collapsible_str_replace = "allow"
trivial_regex = "allow"
elidable_lifetime_names = "allow"
range_plus_one = "allow"
explicit_iter_loop = "allow"
implicit_hasher = "allow"
ref_option = "allow"
[workspace.dependencies]
anyhow = "1"
thiserror = "2"

View File

@@ -81,3 +81,6 @@ lopdf = "0.32"
# error_wire::tests::llm_unreachable_classifies_to_model_unreachable needs a real
# reqwest::Error (private constructor) — built from a connect-refused call.
reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls"] }
[lints]
workspace = true

View File

@@ -293,7 +293,7 @@ impl App {
// so other in-flight searches can use the cache concurrently.
drop(guard);
let hits = self.search_uncached(query)?;
let mut guard = cache.lock().unwrap_or_else(|e| e.into_inner());
let mut guard = cache.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
guard.put(key, hits.clone());
Ok(hits)
}
@@ -467,7 +467,7 @@ impl App {
// Snippet truncation if opts.snippet_chars set (mirror non-trace path).
if opts.snippet_chars.is_some() {
for h in hits.iter_mut() {
for h in &mut hits {
if h.snippet.chars().count() > snippet_chars {
h.snippet = trim_to_chars(&h.snippet, snippet_chars);
}
@@ -502,7 +502,7 @@ impl App {
// `config.search.snippet_chars`; this only kicks in when the
// caller asked for *less*).
if opts.snippet_chars.is_some() {
for h in hits.iter_mut() {
for h in &mut hits {
if h.snippet.chars().count() > snippet_chars {
h.snippet = trim_to_chars(&h.snippet, snippet_chars);
}
@@ -521,7 +521,7 @@ impl App {
{
current_snippet_cap =
(current_snippet_cap / 2).max(SNIPPET_FLOOR);
for h in hits.iter_mut() {
for h in &mut hits {
if h.snippet.chars().count() > current_snippet_cap {
h.snippet =
trim_to_chars(&h.snippet, current_snippet_cap);
@@ -868,7 +868,7 @@ impl App {
/// clear` admin command). No-op when the cache is disabled.
pub fn clear_search_cache(&self) {
if let Some(cache) = self.search_cache.as_ref() {
let mut guard = cache.lock().unwrap_or_else(|e| e.into_inner());
let mut guard = cache.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
guard.clear();
}
}

View File

@@ -139,9 +139,8 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> {
let k = obj
.get("k")
.and_then(|v| v.as_u64())
.map(|n| n as usize)
.unwrap_or(0); // 0 → use config default in app
.and_then(serde_json::Value::as_u64)
.map_or(0, |n| n as usize); // 0 → use config default in app
let trust_min = match obj.get("trust_min").and_then(|v| v.as_str()) {
None => None,
@@ -209,14 +208,14 @@ fn parse_one(raw: &Value) -> Result<(SearchQuery, SearchOpts), String> {
let opts = SearchOpts {
max_tokens: obj
.get("max_tokens")
.and_then(|v| v.as_u64())
.and_then(serde_json::Value::as_u64)
.map(|n| n as usize),
snippet_chars: obj
.get("snippet_chars")
.and_then(|v| v.as_u64())
.and_then(serde_json::Value::as_u64)
.map(|n| n as usize),
cursor: obj.get("cursor").and_then(|v| v.as_str()).map(String::from),
trace: obj.get("trace").and_then(|v| v.as_bool()).unwrap_or(false),
trace: obj.get("trace").and_then(serde_json::Value::as_bool).unwrap_or(false),
};
Ok((

View File

@@ -91,7 +91,7 @@ pub fn classify(err: &anyhow::Error, verbose: bool) -> ErrorV1 {
}
let mut details = json!({});
if verbose {
let chain: Vec<String> = err.chain().map(|c| c.to_string()).collect();
let chain: Vec<String> = err.chain().map(std::string::ToString::to_string).collect();
details = json!({"chain": chain});
}
ErrorV1 {

View File

@@ -50,7 +50,7 @@ pub fn ensure_kebabignore_entry(workspace_root: &Path) -> Result<()> {
if !existing.is_empty() && !existing.ends_with('\n') {
file.write_all(b"\n")?;
}
writeln!(file, "{}", KEBABIGNORE_LINE)?;
writeln!(file, "{KEBABIGNORE_LINE}")?;
Ok(())
}

View File

@@ -166,8 +166,8 @@ mod tests {
};
let v = serde_json::to_value(&ev).unwrap();
assert_eq!(v.get("kind").and_then(|s| s.as_str()), Some("asset_started"));
assert_eq!(v.get("idx").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v.get("total").and_then(|n| n.as_u64()), Some(10));
assert_eq!(v.get("idx").and_then(serde_json::Value::as_u64), Some(1));
assert_eq!(v.get("total").and_then(serde_json::Value::as_u64), Some(10));
assert_eq!(v.get("path").and_then(|s| s.as_str()), Some("notes/foo.md"));
assert_eq!(v.get("media").and_then(|s| s.as_str()), Some("markdown"));
}
@@ -184,8 +184,8 @@ mod tests {
let v = serde_json::to_value(&ev).unwrap();
assert_eq!(v.get("kind").and_then(|s| s.as_str()), Some("completed"));
let counts = v.get("counts").unwrap();
assert_eq!(counts.get("scanned").and_then(|n| n.as_u64()), Some(5));
assert_eq!(counts.get("new").and_then(|n| n.as_u64()), Some(2));
assert_eq!(counts.get("scanned").and_then(serde_json::Value::as_u64), Some(5));
assert_eq!(counts.get("new").and_then(serde_json::Value::as_u64), Some(2));
}
#[test]

View File

@@ -289,8 +289,7 @@ pub fn ingest_with_config_opts(
let cancelled = || {
opts.cancel
.as_ref()
.map(|c| c.load(std::sync::atomic::Ordering::Relaxed))
.unwrap_or(false)
.is_some_and(|c| c.load(std::sync::atomic::Ordering::Relaxed))
};
let force_reingest = opts.force_reingest;
let started_instant = std::time::Instant::now();
@@ -394,7 +393,7 @@ pub fn ingest_with_config_opts(
let purged_deleted_files = sweep_deleted_files(
&app,
&scanned_paths,
vector_store.as_ref().map(|v| v.as_ref()),
vector_store.as_ref().map(std::convert::AsRef::as_ref),
)?;
let started_at = time::OffsetDateTime::now_utc();
@@ -509,10 +508,10 @@ pub fn ingest_with_config_opts(
*skipped_by_extension.entry(ext).or_insert(0) += 1;
}
kebab_core::IngestItemKind::Unchanged => {
unchanged_count = unchanged_count.saturating_add(1)
unchanged_count = unchanged_count.saturating_add(1);
}
kebab_core::IngestItemKind::Error => {
error_count = error_count.saturating_add(1)
error_count = error_count.saturating_add(1);
}
}
crate::ingest_progress::emit(
@@ -940,9 +939,7 @@ fn try_skip_unchanged(
fn ext_for_skip_warning(path: &str) -> String {
std::path::Path::new(path)
.extension()
.and_then(|s| s.to_str())
.map(|s| s.to_ascii_lowercase())
.unwrap_or_else(|| NO_EXT_SENTINEL.to_string())
.and_then(|s| s.to_str()).map_or_else(|| NO_EXT_SENTINEL.to_string(), str::to_ascii_lowercase)
}
/// p9-fb-25: render the `IngestItem.warnings` line for a Skipped
@@ -2407,7 +2404,7 @@ fn lang_hint_from_doc(doc: &CanonicalDocument) -> Option<Lang> {
/// Convenience: end byte of the frontmatter region (or 0 when absent).
fn fm_span_end(span: Option<kebab_parse_md::FrontmatterSpan>) -> usize {
span.map(|s| s.end).unwrap_or(0)
span.map_or(0, |s| s.end)
}
/// Count `\n` in a byte prefix to convert frontmatter byte span to
@@ -2710,8 +2707,7 @@ pub fn ingest_file_with_config(
const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"];
if !SUPPORTED_EXTS.contains(&ext.as_str()) {
anyhow::bail!(
"ingest-file: unsupported extension `.{}` (supported: {:?})",
ext, SUPPORTED_EXTS
"ingest-file: unsupported extension `.{ext}` (supported: {SUPPORTED_EXTS:?})"
);
}

View File

@@ -165,7 +165,7 @@ fn collect_stats(
store: &kebab_store_sqlite::SqliteStore,
) -> anyhow::Result<Stats> {
let counts = store
.count_summary_with_threshold(cfg.search.stale_threshold_days as u64)?;
.count_summary_with_threshold(u64::from(cfg.search.stale_threshold_days))?;
let data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
let index_bytes = kebab_store_sqlite::stats_ext::index_bytes(&data_dir)
.map_err(|e| anyhow::anyhow!("index_bytes: {e}"))?;

View File

@@ -1267,7 +1267,7 @@ fn tier1_cpp_ingest_searchable() {
// (method) depending on which chunk ranks first.
assert!(
symbol.as_deref().is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
"C++ symbol must start with namespace::Class prefix, got {:?}", symbol
"C++ symbol must start with namespace::Class prefix, got {symbol:?}"
);
assert!(*line_start >= 1, "line_start must be >=1");
}

View File

@@ -33,7 +33,7 @@ fn ingest_file_copies_external_md_and_reports_new() {
assert!(ext_dir.is_dir());
let entries: Vec<_> = fs::read_dir(&ext_dir)
.unwrap()
.filter_map(|e| e.ok())
.filter_map(std::result::Result::ok)
.collect();
assert_eq!(entries.len(), 1, "exactly one file in _external/");
let name = entries[0].file_name().to_string_lossy().into_owned();

View File

@@ -35,7 +35,7 @@ fn ingest_stdin_writes_frontmatter_and_reports_new() {
// _external/ contains exactly one .md file with frontmatter.
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
.filter_map(|e| e.ok())
.filter_map(std::result::Result::ok)
.collect();
assert_eq!(entries.len(), 1);
let content = fs::read_to_string(entries[0].path()).unwrap();
@@ -60,7 +60,7 @@ fn ingest_stdin_without_source_uri() {
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
.filter_map(|e| e.ok())
.filter_map(std::result::Result::ok)
.collect();
let content = fs::read_to_string(entries[0].path()).unwrap();
assert!(content.contains("title: \"Title\""));

View File

@@ -0,0 +1,81 @@
//! Tests for `App::open_with_config`'s NLI verifier construction path.
//!
//! Coverage:
//! 1. `open_with_config_nli_fails_when_model_dir_unwritable_and_threshold_positive` —
//! when `rag.nli_threshold > 0` and `storage.model_dir` is unwritable,
//! `open_with_config` returns `Err` with "OnnxNliVerifier" in the
//! error chain.
//! 2. `open_with_config_nli_skipped_when_threshold_zero` —
//! same bad `model_dir`, but `rag.nli_threshold = 0.0` (gate disabled),
//! so `OnnxNliVerifier::new` is never called and `open_with_config`
//! succeeds.
//!
//! `/proc/1/root` is the init process's filesystem root; on Linux it is
//! owned by root and not traversable by unprivileged users, making
//! `create_dir_all` fail with `EACCES` — a reliable "unwritable path"
//! that requires no test setup beyond the path literal.
use kebab_config::Config;
/// Return a `Config` whose `data_dir` lives in a fresh `TempDir`
/// (so `SqliteStore::open` succeeds) and whose `model_dir` is set to
/// `/proc/1/root` (unwritable by non-root processes on Linux).
///
/// The `TempDir` is returned alongside the config so the caller keeps
/// it alive until the test completes — dropping it early would delete
/// the data directory before any assertions run.
fn config_with_unwritable_model_dir() -> (tempfile::TempDir, Config) {
let tmp = tempfile::tempdir().expect("tempdir");
let mut cfg = Config::defaults();
// Valid data_dir → SqliteStore::open + run_migrations succeed.
cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned();
// /proc/1/root is only accessible to root; create_dir_all will
// return EACCES for any unprivileged user, which is exactly the
// failure mode we want to exercise.
cfg.storage.model_dir = "/proc/1/root".to_string();
(tmp, cfg)
}
// ── 1. Failure path: threshold > 0 + unwritable model_dir ─────────────────
#[test]
fn open_with_config_nli_fails_when_model_dir_unwritable_and_threshold_positive() {
let (_tmp, mut cfg) = config_with_unwritable_model_dir();
cfg.rag.nli_threshold = 0.5; // gate enabled → OnnxNliVerifier::new runs
let result = kebab_app::App::open_with_config(cfg);
let Err(err) = result else {
panic!(
"App::open_with_config must fail when model_dir is unwritable and nli_threshold > 0"
);
};
// The error chain must identify the OnnxNliVerifier as the source so
// an operator reading logs can trace the failure to the NLI config.
let err_chain = format!("{err:?}");
assert!(
err_chain.contains("OnnxNliVerifier"),
"error chain must mention OnnxNliVerifier; full chain: {err_chain}"
);
}
// ── 2. Success path: threshold = 0.0 → NLI verifier never constructed ──────
#[test]
fn open_with_config_nli_skipped_when_threshold_zero() {
let (_tmp, cfg) = config_with_unwritable_model_dir();
// Default nli_threshold is 0.0 — gate disabled, verifier skipped.
assert!(
(cfg.rag.nli_threshold - 0.0).abs() < f32::EPSILON,
"precondition: default nli_threshold must be 0.0 (gate disabled)"
);
// A bad model_dir must NOT cause a failure when the NLI gate is off.
let result = kebab_app::App::open_with_config(cfg);
assert!(
result.is_ok(),
"App::open_with_config must succeed when nli_threshold = 0.0 \
(OnnxNliVerifier is never constructed); err: {:?}",
result.err()
);
}

View File

@@ -14,12 +14,10 @@ use common::TestEnv;
fn require_avx_or_panic() {
#[cfg(target_arch = "x86_64")]
{
if !std::is_x86_feature_detected!("avx") {
panic!(
"kb-app vector integration test requires AVX-capable hardware; \
host CPU lacks AVX. Run on an AVX-capable machine."
);
}
assert!(std::is_x86_feature_detected!("avx"),
"kb-app vector integration test requires AVX-capable hardware; \
host CPU lacks AVX. Run on an AVX-capable machine."
);
}
}

View File

@@ -26,3 +26,6 @@ kebab-parse-code = { path = "../kebab-parse-code" }
kebab-normalize = { path = "../kebab-normalize" }
serde_json = { workspace = true }
time = { workspace = true }
[lints]
workspace = true

View File

@@ -266,7 +266,7 @@ mod tests {
#[test]
fn oversize_unit_splits_into_parts_with_unique_ids() {
let body = (0..500).map(|i| format!("\tx{i} = {i};\n")).collect::<Vec<_>>().join("");
let body = (0..500).map(|i| format!("\tx{i} = {i};\n")).collect::<String>();
let code = format!("int big() {{\n{body}\n}}");
let doc = code_doc(&[("big", 1, 502, &code)]);
let chunks = CodeCAstV1Chunker.chunk(&doc, &policy()).unwrap();
@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -266,7 +266,7 @@ mod tests {
#[test]
fn oversize_unit_splits_into_parts_with_unique_ids() {
let body = (0..500).map(|i| format!("\tx{i} = {i};\n")).collect::<Vec<_>>().join("");
let body = (0..500).map(|i| format!("\tx{i} = {i};\n")).collect::<String>();
let code = format!("int big() {{\n{body}\n}}");
let doc = code_doc(&[("big", 1, 502, &code)]);
let chunks = CodeCppAstV1Chunker.chunk(&doc, &policy()).unwrap();
@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -281,7 +281,7 @@ mod tests {
}
}
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
let n = ids.len(); ids.sort(); ids.dedup();
let n = ids.len(); ids.sort_unstable(); ids.dedup();
assert_eq!(ids.len(), n, "chunk_ids unique across split parts");
}

View File

@@ -387,9 +387,7 @@ fn render_block_text(b: &Block) -> String {
// alt keeps lexical search hits on filenames working even when
// P6-1's filename auto-fill is bypassed.
Block::ImageRef(i) => {
let alt = if !i.alt.is_empty() {
i.alt.clone()
} else {
let alt = if i.alt.is_empty() {
// P6-1 falls back to filename so this branch is
// defensive — keep it lest a future test fixture or
// synthetic block path skip the auto-fill.
@@ -399,17 +397,17 @@ fn render_block_text(b: &Block) -> String {
.filter(|s| !s.is_empty())
.unwrap_or("[image]")
.to_string()
} else {
i.alt.clone()
};
let ocr = i
.ocr
.as_ref()
.map(|o| o.joined.as_str())
.unwrap_or("");
.map_or("", |o| o.joined.as_str());
let cap = i
.caption
.as_ref()
.map(|c| c.text.as_str())
.unwrap_or("");
.map_or("", |c| c.text.as_str());
[alt.as_str(), ocr, cap]
.iter()
.filter(|s| !s.is_empty())

View File

@@ -450,7 +450,7 @@ mod tests {
// chunk_ids stay distinct despite identical block_ids — the
// per-chunk policy_hash variant is doing its job.
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
ids.sort();
ids.sort_unstable();
let total = ids.len();
ids.dedup();
assert_eq!(ids.len(), total, "all chunk_ids must be unique");
@@ -668,7 +668,7 @@ mod tests {
// chunk_ids stay distinct (the per-chunk hash variant keys off
// char_start which is now strictly increasing).
let mut ids: Vec<&str> = chunks.iter().map(|c| c.chunk_id.0.as_str()).collect();
ids.sort();
ids.sort_unstable();
let total = ids.len();
ids.dedup();
assert_eq!(ids.len(), total, "chunk_ids must remain unique");

View File

@@ -280,9 +280,7 @@ fn k8s_oversize_splits_into_line_windows_sharing_symbol() {
assert_eq!(
prev_end + 1,
next_start,
"line ranges must be contiguous: {} → {} (got gap or overlap)",
prev_end,
next_start
"line ranges must be contiguous: {prev_end} → {next_start} (got gap or overlap)"
);
}
}

View File

@@ -51,7 +51,7 @@ fn manifest_doc(lang: &str, manifest_text: &str) -> CanonicalDocument {
doc_id,
source_asset_id: aid,
workspace_path: wp,
title: format!("Manifest ({})", lang),
title: format!("Manifest ({lang})"),
lang: Lang("und".into()),
blocks: vec![block],
metadata: Metadata {

View File

@@ -50,3 +50,6 @@ tempfile = { workspace = true }
# to simulate stale docs. `time` is the formatter used by the helper.
rusqlite = { workspace = true }
time = { workspace = true }
[lints]
workspace = true

View File

@@ -797,7 +797,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
serde_json::to_string(&item.query)?,
)?;
if let Some(err) = &item.error {
writeln!(stdout, "error: {}", err)?;
writeln!(stdout, "error: {err}")?;
} else if let Some(resp) = &item.response {
writeln!(
stdout,
@@ -1171,15 +1171,13 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
let report = kebab_app::reset::execute(scope, &cfg)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_reset(&report))?);
} else {
if report.orphans_purged > 0 {
println!("orphans purged: {}", report.orphans_purged);
for p in &report.purged_paths {
println!(" - {}", p.0);
}
} else {
println!("no orphaned docs found — store is already in sync with walker scope");
} else if report.orphans_purged > 0 {
println!("orphans purged: {}", report.orphans_purged);
for p in &report.purged_paths {
println!(" - {}", p.0);
}
} else {
println!("no orphaned docs found — store is already in sync with walker scope");
}
return Ok(());
}
@@ -1508,11 +1506,11 @@ fn confirm_destructive(
) -> anyhow::Result<bool> {
use std::io::Write;
let mut out = std::io::stderr().lock();
writeln!(out, "kebab reset ({:?}): about to remove", scope)?;
writeln!(out, "kebab reset ({scope:?}): about to remove")?;
for p in paths {
writeln!(out, " - {}", p.display())?;
}
writeln!(out, "estimated total: {} bytes", bytes)?;
writeln!(out, "estimated total: {bytes} bytes")?;
write!(out, "Proceed? [y/N] ")?;
out.flush()?;
@@ -1573,19 +1571,19 @@ fn render_fetch_plain(r: &kebab_core::FetchResult) {
if !r.context_before.is_empty() {
println!("\n=== before ===");
for c in &r.context_before {
let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
let heading = c.heading_path.last().map_or("", std::string::String::as_str);
println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
}
}
if let Some(c) = &r.chunk {
println!("\n=== target ===");
let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
let heading = c.heading_path.last().map_or("", std::string::String::as_str);
println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
}
if !r.context_after.is_empty() {
println!("\n=== after ===");
for c in &r.context_after {
let heading = c.heading_path.last().map(|s| s.as_str()).unwrap_or("");
let heading = c.heading_path.last().map_or("", std::string::String::as_str);
println!("[{} § {}]\n{}\n", c.chunk_id.0, heading, c.text);
}
}

View File

@@ -313,7 +313,7 @@ mod tests {
v.get("next_cursor").and_then(|c| c.as_str()),
Some("opaque-cursor-abc")
);
assert_eq!(v.get("truncated").and_then(|t| t.as_bool()), Some(true));
assert_eq!(v.get("truncated").and_then(serde_json::Value::as_bool), Some(true));
}
#[test]

View File

@@ -88,5 +88,5 @@ max_context_tokens = 8000
let stdout = String::from_utf8_lossy(&out.stdout);
let v: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap();
assert_eq!(v.get("schema_version").and_then(|s| s.as_str()), Some("ingest_report.v1"));
assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v.get("new").and_then(serde_json::Value::as_u64), Some(1));
}

View File

@@ -96,5 +96,5 @@ max_context_tokens = 8000
let stdout = String::from_utf8_lossy(&out.stdout);
let v: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap();
assert_eq!(v.get("schema_version").and_then(|s| s.as_str()), Some("ingest_report.v1"));
assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v.get("new").and_then(serde_json::Value::as_u64), Some(1));
}

View File

@@ -43,7 +43,7 @@ fn cli_mcp_initialize_then_tools_list() {
reader.read_line(&mut line).unwrap();
let init: serde_json::Value = serde_json::from_str(line.trim()).unwrap();
assert_eq!(
init.get("id").and_then(|i| i.as_i64()),
init.get("id").and_then(serde_json::Value::as_i64),
Some(1),
"unexpected id in initialize response: {init}"
);
@@ -57,7 +57,7 @@ fn cli_mcp_initialize_then_tools_list() {
reader.read_line(&mut line).unwrap();
let list: serde_json::Value = serde_json::from_str(line.trim()).unwrap();
assert_eq!(
list.get("id").and_then(|i| i.as_i64()),
list.get("id").and_then(serde_json::Value::as_i64),
Some(2),
"unexpected id in tools/list response: {list}"
);

View File

@@ -76,8 +76,7 @@ fn cli_schema_json_emits_schema_v1() {
assert!(
v.get("kebab_version")
.and_then(|s| s.as_str())
.map(|s| !s.is_empty())
.unwrap_or(false),
.is_some_and(|s| !s.is_empty()),
"kebab_version must be a non-empty string"
);
@@ -86,12 +85,12 @@ fn cli_schema_json_emits_schema_v1() {
.and_then(|c| c.as_object())
.expect("capabilities must be a JSON object");
assert_eq!(
caps.get("json_mode").and_then(|b| b.as_bool()),
caps.get("json_mode").and_then(serde_json::Value::as_bool),
Some(true),
"capabilities.json_mode must be true"
);
assert_eq!(
caps.get("mcp_server").and_then(|b| b.as_bool()),
caps.get("mcp_server").and_then(serde_json::Value::as_bool),
Some(true),
"capabilities.mcp_server must be true (fb-30)"
);

View File

@@ -155,8 +155,8 @@ fn ingest_json_progress_lines_carry_kind_and_ts() {
saw_completed = true;
// Counts mirror the report.
let counts = v.get("counts").unwrap();
assert_eq!(counts.get("scanned").and_then(|n| n.as_u64()), Some(2));
assert_eq!(counts.get("new").and_then(|n| n.as_u64()), Some(2));
assert_eq!(counts.get("scanned").and_then(serde_json::Value::as_u64), Some(2));
assert_eq!(counts.get("new").and_then(serde_json::Value::as_u64), Some(2));
}
}
assert!(saw_scan_started, "missing scan_started event");

View File

@@ -22,3 +22,6 @@ tracing = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View File

@@ -157,7 +157,7 @@ mod tests {
#[test]
fn xdg_data_home_set_replaces_var() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") };
@@ -168,7 +168,7 @@ mod tests {
#[test]
fn xdg_data_home_unset_uses_default() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::remove_var("XDG_DATA_HOME") };
@@ -181,7 +181,7 @@ mod tests {
#[test]
fn xdg_with_no_default_resolves_to_empty_when_unset() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::remove_var("XDG_DATA_HOME") };
@@ -193,7 +193,7 @@ mod tests {
#[test]
fn leading_tilde_expands_to_home() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let home = std::env::var("HOME").expect("HOME must be set in tests");
let p = expand_path("~/runs", "");
assert_eq!(p, PathBuf::from(home).join("runs"));
@@ -229,7 +229,7 @@ mod tests {
#[test]
fn tilde_path_ignores_base_dir() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let home = std::env::var("HOME").expect("HOME must be set in tests");
let base = Path::new("/tmp/ignored-cfg");
let p = expand_path_with_base("~/x", "", base);
@@ -238,7 +238,7 @@ mod tests {
#[test]
fn xdg_var_path_ignores_base_dir() {
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
@@ -255,7 +255,7 @@ mod tests {
// Order matters: substitute `{data_dir}` (which itself contains
// an unexpanded `${XDG_DATA_HOME}` and `~`), then the other two
// resolve the result.
let _lock = ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _lock = ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
let _guard = XdgGuard::capture();
// SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };

View File

@@ -16,3 +16,6 @@ time = { workspace = true }
blake3 = { workspace = true }
serde_json_canonicalizer = "0.3"
unicode-normalization = "0.1"
[lints]
workspace = true

View File

@@ -226,28 +226,25 @@ fn parse_hms_ms(s: &str) -> Result<u64> {
let m: u64 = parts[1]
.parse()
.map_err(|_| anyhow::anyhow!("bad minutes in {:?} (input {s:?})", parts[1]))?;
let (sec, ms) = match parts[2].split_once('.') {
Some((s_part, ms_part)) => {
let sec: u64 = s_part
.parse()
.map_err(|_| anyhow::anyhow!("bad seconds in {s_part:?} (input {s:?})"))?;
// Pad/truncate to exactly 3 digits.
let mut ms_str = ms_part.to_owned();
while ms_str.len() < 3 {
ms_str.push('0');
}
ms_str.truncate(3);
let ms: u64 = ms_str
.parse()
.map_err(|_| anyhow::anyhow!("bad milliseconds in {ms_part:?} (input {s:?})"))?;
(sec, ms)
}
None => {
let sec: u64 = parts[2]
.parse()
.map_err(|_| anyhow::anyhow!("bad seconds in {:?} (input {s:?})", parts[2]))?;
(sec, 0)
let (sec, ms) = if let Some((s_part, ms_part)) = parts[2].split_once('.') {
let sec: u64 = s_part
.parse()
.map_err(|_| anyhow::anyhow!("bad seconds in {s_part:?} (input {s:?})"))?;
// Pad/truncate to exactly 3 digits.
let mut ms_str = ms_part.to_owned();
while ms_str.len() < 3 {
ms_str.push('0');
}
ms_str.truncate(3);
let ms: u64 = ms_str
.parse()
.map_err(|_| anyhow::anyhow!("bad milliseconds in {ms_part:?} (input {s:?})"))?;
(sec, ms)
} else {
let sec: u64 = parts[2]
.parse()
.map_err(|_| anyhow::anyhow!("bad seconds in {:?} (input {s:?})", parts[2]))?;
(sec, 0)
};
Ok(h * 3_600_000 + m * 60_000 + sec * 1000 + ms)
}

View File

@@ -471,7 +471,7 @@ mod tests {
doc_path: WorkspacePath("a.md".into()),
heading_path: vec![],
section_label: None,
snippet: "".into(),
snippet: String::new(),
citation: Citation::Line {
path: WorkspacePath("a.md".into()),
start: 1,
@@ -502,7 +502,7 @@ mod tests {
doc_path: WorkspacePath("a.rs".into()),
heading_path: vec![],
section_label: None,
snippet: "".into(),
snippet: String::new(),
citation: Citation::Code {
path: WorkspacePath("a.rs".into()),
line_start: 1,

View File

@@ -20,3 +20,6 @@ anyhow = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
serde_json = { workspace = true }
[lints]
workspace = true

View File

@@ -158,7 +158,7 @@ impl Embedder for FastembedEmbedder {
let guard = self
.inner
.lock()
.unwrap_or_else(|p| p.into_inner());
.unwrap_or_else(std::sync::PoisonError::into_inner);
let batch: Vec<Vec<f32>> = guard
.embed(chunk_vec, Some(self.batch_size))
.context("fastembed: embed")?;

View File

@@ -28,3 +28,6 @@ mock = []
[dev-dependencies]
proptest = { workspace = true }
[lints]
workspace = true

View File

@@ -59,7 +59,7 @@ pub fn assert_vector_shape(vecs: &[Vec<f32>], expected_dims: usize) {
/// Panics on mismatch (test-only helper — callers are tests).
pub fn assert_unit_norm(vecs: &[Vec<f32>], tolerance: f32) {
for (i, v) in vecs.iter().enumerate() {
let norm_sq: f64 = v.iter().map(|&x| (x as f64) * (x as f64)).sum();
let norm_sq: f64 = v.iter().map(|&x| f64::from(x) * f64::from(x)).sum();
let norm = norm_sq.sqrt() as f32;
assert!(
(norm - 1.0).abs() <= tolerance,

View File

@@ -132,10 +132,10 @@ impl Embedder for MockEmbedder {
.collect();
// L2-normalize. Skip the rare all-zero case to avoid 0/0 = NaN.
let norm_sq: f64 = v.iter().map(|&x| (x as f64) * (x as f64)).sum();
let norm_sq: f64 = v.iter().map(|&x| f64::from(x) * f64::from(x)).sum();
if norm_sq > 0.0 {
let inv = (1.0 / norm_sq.sqrt()) as f32;
for x in v.iter_mut() {
for x in &mut v {
*x *= inv;
}
}

View File

@@ -28,3 +28,6 @@ uuid = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
rusqlite = { workspace = true }
[lints]
workspace = true

View File

@@ -260,8 +260,8 @@ pub fn render_report_md(report: &CompareReport) -> String {
"| {} | {} | {} | {} | {} |",
c.query_id,
comparison_kind_label(c.kind),
c.a_hit_rank.map(|r| r.to_string()).unwrap_or_else(|| "".into()),
c.b_hit_rank.map(|r| r.to_string()).unwrap_or_else(|| "".into()),
c.a_hit_rank.map_or_else(|| "".into(), |r| r.to_string()),
c.b_hit_rank.map_or_else(|| "".into(), |r| r.to_string()),
c.note.as_deref().unwrap_or(""),
);
}
@@ -308,7 +308,7 @@ fn extract_chunker_version(snapshot_json: &str) -> Option<String> {
let v: serde_json::Value = serde_json::from_str(snapshot_json).ok()?;
v.get("chunker_version")
.and_then(|x| x.as_str())
.map(|s| s.to_owned())
.map(std::borrow::ToOwned::to_owned)
}
fn parse_results(
@@ -402,8 +402,7 @@ fn classify(
// so refusal-flow queries (no expected_*) don't appear as
// regressions.
let has_expected = gq
.map(|g| !g.expected_chunk_ids.is_empty() || !g.expected_doc_ids.is_empty())
.unwrap_or(false);
.is_some_and(|g| !g.expected_chunk_ids.is_empty() || !g.expected_doc_ids.is_empty());
if has_expected {
(ComparisonKind::Regression, Some("hit→miss".into()))
} else {
@@ -426,7 +425,7 @@ fn build_deltas(
if a.is_nan() || b.is_nan() {
serde_json::Value::Null
} else {
serde_json::Value::from((b - a) as f64)
serde_json::Value::from(f64::from(b - a))
}
}
let mut hit = serde_json::Map::new();

View File

@@ -270,7 +270,21 @@ pub(crate) fn aggregate_from_rows(
// recall@k_doc (doc-level, requires non-empty expected_doc_ids
// and `>0` is the "should retrieve" condition; refusal queries
// (`expected_doc_ids = []`) are excluded by spec).
if !gq.expected_doc_ids.is_empty() {
if gq.expected_doc_ids.is_empty() {
// refusal_correctness: golden marks "should refuse" via empty
// expected_doc_ids. We can only judge this on RAG runs — a
// lexical-only run produces no Answer, so "refusal" is
// undefined. Excluding such queries from the denominator
// (rather than counting them as failures) keeps the metric
// honest: a search-only run reports refusal_correctness as
// NaN/null, not 0.0.
if let Some(ans) = &qr.answer {
refusal_denom += 1;
if !ans.grounded {
refusal_num += 1;
}
}
} else {
let expected_docs: HashSet<&DocumentId> = gq.expected_doc_ids.iter().collect();
for k in TOP_K_VARIANTS {
let entry = recall_at_k_doc.get_mut(k).expect("init");
@@ -285,20 +299,6 @@ pub(crate) fn aggregate_from_rows(
let frac = covered as f64 / expected_docs.len() as f64;
entry.0 += frac;
}
} else {
// refusal_correctness: golden marks "should refuse" via empty
// expected_doc_ids. We can only judge this on RAG runs — a
// lexical-only run produces no Answer, so "refusal" is
// undefined. Excluding such queries from the denominator
// (rather than counting them as failures) keeps the metric
// honest: a search-only run reports refusal_correctness as
// NaN/null, not 0.0.
if let Some(ans) = &qr.answer {
refusal_denom += 1;
if !ans.grounded {
refusal_num += 1;
}
}
}
// groundedness + citation_coverage (only meaningful with RAG

View File

@@ -143,7 +143,7 @@ fn env_guard() -> std::sync::MutexGuard<'static, ()> {
static M: OnceLock<Mutex<()>> = OnceLock::new();
M.get_or_init(|| Mutex::new(()))
.lock()
.unwrap_or_else(|e| e.into_inner())
.unwrap_or_else(std::sync::PoisonError::into_inner)
}
#[test]

View File

@@ -147,7 +147,7 @@ fn lexical_opts() -> EvalRunOpts {
/// guard must outlive the call so concurrent tests don't reset the
/// var mid-run.
fn run_with_golden<F: FnOnce() -> R, R>(yaml: &Path, f: F) -> R {
let _g = GOLDEN_ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
let _g = GOLDEN_ENV_LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
// SAFETY: `KEBAB_EVAL_GOLDEN` is a benign env var; the GOLDEN_ENV_LOCK
// serializes mutations so concurrent tests don't race.
unsafe {

View File

@@ -34,3 +34,6 @@ anyhow = { workspace = true }
# `tokio::*` symbols, so the public/runtime API stays sync.
wiremock = { workspace = true }
tokio = { workspace = true, features = ["macros", "rt"] }
[lints]
workspace = true

View File

@@ -400,9 +400,9 @@ impl Iterator for OllamaStream {
// u32 saturation: even ~4G tokens is implausible for a
// single chat turn; we still saturate rather than
// panic on the unlikely case.
prompt_tokens: prompt_tokens.min(u32::MAX as u64) as u32,
completion_tokens: completion_tokens.min(u32::MAX as u64) as u32,
latency_ms: (total_duration_ns / 1_000_000).min(u32::MAX as u64) as u32,
prompt_tokens: prompt_tokens.min(u64::from(u32::MAX)) as u32,
completion_tokens: completion_tokens.min(u64::from(u32::MAX)) as u32,
latency_ms: (total_duration_ns / 1_000_000).min(u64::from(u32::MAX)) as u32,
};
return Some(Ok(TokenChunk::Done {
finish_reason,

View File

@@ -19,3 +19,6 @@ mock = []
[dev-dependencies]
proptest = { workspace = true }
[lints]
workspace = true

View File

@@ -27,3 +27,6 @@ kebab-core = { path = "../kebab-core" }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View File

@@ -65,8 +65,7 @@ async fn ask_tool_returns_answer_v1_with_refusal_on_empty_kb() {
// Empty KB → refusal (grounded:false) is normal — NOT isError.
assert!(
!result.is_error.unwrap_or(false),
"expected isError=false on refusal, got {:?}",
result
"expected isError=false on refusal, got {result:?}"
);
let content = result
@@ -86,7 +85,7 @@ async fn ask_tool_returns_answer_v1_with_refusal_on_empty_kb() {
"response should carry schema_version=answer.v1"
);
assert_eq!(
v.get("grounded").and_then(|b| b.as_bool()),
v.get("grounded").and_then(serde_json::Value::as_bool),
Some(false),
"empty KB should produce grounded=false"
);

View File

@@ -32,11 +32,13 @@ fn minimal_config(data_dir: &std::path::Path, workspace_root: &std::path::Path)
cfg.models.embedding.dimensions = 0;
// Force the LLM endpoint to a known-unreachable port so this test
// is robust against whether a real Ollama happens to be running
// on 127.0.0.1:11434 (the developer's box; CI; etc.). Combined
// with a tight `request_timeout_secs`, the multi-hop dispatch
// surfaces `model_unreachable` quickly and deterministically.
// on 127.0.0.1:11434 (the developer's box; CI; etc.). The
// `request_timeout_secs = 5` gives slow CI / Docker network stacks
// enough headroom that *some* error fires deterministically — the
// dispatch contract below only cares that `is_error` flipped, not
// which specific error code surfaced.
cfg.models.llm.endpoint = "http://127.0.0.1:1".to_string();
cfg.models.llm.request_timeout_secs = 2;
cfg.models.llm.request_timeout_secs = 5;
cfg
}
@@ -91,18 +93,12 @@ async fn ask_tool_routes_multi_hop_true_to_decompose_first() {
};
let mh_v: serde_json::Value = serde_json::from_str(&mh_text).unwrap();
assert_eq!(mh_v["schema_version"], "error.v1");
// The dispatch contract is "multi-hop reached the LLM". The exact
// error code depends on how the host TCP stack reports an
// unreachable port — fast-path `ECONNREFUSED` classifies as
// `model_unreachable`, but environments that take the connect
// timeout path (some CI / Docker network stacks) surface
// `timeout`. Accept either.
let mh_code = mh_v["code"].as_str().unwrap_or("");
assert!(
matches!(mh_code, "model_unreachable" | "timeout"),
"multi-hop dispatch must reach the LLM and surface model_unreachable/timeout; \
got code={mh_code:?} from {mh_v}"
);
// The dispatch contract is "multi-hop reached the LLM" — i.e.
// `is_error` fires because decompose tried to talk to the LLM and
// failed. Which *specific* error code lands (`model_unreachable`
// on fast ECONNREFUSED hosts, `timeout` on slow connect-timeout
// stacks, etc.) is implementation detail of the host TCP/HTTP
// path; pinning it here would just produce flakes on slow CI.
// Single-pass branch — empty KB short-circuits at retrieve, no LLM
// call happens, refusal Answer comes back as isError=false.

View File

@@ -44,7 +44,7 @@ async fn doctor_tool_returns_doctor_v1_json() {
// `ok` boolean must be present (value may be false in CI where Ollama
// is not reachable — that's expected and acceptable).
assert!(
v.get("ok").and_then(|b| b.as_bool()).is_some(),
v.get("ok").and_then(serde_json::Value::as_bool).is_some(),
"`ok` field missing in doctor.v1 response: {v}"
);
}

View File

@@ -98,8 +98,7 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
assert!(
!result.is_error.unwrap_or(false),
"expected isError=false, got {:?}",
result
"expected isError=false, got {result:?}"
);
let content = result
@@ -123,7 +122,7 @@ async fn fetch_tool_chunk_returns_fetch_result_v1() {
"kind must be 'chunk'"
);
assert!(
v.get("chunk").is_some_and(|c| c.is_object()),
v.get("chunk").is_some_and(serde_json::Value::is_object),
"chunk payload must be populated for kind=chunk"
);
}

View File

@@ -49,7 +49,7 @@ async fn ingest_file_tool_returns_ingest_report_v1() {
v.get("schema_version").and_then(|s| s.as_str()),
Some("ingest_report.v1")
);
assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v.get("new").and_then(serde_json::Value::as_u64), Some(1));
}
#[tokio::test]
@@ -91,7 +91,7 @@ async fn ingest_file_tool_idempotent_on_second_call() {
other => panic!("expected text, got {other:?}"),
};
let v1: serde_json::Value = serde_json::from_str(text1).unwrap();
assert_eq!(v1.get("new").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v1.get("new").and_then(serde_json::Value::as_u64), Some(1));
// Second call — same content, expect unchanged=1.
let r2 = tokio::task::spawn_blocking({
@@ -112,6 +112,6 @@ async fn ingest_file_tool_idempotent_on_second_call() {
other => panic!("expected text, got {other:?}"),
};
let v2: serde_json::Value = serde_json::from_str(text2).unwrap();
assert_eq!(v2.get("new").and_then(|n| n.as_u64()), Some(0), "{v2:?}");
assert_eq!(v2.get("unchanged").and_then(|n| n.as_u64()), Some(1), "{v2:?}");
assert_eq!(v2.get("new").and_then(serde_json::Value::as_u64), Some(0), "{v2:?}");
assert_eq!(v2.get("unchanged").and_then(serde_json::Value::as_u64), Some(1), "{v2:?}");
}

View File

@@ -52,7 +52,7 @@ async fn ingest_stdin_tool_returns_ingest_report_v1() {
v.get("schema_version").and_then(|s| s.as_str()),
Some("ingest_report.v1")
);
assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
assert_eq!(v.get("new").and_then(serde_json::Value::as_u64), Some(1));
}
#[tokio::test]

View File

@@ -49,8 +49,7 @@ async fn schema_tool_returns_schema_v1_json() {
assert!(
!result.is_error.unwrap_or(false),
"expected isError=false on healthy schema, got {:?}",
result
"expected isError=false on healthy schema, got {result:?}"
);
let content = result.content.first().expect("expected at least one content item");
@@ -68,7 +67,7 @@ async fn schema_tool_returns_schema_v1_json() {
"unexpected schema_version in: {v}"
);
assert_eq!(
v.get("capabilities").and_then(|c| c.get("mcp_server")).and_then(|b| b.as_bool()),
v.get("capabilities").and_then(|c| c.get("mcp_server")).and_then(serde_json::Value::as_bool),
Some(true),
"mcp_server capability flag should be true after fb-30",
);

View File

@@ -71,8 +71,7 @@ async fn search_tool_returns_search_response_v1() {
assert!(
!result.is_error.unwrap_or(false),
"expected isError=false, got {:?}",
result
"expected isError=false, got {result:?}"
);
let content = result
@@ -108,7 +107,7 @@ async fn search_tool_returns_search_response_v1() {
);
// truncated must be present (bool); next_cursor may be null on last page.
assert!(
v.get("truncated").and_then(|t| t.as_bool()).is_some(),
v.get("truncated").and_then(serde_json::Value::as_bool).is_some(),
"envelope should carry truncated:bool"
);
assert!(
@@ -172,8 +171,7 @@ async fn search_with_doc_id_filter_returns_only_target() {
);
assert!(
!unfiltered.is_error.unwrap_or(false),
"unfiltered search failed: {:?}",
unfiltered
"unfiltered search failed: {unfiltered:?}"
);
let unfiltered_text = match &unfiltered.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),
@@ -211,8 +209,7 @@ async fn search_with_doc_id_filter_returns_only_target() {
);
assert!(
!filtered.is_error.unwrap_or(false),
"filtered search failed: {:?}",
filtered
"filtered search failed: {filtered:?}"
);
let filtered_text = match &filtered.content.first().unwrap().raw {
RawContent::Text(t) => t.text.clone(),

View File

@@ -28,3 +28,6 @@ tracing = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View File

@@ -1,6 +1,6 @@
//! ONNX-backed `NliVerifier` adapter (mDeBERTa-v3 XNLI).
//!
//! PR-9b: real implementation. `new` resolves the cache directory from
//! `new` resolves the cache directory from
//! `config.storage.model_dir/nli/<sanitized-model-id>/` (matching the
//! fastembed adapter's pattern of `model_dir/fastembed/`) and stamps it
//! on `self`. The (potentially network-bound) model + tokenizer download
@@ -10,9 +10,9 @@
//! a model load on every CLI invocation.
//!
//! Per design §2.2.2 (Lazy init), §2.2.3 (truncation = `OnlyFirst`,
//! premise truncates, hypothesis preserved). PR-9c-1 will wire the
//! `[models.nli]` config section; until then the model id is hard-coded
//! to the Xenova mDeBERTa-v3 XNLI multilingual checkpoint.
//! premise truncates, hypothesis preserved). The model id flows from
//! `config.models.nli.model`; `config.models.nli.provider` selects the
//! verifier impl (only `"onnx"` is implemented in v0.18).
use std::path::PathBuf;
use std::sync::OnceLock;
@@ -26,14 +26,10 @@ use tokenizers::{
use crate::{NliScores, NliVerifier};
/// Default HuggingFace model id for the XNLI verifier. PR-9c-1 will
/// replace this constant with a `config.models.nli.model` lookup once
/// the `NliCfg` section lands. The Xenova repo packages the
/// mDeBERTa-v3-base XNLI multilingual checkpoint as ONNX under the
/// `onnx/model.onnx` path; the tokenizer ships at `tokenizer.json`.
const DEFAULT_MODEL_ID: &str = "Xenova/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7";
/// Filename inside the HF repo (NOT a path on disk).
/// Filename inside the HF repo (NOT a path on disk). The Xenova repo
/// packages the mDeBERTa-v3-base XNLI multilingual checkpoint (the
/// default `config.models.nli.model` — see `kebab-config::NliCfg::defaults`)
/// as ONNX under this path; the tokenizer ships at `tokenizer.json`.
const HF_MODEL_FILE: &str = "onnx/model.onnx";
/// Filename inside the HF repo (NOT a path on disk).
const HF_TOKENIZER_FILE: &str = "tokenizer.json";
@@ -75,9 +71,19 @@ impl OnnxNliVerifier {
/// and runs `create_dir_all` so the first `score` call can drop
/// straight into download + load without re-deriving paths.
///
/// PR-9c-1 will swap `DEFAULT_MODEL_ID` for `config.models.nli.model`.
/// Reads `config.models.nli.model` for the HuggingFace model id
/// and `config.models.nli.provider` to select the verifier impl —
/// only `"onnx"` is implemented in v0.18. The defaults live in
/// `kebab-config::NliCfg::defaults` so this path always receives
/// a non-empty model id.
pub fn new(config: &kebab_config::Config) -> Result<Self> {
let model_id = DEFAULT_MODEL_ID.to_string();
let provider = config.models.nli.provider.as_str();
if provider != "onnx" {
anyhow::bail!(
"kebab-nli: unsupported provider {provider:?} (only 'onnx' is implemented in v0.18)"
);
}
let model_id = config.models.nli.model.clone();
// Match kebab-embed-local's two-step expansion: data_dir first,
// then model_dir with `{data_dir}` substituted in.
@@ -235,11 +241,11 @@ impl NliVerifier for OnnxNliVerifier {
.encode((premise, hypothesis), true)
.map_err(|e| anyhow!("kebab-nli: tokenizer.encode failed: {e}"))?;
let ids: Vec<i64> = enc.get_ids().iter().map(|&u| u as i64).collect();
let ids: Vec<i64> = enc.get_ids().iter().map(|&u| i64::from(u)).collect();
let mask: Vec<i64> = enc
.get_attention_mask()
.iter()
.map(|&u| u as i64)
.map(|&u| i64::from(u))
.collect();
let seq_len = ids.len();
@@ -266,8 +272,7 @@ impl NliVerifier for OnnxNliVerifier {
let shape = logits.shape();
if shape != [1, LOGITS_LEN] {
anyhow::bail!(
"kebab-nli: unexpected logits shape {:?}, expected [1, {LOGITS_LEN}]",
shape
"kebab-nli: unexpected logits shape {shape:?}, expected [1, {LOGITS_LEN}]"
);
}
let l = [logits[[0, 0]], logits[[0, 1]], logits[[0, 2]]];
@@ -330,4 +335,74 @@ mod tests {
"unexpected error message: {err}"
);
}
/// Pins that `config.models.nli.model` flows into `OnnxNliVerifier`
/// instead of being silently overridden by a hardcoded constant.
/// `model_id` is a private field, but this test lives in the same
/// module so it can read it directly — the wiring contract is
/// "whatever the user puts in TOML / KEBAB_MODELS_NLI_MODEL is the
/// id the verifier uses".
#[test]
fn new_uses_config_model_id() {
let (_tmp, mut cfg) = tempdir_config();
cfg.models.nli.model = "custom-org/custom-nli-model".to_string();
let v = OnnxNliVerifier::new(&cfg).expect("new should succeed with custom model id");
assert_eq!(v.model_id, "custom-org/custom-nli-model");
// The custom id also flows into the on-disk cache_dir layout
// (sanitized so `/` doesn't escape the namespace).
let s = v.cache_dir.to_string_lossy();
assert!(
s.contains("custom-org_custom-nli-model"),
"cache_dir should embed sanitized custom model id: {s}"
);
}
/// Pins that a non-`"onnx"` provider value errors out at `new` —
/// the field is no longer silently ignored.
#[test]
fn new_rejects_unsupported_provider() {
let (_tmp, mut cfg) = tempdir_config();
cfg.models.nli.provider = "candle".to_string();
let result = OnnxNliVerifier::new(&cfg);
assert!(result.is_err(), "non-onnx provider must error");
let msg = result.err().unwrap().to_string();
assert!(
msg.contains("unsupported provider") && msg.contains("candle"),
"error should name the rejected provider: {msg}"
);
}
// ── sanitize_model_id pure-fn coverage ────────────────────────────────
//
// Three tests pin the behavior of the private `sanitize_model_id`
// helper. These are orthogonal to the H1 executor tests above
// (which cover config-wiring); these cover the transformation
// contract of the sanitizer itself.
#[test]
fn sanitize_model_id_replaces_slash_with_underscore() {
let input = "Xenova/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7";
let expected = "Xenova_mDeBERTa-v3-base-xnli-multilingual-nli-2mil7";
assert_eq!(sanitize_model_id(input), expected);
}
#[test]
fn sanitize_model_id_is_idempotent_on_already_sanitized() {
// Input with no '/' must come back byte-for-byte unchanged.
let input = "Xenova_mDeBERTa-v3-base-xnli-multilingual-nli-2mil7";
assert_eq!(sanitize_model_id(input), input);
}
#[test]
fn sanitize_model_id_leaves_other_chars_untouched() {
// Hyphens, digits, dots, and underscores must all pass through
// unchanged — only '/' is replaced with '_'.
let input = "org_name/model-name_v2.3-alpha";
let got = sanitize_model_id(input);
assert_eq!(got, "org_name_model-name_v2.3-alpha");
assert!(!got.contains('/'), "no slash must remain after sanitize");
assert!(got.contains('-'), "hyphens must be preserved");
assert!(got.contains('.'), "dots must be preserved");
assert!(got.contains('_'), "underscores must be preserved");
}
}

View File

@@ -111,8 +111,7 @@ fn long_premise_truncates_without_panic() {
] {
assert!(
x.is_finite(),
"channel {name} non-finite: {x} (full scores: {:?})",
s
"channel {name} non-finite: {x} (full scores: {s:?})"
);
}
// Softmax invariant — the three channels sum to ~1.

View File

@@ -25,3 +25,6 @@ tracing = { workspace = true }
# default scope, excluding dev-deps) confirms this.
kebab-parse-md = { path = "../kebab-parse-md" }
serde_json = { workspace = true }
[lints]
workspace = true

View File

@@ -27,3 +27,6 @@ tree-sitter-cpp = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View File

@@ -310,7 +310,7 @@ fn build_blocks(
// If there is only glue (no real unit) the single pushed "<top-level>"
// label should be "<module>" — rename it now.
if !has_real_unit {
for (sym, _, _, _) in units.iter_mut() {
for (sym, _, _, _) in &mut units {
if sym == "<top-level>" {
*sym = "<module>".to_string();
}
@@ -329,7 +329,7 @@ fn build_blocks(
lang: Some("c".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,
@@ -704,11 +704,11 @@ void print_result(int v) {
#[test]
fn c_extractor_deterministic_across_runs() {
let src = r#"
let src = r"
struct Node { int val; };
int sum(int a, int b) { return a + b; }
void noop(void) {}
"#;
";
let a = tests_support::extract_c(src, "x/det.c");
for _ in 0..20 {
assert_eq!(

View File

@@ -224,7 +224,7 @@ fn build_blocks_top(
units.push(("<module>".to_string(), 1, total.max(1), false));
}
if !has_real_unit {
for (sym, _, _, _) in units.iter_mut() {
for (sym, _, _, _) in &mut units {
if sym == "<top-level>" {
*sym = "<module>".to_string();
}
@@ -243,7 +243,7 @@ fn build_blocks_top(
lang: Some("cpp".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,
@@ -696,7 +696,7 @@ mod tests {
#[test]
fn namespace_and_class() {
let src = r#"
let src = r"
namespace ns {
class Foo {
public:
@@ -706,7 +706,7 @@ namespace ns {
int operator+(const Foo& o) { return 0; }
};
}
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(s.iter().any(|x| x == "ns::Foo"), "ns::Foo missing: {s:?}");
@@ -718,11 +718,11 @@ namespace ns {
#[test]
fn anonymous_namespace() {
let src = r#"
let src = r"
namespace {
void hidden_fn() {}
}
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(
@@ -733,11 +733,11 @@ namespace {
#[test]
fn nested_namespace_specifier() {
let src = r#"
let src = r"
namespace outer::inner {
void fn_in_nested() {}
}
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(
@@ -748,9 +748,9 @@ namespace outer::inner {
#[test]
fn out_of_class_method_def() {
let src = r#"
let src = r"
void ns::Foo::method() { }
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(
@@ -761,7 +761,7 @@ void ns::Foo::method() { }
#[test]
fn template_declaration() {
let src = r#"
let src = r"
template<typename T>
class Bar {
void tmpl_method() {}
@@ -769,7 +769,7 @@ class Bar {
template<typename T>
void tmpl_free_fn(T x) {}
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(s.iter().any(|x| x == "Bar"), "Bar class missing: {s:?}");
@@ -785,12 +785,12 @@ void tmpl_free_fn(T x) {}
#[test]
fn enum_and_concept() {
let src = r#"
let src = r"
enum class Color { Red, Green };
template<typename T>
concept Printable = requires(T t) { t.print(); };
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(s.iter().any(|x| x == "Color"), "Color missing: {s:?}");
@@ -813,11 +813,11 @@ extern "C" {
#[test]
fn conversion_operator() {
let src = r#"
let src = r"
class Foo {
operator bool() const { return true; }
};
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(
@@ -852,11 +852,11 @@ class Foo {
#[test]
fn ref_returning_operator() {
let src = r#"
let src = r"
class Foo {
Foo& operator=(const Foo& o) { return *this; }
};
"#;
";
let doc = tests_support::extract_cpp(src, "x/foo.cpp");
let s = syms(&doc);
assert!(
@@ -867,14 +867,14 @@ class Foo {
#[test]
fn deterministic_across_runs() {
let src = r#"
let src = r"
namespace ns {
class Foo {
void method() {}
};
}
void free_fn() {}
"#;
";
let a = tests_support::extract_cpp(src, "x/foo.cpp");
for _ in 0..20 {
assert_eq!(tests_support::extract_cpp(src, "x/foo.cpp").blocks, a.blocks);

View File

@@ -315,7 +315,7 @@ fn build_blocks(
// mod-prefix-agnostic.
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -335,7 +335,7 @@ fn build_blocks(
lang: Some("go".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -248,7 +248,7 @@ fn build_blocks(
// post-pass as 1B / 1C-Go).
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -268,7 +268,7 @@ fn build_blocks(
lang: Some("java".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -293,7 +293,7 @@ fn build_blocks(
let inner_kind = inner.kind();
match inner_kind {
"function_declaration" | "class_declaration" => {
let name_opt = name_text(&inner, src).map(|s| s.to_string());
let name_opt = name_text(&inner, src).map(std::string::ToString::to_string);
if let Some(name) = name_opt {
glue.retain(|(_, gs, _)| *gs < outer_s);
flush_glue(glue, units, mod_prefix, mod_path);
@@ -332,7 +332,7 @@ fn build_blocks(
| "function_declaration"
| "class"
| "class_declaration" => {
let name_opt = name_text(&value, src).map(|s| s.to_string());
let name_opt = name_text(&value, src).map(std::string::ToString::to_string);
let leaf =
name_opt.as_deref().unwrap_or("default").to_string();
glue.retain(|(_, gs, _)| *gs < outer_s);
@@ -402,7 +402,7 @@ fn build_blocks(
// post-pass as 1A Gap 1 / Python / TS).
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -422,7 +422,7 @@ fn build_blocks(
lang: Some("javascript".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -290,7 +290,7 @@ fn build_blocks(
// post-pass as 1B / 1C-Go / Java).
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -310,7 +310,7 @@ fn build_blocks(
lang: Some("kotlin".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -333,7 +333,7 @@ fn build_blocks(
// future-proofed) still demotes correctly.
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -353,7 +353,7 @@ fn build_blocks(
lang: Some("python".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -336,7 +336,7 @@ fn build_blocks(
// group is `<top-level>`, even a pure mod-decl group.
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
// Match on the *suffix*: a glue group may now carry a module
// prefix (`inner::<module>`), so demote any `…<module>` to the
// same-prefixed `…<top-level>` rather than only the bare form.
@@ -359,7 +359,7 @@ fn build_blocks(
lang: Some("rust".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -326,7 +326,7 @@ fn build_blocks(
| "interface_declaration"
| "type_alias_declaration"
| "enum_declaration" => {
let name_opt = name_text(&inner, src).map(|s| s.to_string());
let name_opt = name_text(&inner, src).map(std::string::ToString::to_string);
if let Some(name) = name_opt {
glue.retain(|(_, gs, _)| *gs < outer_s);
flush_glue(glue, units, mod_prefix, mod_path);
@@ -376,7 +376,7 @@ fn build_blocks(
| "class"
| "class_declaration" => {
let name_opt =
name_text(&value, src).map(|s| s.to_string());
name_text(&value, src).map(std::string::ToString::to_string);
let leaf = name_opt
.as_deref()
.unwrap_or("default")
@@ -461,7 +461,7 @@ fn build_blocks(
// post-pass as 1A Gap 1 / Python).
let has_real_unit = units.iter().any(|(_, _, _, is_real)| *is_real);
if has_real_unit {
for (sym, _, _, is_real) in units.iter_mut() {
for (sym, _, _, is_real) in &mut units {
if !*is_real && sym.ends_with("<module>") {
let pre = &sym[..sym.len() - "<module>".len()];
*sym = format!("{pre}<top-level>");
@@ -481,7 +481,7 @@ fn build_blocks(
lang: Some("typescript".to_string()),
};
let block_id = id_for_block(doc_id, "code", &[], ordinal as u32, &span);
let code = lines[(line_start as usize - 1)..=(line_end as usize - 1)].join("\n");
let code = lines[(line_start as usize - 1)..(line_end as usize)].join("\n");
blocks.push(Block::Code(CodeBlock {
common: CommonBlock {
block_id,

View File

@@ -55,3 +55,6 @@ base64 = { workspace = true }
# at runtime) is preserved.
kebab-llm = { path = "../kebab-llm", features = ["mock"] }
kebab-llm-local = { path = "../kebab-llm-local" }
[lints]
workspace = true

View File

@@ -198,7 +198,7 @@ pub fn apply_caption(
/// language; everything else falls through to English.
fn build_prompt(lang_hint: Option<&str>) -> (String, String) {
match lang_hint {
Some("ko") | Some("kor") => (
Some("ko" | "kor") => (
"이미지를 한 문장으로 객관적으로 설명한다. 추측은 피하고, \
보이는 것만 적는다. 마크다운 / 따옴표 / 부가 설명 없이 \
한 문장만 출력."

View File

@@ -103,7 +103,7 @@ fn ascii_field(exif: &exif::Exif, tag: Tag) -> Option<String> {
fn u32_field(exif: &exif::Exif, tag: Tag) -> Option<u32> {
let f = exif.get_field(tag, In::PRIMARY)?;
match &f.value {
Value::Short(v) => v.first().map(|x| *x as u32),
Value::Short(v) => v.first().map(|x| u32::from(*x)),
Value::Long(v) => v.first().copied(),
_ => None,
}
@@ -177,7 +177,7 @@ fn rational_to_f64(r: &exif::Rational) -> Option<f64> {
if r.denom == 0 {
None
} else {
Some(r.num as f64 / r.denom as f64)
Some(f64::from(r.num) / f64::from(r.denom))
}
}

View File

@@ -162,9 +162,7 @@ mod tests {
let ratio = w as f32 / h as f32;
assert!(
(ratio - 4.0 / 3.0).abs() < 0.02,
"aspect drift: in=4/3 out={}/{}={ratio}",
w,
h
"aspect drift: in=4/3 out={w}/{h}={ratio}"
);
}

View File

@@ -142,7 +142,7 @@ fn splice_exif_into_jpeg(exif_blob: Vec<u8>) -> Vec<u8> {
// + exif_blob.len(). Pre-validated against the 0xFFFF segment limit.
let app1_payload_len = 2 + 6 + exif_blob.len();
assert!(
app1_payload_len <= u16::MAX as usize,
u16::try_from(app1_payload_len).is_ok(),
"EXIF segment too large for a single APP1"
);
out.extend_from_slice(&(app1_payload_len as u16).to_be_bytes());

View File

@@ -80,8 +80,8 @@ fn jpeg_with_exif_gps_captures_whitelisted_tags() {
Some(&Value::String("2024-08-15T12:34:56".into()))
);
assert_eq!(exif.get("orientation"), Some(&Value::Number(1.into())));
let lat = exif.get("gps_lat").and_then(|v| v.as_f64()).expect("gps_lat");
let lon = exif.get("gps_lon").and_then(|v| v.as_f64()).expect("gps_lon");
let lat = exif.get("gps_lat").and_then(serde_json::Value::as_f64).expect("gps_lat");
let lon = exif.get("gps_lon").and_then(serde_json::Value::as_f64).expect("gps_lon");
assert!((lat - 37.5).abs() < 1e-6, "lat={lat}");
assert!((lon - 127.0).abs() < 1e-6, "lon={lon}");
@@ -281,7 +281,7 @@ fn jpeg_with_gps_out_of_range_drops_latitude() {
!exif.contains_key("gps_lat"),
"out-of-range latitude must be dropped"
);
let lon = exif.get("gps_lon").and_then(|v| v.as_f64()).expect("gps_lon");
let lon = exif.get("gps_lon").and_then(serde_json::Value::as_f64).expect("gps_lon");
assert!((lon - 127.0).abs() < 1e-6);
}

View File

@@ -388,7 +388,7 @@ async fn ocr_integration_real_ollama_transcribes_text() {
.expect("blocking task panicked")
.expect("real Ollama OCR must succeed");
eprintln!("integration OCR result: {:?}", text.joined);
let normalized = text.joined.to_lowercase().replace(",", "").replace(".", "");
let normalized = text.joined.to_lowercase().replace(',', "").replace('.', "");
assert!(
normalized.contains("hello") && normalized.contains("world"),
"integration OCR did not capture expected text: {:?}",

View File

@@ -38,3 +38,6 @@ lingua = { version = "1.8", default-features = false, features = [
[dev-dependencies]
serde_json = { workspace = true }
[lints]
workspace = true

View File

@@ -60,18 +60,15 @@ pub fn parse_blocks(
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
parse_blocks_inner(body, body_offset_lines)
}));
match result {
Ok(out) => Ok(out),
Err(_) => {
tracing::warn!("parse_blocks panicked on adversarial input; returning empty");
Ok((
Vec::new(),
vec![Warning {
kind: WarningKind::ExtractFailed,
note: "pulldown-cmark panicked; body discarded".to_string(),
}],
))
}
if let Ok(out) = result { Ok(out) } else {
tracing::warn!("parse_blocks panicked on adversarial input; returning empty");
Ok((
Vec::new(),
vec![Warning {
kind: WarningKind::ExtractFailed,
note: "pulldown-cmark panicked; body discarded".to_string(),
}],
))
}
}
@@ -102,9 +99,7 @@ fn parse_blocks_inner(body: &[u8], body_offset_lines: u32) -> (Vec<ParsedBlock>,
// possibly-inverted spans would be more harmful than dropping output.
if state.overflow_detected {
let at = state
.overflow_at_body_line
.map(|n| n.to_string())
.unwrap_or_else(|| "?".to_string());
.overflow_at_body_line.map_or_else(|| "?".to_string(), |n| n.to_string());
return (
Vec::new(),
vec![Warning {
@@ -339,10 +334,10 @@ impl InlineBuf {
// `Inline::Link.text` field. Code/strong/emph inside a link are
// collapsed to their plain text — `Inline::Link` doesn't model
// formatting inside the link.
let flat = if !text.is_empty() {
text
} else {
let flat = if text.is_empty() {
flatten_inlines_to_text(&kids)
} else {
text
};
self.push_inline(Inline::Link { text: flat, href });
}
@@ -364,10 +359,10 @@ impl InlineBuf {
InlineFrame::Strong(kids) => self.push_inline(Inline::Strong { children: kids }),
InlineFrame::Emph(kids) => self.push_inline(Inline::Emph { children: kids }),
InlineFrame::Link { href, text, kids } => {
let flat = if !text.is_empty() {
text
} else {
let flat = if text.is_empty() {
flatten_inlines_to_text(&kids)
} else {
text
};
self.push_inline(Inline::Link { text: flat, href });
}
@@ -528,20 +523,17 @@ impl<'a> WalkState<'a> {
// inverted span. Without this guard, debug builds panic with
// "attempt to add with overflow" (caught by `catch_unwind`, masking
// the real cause) and release builds wrap to `start > end`.
match (
if let (Some(start), Some(end)) = (
start_body.checked_add(self.body_offset_lines),
end_body.checked_add(self.body_offset_lines),
) {
(Some(start), Some(end)) => SourceSpan::Line { start, end },
_ => {
if !self.overflow_detected {
self.overflow_detected = true;
self.overflow_at_body_line = Some(start_body);
}
SourceSpan::Line {
start: start_body.saturating_add(self.body_offset_lines),
end: end_body.saturating_add(self.body_offset_lines),
}
) { SourceSpan::Line { start, end } } else {
if !self.overflow_detected {
self.overflow_detected = true;
self.overflow_at_body_line = Some(start_body);
}
SourceSpan::Line {
start: start_body.saturating_add(self.body_offset_lines),
end: end_body.saturating_add(self.body_offset_lines),
}
}
}
@@ -677,11 +669,11 @@ impl<'a> WalkState<'a> {
}
Event::Start(Tag::Strong) => {
self.flag_non_image_in_paragraph();
self.with_current_inlines(|buf| buf.open_strong());
self.with_current_inlines(InlineBuf::open_strong);
}
Event::Start(Tag::Emphasis) => {
self.flag_non_image_in_paragraph();
self.with_current_inlines(|buf| buf.open_emph());
self.with_current_inlines(InlineBuf::open_emph);
}
Event::Start(Tag::Link { dest_url, .. }) => {
self.flag_non_image_in_paragraph();
@@ -991,13 +983,13 @@ impl<'a> WalkState<'a> {
}
}
Event::End(TagEnd::Strong) => {
self.with_current_inlines(|buf| buf.close_strong());
self.with_current_inlines(InlineBuf::close_strong);
}
Event::End(TagEnd::Emphasis) => {
self.with_current_inlines(|buf| buf.close_emph());
self.with_current_inlines(InlineBuf::close_emph);
}
Event::End(TagEnd::Link) => {
self.with_current_inlines(|buf| buf.close_link());
self.with_current_inlines(InlineBuf::close_link);
}
Event::End(TagEnd::Image) => {
if let Some(Frame::Paragraph { image_depth, .. }) = self.frames.last_mut() {
@@ -1480,8 +1472,7 @@ mod tests {
inl,
Inline::Text { .. } | Inline::Code { .. } | Inline::Link { .. } | Inline::Strong { .. } | Inline::Emph { .. }
),
"unexpected inline kind: {:?}",
inl
"unexpected inline kind: {inl:?}"
);
}
}
@@ -1503,7 +1494,7 @@ mod tests {
// First item should contain "a" plus a flattened rendering
// of the nested sub-list.
let flat = flatten_inlines_to_text(&items[0]);
assert!(flat.contains("a"), "first item missing 'a': {flat:?}");
assert!(flat.contains('a'), "first item missing 'a': {flat:?}");
assert!(flat.contains("- x"), "first item missing '- x': {flat:?}");
assert!(flat.contains("- y"), "first item missing '- y': {flat:?}");
let flat2 = flatten_inlines_to_text(&items[1]);

View File

@@ -110,7 +110,7 @@ pub fn parse_frontmatter(
}
};
let body_start = span_opt.map(|s| s.end).unwrap_or(0);
let body_start = span_opt.map_or(0, |s| s.end);
let body = &bytes[body_start..];
let metadata = derive_metadata(raw_opt, hints, body, &mut warnings);
@@ -430,30 +430,24 @@ fn derive_metadata(
// ---- source_type ----
let source_type = match raw.source_type.as_deref() {
None => SourceType::Markdown,
Some(s) => match parse_source_type(s) {
Some(st) => st,
None => {
warnings.push(Warning {
kind: WarningKind::MalformedFrontmatter,
note: format!("unknown source_type={s}, defaulted to markdown"),
});
SourceType::Markdown
}
Some(s) => if let Some(st) = parse_source_type(s) { st } else {
warnings.push(Warning {
kind: WarningKind::MalformedFrontmatter,
note: format!("unknown source_type={s}, defaulted to markdown"),
});
SourceType::Markdown
},
};
// ---- trust_level ----
let trust_level = match raw.trust_level.as_deref() {
None => TrustLevel::Primary,
Some(s) => match parse_trust_level(s) {
Some(tl) => tl,
None => {
warnings.push(Warning {
kind: WarningKind::MalformedFrontmatter,
note: format!("unknown trust_level={s}, defaulted to primary"),
});
TrustLevel::Primary
}
Some(s) => if let Some(tl) = parse_trust_level(s) { tl } else {
warnings.push(Warning {
kind: WarningKind::MalformedFrontmatter,
note: format!("unknown trust_level={s}, defaulted to primary"),
});
TrustLevel::Primary
},
};

View File

@@ -24,3 +24,6 @@ lopdf = "0.32"
[dev-dependencies]
blake3 = { workspace = true }
[lints]
workspace = true

View File

@@ -111,7 +111,7 @@ impl Extractor for PdfTextExtractor {
});
let mut blocks: Vec<Block> = Vec::with_capacity(pages.len());
for (&page_num, _) in pages.iter() {
for &page_num in pages.keys() {
let (text, warning) = match page_text::extract_one(&pdf_doc, page_num) {
Ok(t) if !t.trim().is_empty() => (t, None),
Ok(_) => (

View File

@@ -10,3 +10,6 @@ description = "Parser intermediate representations (no parser libs allowed)"
[dependencies]
kebab-core = { path = "../kebab-core" }
serde = { workspace = true }
[lints]
workspace = true

View File

@@ -28,3 +28,6 @@ kebab-llm = { path = "../kebab-llm", features = ["mock"] }
tempfile = { workspace = true }
rusqlite = { workspace = true }
serde_json = { workspace = true }
[lints]
workspace = true

View File

@@ -318,7 +318,7 @@ impl RagPipeline {
});
}
let chunks_returned = u32::try_from(hits.len()).unwrap_or(u32::MAX);
let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0);
let top_score = hits.first().map_or(0.0, |h| h.retrieval.fusion_score);
tracing::debug!(
target: "kebab-rag",
@@ -856,7 +856,7 @@ impl RagPipeline {
});
}
let chunks_returned = u32::try_from(pool.len()).unwrap_or(u32::MAX);
let top_score = pool.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0);
let top_score = pool.first().map_or(0.0, |h| h.retrieval.fusion_score);
// ── 3. Score gate / no chunks ──────────────────────────────────────
// PR-3b-ii: forward the partial hop trace into the refusal so
@@ -1038,7 +1038,13 @@ impl RagPipeline {
"verifier must be Some when nli_threshold > 0.0 \
(kebab-app's open_with_config enforces this invariant)",
);
let (truncated_premise, _was_truncated) = truncate_for_nli(&packed_text, &acc);
let (truncated_premise, was_truncated) = truncate_for_nli(&packed_text);
if was_truncated {
tracing::debug!(
target: "kebab-rag",
"NLI premise truncated to MAX_NLI_PREMISE_CHARS for entailment check"
);
}
match v.score(&truncated_premise, &acc) {
Ok(scores) => {
let passed = scores.entailment >= self.config.rag.nli_threshold;
@@ -1149,7 +1155,7 @@ impl RagPipeline {
refusal_phrase_detected = matched_refusal_phrase,
finish_reason = ?finish_reason,
chunks_used,
hops = answer.hops.as_ref().map(|v| v.len()).unwrap_or(0),
hops = answer.hops.as_ref().map_or(0, std::vec::Vec::len),
"kb-rag: multi-hop ask done"
);
@@ -1388,16 +1394,13 @@ impl RagPipeline {
let chunk_full =
<SqliteStore as kebab_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
.context("kb-rag: docs.get_chunk")?;
let chunk_text = match chunk_full {
Some(c) => c.text,
None => {
tracing::warn!(
target: "kebab-rag",
chunk_id = %hit.chunk_id.0,
"kb-rag: chunk not found in store; skipping"
);
continue;
}
let chunk_text = if let Some(c) = chunk_full { c.text } else {
tracing::warn!(
target: "kebab-rag",
chunk_id = %hit.chunk_id.0,
"kb-rag: chunk not found in store; skipping"
);
continue;
};
let header = format!(
"[#{n}] doc={} heading={} span={}\n",
@@ -1636,9 +1639,9 @@ impl RagPipeline {
// PR-9c-2: NLI refusal still carries the hop trace built
// up to step 8.5 — synthesize ran, so the trace is the
// full decompose+decide chain (terminal Synthesize hop is
// NOT appended for the refusal path; cleanup deferred to
// a follow-up if the user-visible trace shape needs the
// synthesize entry).
// NOT appended for the refusal path). See
// `tasks/HOTFIXES.md` "PR-9 NLI refusal: terminal Synthesize
// hop omitted" for follow-up.
hops: Some(hops),
verification: Some(v),
};
@@ -1800,15 +1803,11 @@ pub(crate) const MULTI_HOP_MAX_SUB_QUERIES_HARD_CAP: usize = 10;
pub const MAX_NLI_PREMISE_CHARS: usize = 4 * 400;
/// p9-fb-41 PR-9c-2: truncate `premise` to fit the NLI input budget
/// while preserving `hypothesis` in full. Returns `(truncated_premise,
/// was_truncated)`. `was_truncated` is informational for tracing —
/// the v0.18 wire doesn't surface it; a v0.19+ extension might.
///
/// `_hypothesis` is currently unused — placeholder for the v0.18.1
/// token-budget version that would carve the budget *around* the
/// hypothesis. Kept on the signature to preserve the contract from
/// spec §2.2.3 / spec §3 PR-9c-2.
pub fn truncate_for_nli(premise: &str, _hypothesis: &str) -> (String, bool) {
/// (`MAX_NLI_PREMISE_CHARS`). Returns `(truncated_premise,
/// was_truncated)`; `was_truncated` is informational so the callsite
/// can log a truncation tracing event (the v0.18 wire doesn't surface
/// it).
pub fn truncate_for_nli(premise: &str) -> (String, bool) {
if premise.chars().count() <= MAX_NLI_PREMISE_CHARS {
(premise.to_string(), false)
} else {
@@ -1999,13 +1998,11 @@ fn strip_markdown_json_fence(s: &str) -> &str {
let after_open = trimmed
.strip_prefix("```json")
.or_else(|| trimmed.strip_prefix("```"))
.map(|rest| rest.trim_start_matches('\n'))
.unwrap_or(trimmed);
.map_or(trimmed, |rest| rest.trim_start_matches('\n'));
let inner = after_open
.trim_end()
.strip_suffix("```")
.map(|rest| rest.trim_end())
.unwrap_or(after_open);
.map_or(after_open, str::trim_end);
inner.trim()
}

View File

@@ -147,7 +147,7 @@ pub fn mk_hit_with_indexed_at(
chunk_id: ChunkId(chunk_id.to_string()),
doc_id: DocumentId(doc_id.to_string()),
doc_path: p.clone(),
heading_path: heading.iter().map(|s| s.to_string()).collect(),
heading_path: heading.iter().map(std::string::ToString::to_string).collect(),
section_label: None,
snippet: "snippet".to_string(),
citation: Citation::Line {

View File

@@ -68,7 +68,7 @@ fn multi_hop_decide_stop_triggers_synthesize() {
// Three LLM calls in order: decompose → decide → synthesize.
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"[]"#,
r"[]",
"answer body [#1]",
]));
let lm_handle = lm.clone();
@@ -131,7 +131,7 @@ fn multi_hop_decide_continue_adds_more_chunks() {
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"["q2"]"#,
r#"[]"#,
r"[]",
"synthesized [#1] [#2]",
]));
let lm_handle = lm.clone();
@@ -255,7 +255,7 @@ fn multi_hop_pool_chunks_dedup_by_chunk_id() {
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1", "q2"]"#,
r#"[]"#,
r"[]",
"merged answer [#1]",
]));
let lm_handle = lm.clone();
@@ -444,7 +444,7 @@ fn multi_hop_refuse_score_gate_preserves_hops_trace() {
// never runs because we refuse before pack_context.
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"[]"#,
r"[]",
]));
let lm_handle = lm.clone();
let lm_dyn: Arc<dyn LanguageModel> = lm;
@@ -594,7 +594,7 @@ fn multi_hop_above_probe_gate_proceeds_to_decompose() {
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"[]"#,
r"[]",
"answer [#1]",
]));
let lm_handle = lm.clone();
@@ -631,7 +631,7 @@ fn multi_hop_above_probe_gate_proceeds_to_decompose() {
// `Answer.verification` stays `None` (no verifier attached).
// 4. `multi_hop_nli_model_unavailable_refuses` — verifier returns `Err` →
// refusal with `RefusalReason::NliModelUnavailable` + `verification = None`.
// 5. `multi_hop_truncate_for_nli_preserves_hypothesis` — pure unit test on
// 5. `multi_hop_truncate_for_nli_char_budget` — pure unit test on
// `truncate_for_nli`'s char-budget contract.
/// Helper to build a "valid multi-hop happy-path" scenario where probe +
@@ -649,7 +649,7 @@ fn happy_multi_hop_env() -> (RagEnv, Arc<ScriptedRetriever>, Arc<ScriptedLm>) {
let retriever = Arc::new(ScriptedRetriever::new(vec![hits.clone(), hits]));
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#,
r#"[]"#,
r"[]",
"answer body [#1]",
]));
(env, retriever, lm)
@@ -775,12 +775,11 @@ fn multi_hop_nli_model_unavailable_refuses() {
}
#[test]
fn multi_hop_truncate_for_nli_preserves_hypothesis() {
// Long premise (>1600 chars) gets truncated, short hypothesis is
// passed unchanged (signature placeholder for v0.18.1 token-budget
// version). MAX_NLI_PREMISE_CHARS = 4 * 400 = 1600.
fn multi_hop_truncate_for_nli_char_budget() {
// Long premise (>1600 chars) gets truncated.
// MAX_NLI_PREMISE_CHARS = 4 * 400 = 1600.
let long_premise: String = "a".repeat(2000);
let (truncated, was_truncated) = truncate_for_nli(&long_premise, "short hypothesis");
let (truncated, was_truncated) = truncate_for_nli(&long_premise);
assert!(was_truncated);
assert_eq!(
truncated.chars().count(),
@@ -790,20 +789,20 @@ fn multi_hop_truncate_for_nli_preserves_hypothesis() {
// Short premise (under budget): no truncation, `was_truncated = false`.
let short_premise = "short premise text";
let (passthrough, was_truncated) = truncate_for_nli(short_premise, "anything");
let (passthrough, was_truncated) = truncate_for_nli(short_premise);
assert!(!was_truncated);
assert_eq!(passthrough, short_premise);
// Multi-byte safety: 1600 Korean chars (3 bytes each in UTF-8) fits
// within the char budget even though byte length exceeds 4800.
let kr_short: String = "".repeat(1600);
let (passthrough_kr, was_truncated_kr) = truncate_for_nli(&kr_short, "h");
let (passthrough_kr, was_truncated_kr) = truncate_for_nli(&kr_short);
assert!(!was_truncated_kr, "1600 KR chars == budget, no truncation");
assert_eq!(passthrough_kr.chars().count(), 1600);
// Multi-byte over-budget: truncation must count chars, not bytes.
let kr_long: String = "".repeat(2000);
let (truncated_kr, was_truncated_kr) = truncate_for_nli(&kr_long, "h");
let (truncated_kr, was_truncated_kr) = truncate_for_nli(&kr_long);
assert!(was_truncated_kr);
assert_eq!(
truncated_kr.chars().count(),

View File

@@ -0,0 +1,100 @@
//! Pins the documented facade-invariant panic in `ask_multi_hop`.
//!
//! When `cfg.rag.nli_threshold > 0` but no verifier is attached via
//! `.with_verifier()`, the `expect` at `pipeline.rs` step 8.5 fires
//! with the message "verifier must be Some when nli_threshold > 0.0".
//!
//! This is a **contract test**: it documents the invariant so that a
//! future refactor replacing the `expect` with `bail!` (or a different
//! message) is caught by the test suite, prompting an explicit decision
//! rather than a silent behavior change.
//!
//! The kebab-app facade (`App::open_with_config`) always pairs
//! `nli_threshold > 0` with a constructed `OnnxNliVerifier`, so this
//! panic is unreachable via the normal CLI / MCP / TUI paths — only
//! a direct `RagPipeline::new(...)` caller without `.with_verifier()`
//! can trigger it.
mod common;
use std::sync::Arc;
use common::{RagEnv, ScriptedLm, ScriptedRetriever, id32, mk_hit};
use kebab_core::{LanguageModel, Retriever, SearchMode};
use kebab_rag::{AskOpts, RagPipeline};
/// Minimal multi-hop `AskOpts` mirroring the pattern used in
/// `multi_hop.rs` — lexical mode, deterministic seed, no streaming.
fn multi_hop_opts() -> AskOpts {
AskOpts {
k: 5,
explain: false,
mode: SearchMode::Lexical,
temperature: Some(0.0),
seed: Some(0),
stream_sink: None,
history: Vec::new(),
conversation_id: None,
turn_index: None,
multi_hop: true,
}
}
/// Building the "happy-path" scenario inline: probe retrieve passes
/// the score gate, decompose emits one sub-query, decide signals stop,
/// and synthesize produces a non-empty cited answer. This is the minimal
/// scenario that reaches step 8.5 (NLI gate) in `ask_multi_hop`.
fn setup_happy_pipeline_no_verifier(nli_threshold: f32) -> (RagPipeline, RagEnv) {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "Body text.", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
// Entry 0 = probe retrieve (pre-decompose gate check).
// Entry 1 = decompose-driven retrieve for "q1".
let retriever = Arc::new(ScriptedRetriever::new(vec![hits.clone(), hits]));
let retriever_dyn: Arc<dyn Retriever> = retriever;
// Three LLM calls: decompose → decide (stop) → synthesize.
// Synthesize emits a non-empty answer so step 8.5 is reached.
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#, // decompose
r"[]", // decide: stop signal
"answer body [#1]", // synthesize: non-empty → step 8.5 entered
]));
let lm_dyn: Arc<dyn LanguageModel> = lm;
let mut cfg = env.config.clone();
cfg.rag.nli_threshold = nli_threshold;
// Intentionally NO `.with_verifier()` — this is the condition under test.
let pipeline = RagPipeline::new(cfg, retriever_dyn, lm_dyn, env.sqlite.clone());
(pipeline, env)
}
#[test]
#[should_panic(expected = "verifier must be Some when nli_threshold > 0.0")]
fn ask_multi_hop_panics_when_threshold_positive_but_verifier_none() {
// nli_threshold = 0.5 (gate enabled) but the pipeline has no verifier
// because `.with_verifier()` was never called. The `expect` at
// pipeline.rs step 8.5 fires once synthesize produces a non-empty answer.
let (pipeline, _env) = setup_happy_pipeline_no_verifier(0.5);
// Unwrap is intentional: we're asserting the panic, not an Ok/Err return.
let _ = pipeline.ask("compound", multi_hop_opts());
}
/// Companion: threshold = 0.0 (gate disabled) with no verifier must
/// NOT panic — the `if nli_threshold > 0.0` guard short-circuits the
/// entire step 8.5 block.
#[test]
fn ask_multi_hop_does_not_panic_when_threshold_zero_and_verifier_none() {
let (pipeline, _env) = setup_happy_pipeline_no_verifier(0.0);
let answer = pipeline
.ask("compound", multi_hop_opts())
.expect("threshold = 0.0 skips NLI gate; no panic expected");
// Gate is disabled → verification summary stays None.
assert!(
answer.verification.is_none(),
"nli_threshold = 0.0 must leave Answer.verification = None"
);
}

View File

@@ -0,0 +1,177 @@
//! Tests that the NLI refusal paths emit a `StreamEvent::Final` event
//! into `opts.stream_sink`.
//!
//! Coverage:
//! 1. `nli_verification_fail_emits_final_stream_event_with_refusal` —
//! `MockNliVerifier::fail()` → `RefusalReason::NliVerificationFailed`
//! arrives as the payload of the terminal `StreamEvent::Final`.
//! 2. `nli_model_unavailable_emits_final_stream_event_with_refusal` —
//! `MockNliVerifier::err()` → `RefusalReason::NliModelUnavailable`
//! arrives as the payload of the terminal `StreamEvent::Final`.
//!
//! Note: `ask_multi_hop` does NOT have a separate `_streaming` entrypoint.
//! Streaming is handled by the `stream_sink: Option<Sender<StreamEvent>>`
//! field on `AskOpts`. Both refusal helpers (`refuse_nli_verification` and
//! `refuse_nli_model_unavailable`) fire `sink.send(StreamEvent::Final { … })`
//! before returning — these tests pin that wire shape.
mod common;
use std::sync::Arc;
use std::sync::mpsc;
use common::{MockNliVerifier, RagEnv, ScriptedLm, ScriptedRetriever, id32, mk_hit};
use kebab_core::{LanguageModel, RefusalReason, Retriever, SearchMode};
use kebab_nli::NliVerifier;
use kebab_rag::{AskOpts, RagPipeline, StreamEvent};
// ── shared helpers ─────────────────────────────────────────────────────────
/// Build the minimal happy-path scenario that reaches step 8.5 (NLI gate):
/// probe passes, decompose → one sub-query, decide → stop, synthesize →
/// non-empty answer. Returns the env, scripted retriever, and scripted LM.
fn happy_env_for_stream() -> (RagEnv, Arc<ScriptedRetriever>, Arc<ScriptedLm>) {
let env = RagEnv::new();
let cid = id32("c1");
let did = id32("d1");
env.seed_chunk(&cid, &did, "notes/a.md", "Body text.", &["Intro"]);
let hits = vec![mk_hit(1, &cid, &did, "notes/a.md", 0.85, &["Intro"])];
// Entry 0 = probe, entry 1 = decompose-driven retrieve.
let retriever = Arc::new(ScriptedRetriever::new(vec![hits.clone(), hits]));
let lm = Arc::new(ScriptedLm::new(vec![
r#"["q1"]"#, // decompose
r"[]", // decide: stop
"answer body [#1]", // synthesize: non-empty so NLI gate runs
]));
(env, retriever, lm)
}
/// Multi-hop `AskOpts` with a `stream_sink` wired in so every pipeline
/// stage emits `StreamEvent`s into `tx`.
fn multi_hop_opts_with_sink(tx: mpsc::Sender<StreamEvent>) -> AskOpts {
AskOpts {
k: 5,
explain: false,
mode: SearchMode::Lexical,
temperature: Some(0.0),
seed: Some(0),
stream_sink: Some(tx),
history: Vec::new(),
conversation_id: None,
turn_index: None,
multi_hop: true,
}
}
/// Drain `rx` and return the first `StreamEvent::Final` found, panicking
/// with a clear message if none is present.
fn expect_final_event(rx: mpsc::Receiver<StreamEvent>) -> StreamEvent {
let events: Vec<StreamEvent> = rx.try_iter().collect();
events
.into_iter()
.find(|e| matches!(e, StreamEvent::Final { .. }))
.expect("pipeline must emit at least one StreamEvent::Final")
}
// ── 1. NliVerificationFailed ───────────────────────────────────────────────
#[test]
fn nli_verification_fail_emits_final_stream_event_with_refusal() {
let (env, retriever, lm) = happy_env_for_stream();
let mut cfg = env.config.clone();
cfg.rag.nli_threshold = 0.5; // entailment 0.1 < 0.5 → refusal
let retriever_dyn: Arc<dyn Retriever> = retriever;
let lm_dyn: Arc<dyn LanguageModel> = lm;
let verifier = MockNliVerifier::fail(); // entailment score = 0.1
let verifier_dyn: Arc<dyn NliVerifier> = verifier;
let (tx, rx) = mpsc::channel::<StreamEvent>();
let pipeline = RagPipeline::new(cfg, retriever_dyn, lm_dyn, env.sqlite.clone())
.with_verifier(verifier_dyn);
let answer = pipeline
.ask("compound", multi_hop_opts_with_sink(tx))
.expect("pipeline returns Ok even on NLI refusal");
// Synchronous return value.
assert_eq!(
answer.refusal_reason,
Some(RefusalReason::NliVerificationFailed),
"return value must carry NliVerificationFailed"
);
assert!(!answer.grounded, "NLI refusal must not be grounded");
// Stream wire shape: terminal Final event must carry matching refusal.
let final_event = expect_final_event(rx);
match final_event {
StreamEvent::Final { answer: streamed } => {
assert_eq!(
streamed.refusal_reason,
Some(RefusalReason::NliVerificationFailed),
"Final event's answer must carry NliVerificationFailed"
);
assert!(!streamed.grounded);
// verification summary is stamped even on the refusal path.
let v = streamed
.verification
.expect("NliVerificationFailed carries a VerificationSummary");
assert!(!v.nli_passed);
assert!((v.nli_score - 0.1).abs() < 1e-5, "score: {}", v.nli_score);
}
other => panic!("expected StreamEvent::Final, got {other:?}"),
}
}
// ── 2. NliModelUnavailable ─────────────────────────────────────────────────
#[test]
fn nli_model_unavailable_emits_final_stream_event_with_refusal() {
let (env, retriever, lm) = happy_env_for_stream();
let mut cfg = env.config.clone();
cfg.rag.nli_threshold = 0.5; // gate enabled; verifier will error
let retriever_dyn: Arc<dyn Retriever> = retriever;
let lm_dyn: Arc<dyn LanguageModel> = lm;
let verifier = MockNliVerifier::err(); // returns anyhow::Error
let verifier_dyn: Arc<dyn NliVerifier> = verifier;
let (tx, rx) = mpsc::channel::<StreamEvent>();
let pipeline = RagPipeline::new(cfg, retriever_dyn, lm_dyn, env.sqlite.clone())
.with_verifier(verifier_dyn);
let answer = pipeline
.ask("compound", multi_hop_opts_with_sink(tx))
.expect("pipeline returns Ok even when NLI model is unavailable");
// Synchronous return value.
assert_eq!(
answer.refusal_reason,
Some(RefusalReason::NliModelUnavailable),
"return value must carry NliModelUnavailable"
);
assert!(!answer.grounded);
// verification is None — we can't summarize what didn't happen.
assert!(
answer.verification.is_none(),
"NliModelUnavailable must leave Answer.verification = None"
);
// Stream wire shape: terminal Final event must carry matching refusal.
let final_event = expect_final_event(rx);
match final_event {
StreamEvent::Final { answer: streamed } => {
assert_eq!(
streamed.refusal_reason,
Some(RefusalReason::NliModelUnavailable),
"Final event's answer must carry NliModelUnavailable"
);
assert!(!streamed.grounded);
assert!(
streamed.verification.is_none(),
"NliModelUnavailable: verification must be None in the streamed Final event"
);
}
other => panic!("expected StreamEvent::Final, got {other:?}"),
}
}

View File

@@ -282,8 +282,7 @@ fn streaming_forwards_tokens_to_sink() {
StreamEvent::Token { delta, .. } => Some(delta),
_ => None,
})
.collect::<Vec<_>>()
.join("");
.collect::<String>();
assert_eq!(collected, canned);
}
@@ -522,7 +521,7 @@ fn answer_json_serializes_with_expected_keys() {
let answer = pipeline.ask("what", default_opts()).unwrap();
let v: serde_json::Value = serde_json::to_value(&answer).unwrap();
// Stable top-level key set per `answer.v1` (§2.3).
let keys: Vec<&str> = v.as_object().unwrap().keys().map(|s| s.as_str()).collect();
let keys: Vec<&str> = v.as_object().unwrap().keys().map(std::string::String::as_str).collect();
for needed in [
"answer",
"citations",

View File

@@ -36,3 +36,6 @@ tempfile = { workspace = true }
# The mock-retriever unit tests (the bulk of the hybrid suite) do not
# need either, but the integration / snapshot lane does.
kebab-embed = { path = "../kebab-embed", features = ["mock"] }
[lints]
workspace = true

View File

@@ -601,7 +601,7 @@ mod tests {
let h = HybridRetriever::with_policy(lex, vec, rrf_policy(60), 5);
let out = h.search(&make_query(SearchMode::Hybrid, 5)).unwrap();
let a = out.iter().find(|h| h.chunk_id.0 == "aaaa").unwrap();
let actual = a.retrieval.fusion_score as f64;
let actual = f64::from(a.retrieval.fusion_score);
// Tolerance: the score is computed in f64 and cast to f32 at
// the API boundary, so any discrepancy must fit within f32
// precision. `1e-7` is below `f32::EPSILON` (~1.19e-7), which
@@ -694,7 +694,7 @@ mod tests {
let h = HybridRetriever::with_policy(lex, vec, rrf_policy(60), 4);
let out = h.search(&make_query(SearchMode::Hybrid, 4)).unwrap();
let mut ids: Vec<&str> = out.iter().map(|h| h.chunk_id.0.as_str()).collect();
ids.sort();
ids.sort_unstable();
assert_eq!(ids, vec!["aaaa", "bbbb", "cccc", "dddd"]);
}

View File

@@ -457,7 +457,7 @@ fn run_query(
.prepare(&sql)
.context("kb-search lexical: prepare FTS5 statement")?;
let rows = stmt
.query_map(params_from_iter(params.iter().map(|b| b.as_ref())), row_from_sql)
.query_map(params_from_iter(params.iter().map(std::convert::AsRef::as_ref)), row_from_sql)
.context("kb-search lexical: execute FTS5 query")?;
let mut out: Vec<RawRow> = Vec::new();
for r in rows {

Some files were not shown because too many files have changed in this diff Show More