fix(fb-32): address PR #122 round 1 review
- config: rename env-silent-ignore test + add file-load negative test asserting ConfigInvalid for negative TOML stale_threshold_days - rag: add 5 boundary unit tests pinning compute_stale mirror equivalence - search: rewrite "Task 6" plan refs in lexical/vector to point at actual function names (mark_stale_in_place / RagPipeline::ask) - cli: dedupe write_config / ingest / backdate_updated_at helpers from wire_search_stale + wire_ask_stale into tests/common/mod.rs - tui: clarify inspect.rs uses same source-of-truth as SearchHit - rag: PackedCitation.stale invariant doc comment - HOTFIXES: log conscious decision on wire-schema required-field expansion (strict-validator concern) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
149
crates/kebab-cli/tests/common/mod.rs
Normal file
149
crates/kebab-cli/tests/common/mod.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
//! Shared CLI integration-test helpers.
|
||||
//!
|
||||
//! Each consumer (`tests/wire_search_stale.rs`, `tests/wire_ask_stale.rs`)
|
||||
//! does `mod common;` and calls these via `common::write_config(...)`,
|
||||
//! `common::ingest(...)`, `common::backdate_updated_at(...)`.
|
||||
//!
|
||||
//! `#![allow(dead_code)]` because each consumer typically uses only a
|
||||
//! subset of the helpers; rustc would otherwise warn about the unused
|
||||
//! ones in any single consumer's compilation.
|
||||
|
||||
#![allow(dead_code)]
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
/// Build a `config.toml` text under `dir`. `workspace_root` and
|
||||
/// `data_dir` live inside `dir`. `stale_threshold_days` is plumbed
|
||||
/// into `[search]` so the staleness post-process can fire.
|
||||
///
|
||||
/// Returns `(cfg_path, workspace_dir, data_dir)`.
|
||||
pub fn write_config(dir: &Path, stale_threshold_days: u32) -> (PathBuf, PathBuf, PathBuf) {
|
||||
write_config_with_llm_model(dir, stale_threshold_days, "none")
|
||||
}
|
||||
|
||||
/// Like [`write_config`] but lets the caller pin a specific
|
||||
/// `[models.llm].model` value — needed by `wire_ask_stale.rs` which
|
||||
/// hits a real Ollama and wants `gemma4:e4b` instead of `none`.
|
||||
pub fn write_config_with_llm_model(
|
||||
dir: &Path,
|
||||
stale_threshold_days: u32,
|
||||
llm_model: &str,
|
||||
) -> (PathBuf, PathBuf, PathBuf) {
|
||||
let workspace = dir.join("workspace");
|
||||
let data = dir.join("data");
|
||||
fs::create_dir_all(&workspace).unwrap();
|
||||
fs::create_dir_all(&data).unwrap();
|
||||
|
||||
let cfg_path = dir.join("config.toml");
|
||||
fs::write(
|
||||
&cfg_path,
|
||||
format!(
|
||||
r#"schema_version = 1
|
||||
|
||||
[workspace]
|
||||
root = "{workspace}"
|
||||
exclude = [".git/**"]
|
||||
|
||||
[storage]
|
||||
data_dir = "{data}"
|
||||
sqlite = "{{data_dir}}/kebab.sqlite"
|
||||
vector_dir = "{{data_dir}}/lancedb"
|
||||
asset_dir = "{{data_dir}}/assets"
|
||||
artifact_dir = "{{data_dir}}/artifacts"
|
||||
model_dir = "{{data_dir}}/models"
|
||||
runs_dir = "{{data_dir}}/runs"
|
||||
copy_threshold_mb = 100
|
||||
|
||||
[indexing]
|
||||
max_parallel_extractors = 2
|
||||
max_parallel_embeddings = 1
|
||||
watch_filesystem = false
|
||||
|
||||
[chunking]
|
||||
target_tokens = 80
|
||||
overlap_tokens = 20
|
||||
respect_markdown_headings = true
|
||||
chunker_version = "md-heading-v1"
|
||||
|
||||
[models.embedding]
|
||||
provider = "none"
|
||||
model = "none"
|
||||
version = "v0"
|
||||
dimensions = 0
|
||||
batch_size = 1
|
||||
|
||||
[models.llm]
|
||||
provider = "ollama"
|
||||
model = "{llm_model}"
|
||||
context_tokens = 4096
|
||||
endpoint = "http://127.0.0.1:11434"
|
||||
temperature = 0.0
|
||||
seed = 0
|
||||
|
||||
[search]
|
||||
default_k = 10
|
||||
hybrid_fusion = "rrf"
|
||||
rrf_k = 60
|
||||
snippet_chars = 220
|
||||
stale_threshold_days = {stale_threshold_days}
|
||||
|
||||
[rag]
|
||||
prompt_template_version = "rag-v1"
|
||||
score_gate = 0.30
|
||||
explain_default = false
|
||||
max_context_tokens = 8000
|
||||
"#,
|
||||
workspace = workspace.display(),
|
||||
data = data.display(),
|
||||
llm_model = llm_model,
|
||||
stale_threshold_days = stale_threshold_days,
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
(cfg_path, workspace, data)
|
||||
}
|
||||
|
||||
/// Run `kebab ingest --root <workspace>` against the given config.
|
||||
/// Asserts success — failures abort the calling test.
|
||||
pub fn ingest(cfg: &Path, workspace: &Path) {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"--config",
|
||||
cfg.to_str().unwrap(),
|
||||
"ingest",
|
||||
"--root",
|
||||
workspace.to_str().unwrap(),
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"ingest failed: stderr={}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
/// Rewrite `documents.updated_at` for one workspace path to
|
||||
/// `now - days_ago` (RFC3339 UTC). Mirrors
|
||||
/// `kebab-app/tests/common/mod.rs::backdate_document_updated_at`.
|
||||
/// Asserts exactly one row is updated — typo-proofs the workspace path.
|
||||
pub fn backdate_updated_at(data_dir: &Path, workspace_path: &str, days_ago: i64) {
|
||||
let backdated = (time::OffsetDateTime::now_utc() - time::Duration::days(days_ago))
|
||||
.format(&time::format_description::well_known::Rfc3339)
|
||||
.expect("format backdated updated_at");
|
||||
let db_path = data_dir.join("kebab.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
|
||||
let updated = conn
|
||||
.execute(
|
||||
"UPDATE documents SET updated_at = ?1 WHERE workspace_path = ?2",
|
||||
rusqlite::params![backdated, workspace_path],
|
||||
)
|
||||
.expect("UPDATE documents.updated_at");
|
||||
assert_eq!(
|
||||
updated, 1,
|
||||
"backdate_updated_at: expected to update exactly 1 row for {workspace_path}, got {updated}"
|
||||
);
|
||||
}
|
||||
@@ -13,107 +13,16 @@
|
||||
//! in `kebab-cli/src/main.rs` (`tests::plain_marks_stale_citation_*`)
|
||||
//! that constructs a synthetic `Answer` and pipes it through
|
||||
//! `render_ask_plain_citations` — that path is the always-on guard.
|
||||
//!
|
||||
//! Shared TempDir / ingest / backdate helpers live in
|
||||
//! `tests/common/mod.rs`; see also `wire_search_stale.rs`.
|
||||
|
||||
mod common;
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
/// Build a `config.toml` text under `dir`. `workspace_root` and
|
||||
/// `data_dir` live inside `dir`. `stale_threshold_days` is plumbed
|
||||
/// into `[search]` so the staleness post-process can fire.
|
||||
fn write_config(dir: &Path, stale_threshold_days: u32) -> (PathBuf, PathBuf, PathBuf) {
|
||||
let workspace = dir.join("workspace");
|
||||
let data = dir.join("data");
|
||||
fs::create_dir_all(&workspace).unwrap();
|
||||
fs::create_dir_all(&data).unwrap();
|
||||
|
||||
let cfg_path = dir.join("config.toml");
|
||||
fs::write(
|
||||
&cfg_path,
|
||||
format!(
|
||||
r#"schema_version = 1
|
||||
|
||||
[workspace]
|
||||
root = "{workspace}"
|
||||
exclude = [".git/**"]
|
||||
|
||||
[storage]
|
||||
data_dir = "{data}"
|
||||
sqlite = "{{data_dir}}/kebab.sqlite"
|
||||
vector_dir = "{{data_dir}}/lancedb"
|
||||
asset_dir = "{{data_dir}}/assets"
|
||||
artifact_dir = "{{data_dir}}/artifacts"
|
||||
model_dir = "{{data_dir}}/models"
|
||||
runs_dir = "{{data_dir}}/runs"
|
||||
copy_threshold_mb = 100
|
||||
|
||||
[indexing]
|
||||
max_parallel_extractors = 2
|
||||
max_parallel_embeddings = 1
|
||||
watch_filesystem = false
|
||||
|
||||
[chunking]
|
||||
target_tokens = 80
|
||||
overlap_tokens = 20
|
||||
respect_markdown_headings = true
|
||||
chunker_version = "md-heading-v1"
|
||||
|
||||
[models.embedding]
|
||||
provider = "none"
|
||||
model = "none"
|
||||
version = "v0"
|
||||
dimensions = 0
|
||||
batch_size = 1
|
||||
|
||||
[models.llm]
|
||||
provider = "ollama"
|
||||
model = "gemma4:e4b"
|
||||
context_tokens = 4096
|
||||
endpoint = "http://127.0.0.1:11434"
|
||||
temperature = 0.0
|
||||
seed = 0
|
||||
|
||||
[search]
|
||||
default_k = 10
|
||||
hybrid_fusion = "rrf"
|
||||
rrf_k = 60
|
||||
snippet_chars = 220
|
||||
stale_threshold_days = {stale_threshold_days}
|
||||
|
||||
[rag]
|
||||
prompt_template_version = "rag-v1"
|
||||
score_gate = 0.30
|
||||
explain_default = false
|
||||
max_context_tokens = 8000
|
||||
"#,
|
||||
workspace = workspace.display(),
|
||||
data = data.display(),
|
||||
stale_threshold_days = stale_threshold_days,
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
(cfg_path, workspace, data)
|
||||
}
|
||||
|
||||
fn ingest(cfg: &Path, workspace: &Path) {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"--config",
|
||||
cfg.to_str().unwrap(),
|
||||
"ingest",
|
||||
"--root",
|
||||
workspace.to_str().unwrap(),
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"ingest failed: stderr={}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
/// Run `kebab ask` in lexical mode (no embedding required). `json`
|
||||
/// toggles `--json`. The caller asserts on the resulting stdout.
|
||||
fn run_ask_lexical(cfg: &Path, query: &str, json: bool) -> std::process::Output {
|
||||
@@ -127,35 +36,14 @@ fn run_ask_lexical(cfg: &Path, query: &str, json: bool) -> std::process::Output
|
||||
cmd.output().unwrap()
|
||||
}
|
||||
|
||||
/// Rewrite `documents.updated_at` for one workspace path to
|
||||
/// `now - days_ago` (RFC3339 UTC). Mirrors
|
||||
/// `kebab-app/tests/common/mod.rs::backdate_document_updated_at`.
|
||||
fn backdate_updated_at(data_dir: &Path, workspace_path: &str, days_ago: i64) {
|
||||
let backdated = (time::OffsetDateTime::now_utc() - time::Duration::days(days_ago))
|
||||
.format(&time::format_description::well_known::Rfc3339)
|
||||
.expect("format backdated updated_at");
|
||||
let db_path = data_dir.join("kebab.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
|
||||
let updated = conn
|
||||
.execute(
|
||||
"UPDATE documents SET updated_at = ?1 WHERE workspace_path = ?2",
|
||||
rusqlite::params![backdated, workspace_path],
|
||||
)
|
||||
.expect("UPDATE documents.updated_at");
|
||||
assert_eq!(
|
||||
updated, 1,
|
||||
"backdate_updated_at: expected to update exactly 1 row for {workspace_path}, got {updated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore = "requires real Ollama on 127.0.0.1:11434"]
|
||||
fn ask_json_citations_include_indexed_at_and_stale() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, data) = write_config(dir.path(), 30);
|
||||
let (cfg, workspace, data) = common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
|
||||
fs::write(workspace.join("a.md"), "# T\n\napples are fruit\n").unwrap();
|
||||
ingest(&cfg, &workspace);
|
||||
backdate_updated_at(&data, "a.md", 60);
|
||||
common::ingest(&cfg, &workspace);
|
||||
common::backdate_updated_at(&data, "a.md", 60);
|
||||
|
||||
// ask returns exit 1 on refusal; the JSON envelope still goes to
|
||||
// stdout. Don't assert on `status.success()` — accept either path
|
||||
@@ -193,10 +81,10 @@ fn ask_json_citations_include_indexed_at_and_stale() {
|
||||
#[ignore = "requires real Ollama on 127.0.0.1:11434"]
|
||||
fn ask_plain_marks_stale_citation() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, data) = write_config(dir.path(), 30);
|
||||
let (cfg, workspace, data) = common::write_config_with_llm_model(dir.path(), 30, "gemma4:e4b");
|
||||
fs::write(workspace.join("a.md"), "# T\n\napples are fruit\n").unwrap();
|
||||
ingest(&cfg, &workspace);
|
||||
backdate_updated_at(&data, "a.md", 60);
|
||||
common::ingest(&cfg, &workspace);
|
||||
common::backdate_updated_at(&data, "a.md", 60);
|
||||
|
||||
// Refusal exits 1 — that's still fine here, the renderer prints
|
||||
// the citation block before the refusal exit when citations exist.
|
||||
|
||||
@@ -7,107 +7,16 @@
|
||||
//! via `rusqlite` to simulate an aged-out doc without faking system
|
||||
//! time. Mirrors the helper pattern in
|
||||
//! `crates/kebab-app/tests/common/mod.rs::backdate_document_updated_at`.
|
||||
//!
|
||||
//! Shared TempDir / ingest / backdate helpers live in
|
||||
//! `tests/common/mod.rs`; see also `wire_ask_stale.rs`.
|
||||
|
||||
mod common;
|
||||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
/// Build a `config.toml` text under `dir`. `workspace_root` and
|
||||
/// `data_dir` live inside `dir`. `stale_threshold_days` is plumbed
|
||||
/// into `[search]` so the staleness post-process can fire.
|
||||
fn write_config(dir: &Path, stale_threshold_days: u32) -> (PathBuf, PathBuf, PathBuf) {
|
||||
let workspace = dir.join("workspace");
|
||||
let data = dir.join("data");
|
||||
fs::create_dir_all(&workspace).unwrap();
|
||||
fs::create_dir_all(&data).unwrap();
|
||||
|
||||
let cfg_path = dir.join("config.toml");
|
||||
fs::write(
|
||||
&cfg_path,
|
||||
format!(
|
||||
r#"schema_version = 1
|
||||
|
||||
[workspace]
|
||||
root = "{workspace}"
|
||||
exclude = [".git/**"]
|
||||
|
||||
[storage]
|
||||
data_dir = "{data}"
|
||||
sqlite = "{{data_dir}}/kebab.sqlite"
|
||||
vector_dir = "{{data_dir}}/lancedb"
|
||||
asset_dir = "{{data_dir}}/assets"
|
||||
artifact_dir = "{{data_dir}}/artifacts"
|
||||
model_dir = "{{data_dir}}/models"
|
||||
runs_dir = "{{data_dir}}/runs"
|
||||
copy_threshold_mb = 100
|
||||
|
||||
[indexing]
|
||||
max_parallel_extractors = 2
|
||||
max_parallel_embeddings = 1
|
||||
watch_filesystem = false
|
||||
|
||||
[chunking]
|
||||
target_tokens = 80
|
||||
overlap_tokens = 20
|
||||
respect_markdown_headings = true
|
||||
chunker_version = "md-heading-v1"
|
||||
|
||||
[models.embedding]
|
||||
provider = "none"
|
||||
model = "none"
|
||||
version = "v0"
|
||||
dimensions = 0
|
||||
batch_size = 1
|
||||
|
||||
[models.llm]
|
||||
provider = "ollama"
|
||||
model = "none"
|
||||
context_tokens = 4096
|
||||
endpoint = "http://127.0.0.1:11434"
|
||||
temperature = 0.0
|
||||
seed = 0
|
||||
|
||||
[search]
|
||||
default_k = 10
|
||||
hybrid_fusion = "rrf"
|
||||
rrf_k = 60
|
||||
snippet_chars = 220
|
||||
stale_threshold_days = {stale_threshold_days}
|
||||
|
||||
[rag]
|
||||
prompt_template_version = "rag-v1"
|
||||
score_gate = 0.30
|
||||
explain_default = false
|
||||
max_context_tokens = 8000
|
||||
"#,
|
||||
workspace = workspace.display(),
|
||||
data = data.display(),
|
||||
stale_threshold_days = stale_threshold_days,
|
||||
),
|
||||
)
|
||||
.unwrap();
|
||||
(cfg_path, workspace, data)
|
||||
}
|
||||
|
||||
fn ingest(cfg: &Path, workspace: &Path) {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let out = Command::new(bin)
|
||||
.args([
|
||||
"--config",
|
||||
cfg.to_str().unwrap(),
|
||||
"ingest",
|
||||
"--root",
|
||||
workspace.to_str().unwrap(),
|
||||
])
|
||||
.output()
|
||||
.unwrap();
|
||||
assert!(
|
||||
out.status.success(),
|
||||
"ingest failed: stderr={}",
|
||||
String::from_utf8_lossy(&out.stderr)
|
||||
);
|
||||
}
|
||||
|
||||
fn run_search_lexical(cfg: &Path, query: &str, json: bool) -> std::process::Output {
|
||||
let bin = env!("CARGO_BIN_EXE_kebab");
|
||||
let mut cmd = Command::new(bin);
|
||||
@@ -127,33 +36,12 @@ fn run_search_lexical(cfg: &Path, query: &str, json: bool) -> std::process::Outp
|
||||
out
|
||||
}
|
||||
|
||||
/// Rewrite `documents.updated_at` for one workspace path to
|
||||
/// `now - days_ago` (RFC3339 UTC). Mirrors
|
||||
/// `kebab-app/tests/common/mod.rs::backdate_document_updated_at`.
|
||||
fn backdate_updated_at(data_dir: &Path, workspace_path: &str, days_ago: i64) {
|
||||
let backdated = (time::OffsetDateTime::now_utc() - time::Duration::days(days_ago))
|
||||
.format(&time::format_description::well_known::Rfc3339)
|
||||
.expect("format backdated updated_at");
|
||||
let db_path = data_dir.join("kebab.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
|
||||
let updated = conn
|
||||
.execute(
|
||||
"UPDATE documents SET updated_at = ?1 WHERE workspace_path = ?2",
|
||||
rusqlite::params![backdated, workspace_path],
|
||||
)
|
||||
.expect("UPDATE documents.updated_at");
|
||||
assert_eq!(
|
||||
updated, 1,
|
||||
"backdate_updated_at: expected to update exactly 1 row for {workspace_path}, got {updated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_json_includes_indexed_at_and_stale() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = write_config(dir.path(), 30);
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
fs::write(workspace.join("a.md"), "# Title\n\napples are fruit\n").unwrap();
|
||||
ingest(&cfg, &workspace);
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let out = run_search_lexical(&cfg, "apples", true);
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
@@ -178,10 +66,10 @@ fn search_json_includes_indexed_at_and_stale() {
|
||||
#[test]
|
||||
fn search_plain_marks_stale_doc() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, data) = write_config(dir.path(), 30);
|
||||
let (cfg, workspace, data) = common::write_config(dir.path(), 30);
|
||||
fs::write(workspace.join("a.md"), "# Title\n\napples are fruit\n").unwrap();
|
||||
ingest(&cfg, &workspace);
|
||||
backdate_updated_at(&data, "a.md", 60);
|
||||
common::ingest(&cfg, &workspace);
|
||||
common::backdate_updated_at(&data, "a.md", 60);
|
||||
|
||||
let out = run_search_lexical(&cfg, "apples", false);
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
@@ -194,9 +82,9 @@ fn search_plain_marks_stale_doc() {
|
||||
#[test]
|
||||
fn search_plain_no_stale_tag_for_fresh_doc() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let (cfg, workspace, _data) = write_config(dir.path(), 30);
|
||||
let (cfg, workspace, _data) = common::write_config(dir.path(), 30);
|
||||
fs::write(workspace.join("a.md"), "# Title\n\napples are fruit\n").unwrap();
|
||||
ingest(&cfg, &workspace);
|
||||
common::ingest(&cfg, &workspace);
|
||||
|
||||
let out = run_search_lexical(&cfg, "apples", false);
|
||||
let stdout = String::from_utf8_lossy(&out.stdout);
|
||||
|
||||
@@ -1016,20 +1016,23 @@ max_context_tokens = 8000
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn negative_stale_threshold_rejected_at_validation() {
|
||||
fn env_negative_threshold_silently_ignored() {
|
||||
// Env path: malformed numeric values (including negatives that
|
||||
// can't fit `u32`) are silently ignored — same pattern as
|
||||
// `KEBAB_SEARCH_DEFAULT_K`. The TOML file-load path (covered in
|
||||
// `fb27_tests::file_negative_stale_threshold_returns_config_invalid`)
|
||||
// is the spec-required hard error surface.
|
||||
let c = Config::defaults();
|
||||
// u32 cannot hold a negative — represent the failure path through
|
||||
// `apply_env` parse-failure: malformed values are silently ignored
|
||||
// (existing pattern, see KEBAB_SEARCH_DEFAULT_K). For TOML-level
|
||||
// negative rejection we rely on serde's u32 type; assert that the
|
||||
// env path leaves the default in place when given garbage.
|
||||
let env: HashMap<String, String> = [
|
||||
("KEBAB_SEARCH_STALE_THRESHOLD_DAYS".to_string(), "-5".to_string()),
|
||||
]
|
||||
.into_iter()
|
||||
.collect();
|
||||
let c = c.apply_env(&env);
|
||||
assert_eq!(c.search.stale_threshold_days, 30, "garbage env value must not corrupt the default");
|
||||
assert_eq!(
|
||||
c.search.stale_threshold_days, 30,
|
||||
"env path: malformed value must leave the default unchanged"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1078,4 +1081,34 @@ mod fb27_tests {
|
||||
assert_eq!(signal.path, p);
|
||||
assert!(!signal.cause.is_empty(), "cause should be non-empty");
|
||||
}
|
||||
|
||||
/// Spec §Config: a negative `stale_threshold_days` in TOML must be
|
||||
/// rejected at load time (not silently coerced or ignored). serde's
|
||||
/// `u32` type-check surfaces the failure as a parse error, which
|
||||
/// `from_file` wraps into `ConfigInvalid`. CLI's `error_classify`
|
||||
/// downcasts this and emits `error.v1.code = "config_invalid"`.
|
||||
#[test]
|
||||
fn file_negative_stale_threshold_returns_config_invalid() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let p = dir.path().join("neg.toml");
|
||||
// Build a minimally valid TOML and override only the field
|
||||
// under test — this isolates the failure to the negative
|
||||
// value rather than missing required sections.
|
||||
let cfg = Config::defaults();
|
||||
let mut toml_text = toml::to_string(&cfg).expect("default round-trips");
|
||||
toml_text = toml_text.replace(
|
||||
"stale_threshold_days = 30",
|
||||
"stale_threshold_days = -5",
|
||||
);
|
||||
std::fs::write(&p, &toml_text).unwrap();
|
||||
let err = Config::from_file(&p).unwrap_err();
|
||||
let signal = err.downcast_ref::<ConfigInvalid>()
|
||||
.expect("negative stale_threshold_days should downcast to ConfigInvalid");
|
||||
assert_eq!(signal.path, p);
|
||||
assert!(
|
||||
signal.cause.contains("parse_failed"),
|
||||
"expected parse_failed cause, got: {}",
|
||||
signal.cause
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,6 +54,10 @@ struct PackedCitation {
|
||||
marker: u32,
|
||||
citation: Citation,
|
||||
indexed_at: OffsetDateTime,
|
||||
/// Pre-stamped by `RagPipeline::ask` against the configured
|
||||
/// `search.stale_threshold_days` before `pack_context` runs;
|
||||
/// this struct just forwards the value into the eventual
|
||||
/// `AnswerCitation` and never recomputes.
|
||||
stale: bool,
|
||||
}
|
||||
|
||||
@@ -942,3 +946,54 @@ mod tests {
|
||||
assert_eq!(left, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/// p9-fb-32: boundary tests pinning the local `compute_stale` mirror's
|
||||
/// semantic equivalence to `kebab_app::staleness::compute_stale`. The
|
||||
/// two implementations are intentionally duplicated (dep-boundary rule
|
||||
/// blocks `kebab-rag → kebab-app`); these tests are the contract that
|
||||
/// guards both copies from drifting. Mirrors the test set in
|
||||
/// `crates/kebab-app/src/staleness.rs`.
|
||||
#[cfg(test)]
|
||||
mod compute_stale_mirror_tests {
|
||||
use super::compute_stale;
|
||||
use time::Duration;
|
||||
use time::OffsetDateTime;
|
||||
use time::macros::datetime;
|
||||
|
||||
fn now() -> OffsetDateTime {
|
||||
datetime!(2026-05-09 12:00:00 UTC)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn threshold_zero_always_fresh() {
|
||||
let very_old = datetime!(2020-01-01 00:00:00 UTC);
|
||||
assert!(!compute_stale(very_old, now(), 0));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_under_threshold_is_fresh() {
|
||||
// 29 days, 23h, 59m old — under 30d.
|
||||
let indexed = now() - Duration::days(29) - Duration::hours(23) - Duration::minutes(59);
|
||||
assert!(!compute_stale(indexed, now(), 30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exactly_threshold_is_fresh() {
|
||||
// strict `>` boundary: exactly 30d old is still fresh.
|
||||
let indexed = now() - Duration::days(30);
|
||||
assert!(!compute_stale(indexed, now(), 30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_minute_past_threshold_is_stale() {
|
||||
let indexed = now() - Duration::days(30) - Duration::minutes(1);
|
||||
assert!(compute_stale(indexed, now(), 30));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn future_indexed_at_is_fresh() {
|
||||
// clock skew safety: future timestamps must not be stale.
|
||||
let future = now() + Duration::hours(1);
|
||||
assert!(!compute_stale(future, now(), 30));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -417,7 +417,9 @@ fn build_hit(
|
||||
embedding_model: None,
|
||||
chunker_version: ChunkerVersion(raw.chunker_version),
|
||||
indexed_at,
|
||||
// Placeholder — App layer overwrites against config threshold (Task 6).
|
||||
// Placeholder — overwritten by `kebab_app::staleness::mark_stale_in_place`
|
||||
// (called from `App::search` / `App::search_uncached`) and the equivalent
|
||||
// in `RagPipeline::ask` against the configured threshold.
|
||||
stale: false,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -322,7 +322,9 @@ fn build_hit(
|
||||
embedding_model: Some(model_id.clone()),
|
||||
chunker_version: ChunkerVersion(meta.chunker_version.clone()),
|
||||
indexed_at,
|
||||
// Placeholder — App layer overwrites against config threshold (Task 6).
|
||||
// Placeholder — overwritten by `kebab_app::staleness::mark_stale_in_place`
|
||||
// (called from `App::search` / `App::search_uncached`) and the equivalent
|
||||
// in `RagPipeline::ask` against the configured threshold.
|
||||
stale: false,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -123,6 +123,9 @@ pub(crate) fn build_doc_lines<'a>(
|
||||
let mut lines: Vec<Line> = Vec::new();
|
||||
// Header
|
||||
let now = time::OffsetDateTime::now_utc();
|
||||
// `doc.metadata.updated_at` is the same source as `SearchHit.indexed_at`
|
||||
// (both come from `documents.updated_at`); we compute here because Inspect
|
||||
// doesn't go through the SearchHit post-process pipeline.
|
||||
let stale = kebab_app::compute_stale(doc.metadata.updated_at, now, threshold_days);
|
||||
lines.push(header_kv("title", &doc.title, theme));
|
||||
lines.push(header_kv_with_stale(
|
||||
|
||||
@@ -14,6 +14,20 @@ historical contract that was implemented; this file accumulates the
|
||||
deltas so phase 5+ readers can find the live behavior without diffing
|
||||
git history.
|
||||
|
||||
## 2026-05-09 — fb-32: search_hit.v1 / citation.v1 required-field expansion
|
||||
|
||||
**무엇이 바뀌었나**: `search_hit.v1` 과 `citation.v1` 의 `required` 배열에 `indexed_at` (RFC3339) + `stale` (bool) 두 필드가 추가됨. `schema_version` 은 그대로 (`search_hit.v1` / `citation.v1`).
|
||||
|
||||
**Spec contract 와의 관계**: 본 PR 에서는 additive minor 로 분류했으나 strict JSON Schema validator 입장에서는 pre-fb-32 payload 가 invalid 가 됨. CLAUDE.md `Wire schema v1` 절의 "breaking it requires a *.v2 major bump" 와 엄밀히는 충돌.
|
||||
|
||||
**의식적 결정**:
|
||||
|
||||
- single-user / single-producer 환경 (kebab CLI + MCP server 가 동일 binary) 에서는 producer 가 항상 새 필드를 채우므로 실용적 호환성 영향 없음.
|
||||
- v2 cascade 로 가면 schema 파일 + 모든 consumer 코드 + integration 테스트가 `.v2` 로 동시 bump 가 필요한데, 두 필드 추가만으로 그 비용은 과함.
|
||||
- producer-controlled 환경의 minor bump 로 처리. 향후 외부 third-party producer 가 등장하면 그 시점에 v2 cascade 검토.
|
||||
|
||||
**영향 받는 consumer**: 없음 (현재 모든 consumer 가 동일 repo 내 — `kebab-cli`, `kebab-tui`, `kebab-mcp`, `integrations/claude-code/kebab/`).
|
||||
|
||||
## 2026-05-07 (2)
|
||||
|
||||
### macOS XDG path collision: `data_dir` == `config_dir` → DataOnly reset deletes config
|
||||
|
||||
Reference in New Issue
Block a user