style: cargo fmt --all (round 4 ingest log feature follow-up)
Phase C4 executor 의 마지막 `fix(test): clippy + fmt fixes` commit 이 test file 부분만 fmt 적용. workspace 전체 fmt 누락 발견 → cargo fmt --all 적용. 모든 import alphabetical reorder + line wrapping 정합. 추가 untracked artifact 동시 commit: - docs/superpowers/specs/2026-05-28-v0.20-ingest-log-spec.md (491 line, ACCEPT) - docs/superpowers/plans/2026-05-28-v0.20-ingest-log-plan.md (616 line, ACCEPT) workspace test: 1370 passed / 0 failed / 50 ignored, ingest_log_smoke green. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,9 +29,8 @@ fn rust_file_ingests_and_searches_as_code_citation() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
assert_eq!(report.errors, 0, "no errors expected: {report:?}");
|
||||
let items = report.items.as_ref().expect("items present");
|
||||
@@ -127,9 +126,8 @@ fn rust_code_search_hit_has_repo() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
|
||||
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("mul"))
|
||||
@@ -147,8 +145,7 @@ fn rust_code_search_hit_has_repo() {
|
||||
.and_then(|n| n.to_str())
|
||||
.map(str::to_owned);
|
||||
assert_eq!(
|
||||
h.repo,
|
||||
expected_repo,
|
||||
h.repo, expected_repo,
|
||||
"SearchHit.repo must match the workspace dir name (detect_repo result)"
|
||||
);
|
||||
// Also sanity-check code_lang is still filled.
|
||||
@@ -177,9 +174,8 @@ fn python_file_ingests_and_searches_as_code_citation() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
assert!(report.new >= 1, "python file ingested: {report:?}");
|
||||
|
||||
@@ -254,9 +250,8 @@ fn typescript_file_ingests_and_searches_as_code_citation() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
assert!(report.new >= 1, "ts file ingested: {report:?}");
|
||||
|
||||
@@ -331,9 +326,8 @@ fn javascript_file_ingests_and_searches_as_code_citation() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
assert!(report.new >= 1, "js file ingested: {report:?}");
|
||||
|
||||
@@ -515,7 +509,11 @@ fn java_file_ingests_and_searches_as_code_citation() {
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("java"), "citation.lang must be 'java'");
|
||||
assert_eq!(
|
||||
lang.as_deref(),
|
||||
Some("java"),
|
||||
"citation.lang must be 'java'"
|
||||
);
|
||||
assert_eq!(
|
||||
symbol.as_deref(),
|
||||
Some("com.foo.Foo.bar"),
|
||||
@@ -586,7 +584,11 @@ fn kotlin_file_ingests_and_searches_as_code_citation() {
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("kotlin"), "citation.lang must be 'kotlin'");
|
||||
assert_eq!(
|
||||
lang.as_deref(),
|
||||
Some("kotlin"),
|
||||
"citation.lang must be 'kotlin'"
|
||||
);
|
||||
assert_eq!(
|
||||
symbol.as_deref(),
|
||||
Some("com.foo.Foo.bar"),
|
||||
@@ -651,8 +653,8 @@ fn tier2_k8s_yaml_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -666,7 +668,11 @@ fn tier2_k8s_yaml_ingest_searchable() {
|
||||
line_start,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(lang.as_deref(), Some("yaml"), "citation.lang must be 'yaml'");
|
||||
assert_eq!(
|
||||
lang.as_deref(),
|
||||
Some("yaml"),
|
||||
"citation.lang must be 'yaml'"
|
||||
);
|
||||
assert_eq!(
|
||||
symbol.as_deref(),
|
||||
Some("Deployment/prod/api"),
|
||||
@@ -730,8 +736,8 @@ fn tier2_dockerfile_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -813,8 +819,8 @@ fn tier2_cargo_toml_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -896,8 +902,8 @@ fn tier3_shell_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -987,8 +993,8 @@ fn tier3_yaml_fallback_picks_up_non_k8s_yaml() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -1031,14 +1037,9 @@ fn tier3_yaml_fallback_picks_up_non_k8s_yaml() {
|
||||
fn rust_file_re_ingest_is_unchanged() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
std::fs::write(
|
||||
env.workspace_root.join("stable.rs"),
|
||||
"pub fn noop() {}\n",
|
||||
)
|
||||
.unwrap();
|
||||
std::fs::write(env.workspace_root.join("stable.rs"), "pub fn noop() {}\n").unwrap();
|
||||
|
||||
let r1 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let r1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let item1 = r1
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -1049,8 +1050,7 @@ fn rust_file_re_ingest_is_unchanged() {
|
||||
.unwrap();
|
||||
assert_eq!(item1.kind, IngestItemKind::New);
|
||||
|
||||
let r2 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let r2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let item2 = r2
|
||||
.items
|
||||
.unwrap()
|
||||
@@ -1081,9 +1081,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report1 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("first ingest");
|
||||
let report1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("first ingest");
|
||||
let item1 = report1
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -1093,7 +1092,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
|
||||
.expect("docker-compose.yml in first report");
|
||||
assert!(
|
||||
matches!(item1.kind, IngestItemKind::New),
|
||||
"first ingest must be New, got {:?}", item1.kind
|
||||
"first ingest must be New, got {:?}",
|
||||
item1.kind
|
||||
);
|
||||
assert_eq!(
|
||||
item1.chunker_version.as_ref().map(|c| c.0.as_str()),
|
||||
@@ -1101,9 +1101,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
|
||||
"first ingest must use Tier 3 fallback chunker"
|
||||
);
|
||||
|
||||
let report2 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest");
|
||||
let report2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest");
|
||||
let item2 = report2
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -1113,7 +1112,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
|
||||
.expect("docker-compose.yml in second report");
|
||||
assert!(
|
||||
matches!(item2.kind, IngestItemKind::Unchanged),
|
||||
"second ingest must be Unchanged, got {:?}", item2.kind
|
||||
"second ingest must be Unchanged, got {:?}",
|
||||
item2.kind
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1163,8 +1163,8 @@ fn tier1_c_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -1247,8 +1247,8 @@ fn tier1_cpp_ingest_searchable() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
|
||||
let h = hits
|
||||
.iter()
|
||||
@@ -1266,7 +1266,9 @@ fn tier1_cpp_ingest_searchable() {
|
||||
// Symbol could be "kebab::chunk::Foo" (class) or "kebab::chunk::Foo::bar"
|
||||
// (method) depending on which chunk ranks first.
|
||||
assert!(
|
||||
symbol.as_deref().is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
|
||||
symbol
|
||||
.as_deref()
|
||||
.is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
|
||||
"C++ symbol must start with namespace::Class prefix, got {symbol:?}"
|
||||
);
|
||||
assert!(*line_start >= 1, "line_start must be >=1");
|
||||
@@ -1335,8 +1337,8 @@ fn tier2_k8s_multi_resource_yaml_ingests_without_collision() {
|
||||
..Default::default()
|
||||
},
|
||||
};
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
|
||||
assert!(
|
||||
hits.len() >= 2,
|
||||
"expected ≥2 hits (Deployment + Service), got {}",
|
||||
@@ -1359,9 +1361,8 @@ fn tier3_shell_reingest_is_unchanged() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let report1 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("first ingest");
|
||||
let report1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("first ingest");
|
||||
let item1 = report1
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -1371,12 +1372,12 @@ fn tier3_shell_reingest_is_unchanged() {
|
||||
.expect("deploy.sh in first report");
|
||||
assert!(
|
||||
matches!(item1.kind, IngestItemKind::New),
|
||||
"first ingest must be New, got {:?}", item1.kind
|
||||
"first ingest must be New, got {:?}",
|
||||
item1.kind
|
||||
);
|
||||
|
||||
let report2 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest");
|
||||
let report2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest");
|
||||
let item2 = report2
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -1386,6 +1387,7 @@ fn tier3_shell_reingest_is_unchanged() {
|
||||
.expect("deploy.sh in second report");
|
||||
assert!(
|
||||
matches!(item2.kind, IngestItemKind::Unchanged),
|
||||
"shell reingest must be Unchanged, got {:?}", item2.kind
|
||||
"shell reingest must be Unchanged, got {:?}",
|
||||
item2.kind
|
||||
);
|
||||
}
|
||||
|
||||
@@ -93,8 +93,7 @@ impl TestEnv {
|
||||
/// directly. Caller can invoke this multiple times to simulate
|
||||
/// re-opening the binary after a corpus revision bump.
|
||||
pub fn app(&self) -> kebab_app::App {
|
||||
kebab_app::App::open_with_config(self.config.clone())
|
||||
.expect("App::open_with_config")
|
||||
kebab_app::App::open_with_config(self.config.clone()).expect("App::open_with_config")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,11 @@ fn open(env: &common::TestEnv) -> App {
|
||||
#[test]
|
||||
fn fetch_chunk_returns_target_only_when_no_context() {
|
||||
let env = common::TestEnv::new();
|
||||
common::ingest_md(&env, "a.md", "# Title\n\nFirst paragraph.\n\n## Section\n\nSecond.\n");
|
||||
common::ingest_md(
|
||||
&env,
|
||||
"a.md",
|
||||
"# Title\n\nFirst paragraph.\n\n## Section\n\nSecond.\n",
|
||||
);
|
||||
let app = open(&env);
|
||||
|
||||
// Find a chunk via search to obtain its id.
|
||||
@@ -42,7 +46,8 @@ fn fetch_chunk_with_context_returns_neighbors() {
|
||||
// match. The earlier fixture used 2-char tokens like `A1`/`A3` for
|
||||
// section bodies — those zero-hit under trigram. Use 5-char unique
|
||||
// words per section so the query can pin one chunk deterministically.
|
||||
let body = "# H1\n\napples\n\n# H2\n\nbanana\n\n# H3\n\ncherry\n\n# H4\n\ndurian\n\n# H5\n\nelder\n";
|
||||
let body =
|
||||
"# H1\n\napples\n\n# H2\n\nbanana\n\n# H3\n\ncherry\n\n# H4\n\ndurian\n\n# H5\n\nelder\n";
|
||||
common::ingest_md(&env, "multi.md", body);
|
||||
let app = env.app();
|
||||
|
||||
@@ -110,7 +115,10 @@ fn fetch_doc_returns_serialized_markdown() {
|
||||
.unwrap();
|
||||
assert_eq!(result.kind, FetchKind::Doc);
|
||||
let text = result.text.expect("doc text");
|
||||
assert!(text.contains("Heading One"), "doc text contains heading: {text:?}");
|
||||
assert!(
|
||||
text.contains("Heading One"),
|
||||
"doc text contains heading: {text:?}"
|
||||
);
|
||||
assert!(text.contains("First paragraph"), "doc text contains body");
|
||||
assert!(!result.truncated);
|
||||
}
|
||||
@@ -155,7 +163,11 @@ fn fetch_doc_with_max_tokens_truncates() {
|
||||
.unwrap();
|
||||
assert!(result.truncated);
|
||||
let text = result.text.expect("doc text");
|
||||
assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
|
||||
assert!(
|
||||
text.chars().count() <= 100,
|
||||
"trimmed text len {}",
|
||||
text.chars().count()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -292,8 +304,7 @@ fn fetch_span_line_start_beyond_total_returns_empty_text() {
|
||||
fn fetch_chunk_context_at_first_chunk_clamps_lower_bound() {
|
||||
let env = common::TestEnv::new();
|
||||
// Multi-chunk markdown so context ±N has neighbors.
|
||||
let body =
|
||||
"# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
|
||||
let body = "# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
|
||||
common::ingest_md(&env, "boundary.md", body);
|
||||
let app = env.app();
|
||||
let q = kebab_core::SearchQuery {
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
use kebab_app::ingest_with_config_opts;
|
||||
use kebab_app::IngestOpts;
|
||||
use kebab_app::ingest_with_config_opts;
|
||||
use kebab_core::{DocFilter, DocumentStore, SearchMode, SearchQuery, SourceScope};
|
||||
|
||||
/// Helper: open the store via `TestEnv` and run `list_documents`.
|
||||
@@ -125,17 +125,10 @@ fn include_scope_narrowing_does_not_purge() {
|
||||
include: vec!["**/*.rs".to_string()],
|
||||
exclude: env.config.workspace.exclude.clone(),
|
||||
};
|
||||
let first = ingest_with_config_opts(
|
||||
env.config.clone(),
|
||||
wide_scope,
|
||||
false,
|
||||
IngestOpts::default(),
|
||||
)
|
||||
.expect("first ingest (wide) must succeed");
|
||||
assert!(
|
||||
first.new >= 2,
|
||||
"expected at least 2 new docs: {first:?}"
|
||||
);
|
||||
let first =
|
||||
ingest_with_config_opts(env.config.clone(), wide_scope, false, IngestOpts::default())
|
||||
.expect("first ingest (wide) must succeed");
|
||||
assert!(first.new >= 2, "expected at least 2 new docs: {first:?}");
|
||||
assert_eq!(
|
||||
first.purged_deleted_files, 0,
|
||||
"no purges on first ingest: {first:?}"
|
||||
|
||||
@@ -24,8 +24,7 @@ use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
/// inspectable in stored DB rows.
|
||||
fn write_red_png(root: &Path, name: &str) -> std::path::PathBuf {
|
||||
use image::{ImageBuffer, Rgb};
|
||||
let img: ImageBuffer<Rgb<u8>, _> =
|
||||
ImageBuffer::from_fn(100, 50, |_, _| Rgb([255, 0, 0]));
|
||||
let img: ImageBuffer<Rgb<u8>, _> = ImageBuffer::from_fn(100, 50, |_, _| Rgb([255, 0, 0]));
|
||||
let path = root.join(name);
|
||||
img.save(&path).expect("write PNG fixture");
|
||||
path
|
||||
@@ -80,7 +79,12 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
|
||||
|
||||
// Counters: scanned should include the PNG; new ≥ 1 (markdown
|
||||
// fixtures from the workspace tree may also count).
|
||||
assert!(report.scanned >= 1, "scanned={}, items={:?}", report.scanned, report.items);
|
||||
assert!(
|
||||
report.scanned >= 1,
|
||||
"scanned={}, items={:?}",
|
||||
report.scanned,
|
||||
report.items
|
||||
);
|
||||
assert_eq!(report.errors, 0, "no errors on lenient OCR path");
|
||||
|
||||
// Locate the image doc in the report items.
|
||||
@@ -94,7 +98,11 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
|
||||
kebab_core::IngestItemKind::New,
|
||||
"image asset must be classified New on first ingest"
|
||||
);
|
||||
assert_eq!(img_item.chunk_count, Some(1), "image emits exactly one chunk");
|
||||
assert_eq!(
|
||||
img_item.chunk_count,
|
||||
Some(1),
|
||||
"image emits exactly one chunk"
|
||||
);
|
||||
|
||||
// Inspect the stored chunk text via kb-app's inspect_chunk facade.
|
||||
let doc_id = img_item.doc_id.clone().expect("image doc id");
|
||||
@@ -117,10 +125,12 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
|
||||
|
||||
// Sanity: the doc was actually persisted into SQLite (kb-app's
|
||||
// list_docs facade reads the same store the chunker writes to).
|
||||
let summaries = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())
|
||||
.expect("list_docs");
|
||||
let summaries =
|
||||
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).expect("list_docs");
|
||||
assert!(
|
||||
summaries.iter().any(|s| s.doc_path.0.ends_with("diagram.png")),
|
||||
summaries
|
||||
.iter()
|
||||
.any(|s| s.doc_path.0.ends_with("diagram.png")),
|
||||
"image doc must appear in list_docs"
|
||||
);
|
||||
|
||||
@@ -171,8 +181,7 @@ async fn ingest_image_with_ocr_and_caption_populates_both_fields() {
|
||||
.iter()
|
||||
.find(|i| i.doc_path.0.ends_with("diagram.png"))
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap())
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap()).unwrap();
|
||||
let block = match &doc.blocks[0] {
|
||||
kebab_core::Block::ImageRef(b) => b,
|
||||
_ => unreachable!(),
|
||||
@@ -267,8 +276,7 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
|
||||
let cfg_clone = cfg.clone();
|
||||
let scope = env.scope();
|
||||
let report = spawn_blocking(move || {
|
||||
kebab_app::ingest_with_config(cfg_clone, scope, false)
|
||||
.expect("ingest with no OCR/caption")
|
||||
kebab_app::ingest_with_config(cfg_clone, scope, false).expect("ingest with no OCR/caption")
|
||||
})
|
||||
.await
|
||||
.expect("task");
|
||||
@@ -282,8 +290,7 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
|
||||
.find(|i| i.doc_path.0.ends_with("raw.png"))
|
||||
.unwrap();
|
||||
assert_eq!(img_item.chunk_count, Some(1), "image emits one chunk");
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap())
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap()).unwrap();
|
||||
let block = match &doc.blocks[0] {
|
||||
kebab_core::Block::ImageRef(b) => b,
|
||||
_ => unreachable!(),
|
||||
@@ -392,16 +399,12 @@ async fn re_ingest_image_produces_unchanged_with_same_doc_id() {
|
||||
let scope1 = scope.clone();
|
||||
let scope2 = scope.clone();
|
||||
|
||||
let r1 = spawn_blocking(move || {
|
||||
kebab_app::ingest_with_config(cfg1, scope1, false).unwrap()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let r2 = spawn_blocking(move || {
|
||||
kebab_app::ingest_with_config(cfg2, scope2, false).unwrap()
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let r1 = spawn_blocking(move || kebab_app::ingest_with_config(cfg1, scope1, false).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
let r2 = spawn_blocking(move || kebab_app::ingest_with_config(cfg2, scope2, false).unwrap())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let id1 = r1
|
||||
.items
|
||||
|
||||
@@ -21,11 +21,16 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
|
||||
// First ingest — populates the DB. Use the legacy entry so the
|
||||
// assertions cover the "previously ingested" set without needing
|
||||
// IngestOpts::default() to behave identically.
|
||||
let first =
|
||||
ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let first = ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(first.errors, 0, "first ingest must not error: {first:?}");
|
||||
assert!(first.new >= 1, "first ingest must create new docs: {first:?}");
|
||||
assert_eq!(first.unchanged, 0, "first ingest cannot have unchanged: {first:?}");
|
||||
assert!(
|
||||
first.new >= 1,
|
||||
"first ingest must create new docs: {first:?}"
|
||||
);
|
||||
assert_eq!(
|
||||
first.unchanged, 0,
|
||||
"first ingest cannot have unchanged: {first:?}"
|
||||
);
|
||||
|
||||
let scanned = first.scanned;
|
||||
|
||||
@@ -38,9 +43,15 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
|
||||
IngestOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(second.scanned, scanned, "second scanned matches first: {second:?}");
|
||||
assert_eq!(
|
||||
second.scanned, scanned,
|
||||
"second scanned matches first: {second:?}"
|
||||
);
|
||||
assert_eq!(second.new, 0, "no new docs on re-ingest: {second:?}");
|
||||
assert_eq!(second.updated, 0, "nothing should be marked updated: {second:?}");
|
||||
assert_eq!(
|
||||
second.updated, 0,
|
||||
"nothing should be marked updated: {second:?}"
|
||||
);
|
||||
assert_eq!(
|
||||
second.unchanged, scanned,
|
||||
"every doc must be Unchanged: {second:?}"
|
||||
@@ -52,10 +63,12 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
|
||||
fn force_reingest_bypasses_skip() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let first =
|
||||
ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let first = ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(first.errors, 0, "first ingest must not error: {first:?}");
|
||||
assert!(first.new >= 1, "first ingest must create new docs: {first:?}");
|
||||
assert!(
|
||||
first.new >= 1,
|
||||
"first ingest must create new docs: {first:?}"
|
||||
);
|
||||
let scanned = first.scanned;
|
||||
|
||||
let second = ingest_with_config_opts(
|
||||
|
||||
@@ -107,13 +107,9 @@ fn cancel_none_is_uncancellable_default() {
|
||||
// ingest_with_config_progress (no cancel) runs to completion.
|
||||
let env = TestEnv::lexical_only();
|
||||
let (tx, rx) = mpsc::channel::<IngestEvent>();
|
||||
let report = kebab_app::ingest_with_config_progress(
|
||||
env.config.clone(),
|
||||
env.scope(),
|
||||
true,
|
||||
Some(tx),
|
||||
)
|
||||
.unwrap();
|
||||
let report =
|
||||
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, Some(tx))
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
|
||||
@@ -107,5 +107,8 @@ fn ingest_file_errors_on_unsupported_extension() {
|
||||
|
||||
let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
|
||||
assert!(err.to_string().contains("unsupported extension"), "{err}");
|
||||
assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
|
||||
assert!(
|
||||
err.to_string().contains(".docx") || err.to_string().contains("docx"),
|
||||
"{err}"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -8,8 +8,7 @@ use common::TestEnv;
|
||||
#[test]
|
||||
fn ingest_then_list_inspects_round_trip() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
|
||||
// The fixture has 3 markdown files; first ingest should label them
|
||||
// all as New.
|
||||
@@ -27,17 +26,14 @@ fn ingest_then_list_inspects_round_trip() {
|
||||
}
|
||||
|
||||
// list_docs returns the 3 docs.
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let docs =
|
||||
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3, "docs: {docs:?}");
|
||||
|
||||
// inspect_doc round-trips one of them.
|
||||
let any_doc_id = docs[0].doc_id.clone();
|
||||
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
.unwrap();
|
||||
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id).unwrap();
|
||||
assert_eq!(canonical.doc_id, any_doc_id);
|
||||
assert!(!canonical.blocks.is_empty(), "blocks empty");
|
||||
}
|
||||
@@ -46,12 +42,10 @@ fn ingest_then_list_inspects_round_trip() {
|
||||
fn ingest_idempotent_on_second_run() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let r1 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let r1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(r1.new, 3);
|
||||
|
||||
let r2 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let r2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
// Same files re-ingested — p9-fb-23 task 7 introduced the early-skip
|
||||
// path: when checksum + parser/chunker/embedding versions all match,
|
||||
// the second run reports `Unchanged` rather than `Updated`. Pre-p9-fb-23
|
||||
@@ -63,19 +57,16 @@ fn ingest_idempotent_on_second_run() {
|
||||
assert_eq!(r2.unchanged, 3, "second run unchanged: {r2:?}");
|
||||
|
||||
// list_docs still has 3 docs (no duplicates).
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let docs =
|
||||
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_summary_only_drops_items() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert!(report.items.is_none(), "summary-only should null items");
|
||||
}
|
||||
@@ -87,12 +78,10 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
|
||||
// of every run. `summary_only=true` writes `items_json=NULL`; the
|
||||
// counts MUST still be present.
|
||||
let env = TestEnv::lexical_only();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
|
||||
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir)
|
||||
.join("kebab.sqlite");
|
||||
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir).join("kebab.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
|
||||
let (scanned, new_c, updated, skipped, errors, items_json): (
|
||||
i64,
|
||||
@@ -141,25 +130,18 @@ fn ingest_provider_none_skips_lance() {
|
||||
// tree shape (no `<data_dir>/lancedb` directory, or no `*.lance`
|
||||
// tables under it).
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0, "lexical-only run must not error");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let lance_dir = std::path::PathBuf::from(&env.config.storage.data_dir)
|
||||
.join("lancedb");
|
||||
let lance_dir = std::path::PathBuf::from(&env.config.storage.data_dir).join("lancedb");
|
||||
if lance_dir.exists() {
|
||||
// If the dir was created (e.g., by an earlier consumer touching
|
||||
// the path), it MUST contain no `.lance` tables.
|
||||
let mut had_lance_table = false;
|
||||
for entry in std::fs::read_dir(&lance_dir).expect("read lance_dir") {
|
||||
let entry = entry.unwrap();
|
||||
if entry
|
||||
.path()
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
== Some("lance")
|
||||
{
|
||||
if entry.path().extension().and_then(|s| s.to_str()) == Some("lance") {
|
||||
had_lance_table = true;
|
||||
break;
|
||||
}
|
||||
@@ -189,8 +171,7 @@ fn list_docs_filters_by_tags_any() {
|
||||
tags_any: vec!["rust".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let rust_docs =
|
||||
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
let rust_docs = kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
// intro.md and notes/cargo.md both tag "rust".
|
||||
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
|
||||
}
|
||||
@@ -198,8 +179,9 @@ fn list_docs_filters_by_tags_any() {
|
||||
#[test]
|
||||
fn inspect_doc_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus =
|
||||
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let bogus = kebab_core::DocumentId(
|
||||
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
|
||||
);
|
||||
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
@@ -218,8 +200,7 @@ fn inspect_chunk_not_found_returns_actionable_error() {
|
||||
let bogus = kebab_core::ChunkId(
|
||||
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
|
||||
);
|
||||
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
.unwrap_err();
|
||||
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("not found"), "got: {msg}");
|
||||
}
|
||||
@@ -251,22 +232,18 @@ fn ingest_with_config_opts_default_matches_legacy_behaviour() {
|
||||
#[test]
|
||||
fn ingest_stamps_chunker_version_on_document() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert!(report.new >= 1, "expected at least one new doc: {report:?}");
|
||||
assert_eq!(report.errors, 0, "no errors expected: {report:?}");
|
||||
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let docs =
|
||||
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
|
||||
.unwrap();
|
||||
assert!(!docs.is_empty(), "no docs after ingest");
|
||||
|
||||
for doc_entry in &docs {
|
||||
let canonical =
|
||||
kebab_app::inspect_doc_with_config(env.config.clone(), &doc_entry.doc_id)
|
||||
.unwrap();
|
||||
kebab_app::inspect_doc_with_config(env.config.clone(), &doc_entry.doc_id).unwrap();
|
||||
assert!(
|
||||
canonical.last_chunker_version.is_some(),
|
||||
"last_chunker_version must be stamped for doc {}: got {:?}",
|
||||
|
||||
@@ -17,8 +17,7 @@ use std::sync::atomic::AtomicBool;
|
||||
use common::TestEnv;
|
||||
|
||||
fn ollama_endpoint() -> String {
|
||||
std::env::var("KEBAB_PDF_OCR_ENDPOINT")
|
||||
.unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
std::env::var("KEBAB_PDF_OCR_ENDPOINT").unwrap_or_else(|_| "http://localhost:11434".to_string())
|
||||
}
|
||||
|
||||
fn make_ocr_env_real() -> TestEnv {
|
||||
@@ -43,8 +42,8 @@ fn make_ocr_env_real() -> TestEnv {
|
||||
fn ingest_with_mock_ocr_yields_pdf_ocr_summary() {
|
||||
let env = make_ocr_env_real();
|
||||
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest");
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest");
|
||||
|
||||
assert!(report.new >= 1, "at least one PDF ingested: {report:?}");
|
||||
|
||||
@@ -72,15 +71,13 @@ fn ingest_with_mock_ocr_yields_pdf_ocr_summary() {
|
||||
fn ocr_text_indexed_and_searchable() {
|
||||
let env = make_ocr_env_real();
|
||||
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest");
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest");
|
||||
|
||||
// Search for a Korean morpheme expected to appear in qwen2.5vl:3b OCR
|
||||
// output of the PoC ground-truth page. "다음" is a high-frequency token
|
||||
// in page1.txt truth file.
|
||||
let query = common::lexical_query("다음");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), query).expect("search");
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), query).expect("search");
|
||||
|
||||
assert!(
|
||||
!hits.is_empty(),
|
||||
|
||||
@@ -13,13 +13,9 @@ use kebab_core::IngestItemKind;
|
||||
fn run_with_progress() -> Vec<IngestEvent> {
|
||||
let env = TestEnv::lexical_only();
|
||||
let (tx, rx) = mpsc::channel::<IngestEvent>();
|
||||
let report = kebab_app::ingest_with_config_progress(
|
||||
env.config.clone(),
|
||||
env.scope(),
|
||||
false,
|
||||
Some(tx),
|
||||
)
|
||||
.unwrap();
|
||||
let report =
|
||||
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), false, Some(tx))
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
@@ -116,13 +112,9 @@ fn ingest_with_config_progress_none_matches_ingest_with_config() {
|
||||
// `ingest_with_config_progress(..., None)` must produce identical
|
||||
// reports modulo wall-clock duration.
|
||||
let env = TestEnv::lexical_only();
|
||||
let r_none = kebab_app::ingest_with_config_progress(
|
||||
env.config.clone(),
|
||||
env.scope(),
|
||||
true,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
let r_none =
|
||||
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, None)
|
||||
.unwrap();
|
||||
assert_eq!(r_none.scanned, 3);
|
||||
assert_eq!(r_none.new, 3);
|
||||
}
|
||||
@@ -134,13 +126,9 @@ fn dropped_receiver_does_not_panic_or_fail_ingest() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let (tx, rx) = mpsc::channel::<IngestEvent>();
|
||||
drop(rx);
|
||||
let report = kebab_app::ingest_with_config_progress(
|
||||
env.config.clone(),
|
||||
env.scope(),
|
||||
true,
|
||||
Some(tx),
|
||||
)
|
||||
.unwrap();
|
||||
let report =
|
||||
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, Some(tx))
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
}
|
||||
|
||||
@@ -185,13 +173,8 @@ fn pdf_ocr_progress_emits_started_finished_events() {
|
||||
};
|
||||
|
||||
let (tx, rx) = mpsc::channel::<IngestEvent>();
|
||||
let _report = kebab_app::ingest_with_config_progress(
|
||||
config,
|
||||
scope,
|
||||
false,
|
||||
Some(tx),
|
||||
)
|
||||
.expect("ingest_with_config_progress");
|
||||
let _report = kebab_app::ingest_with_config_progress(config, scope, false, Some(tx))
|
||||
.expect("ingest_with_config_progress");
|
||||
|
||||
let events: Vec<_> = rx.iter().collect();
|
||||
|
||||
@@ -204,7 +187,16 @@ fn pdf_ocr_progress_emits_started_finished_events() {
|
||||
.filter(|e| matches!(e, IngestEvent::PdfOcrFinished { .. }))
|
||||
.count();
|
||||
|
||||
assert!(started_count >= 1, "PdfOcrStarted 가 ≥ 1 emit 됨 (got {started_count})");
|
||||
assert!(finished_count >= 1, "PdfOcrFinished 가 ≥ 1 emit 됨 (got {finished_count})");
|
||||
assert_eq!(started_count, finished_count, "Started 와 Finished 의 count 일치");
|
||||
assert!(
|
||||
started_count >= 1,
|
||||
"PdfOcrStarted 가 ≥ 1 emit 됨 (got {started_count})"
|
||||
);
|
||||
assert!(
|
||||
finished_count >= 1,
|
||||
"PdfOcrFinished 가 ≥ 1 emit 됨 (got {finished_count})"
|
||||
);
|
||||
assert_eq!(
|
||||
started_count, finished_count,
|
||||
"Started 와 Finished 의 count 일치"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -29,12 +29,14 @@ fn ingest_stdin_writes_frontmatter_and_reports_new() {
|
||||
"## Body content\n\nMore.",
|
||||
"Article X",
|
||||
Some("https://example.com/x"),
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(report.new, 1, "{report:?}");
|
||||
|
||||
// _external/ contains exactly one .md file with frontmatter.
|
||||
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
|
||||
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
|
||||
let entries: Vec<_> = fs::read_dir(&ext_dir)
|
||||
.unwrap()
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
assert_eq!(entries.len(), 1);
|
||||
@@ -50,16 +52,13 @@ fn ingest_stdin_without_source_uri() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let cfg = fresh_cfg(dir.path());
|
||||
|
||||
let report = kebab_app::ingest_stdin_with_config(
|
||||
cfg.clone(),
|
||||
"## Body",
|
||||
"Title",
|
||||
None,
|
||||
).unwrap();
|
||||
let report =
|
||||
kebab_app::ingest_stdin_with_config(cfg.clone(), "## Body", "Title", None).unwrap();
|
||||
assert_eq!(report.new, 1);
|
||||
|
||||
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
|
||||
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
|
||||
let entries: Vec<_> = fs::read_dir(&ext_dir)
|
||||
.unwrap()
|
||||
.filter_map(std::result::Result::ok)
|
||||
.collect();
|
||||
let content = fs::read_to_string(entries[0].path()).unwrap();
|
||||
|
||||
@@ -17,9 +17,8 @@ fn init_workspace_header_lists_supported_extensions() {
|
||||
}
|
||||
kebab_app::init_workspace(true).expect("init_workspace");
|
||||
let cfg_path = kebab_config::Config::xdg_config_path();
|
||||
let body = std::fs::read_to_string(&cfg_path).unwrap_or_else(|e| {
|
||||
panic!("read config at {}: {e}", cfg_path.display())
|
||||
});
|
||||
let body = std::fs::read_to_string(&cfg_path)
|
||||
.unwrap_or_else(|e| panic!("read config at {}: {e}", cfg_path.display()));
|
||||
assert!(
|
||||
body.contains("처리 가능한 형식"),
|
||||
"header lists supported types section: body=\n{body}"
|
||||
|
||||
@@ -9,9 +9,8 @@ use std::sync::atomic::AtomicBool;
|
||||
use common::mock_ocr::MockOcrEngine;
|
||||
use kebab_app::pdf_ocr_apply::{PdfOcrOpts, apply_ocr_to_pdf_pages};
|
||||
use kebab_core::{
|
||||
AssetStorage, Block, CanonicalDocument, Checksum, ExtractConfig, ExtractContext,
|
||||
Extractor, Inline, Lang, MediaType, RawAsset, SourceSpan,
|
||||
SourceUri, WorkspacePath, id_for_asset,
|
||||
AssetStorage, Block, CanonicalDocument, Checksum, ExtractConfig, ExtractContext, Extractor,
|
||||
Inline, Lang, MediaType, RawAsset, SourceSpan, SourceUri, WorkspacePath, id_for_asset,
|
||||
};
|
||||
use kebab_parse_pdf::PdfTextExtractor;
|
||||
use time::OffsetDateTime;
|
||||
@@ -258,8 +257,8 @@ fn f6_flatedecode_skipped_with_warning() {
|
||||
// Test 7: F7 CCITTFax → skip + warning (verifier M-4 split)
|
||||
#[test]
|
||||
fn f7_ccittfax_skipped_with_warning() {
|
||||
let bytes = std::fs::read("../kebab-parse-pdf/tests/fixtures/ccitt.pdf")
|
||||
.expect("F7 fixture missing");
|
||||
let bytes =
|
||||
std::fs::read("../kebab-parse-pdf/tests/fixtures/ccitt.pdf").expect("F7 fixture missing");
|
||||
let mut canonical = canonical_with_empty_block(); // page-1 block from F1
|
||||
let engine = MockOcrEngine::single("SHOULD_NOT_BE_CALLED", false);
|
||||
let opts = default_opts(true);
|
||||
|
||||
@@ -46,17 +46,13 @@ fn build_text_pdf(pages: &[Option<&str>]) -> Vec<u8> {
|
||||
operations: vec![
|
||||
Operation::new("BT", vec![]),
|
||||
Operation::new("Tf", vec!["F1".into(), 24.into()]),
|
||||
Operation::new(
|
||||
"Td",
|
||||
vec![Object::Integer(100), Object::Integer(700)],
|
||||
),
|
||||
Operation::new("Td", vec![Object::Integer(100), Object::Integer(700)]),
|
||||
Operation::new("Tj", vec![Object::string_literal(*text)]),
|
||||
Operation::new("ET", vec![]),
|
||||
],
|
||||
};
|
||||
let stream_data = content.encode().expect("content encode");
|
||||
let content_id =
|
||||
doc.add_object(Stream::new(dictionary! {}, stream_data));
|
||||
let content_id = doc.add_object(Stream::new(dictionary! {}, stream_data));
|
||||
page_dict.set("Contents", content_id);
|
||||
}
|
||||
let page_id = doc.add_object(page_dict);
|
||||
@@ -76,8 +72,7 @@ fn build_text_pdf(pages: &[Option<&str>]) -> Vec<u8> {
|
||||
Object::Integer(842),
|
||||
],
|
||||
};
|
||||
doc.objects
|
||||
.insert(pages_id, Object::Dictionary(pages_dict));
|
||||
doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
|
||||
|
||||
let catalog_id = doc.add_object(dictionary! {
|
||||
"Type" => "Catalog",
|
||||
@@ -146,9 +141,8 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
|
||||
write_pdf(&env.workspace_root, "three.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false)
|
||||
.expect("PDF ingest must succeed");
|
||||
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false)
|
||||
.expect("PDF ingest must succeed");
|
||||
|
||||
assert_eq!(report.errors, 0);
|
||||
let items = report.items.as_ref().expect("items present");
|
||||
@@ -157,8 +151,16 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
|
||||
.find(|i| i.doc_path.0.ends_with("three.pdf"))
|
||||
.expect("PDF item present");
|
||||
assert_eq!(pdf_item.kind, IngestItemKind::New);
|
||||
assert_eq!(pdf_item.block_count, Some(3), "one Block::Paragraph per page");
|
||||
assert_eq!(pdf_item.chunk_count, Some(3), "one chunk per non-empty page");
|
||||
assert_eq!(
|
||||
pdf_item.block_count,
|
||||
Some(3),
|
||||
"one Block::Paragraph per page"
|
||||
);
|
||||
assert_eq!(
|
||||
pdf_item.chunk_count,
|
||||
Some(3),
|
||||
"one chunk per non-empty page"
|
||||
);
|
||||
assert_eq!(
|
||||
pdf_item.parser_version.as_ref().map(|p| p.0.as_str()),
|
||||
Some("pdf-text-v1")
|
||||
@@ -169,11 +171,8 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
|
||||
);
|
||||
|
||||
// Inspect the stored doc to confirm SourceSpan::Page round-trip.
|
||||
let doc = kebab_app::inspect_doc_with_config(
|
||||
cfg,
|
||||
pdf_item.doc_id.as_ref().unwrap(),
|
||||
)
|
||||
.expect("inspect_doc returns the PDF document");
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap())
|
||||
.expect("inspect_doc returns the PDF document");
|
||||
assert_eq!(doc.blocks.len(), 3);
|
||||
for (i, block) in doc.blocks.iter().enumerate() {
|
||||
let want_page = (i as u32) + 1;
|
||||
@@ -202,8 +201,7 @@ fn re_ingest_identical_pdf_produces_unchanged_with_same_doc_id() {
|
||||
write_pdf(&env.workspace_root, "stable.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report1 =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report1 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let item1 = report1
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -214,8 +212,7 @@ fn re_ingest_identical_pdf_produces_unchanged_with_same_doc_id() {
|
||||
.unwrap();
|
||||
assert_eq!(item1.kind, IngestItemKind::New);
|
||||
|
||||
let report2 =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report2 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let item2 = report2
|
||||
.items
|
||||
.unwrap()
|
||||
@@ -239,8 +236,7 @@ fn re_ingest_edited_pdf_produces_new_doc_id() {
|
||||
std::fs::write(&path, &bytes_v1).unwrap();
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report_v1 =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report_v1 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let id_v1 = report_v1
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -252,12 +248,10 @@ fn re_ingest_edited_pdf_produces_new_doc_id() {
|
||||
.clone()
|
||||
.unwrap();
|
||||
|
||||
let bytes_v2 =
|
||||
build_text_pdf(&[Some("VERSION TWO entirely different body content.")]);
|
||||
let bytes_v2 = build_text_pdf(&[Some("VERSION TWO entirely different body content.")]);
|
||||
std::fs::write(&path, &bytes_v2).unwrap();
|
||||
|
||||
let report_v2 =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report_v2 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let item_v2 = report_v2
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -282,9 +276,11 @@ fn encrypted_pdf_fails_with_qpdf_hint() {
|
||||
write_pdf(&env.workspace_root, "secret.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 1, "encrypted PDF must increment errors exactly once");
|
||||
let report = kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
|
||||
assert_eq!(
|
||||
report.errors, 1,
|
||||
"encrypted PDF must increment errors exactly once"
|
||||
);
|
||||
let items = report.items.as_ref().unwrap();
|
||||
let pdf_item = items
|
||||
.iter()
|
||||
@@ -310,9 +306,11 @@ fn corrupt_pdf_fails_without_storing() {
|
||||
write_pdf(&env.workspace_root, "corrupt.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 1, "corrupt PDF must increment errors exactly once");
|
||||
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(
|
||||
report.errors, 1,
|
||||
"corrupt PDF must increment errors exactly once"
|
||||
);
|
||||
let items = report.items.as_ref().unwrap();
|
||||
let pdf_item = items
|
||||
.iter()
|
||||
@@ -322,11 +320,8 @@ fn corrupt_pdf_fails_without_storing() {
|
||||
|
||||
// Confirm the doc was NOT stored — list_docs returns nothing for
|
||||
// this path.
|
||||
let summaries = kebab_app::list_docs_with_config(
|
||||
cfg,
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let summaries =
|
||||
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).unwrap();
|
||||
assert!(
|
||||
!summaries
|
||||
.iter()
|
||||
@@ -341,14 +336,15 @@ fn corrupt_pdf_fails_without_storing() {
|
||||
#[test]
|
||||
fn mixed_page_pdf_stores_asset_with_scanned_candidate_warning() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bytes =
|
||||
build_text_pdf(&[Some("first page"), None, Some("third page")]);
|
||||
let bytes = build_text_pdf(&[Some("first page"), None, Some("third page")]);
|
||||
write_pdf(&env.workspace_root, "mixed.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0, "scanned candidate is a Warning, not Error");
|
||||
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(
|
||||
report.errors, 0,
|
||||
"scanned candidate is a Warning, not Error"
|
||||
);
|
||||
let pdf_item = report
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -368,11 +364,7 @@ fn mixed_page_pdf_stores_asset_with_scanned_candidate_warning() {
|
||||
"pdf-page-v1.1 emits 0 chunks for the empty page; total = 2"
|
||||
);
|
||||
|
||||
let doc = kebab_app::inspect_doc_with_config(
|
||||
cfg,
|
||||
pdf_item.doc_id.as_ref().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap()).unwrap();
|
||||
let warnings: Vec<_> = doc
|
||||
.provenance
|
||||
.events
|
||||
@@ -419,8 +411,7 @@ fn ingest_report_arithmetic_invariant_holds_with_corrupt_pdf() {
|
||||
write_pdf(&env.workspace_root, "broken.pdf", &corrupt_pdf());
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
|
||||
let total = report.new + report.updated + report.skipped + report.errors;
|
||||
assert_eq!(
|
||||
report.scanned, total,
|
||||
@@ -441,14 +432,12 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
|
||||
let pages: Vec<String> = (1..=50)
|
||||
.map(|i| format!("Page {i} body — lorem ipsum dolor sit amet."))
|
||||
.collect();
|
||||
let page_refs: Vec<Option<&str>> =
|
||||
pages.iter().map(|s| Some(s.as_str())).collect();
|
||||
let page_refs: Vec<Option<&str>> = pages.iter().map(|s| Some(s.as_str())).collect();
|
||||
let bytes = build_text_pdf(&page_refs);
|
||||
write_pdf(&env.workspace_root, "long.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0);
|
||||
let pdf_item = report
|
||||
.items
|
||||
@@ -466,8 +455,7 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
|
||||
|
||||
// Round-trip: list_docs sees the long PDF.
|
||||
let summaries =
|
||||
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())
|
||||
.unwrap();
|
||||
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).unwrap();
|
||||
assert!(summaries.iter().any(|s| s.doc_path.0.ends_with("long.pdf")));
|
||||
}
|
||||
|
||||
@@ -476,13 +464,11 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
|
||||
#[test]
|
||||
fn inspect_doc_surfaces_page_spans() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bytes =
|
||||
build_text_pdf(&[Some("alpha body"), Some("beta body"), Some("gamma body")]);
|
||||
let bytes = build_text_pdf(&[Some("alpha body"), Some("beta body"), Some("gamma body")]);
|
||||
write_pdf(&env.workspace_root, "inspect.pdf", &bytes);
|
||||
let cfg = cfg_with_pdf(&env);
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
|
||||
let pdf_item = report
|
||||
.items
|
||||
.as_ref()
|
||||
@@ -490,19 +476,12 @@ fn inspect_doc_surfaces_page_spans() {
|
||||
.iter()
|
||||
.find(|i| i.doc_path.0.ends_with("inspect.pdf"))
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(
|
||||
cfg,
|
||||
pdf_item.doc_id.as_ref().unwrap(),
|
||||
)
|
||||
.unwrap();
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap()).unwrap();
|
||||
assert_eq!(doc.parser_version.0, "pdf-text-v1");
|
||||
assert_eq!(doc.blocks.len(), 3);
|
||||
for block in &doc.blocks {
|
||||
match block {
|
||||
Block::Paragraph(p) => assert!(matches!(
|
||||
p.common.source_span,
|
||||
SourceSpan::Page { .. }
|
||||
)),
|
||||
Block::Paragraph(p) => assert!(matches!(p.common.source_span, SourceSpan::Page { .. })),
|
||||
other => panic!("expected Paragraph, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,19 +78,15 @@ fn reset_orphans_only_purges_out_of_scope_docs() {
|
||||
narrow_cfg.workspace.exclude = vec!["b.rs".to_string(), "c.rs".to_string()];
|
||||
|
||||
// Run orphans-only reset.
|
||||
let report = execute(ResetScope::OrphansOnly, &narrow_cfg)
|
||||
.expect("orphans-only reset must succeed");
|
||||
let report =
|
||||
execute(ResetScope::OrphansOnly, &narrow_cfg).expect("orphans-only reset must succeed");
|
||||
|
||||
assert_eq!(
|
||||
report.orphans_purged, 2,
|
||||
"expected 2 orphans purged (b.rs + c.rs): {report:?}"
|
||||
);
|
||||
|
||||
let mut purged: Vec<String> = report
|
||||
.purged_paths
|
||||
.iter()
|
||||
.map(|p| p.0.clone())
|
||||
.collect();
|
||||
let mut purged: Vec<String> = report.purged_paths.iter().map(|p| p.0.clone()).collect();
|
||||
purged.sort();
|
||||
assert_eq!(
|
||||
purged,
|
||||
|
||||
@@ -37,8 +37,14 @@ fn schema_models_active_arrays_empty_on_empty_corpus() {
|
||||
drop(store);
|
||||
|
||||
let s = schema_with_config(&cfg).unwrap();
|
||||
assert!(s.models.active_parsers.is_empty(), "empty corpus → no parsers");
|
||||
assert!(s.models.active_chunkers.is_empty(), "empty corpus → no chunkers");
|
||||
assert!(
|
||||
s.models.active_parsers.is_empty(),
|
||||
"empty corpus → no parsers"
|
||||
);
|
||||
assert!(
|
||||
s.models.active_chunkers.is_empty(),
|
||||
"empty corpus → no chunkers"
|
||||
);
|
||||
// backward compat: 기존 단일 field 는 markdown default 보존.
|
||||
assert_eq!(s.models.parser_version, kebab_parse_md::PARSER_VERSION);
|
||||
}
|
||||
@@ -55,10 +61,19 @@ fn schema_emits_active_parsers_and_chunkers_array_after_ingest() {
|
||||
kebab_app::ingest_with_config(cfg.clone(), scope, false).unwrap();
|
||||
|
||||
let s = schema_with_config(&cfg).unwrap();
|
||||
assert!(!s.models.active_parsers.is_empty(), "active_parsers populated after ingest");
|
||||
assert!(!s.models.active_chunkers.is_empty(), "active_chunkers populated after ingest");
|
||||
assert!(
|
||||
!s.models.active_parsers.is_empty(),
|
||||
"active_parsers populated after ingest"
|
||||
);
|
||||
assert!(
|
||||
!s.models.active_chunkers.is_empty(),
|
||||
"active_chunkers populated after ingest"
|
||||
);
|
||||
// active arrays must be sorted (ORDER BY in SQL).
|
||||
let mut sorted = s.models.active_parsers.clone();
|
||||
sorted.sort();
|
||||
assert_eq!(s.models.active_parsers, sorted, "active_parsers must be sorted");
|
||||
assert_eq!(
|
||||
s.models.active_parsers, sorted,
|
||||
"active_parsers must be sorted"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -27,7 +27,10 @@ fn search_with_opts_no_budget_matches_search() {
|
||||
|
||||
assert_eq!(resp.hits.len(), baseline.len());
|
||||
assert!(!resp.truncated);
|
||||
assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page");
|
||||
assert!(
|
||||
resp.next_cursor.is_none(),
|
||||
"k=5 against 1 doc → no next page"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -62,7 +65,11 @@ fn budget_truncates_snippets_when_below_threshold() {
|
||||
fn cursor_paginates_to_next_page() {
|
||||
let env = common::TestEnv::new();
|
||||
for i in 0..6 {
|
||||
common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n"));
|
||||
common::ingest_md(
|
||||
&env,
|
||||
&format!("d{i}.md"),
|
||||
&format!("# T{i}\n\nrust topic {i}\n"),
|
||||
);
|
||||
}
|
||||
let app = env.app();
|
||||
|
||||
@@ -88,7 +95,10 @@ fn cursor_paginates_to_next_page() {
|
||||
page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
|
||||
let p2_ids: std::collections::HashSet<_> =
|
||||
page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
|
||||
assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits");
|
||||
assert!(
|
||||
p1_ids.is_disjoint(&p2_ids),
|
||||
"page 2 must not repeat page 1 hits"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -75,11 +75,9 @@ fn lexical_multi_token_korean_query_hits() {
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
let hits = kebab_app::search_with_config(
|
||||
env.config.clone(),
|
||||
common::lexical_query("해시 충돌"),
|
||||
)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), common::lexical_query("해시 충돌"))
|
||||
.expect("search must succeed");
|
||||
|
||||
assert!(
|
||||
!hits.is_empty(),
|
||||
@@ -113,11 +111,9 @@ fn lexical_mixed_korean_english_multi_token_query_hits() {
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.expect("ingest must succeed");
|
||||
|
||||
let hits = kebab_app::search_with_config(
|
||||
env.config.clone(),
|
||||
common::lexical_query("Rust 충돌은"),
|
||||
)
|
||||
.expect("search must succeed");
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), common::lexical_query("Rust 충돌은"))
|
||||
.expect("search must succeed");
|
||||
|
||||
assert!(
|
||||
!hits.is_empty(),
|
||||
|
||||
@@ -35,8 +35,8 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
fn lexical_search_empty_query_returns_empty() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query(" "))
|
||||
.unwrap();
|
||||
let hits =
|
||||
kebab_app::search_with_config(env.config.clone(), common::lexical_query(" ")).unwrap();
|
||||
assert!(hits.is_empty(), "blank query must short-circuit empty");
|
||||
}
|
||||
|
||||
@@ -107,17 +107,17 @@ fn search_uncached_returns_same_hits_as_cached() {
|
||||
#[test]
|
||||
fn first_ingest_bumps_corpus_revision() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let store_before =
|
||||
kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
|
||||
let store_before = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
|
||||
store_before.run_migrations().unwrap();
|
||||
assert_eq!(store_before.corpus_revision(), 0, "fresh store seeds 0");
|
||||
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert!(report.new + report.updated > 0, "first ingest must commit ≥1 doc");
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert!(
|
||||
report.new + report.updated > 0,
|
||||
"first ingest must commit ≥1 doc"
|
||||
);
|
||||
|
||||
let store_after =
|
||||
kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
|
||||
let store_after = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
|
||||
assert!(
|
||||
store_after.corpus_revision() >= 1,
|
||||
"ingest commit must bump corpus_revision (got {})",
|
||||
|
||||
@@ -29,7 +29,9 @@ fn fresh_doc_is_not_stale_with_default_threshold() {
|
||||
assert!(
|
||||
hits.iter().all(|h| !h.stale),
|
||||
"freshly-ingested doc must not be stale at default 30d threshold: {:?}",
|
||||
hits.iter().map(|h| (h.doc_path.0.clone(), h.stale)).collect::<Vec<_>>()
|
||||
hits.iter()
|
||||
.map(|h| (h.doc_path.0.clone(), h.stale))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -50,7 +52,9 @@ fn threshold_zero_disables_staleness() {
|
||||
assert!(
|
||||
hits.iter().all(|h| !h.stale),
|
||||
"threshold=0 disables staleness even for year-old docs: {:?}",
|
||||
hits.iter().map(|h| (h.doc_path.0.clone(), h.stale)).collect::<Vec<_>>()
|
||||
hits.iter()
|
||||
.map(|h| (h.doc_path.0.clone(), h.stale))
|
||||
.collect::<Vec<_>>()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ use common::TestEnv;
|
||||
fn require_avx_or_panic() {
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
{
|
||||
assert!(std::is_x86_feature_detected!("avx"),
|
||||
assert!(
|
||||
std::is_x86_feature_detected!("avx"),
|
||||
"kb-app vector integration test requires AVX-capable hardware; \
|
||||
host CPU lacks AVX. Run on an AVX-capable machine."
|
||||
);
|
||||
@@ -28,8 +29,7 @@ fn ingest_then_hybrid_search_returns_hits() {
|
||||
require_avx_or_panic();
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
@@ -55,8 +55,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
|
||||
require_avx_or_panic();
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
|
||||
@@ -13,11 +13,7 @@ fn unsupported_extension_skip_carries_warning_and_is_aggregated() {
|
||||
std::fs::write(workspace_root.join("legacy.docx"), b"unsupported").unwrap();
|
||||
std::fs::write(workspace_root.join("Makefile"), b"unsupported").unwrap();
|
||||
|
||||
let report = kebab_app::ingest_with_config(
|
||||
env.config.clone(),
|
||||
env.scope(),
|
||||
false,
|
||||
).unwrap();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
|
||||
let items = report.items.as_ref().expect("items array populated");
|
||||
let docx_item = items
|
||||
@@ -39,5 +35,8 @@ fn unsupported_extension_skip_carries_warning_and_is_aggregated() {
|
||||
vec!["unsupported media type: <no-ext>".to_string()],
|
||||
);
|
||||
assert_eq!(report.skipped_by_extension.get("docx").copied(), Some(1));
|
||||
assert_eq!(report.skipped_by_extension.get("<no-ext>").copied(), Some(1));
|
||||
assert_eq!(
|
||||
report.skipped_by_extension.get("<no-ext>").copied(),
|
||||
Some(1)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -44,8 +44,8 @@ fn twin_files_fetch_span_uses_correct_asset() {
|
||||
std::fs::write(dir_b.join("note.md"), content).unwrap();
|
||||
|
||||
// Ingest all files (fixture workspace + our two new twins).
|
||||
let report = ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("ingest must succeed");
|
||||
let report =
|
||||
ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest must succeed");
|
||||
assert_eq!(report.errors, 0, "no ingest errors; report={report:?}");
|
||||
|
||||
// Both twin paths must appear as New in the report.
|
||||
@@ -53,8 +53,7 @@ fn twin_files_fetch_span_uses_correct_asset() {
|
||||
let twin_items: Vec<_> = items
|
||||
.iter()
|
||||
.filter(|i| {
|
||||
i.doc_path.0.ends_with("src_a/note.md")
|
||||
|| i.doc_path.0.ends_with("src_b/note.md")
|
||||
i.doc_path.0.ends_with("src_a/note.md") || i.doc_path.0.ends_with("src_b/note.md")
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(
|
||||
@@ -149,7 +148,10 @@ fn twin_files_fetch_span_uses_correct_asset() {
|
||||
// at either twin, making one twin's span fetch behave incorrectly.
|
||||
let report2 = ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest must succeed");
|
||||
assert_eq!(report2.errors, 0, "no ingest errors on second run; report={report2:?}");
|
||||
assert_eq!(
|
||||
report2.errors, 0,
|
||||
"no ingest errors on second run; report={report2:?}"
|
||||
);
|
||||
|
||||
// Re-open app after second ingest and verify span still works on both.
|
||||
let app2 = env.app();
|
||||
|
||||
@@ -43,9 +43,7 @@ fn twin_files_second_ingest_is_unchanged() {
|
||||
let items = first.items.as_ref().expect("items must be present");
|
||||
let twin_items: Vec<_> = items
|
||||
.iter()
|
||||
.filter(|i| {
|
||||
i.doc_path.0.ends_with("__init__.py")
|
||||
})
|
||||
.filter(|i| i.doc_path.0.ends_with("__init__.py"))
|
||||
.collect();
|
||||
assert_eq!(
|
||||
twin_items.len(),
|
||||
@@ -63,8 +61,14 @@ fn twin_files_second_ingest_is_unchanged() {
|
||||
// Second ingest — same files, same content → both must be Unchanged.
|
||||
let second = ingest_with_config(env.config.clone(), env.scope(), false)
|
||||
.expect("second ingest must succeed");
|
||||
assert_eq!(second.errors, 0, "second ingest: no errors; report={second:?}");
|
||||
assert_eq!(second.new, 0, "second ingest: no new docs; report={second:?}");
|
||||
assert_eq!(
|
||||
second.errors, 0,
|
||||
"second ingest: no errors; report={second:?}"
|
||||
);
|
||||
assert_eq!(
|
||||
second.new, 0,
|
||||
"second ingest: no new docs; report={second:?}"
|
||||
);
|
||||
assert_eq!(
|
||||
second.updated, 0,
|
||||
"second ingest: no updated docs (twin-file bug would set this to 2); report={second:?}"
|
||||
|
||||
Reference in New Issue
Block a user