style: cargo fmt --all (round 4 ingest log feature follow-up)

Phase C4 executor 의 마지막 `fix(test): clippy + fmt fixes` commit 이
test file 부분만 fmt 적용. workspace 전체 fmt 누락 발견 → cargo fmt --all
적용. 모든 import alphabetical reorder + line wrapping 정합.

추가 untracked artifact 동시 commit:
- docs/superpowers/specs/2026-05-28-v0.20-ingest-log-spec.md (491 line, ACCEPT)
- docs/superpowers/plans/2026-05-28-v0.20-ingest-log-plan.md (616 line, ACCEPT)

workspace test: 1370 passed / 0 failed / 50 ignored, ingest_log_smoke green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-28 04:18:40 +00:00
parent 445b096215
commit 685007789a
235 changed files with 6520 additions and 3955 deletions

View File

@@ -29,9 +29,8 @@ fn rust_file_ingests_and_searches_as_code_citation() {
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0, "no errors expected: {report:?}");
let items = report.items.as_ref().expect("items present");
@@ -127,9 +126,8 @@ fn rust_code_search_hit_has_repo() {
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert_eq!(report.errors, 0, "no ingest errors: {report:?}");
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query("mul"))
@@ -147,8 +145,7 @@ fn rust_code_search_hit_has_repo() {
.and_then(|n| n.to_str())
.map(str::to_owned);
assert_eq!(
h.repo,
expected_repo,
h.repo, expected_repo,
"SearchHit.repo must match the workspace dir name (detect_repo result)"
);
// Also sanity-check code_lang is still filled.
@@ -177,9 +174,8 @@ fn python_file_ingests_and_searches_as_code_citation() {
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert!(report.new >= 1, "python file ingested: {report:?}");
@@ -254,9 +250,8 @@ fn typescript_file_ingests_and_searches_as_code_citation() {
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert!(report.new >= 1, "ts file ingested: {report:?}");
@@ -331,9 +326,8 @@ fn javascript_file_ingests_and_searches_as_code_citation() {
)
.unwrap();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
assert!(report.new >= 1, "js file ingested: {report:?}");
@@ -515,7 +509,11 @@ fn java_file_ingests_and_searches_as_code_citation() {
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("java"), "citation.lang must be 'java'");
assert_eq!(
lang.as_deref(),
Some("java"),
"citation.lang must be 'java'"
);
assert_eq!(
symbol.as_deref(),
Some("com.foo.Foo.bar"),
@@ -586,7 +584,11 @@ fn kotlin_file_ingests_and_searches_as_code_citation() {
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("kotlin"), "citation.lang must be 'kotlin'");
assert_eq!(
lang.as_deref(),
Some("kotlin"),
"citation.lang must be 'kotlin'"
);
assert_eq!(
symbol.as_deref(),
Some("com.foo.Foo.bar"),
@@ -651,8 +653,8 @@ fn tier2_k8s_yaml_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -666,7 +668,11 @@ fn tier2_k8s_yaml_ingest_searchable() {
line_start,
..
} => {
assert_eq!(lang.as_deref(), Some("yaml"), "citation.lang must be 'yaml'");
assert_eq!(
lang.as_deref(),
Some("yaml"),
"citation.lang must be 'yaml'"
);
assert_eq!(
symbol.as_deref(),
Some("Deployment/prod/api"),
@@ -730,8 +736,8 @@ fn tier2_dockerfile_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -813,8 +819,8 @@ fn tier2_cargo_toml_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -896,8 +902,8 @@ fn tier3_shell_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -987,8 +993,8 @@ fn tier3_yaml_fallback_picks_up_non_k8s_yaml() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -1031,14 +1037,9 @@ fn tier3_yaml_fallback_picks_up_non_k8s_yaml() {
fn rust_file_re_ingest_is_unchanged() {
let env = TestEnv::lexical_only();
std::fs::write(
env.workspace_root.join("stable.rs"),
"pub fn noop() {}\n",
)
.unwrap();
std::fs::write(env.workspace_root.join("stable.rs"), "pub fn noop() {}\n").unwrap();
let r1 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let r1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let item1 = r1
.items
.as_ref()
@@ -1049,8 +1050,7 @@ fn rust_file_re_ingest_is_unchanged() {
.unwrap();
assert_eq!(item1.kind, IngestItemKind::New);
let r2 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let r2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let item2 = r2
.items
.unwrap()
@@ -1081,9 +1081,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
)
.unwrap();
let report1 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("first ingest");
let report1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("first ingest");
let item1 = report1
.items
.as_ref()
@@ -1093,7 +1092,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
.expect("docker-compose.yml in first report");
assert!(
matches!(item1.kind, IngestItemKind::New),
"first ingest must be New, got {:?}", item1.kind
"first ingest must be New, got {:?}",
item1.kind
);
assert_eq!(
item1.chunker_version.as_ref().map(|c| c.0.as_str()),
@@ -1101,9 +1101,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
"first ingest must use Tier 3 fallback chunker"
);
let report2 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest");
let report2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest");
let item2 = report2
.items
.as_ref()
@@ -1113,7 +1112,8 @@ fn tier3_yaml_fallback_reingest_is_unchanged() {
.expect("docker-compose.yml in second report");
assert!(
matches!(item2.kind, IngestItemKind::Unchanged),
"second ingest must be Unchanged, got {:?}", item2.kind
"second ingest must be Unchanged, got {:?}",
item2.kind
);
}
@@ -1163,8 +1163,8 @@ fn tier1_c_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -1247,8 +1247,8 @@ fn tier1_cpp_ingest_searchable() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
let h = hits
.iter()
@@ -1266,7 +1266,9 @@ fn tier1_cpp_ingest_searchable() {
// Symbol could be "kebab::chunk::Foo" (class) or "kebab::chunk::Foo::bar"
// (method) depending on which chunk ranks first.
assert!(
symbol.as_deref().is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
symbol
.as_deref()
.is_some_and(|s| s.starts_with("kebab::chunk::Foo")),
"C++ symbol must start with namespace::Class prefix, got {symbol:?}"
);
assert!(*line_start >= 1, "line_start must be >=1");
@@ -1335,8 +1337,8 @@ fn tier2_k8s_multi_resource_yaml_ingests_without_collision() {
..Default::default()
},
};
let hits = kebab_app::search_with_config(env.config.clone(), query)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search must succeed");
assert!(
hits.len() >= 2,
"expected ≥2 hits (Deployment + Service), got {}",
@@ -1359,9 +1361,8 @@ fn tier3_shell_reingest_is_unchanged() {
)
.unwrap();
let report1 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("first ingest");
let report1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("first ingest");
let item1 = report1
.items
.as_ref()
@@ -1371,12 +1372,12 @@ fn tier3_shell_reingest_is_unchanged() {
.expect("deploy.sh in first report");
assert!(
matches!(item1.kind, IngestItemKind::New),
"first ingest must be New, got {:?}", item1.kind
"first ingest must be New, got {:?}",
item1.kind
);
let report2 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest");
let report2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest");
let item2 = report2
.items
.as_ref()
@@ -1386,6 +1387,7 @@ fn tier3_shell_reingest_is_unchanged() {
.expect("deploy.sh in second report");
assert!(
matches!(item2.kind, IngestItemKind::Unchanged),
"shell reingest must be Unchanged, got {:?}", item2.kind
"shell reingest must be Unchanged, got {:?}",
item2.kind
);
}

View File

@@ -93,8 +93,7 @@ impl TestEnv {
/// directly. Caller can invoke this multiple times to simulate
/// re-opening the binary after a corpus revision bump.
pub fn app(&self) -> kebab_app::App {
kebab_app::App::open_with_config(self.config.clone())
.expect("App::open_with_config")
kebab_app::App::open_with_config(self.config.clone()).expect("App::open_with_config")
}
}

View File

@@ -12,7 +12,11 @@ fn open(env: &common::TestEnv) -> App {
#[test]
fn fetch_chunk_returns_target_only_when_no_context() {
let env = common::TestEnv::new();
common::ingest_md(&env, "a.md", "# Title\n\nFirst paragraph.\n\n## Section\n\nSecond.\n");
common::ingest_md(
&env,
"a.md",
"# Title\n\nFirst paragraph.\n\n## Section\n\nSecond.\n",
);
let app = open(&env);
// Find a chunk via search to obtain its id.
@@ -42,7 +46,8 @@ fn fetch_chunk_with_context_returns_neighbors() {
// match. The earlier fixture used 2-char tokens like `A1`/`A3` for
// section bodies — those zero-hit under trigram. Use 5-char unique
// words per section so the query can pin one chunk deterministically.
let body = "# H1\n\napples\n\n# H2\n\nbanana\n\n# H3\n\ncherry\n\n# H4\n\ndurian\n\n# H5\n\nelder\n";
let body =
"# H1\n\napples\n\n# H2\n\nbanana\n\n# H3\n\ncherry\n\n# H4\n\ndurian\n\n# H5\n\nelder\n";
common::ingest_md(&env, "multi.md", body);
let app = env.app();
@@ -110,7 +115,10 @@ fn fetch_doc_returns_serialized_markdown() {
.unwrap();
assert_eq!(result.kind, FetchKind::Doc);
let text = result.text.expect("doc text");
assert!(text.contains("Heading One"), "doc text contains heading: {text:?}");
assert!(
text.contains("Heading One"),
"doc text contains heading: {text:?}"
);
assert!(text.contains("First paragraph"), "doc text contains body");
assert!(!result.truncated);
}
@@ -155,7 +163,11 @@ fn fetch_doc_with_max_tokens_truncates() {
.unwrap();
assert!(result.truncated);
let text = result.text.expect("doc text");
assert!(text.chars().count() <= 100, "trimmed text len {}", text.chars().count());
assert!(
text.chars().count() <= 100,
"trimmed text len {}",
text.chars().count()
);
}
#[test]
@@ -292,8 +304,7 @@ fn fetch_span_line_start_beyond_total_returns_empty_text() {
fn fetch_chunk_context_at_first_chunk_clamps_lower_bound() {
let env = common::TestEnv::new();
// Multi-chunk markdown so context ±N has neighbors.
let body =
"# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
let body = "# H1\n\nFirst chunk text body.\n\n# H2\n\nSecond chunk.\n\n# H3\n\nThird chunk.\n";
common::ingest_md(&env, "boundary.md", body);
let app = env.app();
let q = kebab_core::SearchQuery {

View File

@@ -16,8 +16,8 @@
mod common;
use common::TestEnv;
use kebab_app::ingest_with_config_opts;
use kebab_app::IngestOpts;
use kebab_app::ingest_with_config_opts;
use kebab_core::{DocFilter, DocumentStore, SearchMode, SearchQuery, SourceScope};
/// Helper: open the store via `TestEnv` and run `list_documents`.
@@ -125,17 +125,10 @@ fn include_scope_narrowing_does_not_purge() {
include: vec!["**/*.rs".to_string()],
exclude: env.config.workspace.exclude.clone(),
};
let first = ingest_with_config_opts(
env.config.clone(),
wide_scope,
false,
IngestOpts::default(),
)
.expect("first ingest (wide) must succeed");
assert!(
first.new >= 2,
"expected at least 2 new docs: {first:?}"
);
let first =
ingest_with_config_opts(env.config.clone(), wide_scope, false, IngestOpts::default())
.expect("first ingest (wide) must succeed");
assert!(first.new >= 2, "expected at least 2 new docs: {first:?}");
assert_eq!(
first.purged_deleted_files, 0,
"no purges on first ingest: {first:?}"

View File

@@ -24,8 +24,7 @@ use wiremock::{Mock, MockServer, ResponseTemplate};
/// inspectable in stored DB rows.
fn write_red_png(root: &Path, name: &str) -> std::path::PathBuf {
use image::{ImageBuffer, Rgb};
let img: ImageBuffer<Rgb<u8>, _> =
ImageBuffer::from_fn(100, 50, |_, _| Rgb([255, 0, 0]));
let img: ImageBuffer<Rgb<u8>, _> = ImageBuffer::from_fn(100, 50, |_, _| Rgb([255, 0, 0]));
let path = root.join(name);
img.save(&path).expect("write PNG fixture");
path
@@ -80,7 +79,12 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
// Counters: scanned should include the PNG; new ≥ 1 (markdown
// fixtures from the workspace tree may also count).
assert!(report.scanned >= 1, "scanned={}, items={:?}", report.scanned, report.items);
assert!(
report.scanned >= 1,
"scanned={}, items={:?}",
report.scanned,
report.items
);
assert_eq!(report.errors, 0, "no errors on lenient OCR path");
// Locate the image doc in the report items.
@@ -94,7 +98,11 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
kebab_core::IngestItemKind::New,
"image asset must be classified New on first ingest"
);
assert_eq!(img_item.chunk_count, Some(1), "image emits exactly one chunk");
assert_eq!(
img_item.chunk_count,
Some(1),
"image emits exactly one chunk"
);
// Inspect the stored chunk text via kb-app's inspect_chunk facade.
let doc_id = img_item.doc_id.clone().expect("image doc id");
@@ -117,10 +125,12 @@ async fn ingest_image_with_ocr_produces_chunk_containing_ocr_text() {
// Sanity: the doc was actually persisted into SQLite (kb-app's
// list_docs facade reads the same store the chunker writes to).
let summaries = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())
.expect("list_docs");
let summaries =
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).expect("list_docs");
assert!(
summaries.iter().any(|s| s.doc_path.0.ends_with("diagram.png")),
summaries
.iter()
.any(|s| s.doc_path.0.ends_with("diagram.png")),
"image doc must appear in list_docs"
);
@@ -171,8 +181,7 @@ async fn ingest_image_with_ocr_and_caption_populates_both_fields() {
.iter()
.find(|i| i.doc_path.0.ends_with("diagram.png"))
.unwrap();
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap())
.unwrap();
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap()).unwrap();
let block = match &doc.blocks[0] {
kebab_core::Block::ImageRef(b) => b,
_ => unreachable!(),
@@ -267,8 +276,7 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
let cfg_clone = cfg.clone();
let scope = env.scope();
let report = spawn_blocking(move || {
kebab_app::ingest_with_config(cfg_clone, scope, false)
.expect("ingest with no OCR/caption")
kebab_app::ingest_with_config(cfg_clone, scope, false).expect("ingest with no OCR/caption")
})
.await
.expect("task");
@@ -282,8 +290,7 @@ async fn image_indexed_with_filename_when_ocr_and_caption_disabled() {
.find(|i| i.doc_path.0.ends_with("raw.png"))
.unwrap();
assert_eq!(img_item.chunk_count, Some(1), "image emits one chunk");
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap())
.unwrap();
let doc = kebab_app::inspect_doc_with_config(cfg, img_item.doc_id.as_ref().unwrap()).unwrap();
let block = match &doc.blocks[0] {
kebab_core::Block::ImageRef(b) => b,
_ => unreachable!(),
@@ -392,16 +399,12 @@ async fn re_ingest_image_produces_unchanged_with_same_doc_id() {
let scope1 = scope.clone();
let scope2 = scope.clone();
let r1 = spawn_blocking(move || {
kebab_app::ingest_with_config(cfg1, scope1, false).unwrap()
})
.await
.unwrap();
let r2 = spawn_blocking(move || {
kebab_app::ingest_with_config(cfg2, scope2, false).unwrap()
})
.await
.unwrap();
let r1 = spawn_blocking(move || kebab_app::ingest_with_config(cfg1, scope1, false).unwrap())
.await
.unwrap();
let r2 = spawn_blocking(move || kebab_app::ingest_with_config(cfg2, scope2, false).unwrap())
.await
.unwrap();
let id1 = r1
.items

View File

@@ -21,11 +21,16 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
// First ingest — populates the DB. Use the legacy entry so the
// assertions cover the "previously ingested" set without needing
// IngestOpts::default() to behave identically.
let first =
ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let first = ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(first.errors, 0, "first ingest must not error: {first:?}");
assert!(first.new >= 1, "first ingest must create new docs: {first:?}");
assert_eq!(first.unchanged, 0, "first ingest cannot have unchanged: {first:?}");
assert!(
first.new >= 1,
"first ingest must create new docs: {first:?}"
);
assert_eq!(
first.unchanged, 0,
"first ingest cannot have unchanged: {first:?}"
);
let scanned = first.scanned;
@@ -38,9 +43,15 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
IngestOpts::default(),
)
.unwrap();
assert_eq!(second.scanned, scanned, "second scanned matches first: {second:?}");
assert_eq!(
second.scanned, scanned,
"second scanned matches first: {second:?}"
);
assert_eq!(second.new, 0, "no new docs on re-ingest: {second:?}");
assert_eq!(second.updated, 0, "nothing should be marked updated: {second:?}");
assert_eq!(
second.updated, 0,
"nothing should be marked updated: {second:?}"
);
assert_eq!(
second.unchanged, scanned,
"every doc must be Unchanged: {second:?}"
@@ -52,10 +63,12 @@ fn second_ingest_of_unchanged_corpus_marks_all_unchanged() {
fn force_reingest_bypasses_skip() {
let env = TestEnv::lexical_only();
let first =
ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let first = ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(first.errors, 0, "first ingest must not error: {first:?}");
assert!(first.new >= 1, "first ingest must create new docs: {first:?}");
assert!(
first.new >= 1,
"first ingest must create new docs: {first:?}"
);
let scanned = first.scanned;
let second = ingest_with_config_opts(

View File

@@ -107,13 +107,9 @@ fn cancel_none_is_uncancellable_default() {
// ingest_with_config_progress (no cancel) runs to completion.
let env = TestEnv::lexical_only();
let (tx, rx) = mpsc::channel::<IngestEvent>();
let report = kebab_app::ingest_with_config_progress(
env.config.clone(),
env.scope(),
true,
Some(tx),
)
.unwrap();
let report =
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, Some(tx))
.unwrap();
assert_eq!(report.scanned, 3);
assert_eq!(report.new, 3);

View File

@@ -107,5 +107,8 @@ fn ingest_file_errors_on_unsupported_extension() {
let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
assert!(err.to_string().contains("unsupported extension"), "{err}");
assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
assert!(
err.to_string().contains(".docx") || err.to_string().contains("docx"),
"{err}"
);
}

View File

@@ -8,8 +8,7 @@ use common::TestEnv;
#[test]
fn ingest_then_list_inspects_round_trip() {
let env = TestEnv::lexical_only();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// The fixture has 3 markdown files; first ingest should label them
// all as New.
@@ -27,17 +26,14 @@ fn ingest_then_list_inspects_round_trip() {
}
// list_docs returns the 3 docs.
let docs = kebab_app::list_docs_with_config(
env.config.clone(),
kebab_core::DocFilter::default(),
)
.unwrap();
let docs =
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
.unwrap();
assert_eq!(docs.len(), 3, "docs: {docs:?}");
// inspect_doc round-trips one of them.
let any_doc_id = docs[0].doc_id.clone();
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
.unwrap();
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id).unwrap();
assert_eq!(canonical.doc_id, any_doc_id);
assert!(!canonical.blocks.is_empty(), "blocks empty");
}
@@ -46,12 +42,10 @@ fn ingest_then_list_inspects_round_trip() {
fn ingest_idempotent_on_second_run() {
let env = TestEnv::lexical_only();
let r1 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let r1 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(r1.new, 3);
let r2 =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let r2 = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// Same files re-ingested — p9-fb-23 task 7 introduced the early-skip
// path: when checksum + parser/chunker/embedding versions all match,
// the second run reports `Unchanged` rather than `Updated`. Pre-p9-fb-23
@@ -63,19 +57,16 @@ fn ingest_idempotent_on_second_run() {
assert_eq!(r2.unchanged, 3, "second run unchanged: {r2:?}");
// list_docs still has 3 docs (no duplicates).
let docs = kebab_app::list_docs_with_config(
env.config.clone(),
kebab_core::DocFilter::default(),
)
.unwrap();
let docs =
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
.unwrap();
assert_eq!(docs.len(), 3);
}
#[test]
fn ingest_summary_only_drops_items() {
let env = TestEnv::lexical_only();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.scanned, 3);
assert!(report.items.is_none(), "summary-only should null items");
}
@@ -87,12 +78,10 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
// of every run. `summary_only=true` writes `items_json=NULL`; the
// counts MUST still be present.
let env = TestEnv::lexical_only();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
.unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.scanned, 3);
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir)
.join("kebab.sqlite");
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir).join("kebab.sqlite");
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
let (scanned, new_c, updated, skipped, errors, items_json): (
i64,
@@ -141,25 +130,18 @@ fn ingest_provider_none_skips_lance() {
// tree shape (no `<data_dir>/lancedb` directory, or no `*.lance`
// tables under it).
let env = TestEnv::lexical_only();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 0, "lexical-only run must not error");
assert_eq!(report.new, 3);
let lance_dir = std::path::PathBuf::from(&env.config.storage.data_dir)
.join("lancedb");
let lance_dir = std::path::PathBuf::from(&env.config.storage.data_dir).join("lancedb");
if lance_dir.exists() {
// If the dir was created (e.g., by an earlier consumer touching
// the path), it MUST contain no `.lance` tables.
let mut had_lance_table = false;
for entry in std::fs::read_dir(&lance_dir).expect("read lance_dir") {
let entry = entry.unwrap();
if entry
.path()
.extension()
.and_then(|s| s.to_str())
== Some("lance")
{
if entry.path().extension().and_then(|s| s.to_str()) == Some("lance") {
had_lance_table = true;
break;
}
@@ -189,8 +171,7 @@ fn list_docs_filters_by_tags_any() {
tags_any: vec!["rust".to_string()],
..Default::default()
};
let rust_docs =
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
let rust_docs = kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
// intro.md and notes/cargo.md both tag "rust".
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
}
@@ -198,8 +179,9 @@ fn list_docs_filters_by_tags_any() {
#[test]
fn inspect_doc_not_found_returns_actionable_error() {
let env = TestEnv::lexical_only();
let bogus =
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
let bogus = kebab_core::DocumentId(
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
);
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
let msg = format!("{err:#}");
assert!(
@@ -218,8 +200,7 @@ fn inspect_chunk_not_found_returns_actionable_error() {
let bogus = kebab_core::ChunkId(
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
);
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
.unwrap_err();
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus).unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("not found"), "got: {msg}");
}
@@ -251,22 +232,18 @@ fn ingest_with_config_opts_default_matches_legacy_behaviour() {
#[test]
fn ingest_stamps_chunker_version_on_document() {
let env = TestEnv::lexical_only();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert!(report.new >= 1, "expected at least one new doc: {report:?}");
assert_eq!(report.errors, 0, "no errors expected: {report:?}");
let docs = kebab_app::list_docs_with_config(
env.config.clone(),
kebab_core::DocFilter::default(),
)
.unwrap();
let docs =
kebab_app::list_docs_with_config(env.config.clone(), kebab_core::DocFilter::default())
.unwrap();
assert!(!docs.is_empty(), "no docs after ingest");
for doc_entry in &docs {
let canonical =
kebab_app::inspect_doc_with_config(env.config.clone(), &doc_entry.doc_id)
.unwrap();
kebab_app::inspect_doc_with_config(env.config.clone(), &doc_entry.doc_id).unwrap();
assert!(
canonical.last_chunker_version.is_some(),
"last_chunker_version must be stamped for doc {}: got {:?}",

View File

@@ -17,8 +17,7 @@ use std::sync::atomic::AtomicBool;
use common::TestEnv;
fn ollama_endpoint() -> String {
std::env::var("KEBAB_PDF_OCR_ENDPOINT")
.unwrap_or_else(|_| "http://localhost:11434".to_string())
std::env::var("KEBAB_PDF_OCR_ENDPOINT").unwrap_or_else(|_| "http://localhost:11434".to_string())
}
fn make_ocr_env_real() -> TestEnv {
@@ -43,8 +42,8 @@ fn make_ocr_env_real() -> TestEnv {
fn ingest_with_mock_ocr_yields_pdf_ocr_summary() {
let env = make_ocr_env_real();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest");
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest");
assert!(report.new >= 1, "at least one PDF ingested: {report:?}");
@@ -72,15 +71,13 @@ fn ingest_with_mock_ocr_yields_pdf_ocr_summary() {
fn ocr_text_indexed_and_searchable() {
let env = make_ocr_env_real();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest");
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest");
// Search for a Korean morpheme expected to appear in qwen2.5vl:3b OCR
// output of the PoC ground-truth page. "다음" is a high-frequency token
// in page1.txt truth file.
let query = common::lexical_query("다음");
let hits =
kebab_app::search_with_config(env.config.clone(), query).expect("search");
let hits = kebab_app::search_with_config(env.config.clone(), query).expect("search");
assert!(
!hits.is_empty(),

View File

@@ -13,13 +13,9 @@ use kebab_core::IngestItemKind;
fn run_with_progress() -> Vec<IngestEvent> {
let env = TestEnv::lexical_only();
let (tx, rx) = mpsc::channel::<IngestEvent>();
let report = kebab_app::ingest_with_config_progress(
env.config.clone(),
env.scope(),
false,
Some(tx),
)
.unwrap();
let report =
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), false, Some(tx))
.unwrap();
assert_eq!(report.scanned, 3);
assert_eq!(report.new, 3);
@@ -116,13 +112,9 @@ fn ingest_with_config_progress_none_matches_ingest_with_config() {
// `ingest_with_config_progress(..., None)` must produce identical
// reports modulo wall-clock duration.
let env = TestEnv::lexical_only();
let r_none = kebab_app::ingest_with_config_progress(
env.config.clone(),
env.scope(),
true,
None,
)
.unwrap();
let r_none =
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, None)
.unwrap();
assert_eq!(r_none.scanned, 3);
assert_eq!(r_none.new, 3);
}
@@ -134,13 +126,9 @@ fn dropped_receiver_does_not_panic_or_fail_ingest() {
let env = TestEnv::lexical_only();
let (tx, rx) = mpsc::channel::<IngestEvent>();
drop(rx);
let report = kebab_app::ingest_with_config_progress(
env.config.clone(),
env.scope(),
true,
Some(tx),
)
.unwrap();
let report =
kebab_app::ingest_with_config_progress(env.config.clone(), env.scope(), true, Some(tx))
.unwrap();
assert_eq!(report.scanned, 3);
}
@@ -185,13 +173,8 @@ fn pdf_ocr_progress_emits_started_finished_events() {
};
let (tx, rx) = mpsc::channel::<IngestEvent>();
let _report = kebab_app::ingest_with_config_progress(
config,
scope,
false,
Some(tx),
)
.expect("ingest_with_config_progress");
let _report = kebab_app::ingest_with_config_progress(config, scope, false, Some(tx))
.expect("ingest_with_config_progress");
let events: Vec<_> = rx.iter().collect();
@@ -204,7 +187,16 @@ fn pdf_ocr_progress_emits_started_finished_events() {
.filter(|e| matches!(e, IngestEvent::PdfOcrFinished { .. }))
.count();
assert!(started_count >= 1, "PdfOcrStarted 가 ≥ 1 emit 됨 (got {started_count})");
assert!(finished_count >= 1, "PdfOcrFinished 가 ≥ 1 emit 됨 (got {finished_count})");
assert_eq!(started_count, finished_count, "Started 와 Finished 의 count 일치");
assert!(
started_count >= 1,
"PdfOcrStarted 가 ≥ 1 emit 됨 (got {started_count})"
);
assert!(
finished_count >= 1,
"PdfOcrFinished 가 ≥ 1 emit 됨 (got {finished_count})"
);
assert_eq!(
started_count, finished_count,
"Started 와 Finished 의 count 일치"
);
}

View File

@@ -29,12 +29,14 @@ fn ingest_stdin_writes_frontmatter_and_reports_new() {
"## Body content\n\nMore.",
"Article X",
Some("https://example.com/x"),
).unwrap();
)
.unwrap();
assert_eq!(report.new, 1, "{report:?}");
// _external/ contains exactly one .md file with frontmatter.
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
let entries: Vec<_> = fs::read_dir(&ext_dir)
.unwrap()
.filter_map(std::result::Result::ok)
.collect();
assert_eq!(entries.len(), 1);
@@ -50,16 +52,13 @@ fn ingest_stdin_without_source_uri() {
let dir = tempfile::tempdir().unwrap();
let cfg = fresh_cfg(dir.path());
let report = kebab_app::ingest_stdin_with_config(
cfg.clone(),
"## Body",
"Title",
None,
).unwrap();
let report =
kebab_app::ingest_stdin_with_config(cfg.clone(), "## Body", "Title", None).unwrap();
assert_eq!(report.new, 1);
let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
let entries: Vec<_> = fs::read_dir(&ext_dir)
.unwrap()
.filter_map(std::result::Result::ok)
.collect();
let content = fs::read_to_string(entries[0].path()).unwrap();

View File

@@ -17,9 +17,8 @@ fn init_workspace_header_lists_supported_extensions() {
}
kebab_app::init_workspace(true).expect("init_workspace");
let cfg_path = kebab_config::Config::xdg_config_path();
let body = std::fs::read_to_string(&cfg_path).unwrap_or_else(|e| {
panic!("read config at {}: {e}", cfg_path.display())
});
let body = std::fs::read_to_string(&cfg_path)
.unwrap_or_else(|e| panic!("read config at {}: {e}", cfg_path.display()));
assert!(
body.contains("처리 가능한 형식"),
"header lists supported types section: body=\n{body}"

View File

@@ -9,9 +9,8 @@ use std::sync::atomic::AtomicBool;
use common::mock_ocr::MockOcrEngine;
use kebab_app::pdf_ocr_apply::{PdfOcrOpts, apply_ocr_to_pdf_pages};
use kebab_core::{
AssetStorage, Block, CanonicalDocument, Checksum, ExtractConfig, ExtractContext,
Extractor, Inline, Lang, MediaType, RawAsset, SourceSpan,
SourceUri, WorkspacePath, id_for_asset,
AssetStorage, Block, CanonicalDocument, Checksum, ExtractConfig, ExtractContext, Extractor,
Inline, Lang, MediaType, RawAsset, SourceSpan, SourceUri, WorkspacePath, id_for_asset,
};
use kebab_parse_pdf::PdfTextExtractor;
use time::OffsetDateTime;
@@ -258,8 +257,8 @@ fn f6_flatedecode_skipped_with_warning() {
// Test 7: F7 CCITTFax → skip + warning (verifier M-4 split)
#[test]
fn f7_ccittfax_skipped_with_warning() {
let bytes = std::fs::read("../kebab-parse-pdf/tests/fixtures/ccitt.pdf")
.expect("F7 fixture missing");
let bytes =
std::fs::read("../kebab-parse-pdf/tests/fixtures/ccitt.pdf").expect("F7 fixture missing");
let mut canonical = canonical_with_empty_block(); // page-1 block from F1
let engine = MockOcrEngine::single("SHOULD_NOT_BE_CALLED", false);
let opts = default_opts(true);

View File

@@ -46,17 +46,13 @@ fn build_text_pdf(pages: &[Option<&str>]) -> Vec<u8> {
operations: vec![
Operation::new("BT", vec![]),
Operation::new("Tf", vec!["F1".into(), 24.into()]),
Operation::new(
"Td",
vec![Object::Integer(100), Object::Integer(700)],
),
Operation::new("Td", vec![Object::Integer(100), Object::Integer(700)]),
Operation::new("Tj", vec![Object::string_literal(*text)]),
Operation::new("ET", vec![]),
],
};
let stream_data = content.encode().expect("content encode");
let content_id =
doc.add_object(Stream::new(dictionary! {}, stream_data));
let content_id = doc.add_object(Stream::new(dictionary! {}, stream_data));
page_dict.set("Contents", content_id);
}
let page_id = doc.add_object(page_dict);
@@ -76,8 +72,7 @@ fn build_text_pdf(pages: &[Option<&str>]) -> Vec<u8> {
Object::Integer(842),
],
};
doc.objects
.insert(pages_id, Object::Dictionary(pages_dict));
doc.objects.insert(pages_id, Object::Dictionary(pages_dict));
let catalog_id = doc.add_object(dictionary! {
"Type" => "Catalog",
@@ -146,9 +141,8 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
write_pdf(&env.workspace_root, "three.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false)
.expect("PDF ingest must succeed");
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false)
.expect("PDF ingest must succeed");
assert_eq!(report.errors, 0);
let items = report.items.as_ref().expect("items present");
@@ -157,8 +151,16 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
.find(|i| i.doc_path.0.ends_with("three.pdf"))
.expect("PDF item present");
assert_eq!(pdf_item.kind, IngestItemKind::New);
assert_eq!(pdf_item.block_count, Some(3), "one Block::Paragraph per page");
assert_eq!(pdf_item.chunk_count, Some(3), "one chunk per non-empty page");
assert_eq!(
pdf_item.block_count,
Some(3),
"one Block::Paragraph per page"
);
assert_eq!(
pdf_item.chunk_count,
Some(3),
"one chunk per non-empty page"
);
assert_eq!(
pdf_item.parser_version.as_ref().map(|p| p.0.as_str()),
Some("pdf-text-v1")
@@ -169,11 +171,8 @@ fn ingest_3_page_pdf_produces_one_doc_and_per_page_chunks() {
);
// Inspect the stored doc to confirm SourceSpan::Page round-trip.
let doc = kebab_app::inspect_doc_with_config(
cfg,
pdf_item.doc_id.as_ref().unwrap(),
)
.expect("inspect_doc returns the PDF document");
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap())
.expect("inspect_doc returns the PDF document");
assert_eq!(doc.blocks.len(), 3);
for (i, block) in doc.blocks.iter().enumerate() {
let want_page = (i as u32) + 1;
@@ -202,8 +201,7 @@ fn re_ingest_identical_pdf_produces_unchanged_with_same_doc_id() {
write_pdf(&env.workspace_root, "stable.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report1 =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report1 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let item1 = report1
.items
.as_ref()
@@ -214,8 +212,7 @@ fn re_ingest_identical_pdf_produces_unchanged_with_same_doc_id() {
.unwrap();
assert_eq!(item1.kind, IngestItemKind::New);
let report2 =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report2 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let item2 = report2
.items
.unwrap()
@@ -239,8 +236,7 @@ fn re_ingest_edited_pdf_produces_new_doc_id() {
std::fs::write(&path, &bytes_v1).unwrap();
let cfg = cfg_with_pdf(&env);
let report_v1 =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report_v1 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let id_v1 = report_v1
.items
.as_ref()
@@ -252,12 +248,10 @@ fn re_ingest_edited_pdf_produces_new_doc_id() {
.clone()
.unwrap();
let bytes_v2 =
build_text_pdf(&[Some("VERSION TWO entirely different body content.")]);
let bytes_v2 = build_text_pdf(&[Some("VERSION TWO entirely different body content.")]);
std::fs::write(&path, &bytes_v2).unwrap();
let report_v2 =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report_v2 = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let item_v2 = report_v2
.items
.as_ref()
@@ -282,9 +276,11 @@ fn encrypted_pdf_fails_with_qpdf_hint() {
write_pdf(&env.workspace_root, "secret.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
assert_eq!(report.errors, 1, "encrypted PDF must increment errors exactly once");
let report = kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
assert_eq!(
report.errors, 1,
"encrypted PDF must increment errors exactly once"
);
let items = report.items.as_ref().unwrap();
let pdf_item = items
.iter()
@@ -310,9 +306,11 @@ fn corrupt_pdf_fails_without_storing() {
write_pdf(&env.workspace_root, "corrupt.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 1, "corrupt PDF must increment errors exactly once");
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
assert_eq!(
report.errors, 1,
"corrupt PDF must increment errors exactly once"
);
let items = report.items.as_ref().unwrap();
let pdf_item = items
.iter()
@@ -322,11 +320,8 @@ fn corrupt_pdf_fails_without_storing() {
// Confirm the doc was NOT stored — list_docs returns nothing for
// this path.
let summaries = kebab_app::list_docs_with_config(
cfg,
kebab_core::DocFilter::default(),
)
.unwrap();
let summaries =
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).unwrap();
assert!(
!summaries
.iter()
@@ -341,14 +336,15 @@ fn corrupt_pdf_fails_without_storing() {
#[test]
fn mixed_page_pdf_stores_asset_with_scanned_candidate_warning() {
let env = TestEnv::lexical_only();
let bytes =
build_text_pdf(&[Some("first page"), None, Some("third page")]);
let bytes = build_text_pdf(&[Some("first page"), None, Some("third page")]);
write_pdf(&env.workspace_root, "mixed.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 0, "scanned candidate is a Warning, not Error");
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
assert_eq!(
report.errors, 0,
"scanned candidate is a Warning, not Error"
);
let pdf_item = report
.items
.as_ref()
@@ -368,11 +364,7 @@ fn mixed_page_pdf_stores_asset_with_scanned_candidate_warning() {
"pdf-page-v1.1 emits 0 chunks for the empty page; total = 2"
);
let doc = kebab_app::inspect_doc_with_config(
cfg,
pdf_item.doc_id.as_ref().unwrap(),
)
.unwrap();
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap()).unwrap();
let warnings: Vec<_> = doc
.provenance
.events
@@ -419,8 +411,7 @@ fn ingest_report_arithmetic_invariant_holds_with_corrupt_pdf() {
write_pdf(&env.workspace_root, "broken.pdf", &corrupt_pdf());
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(cfg, env.scope(), false).unwrap();
let total = report.new + report.updated + report.skipped + report.errors;
assert_eq!(
report.scanned, total,
@@ -441,14 +432,12 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
let pages: Vec<String> = (1..=50)
.map(|i| format!("Page {i} body — lorem ipsum dolor sit amet."))
.collect();
let page_refs: Vec<Option<&str>> =
pages.iter().map(|s| Some(s.as_str())).collect();
let page_refs: Vec<Option<&str>> = pages.iter().map(|s| Some(s.as_str())).collect();
let bytes = build_text_pdf(&page_refs);
write_pdf(&env.workspace_root, "long.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 0);
let pdf_item = report
.items
@@ -466,8 +455,7 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
// Round-trip: list_docs sees the long PDF.
let summaries =
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())
.unwrap();
kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default()).unwrap();
assert!(summaries.iter().any(|s| s.doc_path.0.ends_with("long.pdf")));
}
@@ -476,13 +464,11 @@ fn long_pdf_round_trips_through_lexical_pipeline() {
#[test]
fn inspect_doc_surfaces_page_spans() {
let env = TestEnv::lexical_only();
let bytes =
build_text_pdf(&[Some("alpha body"), Some("beta body"), Some("gamma body")]);
let bytes = build_text_pdf(&[Some("alpha body"), Some("beta body"), Some("gamma body")]);
write_pdf(&env.workspace_root, "inspect.pdf", &bytes);
let cfg = cfg_with_pdf(&env);
let report =
kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let report = kebab_app::ingest_with_config(cfg.clone(), env.scope(), false).unwrap();
let pdf_item = report
.items
.as_ref()
@@ -490,19 +476,12 @@ fn inspect_doc_surfaces_page_spans() {
.iter()
.find(|i| i.doc_path.0.ends_with("inspect.pdf"))
.unwrap();
let doc = kebab_app::inspect_doc_with_config(
cfg,
pdf_item.doc_id.as_ref().unwrap(),
)
.unwrap();
let doc = kebab_app::inspect_doc_with_config(cfg, pdf_item.doc_id.as_ref().unwrap()).unwrap();
assert_eq!(doc.parser_version.0, "pdf-text-v1");
assert_eq!(doc.blocks.len(), 3);
for block in &doc.blocks {
match block {
Block::Paragraph(p) => assert!(matches!(
p.common.source_span,
SourceSpan::Page { .. }
)),
Block::Paragraph(p) => assert!(matches!(p.common.source_span, SourceSpan::Page { .. })),
other => panic!("expected Paragraph, got {other:?}"),
}
}

View File

@@ -78,19 +78,15 @@ fn reset_orphans_only_purges_out_of_scope_docs() {
narrow_cfg.workspace.exclude = vec!["b.rs".to_string(), "c.rs".to_string()];
// Run orphans-only reset.
let report = execute(ResetScope::OrphansOnly, &narrow_cfg)
.expect("orphans-only reset must succeed");
let report =
execute(ResetScope::OrphansOnly, &narrow_cfg).expect("orphans-only reset must succeed");
assert_eq!(
report.orphans_purged, 2,
"expected 2 orphans purged (b.rs + c.rs): {report:?}"
);
let mut purged: Vec<String> = report
.purged_paths
.iter()
.map(|p| p.0.clone())
.collect();
let mut purged: Vec<String> = report.purged_paths.iter().map(|p| p.0.clone()).collect();
purged.sort();
assert_eq!(
purged,

View File

@@ -37,8 +37,14 @@ fn schema_models_active_arrays_empty_on_empty_corpus() {
drop(store);
let s = schema_with_config(&cfg).unwrap();
assert!(s.models.active_parsers.is_empty(), "empty corpus → no parsers");
assert!(s.models.active_chunkers.is_empty(), "empty corpus → no chunkers");
assert!(
s.models.active_parsers.is_empty(),
"empty corpus → no parsers"
);
assert!(
s.models.active_chunkers.is_empty(),
"empty corpus → no chunkers"
);
// backward compat: 기존 단일 field 는 markdown default 보존.
assert_eq!(s.models.parser_version, kebab_parse_md::PARSER_VERSION);
}
@@ -55,10 +61,19 @@ fn schema_emits_active_parsers_and_chunkers_array_after_ingest() {
kebab_app::ingest_with_config(cfg.clone(), scope, false).unwrap();
let s = schema_with_config(&cfg).unwrap();
assert!(!s.models.active_parsers.is_empty(), "active_parsers populated after ingest");
assert!(!s.models.active_chunkers.is_empty(), "active_chunkers populated after ingest");
assert!(
!s.models.active_parsers.is_empty(),
"active_parsers populated after ingest"
);
assert!(
!s.models.active_chunkers.is_empty(),
"active_chunkers populated after ingest"
);
// active arrays must be sorted (ORDER BY in SQL).
let mut sorted = s.models.active_parsers.clone();
sorted.sort();
assert_eq!(s.models.active_parsers, sorted, "active_parsers must be sorted");
assert_eq!(
s.models.active_parsers, sorted,
"active_parsers must be sorted"
);
}

View File

@@ -27,7 +27,10 @@ fn search_with_opts_no_budget_matches_search() {
assert_eq!(resp.hits.len(), baseline.len());
assert!(!resp.truncated);
assert!(resp.next_cursor.is_none(), "k=5 against 1 doc → no next page");
assert!(
resp.next_cursor.is_none(),
"k=5 against 1 doc → no next page"
);
}
#[test]
@@ -62,7 +65,11 @@ fn budget_truncates_snippets_when_below_threshold() {
fn cursor_paginates_to_next_page() {
let env = common::TestEnv::new();
for i in 0..6 {
common::ingest_md(&env, &format!("d{i}.md"), &format!("# T{i}\n\nrust topic {i}\n"));
common::ingest_md(
&env,
&format!("d{i}.md"),
&format!("# T{i}\n\nrust topic {i}\n"),
);
}
let app = env.app();
@@ -88,7 +95,10 @@ fn cursor_paginates_to_next_page() {
page1.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
let p2_ids: std::collections::HashSet<_> =
page2.hits.iter().map(|h| h.chunk_id.0.clone()).collect();
assert!(p1_ids.is_disjoint(&p2_ids), "page 2 must not repeat page 1 hits");
assert!(
p1_ids.is_disjoint(&p2_ids),
"page 2 must not repeat page 1 hits"
);
}
#[test]

View File

@@ -75,11 +75,9 @@ fn lexical_multi_token_korean_query_hits() {
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
.expect("ingest must succeed");
let hits = kebab_app::search_with_config(
env.config.clone(),
common::lexical_query("해시 충돌"),
)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), common::lexical_query("해시 충돌"))
.expect("search must succeed");
assert!(
!hits.is_empty(),
@@ -113,11 +111,9 @@ fn lexical_mixed_korean_english_multi_token_query_hits() {
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
.expect("ingest must succeed");
let hits = kebab_app::search_with_config(
env.config.clone(),
common::lexical_query("Rust 충돌은"),
)
.expect("search must succeed");
let hits =
kebab_app::search_with_config(env.config.clone(), common::lexical_query("Rust 충돌은"))
.expect("search must succeed");
assert!(
!hits.is_empty(),

View File

@@ -35,8 +35,8 @@ fn lexical_search_returns_hits_after_ingest() {
fn lexical_search_empty_query_returns_empty() {
let env = TestEnv::lexical_only();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits = kebab_app::search_with_config(env.config.clone(), common::lexical_query(" "))
.unwrap();
let hits =
kebab_app::search_with_config(env.config.clone(), common::lexical_query(" ")).unwrap();
assert!(hits.is_empty(), "blank query must short-circuit empty");
}
@@ -107,17 +107,17 @@ fn search_uncached_returns_same_hits_as_cached() {
#[test]
fn first_ingest_bumps_corpus_revision() {
let env = TestEnv::lexical_only();
let store_before =
kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
let store_before = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
store_before.run_migrations().unwrap();
assert_eq!(store_before.corpus_revision(), 0, "fresh store seeds 0");
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert!(report.new + report.updated > 0, "first ingest must commit ≥1 doc");
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert!(
report.new + report.updated > 0,
"first ingest must commit ≥1 doc"
);
let store_after =
kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
let store_after = kebab_store_sqlite::SqliteStore::open(&env.config).unwrap();
assert!(
store_after.corpus_revision() >= 1,
"ingest commit must bump corpus_revision (got {})",

View File

@@ -29,7 +29,9 @@ fn fresh_doc_is_not_stale_with_default_threshold() {
assert!(
hits.iter().all(|h| !h.stale),
"freshly-ingested doc must not be stale at default 30d threshold: {:?}",
hits.iter().map(|h| (h.doc_path.0.clone(), h.stale)).collect::<Vec<_>>()
hits.iter()
.map(|h| (h.doc_path.0.clone(), h.stale))
.collect::<Vec<_>>()
);
}
@@ -50,7 +52,9 @@ fn threshold_zero_disables_staleness() {
assert!(
hits.iter().all(|h| !h.stale),
"threshold=0 disables staleness even for year-old docs: {:?}",
hits.iter().map(|h| (h.doc_path.0.clone(), h.stale)).collect::<Vec<_>>()
hits.iter()
.map(|h| (h.doc_path.0.clone(), h.stale))
.collect::<Vec<_>>()
);
}

View File

@@ -14,7 +14,8 @@ use common::TestEnv;
fn require_avx_or_panic() {
#[cfg(target_arch = "x86_64")]
{
assert!(std::is_x86_feature_detected!("avx"),
assert!(
std::is_x86_feature_detected!("avx"),
"kb-app vector integration test requires AVX-capable hardware; \
host CPU lacks AVX. Run on an AVX-capable machine."
);
@@ -28,8 +29,7 @@ fn ingest_then_hybrid_search_returns_hits() {
require_avx_or_panic();
let env = TestEnv::with_embeddings();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3);
@@ -55,8 +55,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
require_avx_or_panic();
let env = TestEnv::with_embeddings();
let report =
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3);

View File

@@ -13,11 +13,7 @@ fn unsupported_extension_skip_carries_warning_and_is_aggregated() {
std::fs::write(workspace_root.join("legacy.docx"), b"unsupported").unwrap();
std::fs::write(workspace_root.join("Makefile"), b"unsupported").unwrap();
let report = kebab_app::ingest_with_config(
env.config.clone(),
env.scope(),
false,
).unwrap();
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
let items = report.items.as_ref().expect("items array populated");
let docx_item = items
@@ -39,5 +35,8 @@ fn unsupported_extension_skip_carries_warning_and_is_aggregated() {
vec!["unsupported media type: <no-ext>".to_string()],
);
assert_eq!(report.skipped_by_extension.get("docx").copied(), Some(1));
assert_eq!(report.skipped_by_extension.get("<no-ext>").copied(), Some(1));
assert_eq!(
report.skipped_by_extension.get("<no-ext>").copied(),
Some(1)
);
}

View File

@@ -44,8 +44,8 @@ fn twin_files_fetch_span_uses_correct_asset() {
std::fs::write(dir_b.join("note.md"), content).unwrap();
// Ingest all files (fixture workspace + our two new twins).
let report = ingest_with_config(env.config.clone(), env.scope(), false)
.expect("ingest must succeed");
let report =
ingest_with_config(env.config.clone(), env.scope(), false).expect("ingest must succeed");
assert_eq!(report.errors, 0, "no ingest errors; report={report:?}");
// Both twin paths must appear as New in the report.
@@ -53,8 +53,7 @@ fn twin_files_fetch_span_uses_correct_asset() {
let twin_items: Vec<_> = items
.iter()
.filter(|i| {
i.doc_path.0.ends_with("src_a/note.md")
|| i.doc_path.0.ends_with("src_b/note.md")
i.doc_path.0.ends_with("src_a/note.md") || i.doc_path.0.ends_with("src_b/note.md")
})
.collect();
assert_eq!(
@@ -149,7 +148,10 @@ fn twin_files_fetch_span_uses_correct_asset() {
// at either twin, making one twin's span fetch behave incorrectly.
let report2 = ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest must succeed");
assert_eq!(report2.errors, 0, "no ingest errors on second run; report={report2:?}");
assert_eq!(
report2.errors, 0,
"no ingest errors on second run; report={report2:?}"
);
// Re-open app after second ingest and verify span still works on both.
let app2 = env.app();

View File

@@ -43,9 +43,7 @@ fn twin_files_second_ingest_is_unchanged() {
let items = first.items.as_ref().expect("items must be present");
let twin_items: Vec<_> = items
.iter()
.filter(|i| {
i.doc_path.0.ends_with("__init__.py")
})
.filter(|i| i.doc_path.0.ends_with("__init__.py"))
.collect();
assert_eq!(
twin_items.len(),
@@ -63,8 +61,14 @@ fn twin_files_second_ingest_is_unchanged() {
// Second ingest — same files, same content → both must be Unchanged.
let second = ingest_with_config(env.config.clone(), env.scope(), false)
.expect("second ingest must succeed");
assert_eq!(second.errors, 0, "second ingest: no errors; report={second:?}");
assert_eq!(second.new, 0, "second ingest: no new docs; report={second:?}");
assert_eq!(
second.errors, 0,
"second ingest: no errors; report={second:?}"
);
assert_eq!(
second.new, 0,
"second ingest: no new docs; report={second:?}"
);
assert_eq!(
second.updated, 0,
"second ingest: no updated docs (twin-file bug would set this to 2); report={second:?}"