refactor(rename): kb crates → kebab — Cargo packages, folders, Rust modules
프로젝트 이름 `kb` → `kebab` rename 의 첫 단계. - workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`, repository URL `altair823/kb` → `altair823/kebab`. - 18 crate 폴더 rename via `git mv` (history 보존). - 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps `../kb-*` → `../kebab-*`. - 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`, `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`, `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`, `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`, `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어 경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염). CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths, tracing target, 그리고 docs sweep 은 다음 commit 에서. ## 검증 - `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
220
crates/kebab-app/tests/ingest_lexical.rs
Normal file
220
crates/kebab-app/tests/ingest_lexical.rs
Normal file
@@ -0,0 +1,220 @@
|
||||
//! Integration tests for `kb-app::ingest` + `list_docs` + `inspect_*`
|
||||
//! along the lexical-only path (no embeddings → no AVX requirement).
|
||||
|
||||
mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
|
||||
#[test]
|
||||
fn ingest_then_list_inspects_round_trip() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
|
||||
// The fixture has 3 markdown files; first ingest should label them
|
||||
// all as New.
|
||||
assert_eq!(report.scanned, 3, "scanned: {report:?}");
|
||||
assert_eq!(report.new, 3, "new: {report:?}");
|
||||
assert_eq!(report.updated, 0, "updated: {report:?}");
|
||||
assert_eq!(report.errors, 0, "errors: {report:?}");
|
||||
let items = report.items.as_ref().expect("items present");
|
||||
assert_eq!(items.len(), 3);
|
||||
for it in items {
|
||||
assert!(it.error.is_none(), "per-item error: {it:?}");
|
||||
assert!(it.doc_id.is_some());
|
||||
// Each fixture file emits ≥1 chunk.
|
||||
assert!(it.chunk_count.unwrap_or(0) >= 1, "chunks: {it:?}");
|
||||
}
|
||||
|
||||
// list_docs returns the 3 docs.
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3, "docs: {docs:?}");
|
||||
|
||||
// inspect_doc round-trips one of them.
|
||||
let any_doc_id = docs[0].doc_id.clone();
|
||||
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
.unwrap();
|
||||
assert_eq!(canonical.doc_id, any_doc_id);
|
||||
assert!(!canonical.blocks.is_empty(), "blocks empty");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_idempotent_on_second_run() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let r1 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(r1.new, 3);
|
||||
|
||||
let r2 =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
// Same files re-ingested — labelled Updated, not duplicated.
|
||||
assert_eq!(r2.scanned, 3, "second scan: {r2:?}");
|
||||
assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}");
|
||||
assert_eq!(r2.updated, 3, "second run updated: {r2:?}");
|
||||
|
||||
// list_docs still has 3 docs (no duplicates).
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_summary_only_drops_items() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert!(report.items.is_none(), "summary-only should null items");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_records_ingest_runs_row_with_aggregate_counts() {
|
||||
// The ingest_runs table is the §5.7 sibling of `jobs`: dedicated
|
||||
// count columns (`scanned`, `new_count`, …) populated at the end
|
||||
// of every run. `summary_only=true` writes `items_json=NULL`; the
|
||||
// counts MUST still be present.
|
||||
let env = TestEnv::lexical_only();
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
|
||||
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir)
|
||||
.join("kb.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kb.sqlite");
|
||||
let (scanned, new_c, updated, skipped, errors, items_json): (
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
i64,
|
||||
Option<String>,
|
||||
) = conn
|
||||
.query_row(
|
||||
"SELECT scanned, new_count, updated_count, skipped_count,
|
||||
error_count, items_json
|
||||
FROM ingest_runs
|
||||
ORDER BY started_at DESC
|
||||
LIMIT 1",
|
||||
[],
|
||||
|r| {
|
||||
Ok((
|
||||
r.get(0)?,
|
||||
r.get(1)?,
|
||||
r.get(2)?,
|
||||
r.get(3)?,
|
||||
r.get(4)?,
|
||||
r.get(5)?,
|
||||
))
|
||||
},
|
||||
)
|
||||
.expect("ingest_runs row present");
|
||||
assert_eq!(scanned, 3);
|
||||
assert_eq!(new_c, 3);
|
||||
assert_eq!(updated, 0);
|
||||
assert_eq!(skipped, 0);
|
||||
assert_eq!(errors, 0);
|
||||
assert!(
|
||||
items_json.is_none(),
|
||||
"summary_only=true must store items_json=NULL: {items_json:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_provider_none_skips_lance() {
|
||||
// `provider="none"` must short-circuit the embedder + vector store
|
||||
// build entirely, so the LanceDB directory MUST NOT be created on
|
||||
// disk during ingest. `IngestReport` currently has no
|
||||
// `embeddings_indexed` field, so we assert via the on-disk lance
|
||||
// tree shape (no `<data_dir>/lancedb` directory, or no `*.lance`
|
||||
// tables under it).
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0, "lexical-only run must not error");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let lance_dir = std::path::PathBuf::from(&env.config.storage.data_dir)
|
||||
.join("lancedb");
|
||||
if lance_dir.exists() {
|
||||
// If the dir was created (e.g., by an earlier consumer touching
|
||||
// the path), it MUST contain no `.lance` tables.
|
||||
let mut had_lance_table = false;
|
||||
for entry in std::fs::read_dir(&lance_dir).expect("read lance_dir") {
|
||||
let entry = entry.unwrap();
|
||||
if entry
|
||||
.path()
|
||||
.extension()
|
||||
.and_then(|s| s.to_str())
|
||||
== Some("lance")
|
||||
{
|
||||
had_lance_table = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
!had_lance_table,
|
||||
"provider=none must not produce any *.lance table under {}",
|
||||
lance_dir.display()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_docs_filters_by_tags_any() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["python".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let docs = kebab_app::list_docs_with_config(env.config.clone(), filter).unwrap();
|
||||
assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}");
|
||||
assert!(docs[0].tags.contains(&"python".to_string()));
|
||||
|
||||
let rust_filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["rust".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let rust_docs =
|
||||
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
// intro.md and notes/cargo.md both tag "rust".
|
||||
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inspect_doc_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus =
|
||||
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("not found"),
|
||||
"error must mention not-found: {msg}"
|
||||
);
|
||||
assert!(
|
||||
msg.contains("kb list docs") || msg.contains("list"),
|
||||
"error must hint at `kb list docs`: {msg}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn inspect_chunk_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus = kebab_core::ChunkId(
|
||||
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
|
||||
);
|
||||
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
.unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("not found"), "got: {msg}");
|
||||
}
|
||||
Reference in New Issue
Block a user