refactor(rename): kb crates → kebab — Cargo packages, folders, Rust modules

프로젝트 이름 `kb` → `kebab` rename 의 첫 단계.

- workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`,
  repository URL `altair823/kb` → `altair823/kebab`.
- 18 crate 폴더 rename via `git mv` (history 보존).
- 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps
  `../kb-*` → `../kebab-*`.
- 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`,
  `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`,
  `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`,
  `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`,
  `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어
  경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염).

CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths,
tracing target, 그리고 docs sweep 은 다음 commit 에서.

## 검증

- `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-02 03:28:08 +00:00
parent 2aecbf3d9f
commit 911fb49550
143 changed files with 727 additions and 727 deletions

158
Cargo.lock generated
View File

@@ -3366,27 +3366,27 @@ dependencies = [
]
[[package]]
name = "kb-app"
name = "kebab-app"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"dirs 5.0.1",
"kb-chunk",
"kb-config",
"kb-core",
"kb-embed",
"kb-embed-local",
"kb-llm",
"kb-llm-local",
"kb-normalize",
"kb-parse-md",
"kb-parse-types",
"kb-rag",
"kb-search",
"kb-source-fs",
"kb-store-sqlite",
"kb-store-vector",
"kebab-chunk",
"kebab-config",
"kebab-core",
"kebab-embed",
"kebab-embed-local",
"kebab-llm",
"kebab-llm-local",
"kebab-normalize",
"kebab-parse-md",
"kebab-parse-types",
"kebab-rag",
"kebab-search",
"kebab-source-fs",
"kebab-store-sqlite",
"kebab-store-vector",
"rusqlite",
"serde",
"serde_json",
@@ -3399,14 +3399,14 @@ dependencies = [
]
[[package]]
name = "kb-chunk"
name = "kebab-chunk"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"kb-core",
"kb-normalize",
"kb-parse-md",
"kebab-core",
"kebab-normalize",
"kebab-parse-md",
"serde_json",
"serde_json_canonicalizer",
"time",
@@ -3414,32 +3414,32 @@ dependencies = [
]
[[package]]
name = "kb-cli"
name = "kebab-cli"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"kb-app",
"kb-config",
"kb-core",
"kb-eval",
"kebab-app",
"kebab-config",
"kebab-core",
"kebab-eval",
"serde_json",
]
[[package]]
name = "kb-config"
name = "kebab-config"
version = "0.1.0"
dependencies = [
"anyhow",
"dirs 5.0.1",
"kb-core",
"kebab-core",
"serde",
"serde_json",
"toml",
]
[[package]]
name = "kb-core"
name = "kebab-core"
version = "0.1.0"
dependencies = [
"anyhow",
@@ -3453,13 +3453,13 @@ dependencies = [
]
[[package]]
name = "kb-embed"
name = "kebab-embed"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"kb-config",
"kb-core",
"kebab-config",
"kebab-core",
"proptest",
"serde",
"thiserror 2.0.18",
@@ -3467,27 +3467,27 @@ dependencies = [
]
[[package]]
name = "kb-embed-local"
name = "kebab-embed-local"
version = "0.1.0"
dependencies = [
"anyhow",
"fastembed",
"kb-config",
"kb-embed",
"kebab-config",
"kebab-embed",
"serde_json",
"tempfile",
"tracing",
]
[[package]]
name = "kb-eval"
name = "kebab-eval"
version = "0.1.0"
dependencies = [
"anyhow",
"kb-app",
"kb-config",
"kb-core",
"kb-store-sqlite",
"kebab-app",
"kebab-config",
"kebab-core",
"kebab-store-sqlite",
"rusqlite",
"serde",
"serde_json",
@@ -3499,22 +3499,22 @@ dependencies = [
]
[[package]]
name = "kb-llm"
name = "kebab-llm"
version = "0.1.0"
dependencies = [
"anyhow",
"kb-core",
"kebab-core",
"proptest",
]
[[package]]
name = "kb-llm-local"
name = "kebab-llm-local"
version = "0.1.0"
dependencies = [
"anyhow",
"kb-config",
"kb-core",
"kb-llm",
"kebab-config",
"kebab-core",
"kebab-llm",
"reqwest",
"serde",
"serde_json",
@@ -3525,13 +3525,13 @@ dependencies = [
]
[[package]]
name = "kb-normalize"
name = "kebab-normalize"
version = "0.1.0"
dependencies = [
"anyhow",
"kb-core",
"kb-parse-md",
"kb-parse-types",
"kebab-core",
"kebab-parse-md",
"kebab-parse-types",
"serde",
"serde_json",
"time",
@@ -3540,12 +3540,12 @@ dependencies = [
]
[[package]]
name = "kb-parse-md"
name = "kebab-parse-md"
version = "0.1.0"
dependencies = [
"anyhow",
"kb-core",
"kb-parse-types",
"kebab-core",
"kebab-parse-types",
"lingua",
"pulldown-cmark",
"serde",
@@ -3557,24 +3557,24 @@ dependencies = [
]
[[package]]
name = "kb-parse-types"
name = "kebab-parse-types"
version = "0.1.0"
dependencies = [
"kb-core",
"kebab-core",
"serde",
]
[[package]]
name = "kb-rag"
name = "kebab-rag"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"kb-config",
"kb-core",
"kb-llm",
"kb-search",
"kb-store-sqlite",
"kebab-config",
"kebab-core",
"kebab-llm",
"kebab-search",
"kebab-store-sqlite",
"regex",
"rusqlite",
"serde",
@@ -3586,16 +3586,16 @@ dependencies = [
]
[[package]]
name = "kb-search"
name = "kebab-search"
version = "0.1.0"
dependencies = [
"anyhow",
"globset",
"kb-config",
"kb-core",
"kb-embed",
"kb-store-sqlite",
"kb-store-vector",
"kebab-config",
"kebab-core",
"kebab-embed",
"kebab-store-sqlite",
"kebab-store-vector",
"rusqlite",
"serde_json",
"tempfile",
@@ -3604,14 +3604,14 @@ dependencies = [
]
[[package]]
name = "kb-source-fs"
name = "kebab-source-fs"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"ignore",
"kb-config",
"kb-core",
"kebab-config",
"kebab-core",
"serde",
"serde_json",
"tempfile",
@@ -3621,17 +3621,17 @@ dependencies = [
]
[[package]]
name = "kb-store-sqlite"
name = "kebab-store-sqlite"
version = "0.1.0"
dependencies = [
"anyhow",
"blake3",
"globset",
"kb-chunk",
"kb-config",
"kb-core",
"kb-normalize",
"kb-parse-md",
"kebab-chunk",
"kebab-config",
"kebab-core",
"kebab-normalize",
"kebab-parse-md",
"refinery",
"rusqlite",
"serde_json",
@@ -3642,7 +3642,7 @@ dependencies = [
]
[[package]]
name = "kb-store-vector"
name = "kebab-store-vector"
version = "0.1.0"
dependencies = [
"anyhow",
@@ -3651,9 +3651,9 @@ dependencies = [
"arrow-schema",
"blake3",
"futures",
"kb-config",
"kb-core",
"kb-store-sqlite",
"kebab-config",
"kebab-core",
"kebab-store-sqlite",
"lancedb",
"rusqlite",
"serde",

View File

@@ -1,31 +1,31 @@
[workspace]
resolver = "3"
members = [
"crates/kb-core",
"crates/kb-parse-types",
"crates/kb-config",
"crates/kb-source-fs",
"crates/kb-parse-md",
"crates/kb-normalize",
"crates/kb-chunk",
"crates/kb-store-sqlite",
"crates/kb-store-vector",
"crates/kb-search",
"crates/kb-embed",
"crates/kb-embed-local",
"crates/kb-llm",
"crates/kb-llm-local",
"crates/kb-rag",
"crates/kb-app",
"crates/kb-cli",
"crates/kb-eval",
"crates/kebab-core",
"crates/kebab-parse-types",
"crates/kebab-config",
"crates/kebab-source-fs",
"crates/kebab-parse-md",
"crates/kebab-normalize",
"crates/kebab-chunk",
"crates/kebab-store-sqlite",
"crates/kebab-store-vector",
"crates/kebab-search",
"crates/kebab-embed",
"crates/kebab-embed-local",
"crates/kebab-llm",
"crates/kebab-llm-local",
"crates/kebab-rag",
"crates/kebab-app",
"crates/kebab-cli",
"crates/kebab-eval",
]
[workspace.package]
edition = "2024"
rust-version = "1.85"
license = "MIT OR Apache-2.0"
repository = "https://github.com/altair823/kb"
repository = "https://github.com/altair823/kebab"
version = "0.1.0"
[workspace.dependencies]

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-app"
name = "kebab-app"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,21 +8,21 @@ repository = { workspace = true }
description = "Facade — orchestrates components for kb-cli/tui/desktop"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-source-fs = { path = "../kb-source-fs" }
kb-parse-md = { path = "../kb-parse-md" }
kb-parse-types = { path = "../kb-parse-types" }
kb-normalize = { path = "../kb-normalize" }
kb-chunk = { path = "../kb-chunk" }
kb-store-sqlite = { path = "../kb-store-sqlite" }
kb-store-vector = { path = "../kb-store-vector" }
kb-search = { path = "../kb-search" }
kb-embed = { path = "../kb-embed" }
kb-embed-local = { path = "../kb-embed-local" }
kb-llm = { path = "../kb-llm" }
kb-llm-local = { path = "../kb-llm-local" }
kb-rag = { path = "../kb-rag" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-source-fs = { path = "../kebab-source-fs" }
kebab-parse-md = { path = "../kebab-parse-md" }
kebab-parse-types = { path = "../kebab-parse-types" }
kebab-normalize = { path = "../kebab-normalize" }
kebab-chunk = { path = "../kebab-chunk" }
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
kebab-store-vector = { path = "../kebab-store-vector" }
kebab-search = { path = "../kebab-search" }
kebab-embed = { path = "../kebab-embed" }
kebab-embed-local = { path = "../kebab-embed-local" }
kebab-llm = { path = "../kebab-llm" }
kebab-llm-local = { path = "../kebab-llm-local" }
kebab-rag = { path = "../kebab-rag" }
anyhow = { workspace = true }
blake3 = { workspace = true }
serde = { workspace = true }

View File

@@ -37,16 +37,16 @@ use std::sync::{Arc, OnceLock};
use anyhow::{Context, Result, anyhow};
use kb_core::{
use kebab_core::{
Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
SearchQuery, VectorStore,
};
use kb_embed_local::FastembedEmbedder;
use kb_llm_local::OllamaLanguageModel;
use kb_rag::{AskOpts, RagPipeline};
use kb_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
use kb_store_sqlite::SqliteStore;
use kb_store_vector::LanceVectorStore;
use kebab_embed_local::FastembedEmbedder;
use kebab_llm_local::OllamaLanguageModel;
use kebab_rag::{AskOpts, RagPipeline};
use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
use kebab_store_sqlite::SqliteStore;
use kebab_store_vector::LanceVectorStore;
/// Facade state — see module docs for lifetime rules.
///
@@ -55,7 +55,7 @@ use kb_store_vector::LanceVectorStore;
/// ask calls. The OnceLock-backed `embedder` / `vector` fields ensure
/// the cold-start cost is paid exactly once per instance.
pub struct App {
pub(crate) config: kb_config::Config,
pub(crate) config: kebab_config::Config,
pub(crate) sqlite: Arc<SqliteStore>,
/// Memoized embedder — built lazily on first `embedder()` call when
/// embeddings are enabled. `OnceLock` keeps the struct `Sync` and
@@ -80,7 +80,7 @@ impl App {
/// Downstream `LanceVectorStore::new` (called by [`Self::vector`])
/// internally drives a `tokio::Runtime::block_on`, which panics if
/// invoked from inside another tokio runtime.
pub fn open_with_config(config: kb_config::Config) -> Result<Self> {
pub fn open_with_config(config: kebab_config::Config) -> Result<Self> {
let sqlite = SqliteStore::open(&config).context("kb-app: open SqliteStore")?;
sqlite
.run_migrations()
@@ -286,7 +286,7 @@ impl App {
/// the active config. This token surfaces in `SearchHit.index_version`
/// and on snapshot tests; including the chunker version pins it to
/// the chunking policy in effect.
fn lexical_index_version(config: &kb_config::Config) -> IndexVersion {
fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
IndexVersion(format!("lex:{}", config.chunking.chunker_version))
}

View File

@@ -23,7 +23,7 @@
//! ## Config seam (`*_with_config`)
//!
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
//! companion that takes a fully-resolved [`kb_config::Config`] directly.
//! companion that takes a fully-resolved [`kebab_config::Config`] directly.
//! Three callers go through it: (1) the top-level free functions
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes
//! `--config <path>` (CLI builds the Config via
@@ -39,16 +39,16 @@ use std::sync::Arc;
use anyhow::{Context, anyhow};
use serde::{Deserialize, Serialize};
use kb_chunk::MdHeadingV1Chunker;
use kb_core::{
use kebab_chunk::MdHeadingV1Chunker;
use kebab_core::{
Answer, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
EmbeddingKind, IngestReport, ParserVersion, RawAsset, SearchHit, SearchQuery,
SourceConnector, SourceScope, SourceUri, VectorRecord, VectorStore,
};
use kb_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kb_source_fs::FsSourceConnector;
use kebab_normalize::build_canonical_document;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_source_fs::FsSourceConnector;
mod app;
pub mod doctor_signal;
@@ -65,11 +65,11 @@ const KB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x";
/// Caller-supplied knobs for one [`ask`] invocation.
///
/// Re-exported from [`kb_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
/// `use kb_app::AskOpts` keeps working without churn. The struct gained
/// Re-exported from [`kebab_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
/// `use kebab_app::AskOpts` keeps working without churn. The struct gained
/// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today)
/// pass `stream_sink: None`.
pub use kb_rag::AskOpts;
pub use kebab_rag::AskOpts;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct DoctorReport {
@@ -90,10 +90,10 @@ pub struct DoctorCheck {
/// Create XDG dirs and write a starter `config.toml`. Idempotent unless
/// `force=true` (which overwrites an existing config).
pub fn init_workspace(force: bool) -> anyhow::Result<()> {
let cfg_path = kb_config::Config::xdg_config_path();
let data_dir = kb_config::Config::xdg_data_dir();
let cache_dir = kb_config::Config::xdg_cache_dir();
let state_dir = kb_config::Config::xdg_state_dir();
let cfg_path = kebab_config::Config::xdg_config_path();
let data_dir = kebab_config::Config::xdg_data_dir();
let cache_dir = kebab_config::Config::xdg_cache_dir();
let state_dir = kebab_config::Config::xdg_state_dir();
for d in [
cfg_path.parent().map(PathBuf::from).unwrap_or_default(),
@@ -107,11 +107,11 @@ pub fn init_workspace(force: bool) -> anyhow::Result<()> {
}
}
let workspace_root = expand_tilde(&kb_config::Config::defaults().workspace.root);
let workspace_root = expand_tilde(&kebab_config::Config::defaults().workspace.root);
std::fs::create_dir_all(&workspace_root)?;
if !cfg_path.exists() || force {
let cfg = kb_config::Config::defaults();
let cfg = kebab_config::Config::defaults();
let toml_text = toml::to_string_pretty(&cfg)?;
std::fs::write(&cfg_path, toml_text)?;
}
@@ -141,8 +141,8 @@ fn expand_tilde(s: &str) -> PathBuf {
/// Callers that already have a Config in hand (CLI honoring `--config`,
/// integration tests, TUI session) should bypass this and call the
/// matching `*_with_config` helper directly.
fn load_config() -> anyhow::Result<kb_config::Config> {
kb_config::Config::load(None)
fn load_config() -> anyhow::Result<kebab_config::Config> {
kebab_config::Config::load(None)
}
// ── ingest ────────────────────────────────────────────────────────────────
@@ -154,11 +154,11 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
/// Config-explicit variant — bypasses [`load_config`] when the
/// caller (kb-cli with `--config`, integration tests, TUI session)
/// already has a [`kb_config::Config`] in hand. The public free
/// already has a [`kebab_config::Config`] in hand. The public free
/// function [`ingest`] wraps this with the XDG-default load.
#[doc(hidden)]
pub fn ingest_with_config(
config: kb_config::Config,
config: kebab_config::Config,
scope: SourceScope,
summary_only: bool,
) -> anyhow::Result<IngestReport> {
@@ -205,13 +205,13 @@ pub fn ingest_with_config(
let started_at = time::OffsetDateTime::now_utc();
let mut items: Vec<kb_core::IngestItem> = Vec::new();
let mut items: Vec<kebab_core::IngestItem> = Vec::new();
let mut new_count: u32 = 0;
let mut updated_count: u32 = 0;
let mut skipped_count: u32 = 0;
let mut error_count: u32 = 0;
// Aggregate counts surfaced into `ingest_runs` (and tracing). Not
// exposed on `IngestReport` today — `kb_core::IngestReport` is a
// exposed on `IngestReport` today — `kebab_core::IngestReport` is a
// wire-stable struct without these fields — but persisting them
// means audit tooling and `kb jobs` (P+) can recover the totals
// without re-walking the DB.
@@ -242,8 +242,8 @@ pub fn ingest_with_config(
"kb-app::ingest: per-file fatal"
);
error_count = error_count.saturating_add(1);
kb_core::IngestItem {
kind: kb_core::IngestItemKind::Error,
kebab_core::IngestItem {
kind: kebab_core::IngestItemKind::Error,
doc_id: None,
doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()),
@@ -259,7 +259,7 @@ pub fn ingest_with_config(
};
match item.kind {
kb_core::IngestItemKind::New => {
kebab_core::IngestItemKind::New => {
new_count = new_count.saturating_add(1);
let n = item.chunk_count.unwrap_or(0);
chunks_indexed = chunks_indexed.saturating_add(n);
@@ -267,7 +267,7 @@ pub fn ingest_with_config(
embeddings_indexed = embeddings_indexed.saturating_add(n);
}
}
kb_core::IngestItemKind::Updated => {
kebab_core::IngestItemKind::Updated => {
updated_count = updated_count.saturating_add(1);
let n = item.chunk_count.unwrap_or(0);
chunks_indexed = chunks_indexed.saturating_add(n);
@@ -275,10 +275,10 @@ pub fn ingest_with_config(
embeddings_indexed = embeddings_indexed.saturating_add(n);
}
}
kb_core::IngestItemKind::Skipped => {
kebab_core::IngestItemKind::Skipped => {
skipped_count = skipped_count.saturating_add(1)
}
kb_core::IngestItemKind::Error => {
kebab_core::IngestItemKind::Error => {
error_count = error_count.saturating_add(1)
}
}
@@ -293,9 +293,9 @@ pub fn ingest_with_config(
"scope": scope,
"summary_only": summary_only,
});
let job_id_res = <SqliteStoreAlias as kb_core::JobRepo>::create(
let job_id_res = <SqliteStoreAlias as kebab_core::JobRepo>::create(
&app.sqlite,
kb_core::JobKind::Ingest,
kebab_core::JobKind::Ingest,
payload,
);
match job_id_res {
@@ -312,7 +312,7 @@ pub fn ingest_with_config(
"chunks_indexed": chunks_indexed,
"embeddings_indexed": embeddings_indexed,
});
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::update_progress(
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::update_progress(
&app.sqlite,
&jid,
progress,
@@ -323,10 +323,10 @@ pub fn ingest_with_config(
"kb-app::ingest: JobRepo::update_progress failed"
);
}
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::finish(
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::finish(
&app.sqlite,
&jid,
kb_core::JobStatus::Succeeded,
kebab_core::JobStatus::Succeeded,
None,
) {
tracing::warn!(
@@ -370,7 +370,7 @@ pub fn ingest_with_config(
}
};
let run_id = mint_ingest_run_id(&scope_json, started_at);
let row = kb_store_sqlite::IngestRunRow {
let row = kebab_store_sqlite::IngestRunRow {
run_id: &run_id,
scope_json: &scope_json,
scanned: scanned_count,
@@ -432,7 +432,7 @@ fn mint_ingest_run_id(scope_json: &str, at: time::OffsetDateTime) -> String {
/// vs `JobRepo`) on the same store. Plain `app.sqlite.create(...)`
/// would pick one based on inherent vs trait methods; we go through
/// `<… as JobRepo>` to be explicit.
type SqliteStoreAlias = kb_store_sqlite::SqliteStore;
type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;
/// Process a single asset: read bytes, parse, normalize, chunk,
/// persist, embed. Per-asset failures bubble up to the caller for
@@ -444,18 +444,18 @@ fn ingest_one_asset(
parser_version: &ParserVersion,
chunk_policy: &ChunkPolicy,
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
vector_store: Option<&Arc<kb_store_vector::LanceVectorStore>>,
vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
existing_doc_ids: &std::collections::HashSet<String>,
) -> anyhow::Result<kb_core::IngestItem> {
) -> anyhow::Result<kebab_core::IngestItem> {
tracing::debug!(
target: "kb-app::ingest",
path = %asset.workspace_path.0,
"processing asset"
);
// Only handle Markdown for now; other media types are P6+ work.
if asset.media_type != kb_core::MediaType::Markdown {
return Ok(kb_core::IngestItem {
kind: kb_core::IngestItemKind::Skipped,
if asset.media_type != kebab_core::MediaType::Markdown {
return Ok(kebab_core::IngestItem {
kind: kebab_core::IngestItemKind::Skipped,
doc_id: None,
doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()),
@@ -472,8 +472,8 @@ fn ingest_one_asset(
let path = match &asset.source_uri {
SourceUri::File(p) => p.clone(),
SourceUri::Kb(_) => {
return Ok(kb_core::IngestItem {
kind: kb_core::IngestItemKind::Skipped,
return Ok(kebab_core::IngestItem {
kind: kebab_core::IngestItemKind::Skipped,
doc_id: None,
doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()),
@@ -569,7 +569,7 @@ fn ingest_one_asset(
.iter()
.zip(vectors)
.map(|(c, v)| VectorRecord {
embedding_id: kb_core::id_for_embedding(
embedding_id: kebab_core::id_for_embedding(
&c.chunk_id,
&model_id,
&model_version,
@@ -592,12 +592,12 @@ fn ingest_one_asset(
}
let kind = if existing_doc_ids.contains(&canonical.doc_id.0) {
kb_core::IngestItemKind::Updated
kebab_core::IngestItemKind::Updated
} else {
kb_core::IngestItemKind::New
kebab_core::IngestItemKind::New
};
Ok(kb_core::IngestItem {
Ok(kebab_core::IngestItem {
kind,
doc_id: Some(canonical.doc_id.clone()),
doc_path: asset.workspace_path.clone(),
@@ -613,7 +613,7 @@ fn ingest_one_asset(
}
/// Convenience: end byte of the frontmatter region (or 0 when absent).
fn fm_span_end(span: Option<kb_parse_md::FrontmatterSpan>) -> usize {
fn fm_span_end(span: Option<kebab_parse_md::FrontmatterSpan>) -> usize {
span.map(|s| s.end).unwrap_or(0)
}
@@ -640,7 +640,7 @@ fn build_body_hints(asset: &RawAsset) -> BodyHints {
}
/// Build a `ChunkPolicy` from the active config.
fn chunk_policy_from_config(config: &kb_config::Config) -> ChunkPolicy {
fn chunk_policy_from_config(config: &kebab_config::Config) -> ChunkPolicy {
ChunkPolicy {
target_tokens: config.chunking.target_tokens,
overlap_tokens: config.chunking.overlap_tokens,
@@ -660,7 +660,7 @@ pub fn list_docs(filter: DocFilter) -> anyhow::Result<Vec<DocSummary>> {
/// ([`list_docs`]), not this.
#[doc(hidden)]
pub fn list_docs_with_config(
config: kb_config::Config,
config: kebab_config::Config,
filter: DocFilter,
) -> anyhow::Result<Vec<DocSummary>> {
let app = App::open_with_config(config)?;
@@ -676,7 +676,7 @@ pub fn inspect_doc(id: &DocumentId) -> anyhow::Result<CanonicalDocument> {
/// ([`inspect_doc`]), not this.
#[doc(hidden)]
pub fn inspect_doc_with_config(
config: kb_config::Config,
config: kebab_config::Config,
id: &DocumentId,
) -> anyhow::Result<CanonicalDocument> {
let app = App::open_with_config(config)?;
@@ -694,7 +694,7 @@ pub fn inspect_chunk(id: &ChunkId) -> anyhow::Result<Chunk> {
/// ([`inspect_chunk`]), not this.
#[doc(hidden)]
pub fn inspect_chunk_with_config(
config: kb_config::Config,
config: kebab_config::Config,
id: &ChunkId,
) -> anyhow::Result<Chunk> {
let app = App::open_with_config(config)?;
@@ -716,7 +716,7 @@ pub fn search(query: SearchQuery) -> anyhow::Result<Vec<SearchHit>> {
/// directly to amortize the embedder / vector-store cold start.
#[doc(hidden)]
pub fn search_with_config(
config: kb_config::Config,
config: kebab_config::Config,
query: SearchQuery,
) -> anyhow::Result<Vec<SearchHit>> {
App::open_with_config(config)?.search(query)
@@ -740,7 +740,7 @@ pub fn ask(query: &str, opts: AskOpts) -> anyhow::Result<Answer> {
/// [`App::ask`].
#[doc(hidden)]
pub fn ask_with_config(
config: kb_config::Config,
config: kebab_config::Config,
query: &str,
opts: AskOpts,
) -> anyhow::Result<Answer> {
@@ -761,10 +761,10 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
// override first, else XDG default. Report whichever was probed.
let cfg_path: PathBuf = match config_path {
Some(p) => p.to_path_buf(),
None => kb_config::Config::xdg_config_path(),
None => kebab_config::Config::xdg_config_path(),
};
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
match kb_config::Config::from_file(&cfg_path) {
match kebab_config::Config::from_file(&cfg_path) {
Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
}
@@ -804,7 +804,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
let merged = c.clone().apply_env(&env);
expand_tilde(&merged.storage.data_dir)
}
None => kb_config::Config::xdg_data_dir(),
None => kebab_config::Config::xdg_data_dir(),
};
let writable = (|| -> anyhow::Result<()> {
std::fs::create_dir_all(&data_dir)?;

View File

@@ -19,7 +19,7 @@ pub enum LogLevel {
/// — a second call is a no-op (the second `try_init` is dropped silently
/// but the guard is still returned so the caller can keep it alive).
pub fn init(level: LogLevel) -> Result<WorkerGuard> {
let log_dir = kb_config::Config::xdg_state_dir().join("logs");
let log_dir = kebab_config::Config::xdg_state_dir().join("logs");
std::fs::create_dir_all(&log_dir)?;
let file_appender = tracing_appender::rolling::daily(&log_dir, "kb.log");

View File

@@ -21,12 +21,12 @@ use common::TestEnv;
#[ignore = "requires real Ollama on 127.0.0.1:11434"]
fn ask_lexical_smoke() {
let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let opts = kb_app::AskOpts {
let opts = kebab_app::AskOpts {
k: 5,
explain: false,
mode: kb_core::SearchMode::Lexical,
mode: kebab_core::SearchMode::Lexical,
temperature: Some(0.0),
seed: Some(0),
stream_sink: None,
@@ -34,10 +34,10 @@ fn ask_lexical_smoke() {
// The fixture workspace contains "ownership" content; the model's
// citation behavior depends on its training, so we don't assert on
// grounded — only that the call returns a structurally-valid Answer.
let answer = kb_app::ask_with_config(env.config.clone(), "ownership", opts)
let answer = kebab_app::ask_with_config(env.config.clone(), "ownership", opts)
.expect("ask returns Ok with a real Ollama backend");
// retrieval summary always populated, regardless of grounded path.
assert_eq!(answer.retrieval.mode, kb_core::SearchMode::Lexical);
assert_eq!(answer.retrieval.mode, kebab_core::SearchMode::Lexical);
assert!(answer.retrieval.k >= 5);
assert!(answer.retrieval.trace_id.0.starts_with("ret_"));
}

View File

@@ -12,7 +12,7 @@
use std::path::{Path, PathBuf};
use kb_config::Config;
use kebab_config::Config;
use tempfile::TempDir;
/// Test environment: owns a `TempDir` and exposes a `Config` whose
@@ -72,8 +72,8 @@ impl TestEnv {
}
}
pub fn scope(&self) -> kb_core::SourceScope {
kb_core::SourceScope {
pub fn scope(&self) -> kebab_core::SourceScope {
kebab_core::SourceScope {
root: self.workspace_root.clone(),
include: self.config.workspace.include.clone(),
exclude: self.config.workspace.exclude.clone(),

View File

@@ -9,7 +9,7 @@ use common::TestEnv;
fn ingest_then_list_inspects_round_trip() {
let env = TestEnv::lexical_only();
let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// The fixture has 3 markdown files; first ingest should label them
// all as New.
@@ -27,16 +27,16 @@ fn ingest_then_list_inspects_round_trip() {
}
// list_docs returns the 3 docs.
let docs = kb_app::list_docs_with_config(
let docs = kebab_app::list_docs_with_config(
env.config.clone(),
kb_core::DocFilter::default(),
kebab_core::DocFilter::default(),
)
.unwrap();
assert_eq!(docs.len(), 3, "docs: {docs:?}");
// inspect_doc round-trips one of them.
let any_doc_id = docs[0].doc_id.clone();
let canonical = kb_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
.unwrap();
assert_eq!(canonical.doc_id, any_doc_id);
assert!(!canonical.blocks.is_empty(), "blocks empty");
@@ -47,20 +47,20 @@ fn ingest_idempotent_on_second_run() {
let env = TestEnv::lexical_only();
let r1 =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(r1.new, 3);
let r2 =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// Same files re-ingested — labelled Updated, not duplicated.
assert_eq!(r2.scanned, 3, "second scan: {r2:?}");
assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}");
assert_eq!(r2.updated, 3, "second run updated: {r2:?}");
// list_docs still has 3 docs (no duplicates).
let docs = kb_app::list_docs_with_config(
let docs = kebab_app::list_docs_with_config(
env.config.clone(),
kb_core::DocFilter::default(),
kebab_core::DocFilter::default(),
)
.unwrap();
assert_eq!(docs.len(), 3);
@@ -70,7 +70,7 @@ fn ingest_idempotent_on_second_run() {
fn ingest_summary_only_drops_items() {
let env = TestEnv::lexical_only();
let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.scanned, 3);
assert!(report.items.is_none(), "summary-only should null items");
}
@@ -82,7 +82,7 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
// of every run. `summary_only=true` writes `items_json=NULL`; the
// counts MUST still be present.
let env = TestEnv::lexical_only();
let report = kb_app::ingest_with_config(env.config.clone(), env.scope(), true)
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
.unwrap();
assert_eq!(report.scanned, 3);
@@ -137,7 +137,7 @@ fn ingest_provider_none_skips_lance() {
// tables under it).
let env = TestEnv::lexical_only();
let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 0, "lexical-only run must not error");
assert_eq!(report.new, 3);
@@ -170,22 +170,22 @@ fn ingest_provider_none_skips_lance() {
#[test]
fn list_docs_filters_by_tags_any() {
let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let filter = kb_core::DocFilter {
let filter = kebab_core::DocFilter {
tags_any: vec!["python".to_string()],
..Default::default()
};
let docs = kb_app::list_docs_with_config(env.config.clone(), filter).unwrap();
let docs = kebab_app::list_docs_with_config(env.config.clone(), filter).unwrap();
assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}");
assert!(docs[0].tags.contains(&"python".to_string()));
let rust_filter = kb_core::DocFilter {
let rust_filter = kebab_core::DocFilter {
tags_any: vec!["rust".to_string()],
..Default::default()
};
let rust_docs =
kb_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
// intro.md and notes/cargo.md both tag "rust".
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
}
@@ -194,8 +194,8 @@ fn list_docs_filters_by_tags_any() {
fn inspect_doc_not_found_returns_actionable_error() {
let env = TestEnv::lexical_only();
let bogus =
kb_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
let err = kb_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
let msg = format!("{err:#}");
assert!(
msg.contains("not found"),
@@ -210,10 +210,10 @@ fn inspect_doc_not_found_returns_actionable_error() {
#[test]
fn inspect_chunk_not_found_returns_actionable_error() {
let env = TestEnv::lexical_only();
let bogus = kb_core::ChunkId(
let bogus = kebab_core::ChunkId(
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
);
let err = kb_app::inspect_chunk_with_config(env.config.clone(), &bogus)
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
.unwrap_err();
let msg = format!("{err:#}");
assert!(msg.contains("not found"), "got: {msg}");

View File

@@ -5,24 +5,24 @@ mod common;
use common::TestEnv;
fn lexical_query(text: &str) -> kb_core::SearchQuery {
kb_core::SearchQuery {
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
kebab_core::SearchQuery {
text: text.to_string(),
mode: kb_core::SearchMode::Lexical,
mode: kebab_core::SearchMode::Lexical,
k: 10,
filters: kb_core::SearchFilters::default(),
filters: kebab_core::SearchFilters::default(),
}
}
#[test]
fn lexical_search_returns_hits_after_ingest() {
let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
// "Ownership" appears as a heading + paragraph in intro.md and
// matches FTS5 default tokenizer easily.
let hits =
kb_app::search_with_config(env.config.clone(), lexical_query("ownership"))
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
.unwrap();
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
@@ -34,7 +34,7 @@ fn lexical_search_returns_hits_after_ingest() {
);
assert_eq!(
h.retrieval.method,
kb_core::SearchMode::Lexical,
kebab_core::SearchMode::Lexical,
"method label should be Lexical"
);
}
@@ -43,8 +43,8 @@ fn lexical_search_returns_hits_after_ingest() {
#[test]
fn lexical_search_empty_query_returns_empty() {
let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits = kb_app::search_with_config(env.config.clone(), lexical_query(" "))
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
.unwrap();
assert!(hits.is_empty(), "blank query must short-circuit empty");
}
@@ -52,15 +52,15 @@ fn lexical_search_empty_query_returns_empty() {
#[test]
fn vector_mode_with_provider_none_errors_clearly() {
let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let q = kb_core::SearchQuery {
let q = kebab_core::SearchQuery {
text: "ownership".to_string(),
mode: kb_core::SearchMode::Vector,
mode: kebab_core::SearchMode::Vector,
k: 10,
filters: kb_core::SearchFilters::default(),
filters: kebab_core::SearchFilters::default(),
};
let err = kb_app::search_with_config(env.config.clone(), q).unwrap_err();
let err = kebab_app::search_with_config(env.config.clone(), q).unwrap_err();
let msg = format!("{err:#}");
assert!(
msg.contains("embeddings disabled") || msg.contains("disabled"),

View File

@@ -31,21 +31,21 @@ fn ingest_then_hybrid_search_returns_hits() {
let env = TestEnv::with_embeddings();
let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3);
let q = kb_core::SearchQuery {
let q = kebab_core::SearchQuery {
text: "ownership".to_string(),
mode: kb_core::SearchMode::Hybrid,
mode: kebab_core::SearchMode::Hybrid,
k: 10,
filters: kb_core::SearchFilters::default(),
filters: kebab_core::SearchFilters::default(),
};
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
assert!(!hits.is_empty(), "expected hybrid hits for 'ownership'");
let methods: Vec<_> = hits.iter().map(|h| h.retrieval.method).collect();
assert!(
methods.iter().all(|m| *m == kb_core::SearchMode::Hybrid),
methods.iter().all(|m| *m == kebab_core::SearchMode::Hybrid),
"every hit must report method=Hybrid: {methods:?}"
);
}
@@ -58,22 +58,22 @@ fn ingest_then_vector_search_carries_embedding_model() {
let env = TestEnv::with_embeddings();
let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3);
let q = kb_core::SearchQuery {
let q = kebab_core::SearchQuery {
text: "ownership".to_string(),
mode: kb_core::SearchMode::Vector,
mode: kebab_core::SearchMode::Vector,
k: 10,
filters: kb_core::SearchFilters::default(),
filters: kebab_core::SearchFilters::default(),
};
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
assert!(!hits.is_empty(), "expected vector hits for 'ownership'");
// Vector mode dispatches through `VectorRetriever` and MUST stamp
// each hit with the configured embedding_model id.
let expected = kb_core::EmbeddingModelId(env.config.models.embedding.model.clone());
let expected = kebab_core::EmbeddingModelId(env.config.models.embedding.model.clone());
for h in &hits {
assert_eq!(
h.embedding_model,
@@ -82,7 +82,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
);
assert_eq!(
h.retrieval.method,
kb_core::SearchMode::Vector,
kebab_core::SearchMode::Vector,
"vector-mode hit must report method=Vector"
);
}

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-chunk"
name = "kebab-chunk"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,7 +8,7 @@ repository = { workspace = true }
description = "Chunkers that turn kb-core::CanonicalDocument into kb-core::Chunk batches (§3.5, §4.2, §7.2)"
[dependencies]
kb-core = { path = "../kb-core" }
kebab-core = { path = "../kebab-core" }
serde_json_canonicalizer = "0.3"
blake3 = { workspace = true }
anyhow = { workspace = true }
@@ -20,7 +20,7 @@ tracing = { workspace = true }
# regular deps per design §8 (chunker consumes CanonicalDocument from kb-core
# only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps)
# confirms this.
kb-parse-md = { path = "../kb-parse-md" }
kb-normalize = { path = "../kb-normalize" }
kebab-parse-md = { path = "../kebab-parse-md" }
kebab-normalize = { path = "../kebab-normalize" }
serde_json = { workspace = true }
time = { workspace = true }

View File

@@ -1,4 +1,4 @@
//! `kb-chunk` — chunkers that emit [`kb_core::Chunk`] batches.
//! `kb-chunk` — chunkers that emit [`kebab_core::Chunk`] batches.
//!
//! Per design §3.5 (Chunk), §4.2 (chunk_id recipe), §7.2 (`Chunker`
//! trait), §0 Q3/§14 (chunking priority).

View File

@@ -1,6 +1,6 @@
//! `md-heading-v1` — heading-aware Markdown chunker.
use kb_core::{
use kebab_core::{
Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker,
ChunkerVersion, DocumentId, SourceSpan, id_for_chunk,
};
@@ -24,7 +24,7 @@ const POLICY_HASH_HEX_LEN: usize = 16;
/// Heading-aware Markdown chunker.
///
/// Implements [`kb_core::Chunker`] for Markdown-derived
/// Implements [`kebab_core::Chunker`] for Markdown-derived
/// [`CanonicalDocument`]s.
///
/// **Behavior contract** (design §0 / §14, in priority order):
@@ -409,7 +409,7 @@ fn estimate_block_tokens(b: &Block) -> usize {
}
/// Borrow the `CommonBlock` of any [`Block`] variant.
fn common(b: &Block) -> &kb_core::CommonBlock {
fn common(b: &Block) -> &kebab_core::CommonBlock {
match b {
Block::Heading(h) => &h.common,
Block::Paragraph(t) | Block::Quote(t) => &t.common,
@@ -424,7 +424,7 @@ fn common(b: &Block) -> &kb_core::CommonBlock {
#[cfg(test)]
mod tests {
use super::*;
use kb_core::{
use kebab_core::{
AssetId, CodeBlock, CommonBlock, HeadingBlock, ImageRefBlock, Lang,
Metadata, Provenance, SourceType, TableBlock, TextBlock, TrustLevel,
WorkspacePath, id_for_block,
@@ -433,7 +433,7 @@ mod tests {
fn make_doc(blocks: Vec<Block>) -> CanonicalDocument {
CanonicalDocument {
doc_id: kb_core::DocumentId("d".repeat(32)),
doc_id: kebab_core::DocumentId("d".repeat(32)),
source_asset_id: AssetId("a".repeat(32)),
workspace_path: WorkspacePath::new("notes/test.md".into()).unwrap(),
title: "Test".into(),
@@ -450,14 +450,14 @@ mod tests {
user: Default::default(),
},
provenance: Provenance { events: vec![] },
parser_version: kb_core::ParserVersion("test-parser-0".into()),
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
schema_version: 1,
doc_version: 1,
}
}
fn doc_id() -> kb_core::DocumentId {
kb_core::DocumentId("d".repeat(32))
fn doc_id() -> kebab_core::DocumentId {
kebab_core::DocumentId("d".repeat(32))
}
fn span(start: u32, end: u32) -> SourceSpan {

View File

@@ -13,13 +13,13 @@
use std::path::PathBuf;
use kb_chunk::MdHeadingV1Chunker;
use kb_core::{
use kebab_chunk::MdHeadingV1Chunker;
use kebab_core::{
AssetId, AssetStorage, Checksum, ChunkPolicy, ChunkerVersion, Chunker, MediaType,
ParserVersion, RawAsset, SourceUri, WorkspacePath,
};
use kb_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_normalize::build_canonical_document;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use serde_json::Value;
use time::OffsetDateTime;

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-cli"
name = "kebab-cli"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -12,9 +12,9 @@ name = "kb"
path = "src/main.rs"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-app = { path = "../kb-app" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-app = { path = "../kebab-app" }
# kb-eval re-exports `compute_aggregate` / `compare_runs` /
# `render_report_md` (P5-2). The DoD calls for these to be reached
# "via kb-app", but kb-eval already depends on kb-app (P5-1 runner
@@ -22,7 +22,7 @@ kb-app = { path = "../kb-app" }
# require kb-app → kb-eval, forming a cycle. We therefore wire
# kb-cli → kb-eval directly; documented in
# `tasks/p5/p5-2-metrics-compare.md`.
kb-eval = { path = "../kb-eval" }
kebab-eval = { path = "../kebab-eval" }
anyhow = { workspace = true }
serde_json = { workspace = true }
clap = { version = "4", features = ["derive"] }

View File

@@ -6,7 +6,7 @@ use std::process::ExitCode;
use clap::{Parser, Subcommand};
use kb_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
mod wire;
@@ -170,12 +170,12 @@ enum ModeFlag {
Hybrid,
}
impl From<ModeFlag> for kb_core::SearchMode {
impl From<ModeFlag> for kebab_core::SearchMode {
fn from(m: ModeFlag) -> Self {
match m {
ModeFlag::Lexical => kb_core::SearchMode::Lexical,
ModeFlag::Vector => kb_core::SearchMode::Vector,
ModeFlag::Hybrid => kb_core::SearchMode::Hybrid,
ModeFlag::Lexical => kebab_core::SearchMode::Lexical,
ModeFlag::Vector => kebab_core::SearchMode::Vector,
ModeFlag::Hybrid => kebab_core::SearchMode::Hybrid,
}
}
}
@@ -183,15 +183,15 @@ impl From<ModeFlag> for kb_core::SearchMode {
fn main() -> ExitCode {
let cli = Cli::parse();
let level = if cli.debug {
kb_app::logging::LogLevel::Debug
kebab_app::logging::LogLevel::Debug
} else if cli.verbose {
kb_app::logging::LogLevel::Verbose
kebab_app::logging::LogLevel::Verbose
} else {
kb_app::logging::LogLevel::Default
kebab_app::logging::LogLevel::Default
};
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
// proceed without a guard rather than crashing — `kb` is still usable.
let _log_guard = kb_app::logging::init(level).ok();
let _log_guard = kebab_app::logging::init(level).ok();
match run(&cli) {
Ok(()) => ExitCode::from(0),
Err(e) => {
@@ -227,14 +227,14 @@ fn exit_code(err: &anyhow::Error) -> u8 {
fn run(cli: &Cli) -> anyhow::Result<()> {
match &cli.command {
Cmd::Init { force } => {
kb_app::init_workspace(*force)?;
kebab_app::init_workspace(*force)?;
if !cli.json {
println!(
"created {}",
kb_config::Config::xdg_config_path().display()
kebab_config::Config::xdg_config_path().display()
);
println!("created {}", kb_config::Config::xdg_data_dir().display());
println!("created {}", kb_config::Config::xdg_state_dir().display());
println!("created {}", kebab_config::Config::xdg_data_dir().display());
println!("created {}", kebab_config::Config::xdg_state_dir().display());
println!("hint edit the config above, then `kb ingest`");
}
Ok(())
@@ -244,13 +244,13 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
root,
summary_only,
} => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let scope = kb_core::SourceScope {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let scope = kebab_core::SourceScope {
root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)),
include: cfg.workspace.include.clone(),
exclude: cfg.workspace.exclude.clone(),
};
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?;
let report = kebab_app::ingest_with_config(cfg, scope, *summary_only)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
} else {
@@ -269,8 +269,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::List { what } => match what {
ListWhat::Docs => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?;
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let docs = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
} else {
@@ -284,9 +284,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::Inspect { what } => match what {
InspectWhat::Doc { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let doc_id: kb_core::DocumentId = id.parse()?;
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?;
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let doc_id: kebab_core::DocumentId = id.parse()?;
let doc = kebab_app::inspect_doc_with_config(cfg, &doc_id)?;
// Inspect doc emits a `CanonicalDocument` — there's no §2
// wire schema for it (P1-5 will decide whether this also
// becomes a tagged wrapper or stays as the raw domain
@@ -296,9 +296,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Ok(())
}
InspectWhat::Chunk { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let chunk_id: kb_core::ChunkId = id.parse()?;
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?;
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let chunk_id: kebab_core::ChunkId = id.parse()?;
let chunk = kebab_app::inspect_chunk_with_config(cfg, &chunk_id)?;
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
Ok(())
}
@@ -310,14 +310,14 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
mode,
explain: _,
} => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let q = kb_core::SearchQuery {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let q = kebab_core::SearchQuery {
text: query.clone(),
mode: (*mode).into(),
k: *k,
filters: kb_core::SearchFilters::default(),
filters: kebab_core::SearchFilters::default(),
};
let hits = kb_app::search_with_config(cfg, q)?;
let hits = kebab_app::search_with_config(cfg, q)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
} else {
@@ -351,8 +351,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature,
seed,
} => {
let cfg = kb_config::Config::load(cli.config.as_deref())?;
let opts = kb_app::AskOpts {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let opts = kebab_app::AskOpts {
k: *k,
explain: *explain,
mode: (*mode).into(),
@@ -363,7 +363,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
// wires up a real `mpsc::Sender` here.
stream_sink: None,
};
let ans = kb_app::ask_with_config(cfg, query, opts)?;
let ans = kebab_app::ask_with_config(cfg, query, opts)?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
} else {
@@ -377,7 +377,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
}
Cmd::Doctor => {
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?;
let report = kebab_app::doctor_with_config_path(cli.config.as_deref())?;
if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
} else {
@@ -409,7 +409,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature,
seed,
} => {
let opts = kb_eval::EvalRunOpts {
let opts = kebab_eval::EvalRunOpts {
suite: suite.clone(),
mode: (*mode).into(),
with_rag: *with_rag,
@@ -417,7 +417,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature: *temperature,
seed: *seed,
};
let run = kb_eval::run_eval(&opts)?;
let run = kebab_eval::run_eval(&opts)?;
if cli.json {
println!("{}", serde_json::to_string_pretty(&run)?);
} else {
@@ -430,8 +430,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
}
EvalWhat::Aggregate { run_id } => {
let agg = kb_eval::compute_aggregate(run_id)?;
kb_eval::store_aggregate(run_id, &agg)?;
let agg = kebab_eval::compute_aggregate(run_id)?;
kebab_eval::store_aggregate(run_id, &agg)?;
if cli.json {
println!("{}", serde_json::to_string_pretty(&agg)?);
} else {
@@ -450,20 +450,20 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
strict_chunker_version,
write_report,
} => {
let cfg = kb_config::Config::load(None)?;
let opts = kb_eval::CompareOpts {
let cfg = kebab_config::Config::load(None)?;
let opts = kebab_eval::CompareOpts {
strict_chunker_version: *strict_chunker_version,
};
let report = kb_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
let md = kb_eval::render_report_md(&report);
let report = kebab_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
let md = kebab_eval::render_report_md(&report);
if cli.json {
println!("{}", serde_json::to_string_pretty(&report)?);
} else {
print!("{md}");
}
if *write_report {
let resolved_data_dir = kb_config::expand_path(&cfg.storage.data_dir, "");
let runs_dir = kb_config::expand_path(
let resolved_data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
let runs_dir = kebab_config::expand_path(
&cfg.storage.runs_dir,
&resolved_data_dir.to_string_lossy(),
);

View File

@@ -17,8 +17,8 @@
use serde_json::Value;
use kb_app::DoctorReport;
use kb_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
use kebab_app::DoctorReport;
use kebab_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
/// Insert `schema_version` into an object-shaped `Value`. Helper for the
/// "serialize, then tag" pattern used by all the per-type wrappers below.
@@ -132,7 +132,7 @@ mod tests {
#[test]
fn ingest_wrapper_tags_schema_version() {
use kb_core::SourceScope;
use kebab_core::SourceScope;
let r = IngestReport {
scope: SourceScope {
root: std::path::PathBuf::from("/tmp"),

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-config"
name = "kebab-config"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -9,7 +9,7 @@ description = "Config schema + XDG path resolution"
[dependencies]
# kb-core::CoreError reserved for P1-* config errors
kb-core = { path = "../kb-core" }
kebab-core = { path = "../kebab-core" }
anyhow = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-core"
name = "kebab-core"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-embed-local"
name = "kebab-embed-local"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)"
[dependencies]
kb-config = { path = "../kb-config" }
kb-embed = { path = "../kb-embed" }
kebab-config = { path = "../kebab-config" }
kebab-embed = { path = "../kebab-embed" }
# Default features bring `ort-download-binaries` (bundled ONNX runtime)
# and `hf-hub-native-tls` (first-run model download). No extra features
# needed for the multilingual-e5-small path.

View File

@@ -1,5 +1,5 @@
//! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed
//! [`Embedder`](kb_embed::Embedder) implementation.
//! [`Embedder`](kebab_embed::Embedder) implementation.
//!
//! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small`
//! (384-dim) model. Honors `config.models.embedding.batch_size` and applies
@@ -26,8 +26,8 @@ use std::sync::Mutex;
use anyhow::{Context, Result};
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use kb_config::expand_path;
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
use kebab_config::expand_path;
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
/// Subdirectory under `config.storage.model_dir` where the fastembed
/// adapter writes / reads ONNX + tokenizer files. Hard-coded per task
@@ -58,9 +58,9 @@ impl FastembedEmbedder {
/// `config.models.embedding.dimensions` matches the model's actual
/// dim BEFORE returning, so a mismatch fails at construction (not on
/// first `embed`).
pub fn new(config: &kb_config::Config) -> Result<Self> {
pub fn new(config: &kebab_config::Config) -> Result<Self> {
// 1. Resolve `{data_dir}/models/fastembed/` from the config
// templates. Goes through the shared `kb_config::expand_path`
// templates. Goes through the shared `kebab_config::expand_path`
// so every crate resolves storage paths identically.
let data_dir = expand_path(&config.storage.data_dir, "");
let model_dir = expand_path(&config.storage.model_dir, &data_dir.to_string_lossy());
@@ -224,7 +224,7 @@ pub(crate) fn check_dim(model_dim: usize, cfg_dim: usize) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use kb_embed::EmbeddingInput;
use kebab_embed::EmbeddingInput;
// ── check_dim ────────────────────────────────────────────────────
//

View File

@@ -22,16 +22,16 @@ use std::hash::{Hash, Hasher};
use std::sync::OnceLock;
use std::time::Instant;
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind};
use kb_embed_local::FastembedEmbedder;
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind};
use kebab_embed_local::FastembedEmbedder;
/// Build a `Config` whose `data_dir` lives in a per-process temp dir so
/// the test never writes into the developer's real `~/.local/share/kb`.
/// Returns the `Config` and the `TempDir` guard (caller keeps the guard
/// alive for the test duration).
fn test_config() -> (kb_config::Config, tempfile::TempDir) {
fn test_config() -> (kebab_config::Config, tempfile::TempDir) {
let tmp = tempfile::tempdir().expect("create tempdir");
let mut cfg = kb_config::Config::defaults();
let mut cfg = kebab_config::Config::defaults();
cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned();
// model_dir keeps its default `{data_dir}/models` template; the
// adapter resolves it itself.
@@ -141,12 +141,12 @@ fn output_vectors_are_l2_normalized() {
},
];
let out = emb.embed(&inputs).expect("embed");
// Per `kb_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
// Per `kebab_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
// 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add
// their own per-component noise; 1e-3 is very generous and matches
// the spec's `± 1e-3`).
kb_embed::assert_unit_norm(&out, 1e-3);
kb_embed::assert_vector_shape(&out, 384);
kebab_embed::assert_unit_norm(&out, 1e-3);
kebab_embed::assert_vector_shape(&out, 384);
}
// ─── determinism ──────────────────────────────────────────────────────

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-embed"
name = "kebab-embed"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Embedder trait re-exports + opt-in deterministic MockEmbedder for downstream tests"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
serde = { workspace = true }
thiserror = { workspace = true }
tracing = { workspace = true }

View File

@@ -1,9 +1,9 @@
//! `kb-embed` — thin re-export crate for the [`Embedder`] trait surface.
//!
//! This crate exists so downstream code (`kb-store-vector`, `kb-search`,
//! adapters in p3-2) can `use kb_embed::Embedder` and stay stable across
//! adapters in p3-2) can `use kebab_embed::Embedder` and stay stable across
//! kb-core reorganizations. It defines **no new types**; everything is a
//! re-export of [`kb_core`].
//! re-export of [`kebab_core`].
//!
//! ## Mock implementation
//!
@@ -19,7 +19,7 @@
// Per spec §7.2 — these are the only public-surface types this crate offers.
// Adding new types is forbidden by the task contract.
pub use kb_core::{
pub use kebab_core::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
};

View File

@@ -38,7 +38,7 @@
//! * Different `text` → different output with overwhelming probability.
//! * All output components are finite (`is_finite()`).
use kb_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
use kebab_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
/// Deterministic test double. See module docs for the hashing recipe.
pub struct MockEmbedder {

View File

@@ -4,7 +4,7 @@
#![cfg(feature = "mock")]
use kb_embed::{
use kebab_embed::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder,
assert_unit_norm, assert_vector_shape,
};

View File

@@ -5,7 +5,7 @@
//! Runs under both `cargo test -p kb-embed` and
//! `cargo test -p kb-embed --features mock`.
use kb_embed::{
use kebab_embed::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
assert_vector_shape,
};

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-eval"
name = "kebab-eval"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -9,10 +9,10 @@ description = "Golden-fixture eval runner: load YAML, drive kb-app search/ask,
[dependencies]
# Allowed deps per p5-1 spec — domain types + facade only.
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-app = { path = "../kb-app" }
kb-store-sqlite = { path = "../kb-store-sqlite" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-app = { path = "../kebab-app" }
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
serde = { workspace = true }
serde_json = { workspace = true }
serde_yaml = { workspace = true }

View File

@@ -14,9 +14,9 @@ use std::fmt::Write as _;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use kb_config::Config;
use kb_core::{ChunkId, DocumentId};
use kb_store_sqlite::SqliteStore;
use kebab_config::Config;
use kebab_core::{ChunkId, DocumentId};
use kebab_store_sqlite::SqliteStore;
use crate::loader::load_golden_set;
use crate::metrics::{
@@ -300,7 +300,7 @@ fn extract_chunker_version(snapshot_json: &str) -> Option<String> {
}
fn parse_results(
rows: &[kb_store_sqlite::EvalQueryResultRecord],
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
) -> Result<HashMap<String, QueryResult>> {
let mut out = HashMap::with_capacity(rows.len());
for row in rows {
@@ -456,9 +456,9 @@ mod tests {
let g = GoldenQuery {
id: "q1".into(),
query: "q".into(),
lang: kb_core::Lang(String::new()),
lang: kebab_core::Lang(String::new()),
expected_doc_ids: vec![],
expected_chunk_ids: vec![kb_core::ChunkId("c1".into())],
expected_chunk_ids: vec![kebab_core::ChunkId("c1".into())],
must_contain: vec![],
forbidden: vec![],
difficulty: None,

View File

@@ -1,7 +1,7 @@
//! `kb-eval` — golden-fixture eval runner (P5-1).
//!
//! Loads `fixtures/golden_queries.yaml`, runs each entry through the
//! [`kb_app`] facade (lexical / vector / hybrid + optional RAG), and
//! [`kebab_app`] facade (lexical / vector / hybrid + optional RAG), and
//! persists results into `eval_runs` / `eval_query_results` plus
//! `runs_dir/<run_id>/per_query.jsonl` (design §5.7, §6.3).
//!

View File

@@ -6,7 +6,7 @@
//! tests that don't have a SQLite store handy.
//! - [`load_golden_set_validated`] — additionally verifies every
//! `expected_doc_id` / `expected_chunk_id` exists in the SQLite DB
//! the supplied [`kb_config::Config`] points at. Used by
//! the supplied [`kebab_config::Config`] points at. Used by
//! [`crate::run_eval`] in production so a stale golden set fails
//! fast at run start.
@@ -14,7 +14,7 @@ use std::collections::{BTreeSet, HashSet};
use std::path::Path;
use anyhow::{Context, Result, anyhow};
use kb_store_sqlite::SqliteStore;
use kebab_store_sqlite::SqliteStore;
use crate::types::GoldenQuery;
@@ -43,11 +43,11 @@ pub fn load_golden_set(path: &Path) -> Result<Vec<GoldenQuery>> {
/// Currently used only by the in-module tests below; production code
/// inlines `load_golden_set` + `validate_against_db` in
/// [`crate::run_eval_with_config`] so the validation can run against
/// an already-opened [`kb_config::Config`] without re-parsing YAML.
/// an already-opened [`kebab_config::Config`] without re-parsing YAML.
#[cfg(test)]
pub(crate) fn load_golden_set_validated(
yaml_path: &Path,
cfg: &kb_config::Config,
cfg: &kebab_config::Config,
) -> Result<Vec<GoldenQuery>> {
let queries = load_golden_set(yaml_path)?;
validate_against_db(&queries, cfg)?;
@@ -73,7 +73,7 @@ fn check_unique_ids(queries: &[GoldenQuery]) -> Result<()> {
/// Read every doc_id / chunk_id referenced by `queries` and confirm
/// SQLite has rows for them. Builds a sorted, deduplicated error
/// message listing every missing ID.
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kb_config::Config) -> Result<()> {
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kebab_config::Config) -> Result<()> {
// Short-circuit when there is nothing to validate — saves opening
// SQLite for golden sets that omit expected_*_ids entirely.
let needs_check = queries
@@ -140,8 +140,8 @@ mod tests {
//! `tests/loader.rs`; only the validated-variant cases need to sit
//! next to the function so they can see the `pub(crate)` symbol.
use super::*;
use kb_config::Config;
use kb_store_sqlite::SqliteStore;
use kebab_config::Config;
use kebab_store_sqlite::SqliteStore;
use rusqlite::params;
use std::fs;
use tempfile::tempdir;

View File

@@ -13,9 +13,9 @@ use std::path::PathBuf;
use anyhow::{Context, Result};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use kb_config::Config;
use kb_core::{ChunkId, Citation, DocumentId};
use kb_store_sqlite::SqliteStore;
use kebab_config::Config;
use kebab_core::{ChunkId, Citation, DocumentId};
use kebab_store_sqlite::SqliteStore;
use crate::loader::load_golden_set;
use crate::types::{GoldenQuery, QueryResult};
@@ -175,7 +175,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
/// `tasks/p5/p5-2-metrics-compare.md`), this will need to take one.
pub(crate) fn aggregate_from_rows(
queries: &[GoldenQuery],
rows: &[kb_store_sqlite::EvalQueryResultRecord],
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
) -> Result<AggregateMetrics> {
let golden_by_id: HashMap<&str, &GoldenQuery> =
queries.iter().map(|q| (q.id.as_str(), q)).collect();
@@ -395,14 +395,14 @@ fn ratio_or_zero(num: u32, denom: u32) -> f32 {
#[cfg(test)]
mod tests {
use super::*;
use kb_core::{
use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, RetrievalDetail, SearchHit,
SearchMode,
};
use kb_core::asset::WorkspacePath;
use kb_core::media::Lang;
use kb_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
use kb_core::versions::PromptTemplateVersion;
use kebab_core::asset::WorkspacePath;
use kebab_core::media::Lang;
use kebab_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
use kebab_core::versions::PromptTemplateVersion;
use time::OffsetDateTime;
fn gq(id: &str, expected_chunks: &[&str], expected_docs: &[&str]) -> GoldenQuery {
@@ -460,9 +460,9 @@ mod tests {
}
fn record(id: &str, hits: Vec<SearchHit>, error: Option<String>, answer: Option<Answer>)
-> kb_store_sqlite::EvalQueryResultRecord
-> kebab_store_sqlite::EvalQueryResultRecord
{
kb_store_sqlite::EvalQueryResultRecord {
kebab_store_sqlite::EvalQueryResultRecord {
query_id: id.into(),
result_json: serde_json::to_string(&qr(id, hits, error, answer)).unwrap(),
}

View File

@@ -6,10 +6,10 @@ use std::path::PathBuf;
use std::time::Instant;
use anyhow::{Context, Result};
use kb_app::App;
use kb_config::expand_path;
use kb_core::{SearchFilters, SearchQuery};
use kb_store_sqlite::{EvalRunRow, SqliteStore};
use kebab_app::App;
use kebab_config::expand_path;
use kebab_core::{SearchFilters, SearchQuery};
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
use time::OffsetDateTime;
use crate::loader::{load_golden_set, validate_against_db};
@@ -25,18 +25,18 @@ fn elapsed_ms_u32(start: Instant) -> u32 {
}
/// Run the golden suite end-to-end against the active XDG-loaded
/// [`kb_config::Config`]. Wraps [`run_eval_with_config`] with
/// [`kebab_config::Config`]. Wraps [`run_eval_with_config`] with
/// `Config::load(None)`.
pub fn run_eval(opts: &EvalRunOpts) -> Result<EvalRun> {
let cfg = kb_config::Config::load(None).context("load Config for run_eval")?;
let cfg = kebab_config::Config::load(None).context("load Config for run_eval")?;
run_eval_with_config(&cfg, opts)
}
/// Run the golden suite end-to-end against an explicit
/// [`kb_config::Config`]. Used by integration tests (TempDir-backed
/// [`kebab_config::Config`]. Used by integration tests (TempDir-backed
/// data_dir) and any future caller that wants to drive the runner
/// against a non-default config.
pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
pub fn run_eval_with_config(cfg: &kebab_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
let started = Instant::now();
// ── 1. Load golden set ────────────────────────────────────────────────
@@ -167,7 +167,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
// call did not already error out (we want one error per query, not
// a duplicated one).
let answer = if opts.with_rag && error.is_none() {
let ask_opts = kb_app::AskOpts {
let ask_opts = kebab_app::AskOpts {
k: opts.k,
explain: true,
mode: opts.mode,
@@ -206,7 +206,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
/// stable run-time property of the config alone. P5-2 may compose it
/// from `embedding.{model,version,dimensions}` if it needs the field
/// for compare reports.
fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
fn build_config_snapshot(cfg: &kebab_config::Config) -> Result<serde_json::Value> {
let cfg_value = serde_json::to_value(cfg).context("serialize Config")?;
Ok(serde_json::json!({
"config": cfg_value,
@@ -234,7 +234,7 @@ fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
/// `run_id` collision would already have failed the `eval_runs`
/// PRIMARY KEY upstream).
fn write_per_query_jsonl(
cfg: &kb_config::Config,
cfg: &kebab_config::Config,
run_id: &str,
per_query: &[QueryResult],
) -> Result<()> {

View File

@@ -4,7 +4,7 @@
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;
use kb_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
use kebab_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
/// One golden query loaded from `fixtures/golden_queries.yaml`.
///
@@ -41,10 +41,10 @@ pub struct EvalRunOpts {
/// Suite label persisted into `eval_runs.suite`. The shipped
/// fixture is `"golden"`; other suites can reuse the same runner.
pub suite: String,
/// Retrieval mode forwarded to every `kb_app::search` /
/// `kb_app::ask` call inside the run.
/// Retrieval mode forwarded to every `kebab_app::search` /
/// `kebab_app::ask` call inside the run.
pub mode: SearchMode,
/// When `true`, also call `kb_app::ask` per query and record the
/// When `true`, also call `kebab_app::ask` per query and record the
/// resulting `Answer` on the `QueryResult`.
pub with_rag: bool,
/// Top-k forwarded to retrieval (and `AskOpts.k` when `with_rag`).

View File

@@ -8,7 +8,7 @@
use std::fs;
use kb_eval::load_golden_set;
use kebab_eval::load_golden_set;
use tempfile::tempdir;
// ── 1. parser accepts well-formed YAML with optional fields ──────────────────

View File

@@ -9,17 +9,17 @@
use std::fs;
use std::path::PathBuf;
use kb_config::Config;
use kb_core::{
use kebab_config::Config;
use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, Lang,
RetrievalDetail, SearchHit, SearchMode,
asset::WorkspacePath,
};
use kb_eval::{
use kebab_eval::{
AggregateMetrics, CompareOpts, CompareReport, ComparisonKind, GoldenQuery, QueryResult,
compare_runs_with_config, compute_aggregate_with_config, store_aggregate_with_config,
};
use kb_store_sqlite::{EvalRunRow, SqliteStore};
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
use tempfile::TempDir;
use time::OffsetDateTime;
@@ -259,7 +259,7 @@ fn compare_runs_classifies_win_loss_draw_regression() {
drop(store);
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
let by_id: std::collections::HashMap<&str, &kb_eval::QueryComparison> =
let by_id: std::collections::HashMap<&str, &kebab_eval::QueryComparison> =
report.per_query.iter().map(|c| (c.query_id.as_str(), c)).collect();
assert_eq!(by_id["q-001"].kind, ComparisonKind::Loss);
assert_eq!(by_id["q-002"].kind, ComparisonKind::Win);
@@ -414,7 +414,7 @@ fn render_report_md_is_human_readable() {
drop(store);
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
let md = kb_eval::render_report_md(&report);
let md = kebab_eval::render_report_md(&report);
assert!(md.starts_with("# Eval compare:"), "md = {md}");
assert!(md.contains("hit@1"));
assert!(md.contains("MRR"));

View File

@@ -1,6 +1,6 @@
//! Runner integration tests for `kb-eval` (P5-1).
//!
//! Drives [`kb_eval::run_eval_with_config`] end-to-end against a
//! Drives [`kebab_eval::run_eval_with_config`] end-to-end against a
//! TempDir-backed config:
//!
//! - tiny seeded SQLite corpus (3 docs / 3 chunks) used as the
@@ -17,10 +17,10 @@ use std::fs;
use std::path::{Path, PathBuf};
use std::sync::Mutex;
use kb_config::Config;
use kb_core::SearchMode;
use kb_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
use kb_store_sqlite::SqliteStore;
use kebab_config::Config;
use kebab_core::SearchMode;
use kebab_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
use kebab_store_sqlite::SqliteStore;
use rusqlite::params;
use tempfile::TempDir;
@@ -110,7 +110,7 @@ fn seed_corpus(store: &SqliteStore) {
// Build the FTS index so lexical search returns hits. Reuses the
// same connection guard rather than reopening — the SAVEPOINT
// protocol nests correctly under the existing read_conn lock.
kb_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
kebab_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
drop(conn);
}

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-llm-local"
name = "kebab-llm-local"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,9 +8,9 @@ repository = { workspace = true }
description = "Ollama HTTP adapter implementing kb_core::LanguageModel via reqwest::blocking"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-llm = { path = "../kb-llm" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-llm = { path = "../kebab-llm" }
# `default-features = false` drops the `default-tls` (native-tls / openssl)
# feature so we don't pull in a system OpenSSL; we explicitly pin rustls.
# Note: `default-features = false` does NOT drop tokio — reqwest 0.12's

View File

@@ -1,5 +1,5 @@
//! `kb-llm-local` — Ollama HTTP adapter implementing
//! [`kb_core::LanguageModel`] over the local `POST /api/generate` endpoint.
//! [`kebab_core::LanguageModel`] over the local `POST /api/generate` endpoint.
//!
//! ## Why a separate crate
//!
@@ -39,11 +39,11 @@ mod ollama;
pub use error::LlmError;
pub use ollama::OllamaLanguageModel;
// Re-export the trait surface so adapter consumers can `use kb_llm_local::*`
// Re-export the trait surface so adapter consumers can `use kebab_llm_local::*`
// without also depending on `kb-llm` directly. These are the same symbols
// `kb-llm` re-exports from `kb-core`; this crate adds **no new types** to
// the trait surface (`LlmError` and `OllamaLanguageModel` are
// implementation-side only).
pub use kb_llm::{
pub use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
};

View File

@@ -41,7 +41,7 @@
use std::io::{BufRead, BufReader};
use std::time::Duration;
use kb_core::{
use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
};
use serde::{Deserialize, Serialize};
@@ -68,7 +68,7 @@ pub struct OllamaLanguageModel {
}
impl OllamaLanguageModel {
/// Build an adapter from a workspace [`kb_config::Config`]. Reads
/// Build an adapter from a workspace [`kebab_config::Config`]. Reads
/// `config.models.llm.{provider, model, endpoint, context_tokens,
/// temperature, seed}`.
///
@@ -76,7 +76,7 @@ impl OllamaLanguageModel {
/// expected to have validated `provider == "ollama"`; this constructor
/// trusts the config and would happily build for an unknown provider.
/// (Provider routing is the App layer's job, not the adapter's.)
pub fn new(config: &kb_config::Config) -> anyhow::Result<Self> {
pub fn new(config: &kebab_config::Config) -> anyhow::Result<Self> {
let llm = &config.models.llm;
let client = reqwest::blocking::Client::builder()
.timeout(REQUEST_TIMEOUT)
@@ -292,7 +292,7 @@ impl Iterator for OllamaStream {
// pipelines that expect a terminal frame still terminate.
self.done = true;
tracing::warn!(
target: "kb_llm_local",
target: "kebab_llm_local",
"ollama stream ended without a `done: true` frame; synthesizing Aborted",
);
return Some(Ok(TokenChunk::Done {
@@ -361,14 +361,14 @@ impl Iterator for OllamaStream {
};
let prompt_tokens = line.prompt_eval_count.unwrap_or_else(|| {
tracing::warn!(
target: "kb_llm_local",
target: "kebab_llm_local",
"ollama done frame missing prompt_eval_count; defaulting to 0",
);
0
});
let completion_tokens = line.eval_count.unwrap_or_else(|| {
tracing::warn!(
target: "kb_llm_local",
target: "kebab_llm_local",
"ollama done frame missing eval_count; defaulting to 0",
);
0

View File

@@ -2,8 +2,8 @@
//! relevant config fields and exposes them via the trait surface, all
//! without touching the network (per design §7.2 lazy-connect contract).
use kb_config::Config;
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
use kebab_config::Config;
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
#[test]
fn construction_with_default_config_returns_expected_model_ref() {

View File

@@ -11,9 +11,9 @@
//! These hit `http://127.0.0.1:11434` directly and require an actual model
//! pulled locally. CI runs default (non-ignored) tests only.
use kb_config::Config;
use kb_core::{GenerateRequest, TokenChunk};
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
use kebab_config::Config;
use kebab_core::{GenerateRequest, TokenChunk};
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
#[test]
#[ignore = "requires a local Ollama daemon + pulled model"]

View File

@@ -10,9 +10,9 @@
//! error mapping, finish-reason mapping, missing-counter degradation, and
//! determinism semantics.
use kb_config::Config;
use kb_core::{FinishReason, GenerateRequest, TokenChunk};
use kb_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
use kebab_config::Config;
use kebab_core::{FinishReason, GenerateRequest, TokenChunk};
use kebab_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate};

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-llm"
name = "kebab-llm"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,7 +8,7 @@ repository = { workspace = true }
description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
[dependencies]
kb-core = { path = "../kb-core" }
kebab-core = { path = "../kebab-core" }
anyhow = { workspace = true }
[features]

View File

@@ -1,8 +1,8 @@
//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
//!
//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations.
//! It defines **no new types**; everything is a re-export of [`kb_core`].
//! `use kebab_llm::LanguageModel` and stay stable across kb-core reorganizations.
//! It defines **no new types**; everything is a re-export of [`kebab_core`].
//!
//! ## Mock implementation
//!
@@ -20,7 +20,7 @@
// Per spec §7.2 — these are the only public-surface types this crate offers.
// Adding new types is forbidden by the task contract.
pub use kb_core::{
pub use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
};

View File

@@ -36,7 +36,7 @@
//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
//! the constructor was given — the mock does not count.
use kb_core::{
use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
};

View File

@@ -4,7 +4,7 @@
#![cfg(feature = "mock")]
use kb_llm::{
use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
assert_finish_chunk,
};

View File

@@ -5,7 +5,7 @@
//! Runs under both `cargo test -p kb-llm` and
//! `cargo test -p kb-llm --features mock`.
use kb_llm::{
use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
assert_finish_chunk,
};

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-normalize"
name = "kebab-normalize"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Lift parser output (kb-parse-types) into kb-core::CanonicalDocument with deterministic IDs (§3.4, §4.2, §4.3)"
[dependencies]
kb-core = { path = "../kb-core" }
kb-parse-types = { path = "../kb-parse-types" }
kebab-core = { path = "../kebab-core" }
kebab-parse-types = { path = "../kebab-parse-types" }
serde = { workspace = true }
serde_json = { workspace = true }
unicode-normalization = "0.1"
@@ -23,5 +23,5 @@ tracing = { workspace = true }
# Forbidden as a regular dep per design §8 (kb-normalize must not depend
# on any specific parser); `cargo tree -p kb-normalize --depth 1` (the
# default scope, excluding dev-deps) confirms this.
kb-parse-md = { path = "../kb-parse-md" }
kebab-parse-md = { path = "../kebab-parse-md" }
serde_json = { workspace = true }

View File

@@ -1,5 +1,5 @@
//! `kb-normalize` — lift parser output (`kb-parse-types`) into a
//! [`kb_core::CanonicalDocument`] with deterministic IDs.
//! [`kebab_core::CanonicalDocument`] with deterministic IDs.
//!
//! Per design §3.4 (CanonicalDocument / Block), §4.2 (ID recipe), §4.3
//! (ordinal rule), §3.6 (Provenance), §8 (module boundaries).
@@ -20,16 +20,16 @@
use std::collections::HashMap;
use anyhow::Result;
use kb_core::{
use kebab_core::{
Block, BlockId, CanonicalDocument, CodeBlock, CommonBlock, DocumentId, HeadingBlock,
ImageRefBlock, Inline, Lang, ListBlock, Metadata, ParserVersion, Provenance, ProvenanceEvent,
ProvenanceKind, RawAsset, TableBlock, TextBlock,
};
use kb_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
use kebab_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
use time::OffsetDateTime;
use unicode_normalization::UnicodeNormalization;
pub use kb_core::{id_for_block, id_for_doc};
pub use kebab_core::{id_for_block, id_for_doc};
/// Build a [`CanonicalDocument`] from the raw asset, frontmatter
/// metadata, parser blocks, parser version, and any warnings.
@@ -38,7 +38,7 @@ pub use kb_core::{id_for_block, id_for_doc};
///
/// * `doc_id = id_for_doc(workspace_path, asset_id, parser_version)` —
/// `workspace_path` is consumed verbatim from `asset` (already NFC +
/// POSIX per `kb_core::normalize::to_posix`).
/// POSIX per `kebab_core::normalize::to_posix`).
/// * `block_id = id_for_block(doc_id, kind, heading_path, ordinal,
/// source_span)` — `ordinal` is **0-based, scoped to (heading_path,
/// block_kind), in document order** per §4.3.
@@ -329,7 +329,7 @@ fn flatten_inline(i: &Inline, out: &mut String) {
#[cfg(test)]
mod tests {
use super::*;
use kb_core::{
use kebab_core::{
AssetId, AssetStorage, Checksum, MediaType, SourceSpan, SourceType, SourceUri,
TrustLevel, WorkspacePath, normalize::to_posix,
};
@@ -386,7 +386,7 @@ mod tests {
let h1_b = vec!["B".to_string()];
vec![
ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph,
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 1, end: 1 },
payload: ParsedPayload::Paragraph {
@@ -395,7 +395,7 @@ mod tests {
},
},
ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph,
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 2, end: 2 },
payload: ParsedPayload::Paragraph {
@@ -404,7 +404,7 @@ mod tests {
},
},
ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph,
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 3, end: 3 },
payload: ParsedPayload::Paragraph {
@@ -413,7 +413,7 @@ mod tests {
},
},
ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Code,
kind: kebab_parse_types::ParsedBlockKind::Code,
heading_path: h1_a,
source_span: SourceSpan::Line { start: 4, end: 5 },
payload: ParsedPayload::Code {
@@ -422,7 +422,7 @@ mod tests {
},
},
ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph,
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_b,
source_span: SourceSpan::Line { start: 6, end: 6 },
payload: ParsedPayload::Paragraph {
@@ -715,7 +715,7 @@ mod tests {
fn audio_ref_block_skipped_with_warning() {
let span = SourceSpan::Line { start: 1, end: 1 };
let blocks = vec![ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::AudioRef,
kind: kebab_parse_types::ParsedBlockKind::AudioRef,
heading_path: vec![],
source_span: span,
payload: ParsedPayload::AudioRef {
@@ -759,7 +759,7 @@ mod tests {
let nfd_heading = "\u{1100}\u{1161}".to_string(); // 가 (NFD)
let nfc_heading = "\u{AC00}".to_string(); // 가 (NFC)
let mk_block = |heading: String| ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph,
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: vec![heading],
source_span: span.clone(),
payload: ParsedPayload::Paragraph {

View File

@@ -15,12 +15,12 @@
use std::path::PathBuf;
use kb_core::{
use kebab_core::{
AssetId, AssetStorage, Checksum, MediaType, ParserVersion, RawAsset, SourceUri,
WorkspacePath,
};
use kb_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kebab_normalize::build_canonical_document;
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use serde_json::Value;
use time::OffsetDateTime;

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-parse-md"
name = "kebab-parse-md"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Markdown frontmatter and block parsing into kb-core::Metadata / kb-parse-types intermediates"
[dependencies]
kb-core = { path = "../kb-core" }
kb-parse-types = { path = "../kb-parse-types" }
kebab-core = { path = "../kebab-core" }
kebab-parse-types = { path = "../kebab-parse-types" }
anyhow = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }

View File

@@ -1,10 +1,10 @@
//! Markdown body → flat `Vec<kb_parse_types::ParsedBlock>` (§3.4 / §3.7b).
//! Markdown body → flat `Vec<kebab_parse_types::ParsedBlock>` (§3.4 / §3.7b).
//!
//! Uses `pulldown-cmark` (with GFM tables enabled at runtime via
//! `Options::ENABLE_TABLES`) to walk the body once and emit a flat list of
//! parsed blocks. Heading paths are computed by tracking the most-recent
//! heading text at each level. Source spans are reported as
//! [`kb_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
//! [`kebab_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
//! converting `pulldown-cmark`'s byte offsets to line numbers and adding the
//! caller-supplied `body_offset_lines`.
//!
@@ -19,10 +19,10 @@
//!
//! ## Inline filter
//!
//! [`kb_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
//! [`kebab_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
//! Inline images, footnotes, hard breaks, etc. are dropped silently per
//! design §3.4. Block-level `![alt](src)` (an image as the sole content of a
//! paragraph) is lifted to [`kb_parse_types::ParsedPayload::ImageRef`].
//! paragraph) is lifted to [`kebab_parse_types::ParsedPayload::ImageRef`].
//!
//! ## CRLF
//!
@@ -33,8 +33,8 @@
use std::ops::Range;
use kb_core::{Inline, SourceSpan};
use kb_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
use kebab_core::{Inline, SourceSpan};
use kebab_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
/// Parse a Markdown body into a flat `Vec<ParsedBlock>` plus any warnings.
@@ -1595,7 +1595,7 @@ mod tests {
let (blocks, _) = parse(body, 1);
assert_eq!(blocks.len(), 1, "expected single list block");
match &blocks[0].kind {
kb_parse_types::ParsedBlockKind::List => {}
kebab_parse_types::ParsedBlockKind::List => {}
other => panic!("expected list, got {other:?}"),
}
}

View File

@@ -1,4 +1,4 @@
//! Markdown frontmatter parsing → `kb_core::Metadata`.
//! Markdown frontmatter parsing → `kebab_core::Metadata`.
//!
//! Implements the contract pinned in design §0 Q9 (frontmatter derive table)
//! and §3.6 (Metadata shape). Produces structured warnings via
@@ -18,8 +18,8 @@
use std::ops::Range;
use std::sync::OnceLock;
use kb_core::{Metadata, SourceType, TrustLevel};
use kb_parse_types::{Warning, WarningKind};
use kebab_core::{Metadata, SourceType, TrustLevel};
use kebab_parse_types::{Warning, WarningKind};
use lingua::{IsoCode639_1, Language, LanguageDetector, LanguageDetectorBuilder};
use serde::Deserialize;
use serde_json::{Map, Value};
@@ -59,7 +59,7 @@ pub struct FrontmatterSpan {
}
/// Parse the frontmatter (if any) from a Markdown byte slice into a
/// `kb_core::Metadata`, applying the §0 Q9 derive table for missing fields.
/// `kebab_core::Metadata`, applying the §0 Q9 derive table for missing fields.
///
/// On a malformed frontmatter the function still returns `Ok` — the
/// frontmatter contents are discarded and the caller is told via a
@@ -589,7 +589,7 @@ fn iso_code(lang: Language) -> &'static str {
#[cfg(test)]
mod tests {
use super::*;
use kb_core::{
use kebab_core::{
AssetId, WorkspacePath,
ids::id_for_doc,
versions::ParserVersion,

View File

@@ -10,13 +10,13 @@
//! env-var pattern. Migrating kb-parse-md to the env-var style is out of
//! scope; both styles are intentional for now.
//!
//! Following the kb_core::Inline schema migration (struct-variant shape),
//! Following the kebab_core::Inline schema migration (struct-variant shape),
//! `ParsedBlock` now serializes directly through serde — no projection
//! shim is required. Inlines surface as structured objects, e.g.
//! `[{"kind":"text","text":"…"},{"kind":"code","code":"…"}]`.
use kb_parse_md::parse_blocks;
use kb_parse_types::{ParsedBlock, Warning};
use kebab_parse_md::parse_blocks;
use kebab_parse_types::{ParsedBlock, Warning};
use serde::Serialize;
use serde_json::Value;
use std::fs;

View File

@@ -5,7 +5,7 @@
//! and therefore stable; lingua autodetect over our fixtures is also
//! stable for the language set we configured.
use kb_parse_md::{BodyHints, parse_frontmatter};
use kebab_parse_md::{BodyHints, parse_frontmatter};
use serde::Serialize;
use serde_json::Value;
use std::fs;
@@ -18,9 +18,9 @@ use time::macros::datetime;
/// snapshot focuses on the §0 Q9 derive contract.
#[derive(Serialize)]
struct Snapshot {
metadata: kb_core::Metadata,
metadata: kebab_core::Metadata,
span_present: bool,
warnings: Vec<kb_parse_types::Warning>,
warnings: Vec<kebab_parse_types::Warning>,
}
fn fixtures_dir() -> PathBuf {

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-parse-types"
name = "kebab-parse-types"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,5 +8,5 @@ repository = { workspace = true }
description = "Parser intermediate representations (no parser libs allowed)"
[dependencies]
kb-core = { path = "../kb-core" }
kebab-core = { path = "../kebab-core" }
serde = { workspace = true }

View File

@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
pub struct ParsedBlock {
pub kind: ParsedBlockKind,
pub heading_path: Vec<String>,
pub source_span: kb_core::SourceSpan,
pub source_span: kebab_core::SourceSpan,
pub payload: ParsedPayload,
}
@@ -36,11 +36,11 @@ pub enum ParsedPayload {
},
Paragraph {
text: String,
inlines: Vec<kb_core::Inline>,
inlines: Vec<kebab_core::Inline>,
},
List {
ordered: bool,
items: Vec<Vec<kb_core::Inline>>,
items: Vec<Vec<kebab_core::Inline>>,
},
Code {
lang: Option<String>,
@@ -52,7 +52,7 @@ pub enum ParsedPayload {
},
Quote {
text: String,
inlines: Vec<kb_core::Inline>,
inlines: Vec<kebab_core::Inline>,
},
ImageRef {
src: String,

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-rag"
name = "kebab-rag"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,11 +8,11 @@ repository = { workspace = true }
description = "RAG pipeline: retrieve → gate → pack → generate → cite-validate"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-search = { path = "../kb-search" }
kb-llm = { path = "../kb-llm" }
kb-store-sqlite = { path = "../kb-store-sqlite" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-search = { path = "../kebab-search" }
kebab-llm = { path = "../kebab-llm" }
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
serde = { workspace = true }
serde_json = { workspace = true }
regex = { workspace = true }
@@ -23,7 +23,7 @@ anyhow = { workspace = true }
blake3 = { workspace = true }
[dev-dependencies]
kb-llm = { path = "../kb-llm", features = ["mock"] }
kebab-llm = { path = "../kebab-llm", features = ["mock"] }
tempfile = { workspace = true }
rusqlite = { workspace = true }
serde_json = { workspace = true }

View File

@@ -18,7 +18,7 @@
//! reachable via `Retriever`), `kb-embed*` (only via `Retriever`),
//! `kb-llm-local` (only via `LanguageModel`), `kb-tui`, `kb-desktop`.
pub use kb_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
pub use kebab_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
mod pipeline;

View File

@@ -33,13 +33,13 @@
use std::sync::Arc;
use anyhow::{Context, Result};
use kb_core::{
use kebab_core::{
Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
};
use kb_core::versions::PromptTemplateVersion;
use kb_store_sqlite::SqliteStore;
use kebab_core::versions::PromptTemplateVersion;
use kebab_store_sqlite::SqliteStore;
use regex::Regex;
use std::sync::OnceLock;
use time::OffsetDateTime;
@@ -86,7 +86,7 @@ pub struct AskOpts {
/// Single-threaded RAG orchestrator. See module docs for the stage list.
pub struct RagPipeline {
config: kb_config::Config,
config: kebab_config::Config,
retriever: Arc<dyn Retriever>,
llm: Arc<dyn LanguageModel>,
docs: Arc<SqliteStore>,
@@ -98,7 +98,7 @@ impl RagPipeline {
/// `Arc`'d trait objects (kb-app builds them from config; tests
/// inject mocks).
pub fn new(
config: kb_config::Config,
config: kebab_config::Config,
retriever: Arc<dyn Retriever>,
llm: Arc<dyn LanguageModel>,
docs: Arc<SqliteStore>,
@@ -380,7 +380,7 @@ impl RagPipeline {
for hit in hits {
let chunk_full =
<SqliteStore as kb_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
<SqliteStore as kebab_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
.context("kb-rag: docs.get_chunk")?;
let chunk_text = match chunk_full {
Some(c) => c.text,
@@ -542,7 +542,7 @@ impl RagPipeline {
/// paths attach the configured embedding model so `kb explain` can
/// later identify which embedder shaped the retrieval (even on
/// refusals — see `refuse_score_gate`).
fn embedding_ref_for(mode: SearchMode, cfg: &kb_config::Config) -> Option<ModelRef> {
fn embedding_ref_for(mode: SearchMode, cfg: &kebab_config::Config) -> Option<ModelRef> {
match mode {
SearchMode::Lexical => None,
SearchMode::Vector | SearchMode::Hybrid => Some(ModelRef {

View File

@@ -14,12 +14,12 @@
use std::sync::Arc;
use kb_config::Config;
use kb_core::{
use kebab_config::Config;
use kebab_core::{
ChunkerVersion, ChunkId, Citation, DocumentId, IndexVersion, RetrievalDetail,
Retriever, SearchHit, SearchMode, SearchQuery, WorkspacePath,
};
use kb_store_sqlite::SqliteStore;
use kebab_store_sqlite::SqliteStore;
use rusqlite::params;
use tempfile::TempDir;
@@ -176,7 +176,7 @@ impl Retriever for MockRetriever {
}
}
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
pub fn id32(prefix: &str) -> String {
let mut s = prefix.to_string();
while s.len() < 32 {

View File

@@ -10,11 +10,11 @@ use std::sync::Arc;
use std::sync::atomic::Ordering;
use common::{MockRetriever, RagEnv, id32, mk_hit};
use kb_core::{
use kebab_core::{
FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage,
};
use kb_llm::MockLanguageModel;
use kb_rag::{AskOpts, RagPipeline, RefusalReason};
use kebab_llm::MockLanguageModel;
use kebab_rag::{AskOpts, RagPipeline, RefusalReason};
/// LM ID used everywhere — kept short so snapshots stay stable.
const TEST_LM_ID: &str = "mock-lm";
@@ -49,7 +49,7 @@ impl CountingLm {
}
impl LanguageModel for CountingLm {
fn model_ref(&self) -> kb_core::ModelRef {
fn model_ref(&self) -> kebab_core::ModelRef {
self.inner.model_ref()
}
fn context_tokens(&self) -> usize {
@@ -57,7 +57,7 @@ impl LanguageModel for CountingLm {
}
fn generate_stream(
&self,
req: kb_core::GenerateRequest,
req: kebab_core::GenerateRequest,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
self.calls.fetch_add(1, Ordering::SeqCst);
self.inner.generate_stream(req)

View File

@@ -1,5 +1,5 @@
[package]
name = "kb-search"
name = "kebab-search"
version = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
@@ -8,17 +8,17 @@ repository = { workspace = true }
description = "Retriever implementations for kb (P2-2 lexical FTS5; P3 vector / hybrid will follow)"
[dependencies]
kb-core = { path = "../kb-core" }
kb-config = { path = "../kb-config" }
kb-store-sqlite = { path = "../kb-store-sqlite" }
kebab-core = { path = "../kebab-core" }
kebab-config = { path = "../kebab-config" }
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
# P3-4 hybrid retriever wraps a `dyn VectorStore` (typically backed by
# `kb-store-vector::LanceVectorStore`) and a `dyn Embedder` (any P3-2
# adapter). Listed as a runtime dep so callers can construct
# `VectorRetriever::new` against the trait objects without a concrete
# adapter — the concrete adapter (`kb-embed-local`) stays out of this
# crate per the spec's Forbidden deps list.
kb-store-vector = { path = "../kb-store-vector" }
kb-embed = { path = "../kb-embed" }
kebab-store-vector = { path = "../kebab-store-vector" }
kebab-embed = { path = "../kebab-embed" }
rusqlite = { workspace = true }
globset = { workspace = true }
serde_json = { workspace = true }
@@ -32,4 +32,4 @@ tempfile = { workspace = true }
# feature) and stand up a real `LanceVectorStore` on a tmp directory.
# The mock-retriever unit tests (the bulk of the hybrid suite) do not
# need either, but the integration / snapshot lane does.
kb-embed = { path = "../kb-embed", features = ["mock"] }
kebab-embed = { path = "../kebab-embed", features = ["mock"] }

View File

@@ -1,4 +1,4 @@
//! Shared helpers for building `kb_core::Citation` values from a
//! Shared helpers for building `kebab_core::Citation` values from a
//! chunk's first `SourceSpan`.
//!
//! Both the lexical and vector retrievers join against the same
@@ -9,7 +9,7 @@
//! §1.6). Living here means a future PDF / image / audio extractor can
//! enrich the mapping in one place rather than two.
use kb_core::{Citation, SourceSpan, WorkspacePath};
use kebab_core::{Citation, SourceSpan, WorkspacePath};
/// Build a `Citation` from the chunk's first `SourceSpan`. P1 markdown
/// only emits `Line`, so the other variants are mostly defensive — we

View File

@@ -20,7 +20,7 @@ use std::collections::HashMap;
use std::sync::Arc;
use anyhow::Result;
use kb_core::{
use kebab_core::{
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
};
@@ -75,7 +75,7 @@ impl HybridRetriever {
/// retrievers. Reads `config.search.hybrid_fusion` (only `"rrf"`
/// is recognised today) and `config.search.rrf_k`.
pub fn new(
config: &kb_config::Config,
config: &kebab_config::Config,
lexical: Arc<dyn Retriever>,
vector: Arc<dyn Retriever>,
) -> Self {
@@ -335,7 +335,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
#[cfg(test)]
mod tests {
use super::*;
use kb_core::{
use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, SearchFilters,
SearchHit, SearchMode, WorkspacePath,
};

View File

@@ -1,7 +1,7 @@
//! Lexical (FTS5 + bm25) retriever — design §3.7 / §1.5 / §2.2 / §6.4.
//!
//! Owns the SQL pattern documented in `tasks/p2/p2-2-lexical-retriever.md`
//! and constructs `kb_core::SearchHit` values directly from the joined
//! and constructs `kebab_core::SearchHit` values directly from the joined
//! `chunks_fts` / `chunks` / `documents` rows. Reads only — never mutates
//! the underlying SQLite file.
@@ -9,12 +9,12 @@ use std::sync::Arc;
use anyhow::{Context, Result};
use globset::GlobMatcher;
use kb_core::{
use kebab_core::{
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
WorkspacePath,
};
use kb_store_sqlite::SqliteStore;
use kebab_store_sqlite::SqliteStore;
use rusqlite::{params_from_iter, Connection, Row, ToSql};
use crate::citation_helper::citation_from_first_span;
@@ -57,7 +57,7 @@ impl LexicalRetriever {
/// Construct with default settings derived from `kb-config`'s defaults.
/// Snippet width is computed from `Config::defaults().search.snippet_chars`.
pub fn new(store: Arc<SqliteStore>, index_version: IndexVersion) -> Self {
let cfg = kb_config::Config::defaults();
let cfg = kebab_config::Config::defaults();
Self::with_settings(store, index_version, cfg.search.snippet_chars)
}
@@ -297,7 +297,7 @@ fn run_query(
params.push(Box::new(lang.0.clone()));
}
if let Some(trust_min) = &filters.trust_min {
// Mirror `kb_store_sqlite::documents::list_documents` ranking:
// Mirror `kebab_store_sqlite::documents::list_documents` ranking:
// Generated < Secondary < Primary. Doing the rank in SQL
// (rather than post-filtering) keeps the row stream short
// when the workspace contains many low-trust docs.
@@ -523,7 +523,7 @@ mod tests {
#[test]
fn build_citation_line_round_trip() {
use kb_core::Citation;
use kebab_core::Citation;
let p = WorkspacePath::new("a/b.md".to_string()).unwrap();
let span = SourceSpan::Line { start: 7, end: 12 };
let c = citation_from_first_span("c1", p.clone(), Some("S1".to_string()), Some(&span));
@@ -545,7 +545,7 @@ mod tests {
#[test]
fn build_citation_page_forwards_section() {
use kb_core::Citation;
use kebab_core::Citation;
let p = WorkspacePath::new("doc.pdf".to_string()).unwrap();
let span = SourceSpan::Page {
page: 4,
@@ -568,7 +568,7 @@ mod tests {
#[test]
fn build_citation_none_falls_back_to_line_one() {
use kb_core::Citation;
use kebab_core::Citation;
let p = WorkspacePath::new("x.md".to_string()).unwrap();
let c = citation_from_first_span("c1", p, None, None);
match c {

View File

@@ -1,4 +1,4 @@
//! `kb-search` — `kb_core::Retriever` implementations.
//! `kb-search` — `kebab_core::Retriever` implementations.
//!
//! - [`LexicalRetriever`] (P2-2): SQLite-FTS5 + bm25 backed retriever
//! for `SearchMode::Lexical`.

View File

@@ -1,7 +1,7 @@
//! Vector retriever — design §3.7 / §7.2 / §1.6.
//!
//! Wraps a `dyn VectorStore` + `dyn Embedder` + the SQLite metadata
//! store into a `kb_core::Retriever`. The vector store knows how to
//! store into a `kebab_core::Retriever`. The vector store knows how to
//! find the nearest chunks by cosine on the embedding column; SQLite
//! owns the human-readable metadata (heading_path / section_label /
//! source_spans / chunker_version / workspace_path) needed for
@@ -19,12 +19,12 @@ use std::collections::HashMap;
use std::sync::Arc;
use anyhow::{Context, Result};
use kb_core::{
use kebab_core::{
ChunkId, ChunkerVersion, DocumentId, Embedder, EmbeddingInput, EmbeddingKind,
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
SourceSpan, VectorHit, VectorStore, WorkspacePath,
};
use kb_store_sqlite::SqliteStore;
use kebab_store_sqlite::SqliteStore;
use rusqlite::params_from_iter;
use crate::citation_helper::citation_from_first_span;
@@ -67,7 +67,7 @@ impl VectorRetriever {
sqlite: Arc<SqliteStore>,
index_version: IndexVersion,
) -> Self {
let cfg = kb_config::Config::defaults();
let cfg = kebab_config::Config::defaults();
Self::with_settings(store, embed, sqlite, index_version, cfg.search.snippet_chars)
}
@@ -268,7 +268,7 @@ fn build_hit(
meta: &ChunkMeta,
rank: u32,
index_version: &IndexVersion,
model_id: &kb_core::EmbeddingModelId,
model_id: &kebab_core::EmbeddingModelId,
snippet_chars: usize,
) -> Result<SearchHit> {
let heading_path: Vec<String> = serde_json::from_str(&meta.heading_path_json)

View File

@@ -16,15 +16,15 @@
use std::sync::Arc;
use kb_config::Config;
use kb_core::{
use kebab_config::Config;
use kebab_core::{
ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
};
use kb_embed::{Embedder, MockEmbedder};
use kb_search::{LexicalRetriever, VectorRetriever};
use kb_store_sqlite::SqliteStore;
use kb_store_vector::LanceVectorStore;
use kebab_embed::{Embedder, MockEmbedder};
use kebab_search::{LexicalRetriever, VectorRetriever};
use kebab_store_sqlite::SqliteStore;
use kebab_store_vector::LanceVectorStore;
use rusqlite::params;
use tempfile::TempDir;
@@ -205,7 +205,7 @@ impl HybridEnv {
}
}
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
pub fn id32(prefix: &str) -> String {
let mut s = prefix.to_string();
while s.len() < 32 {

Some files were not shown because too many files have changed in this diff Show More