refactor(rename): kb crates → kebab — Cargo packages, folders, Rust modules
프로젝트 이름 `kb` → `kebab` rename 의 첫 단계. - workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`, repository URL `altair823/kb` → `altair823/kebab`. - 18 crate 폴더 rename via `git mv` (history 보존). - 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps `../kb-*` → `../kebab-*`. - 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`, `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`, `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`, `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`, `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어 경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염). CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths, tracing target, 그리고 docs sweep 은 다음 commit 에서. ## 검증 - `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
158
Cargo.lock
generated
158
Cargo.lock
generated
@@ -3366,27 +3366,27 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-app"
|
||||
name = "kebab-app"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"dirs 5.0.1",
|
||||
"kb-chunk",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-embed",
|
||||
"kb-embed-local",
|
||||
"kb-llm",
|
||||
"kb-llm-local",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kb-parse-types",
|
||||
"kb-rag",
|
||||
"kb-search",
|
||||
"kb-source-fs",
|
||||
"kb-store-sqlite",
|
||||
"kb-store-vector",
|
||||
"kebab-chunk",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-embed",
|
||||
"kebab-embed-local",
|
||||
"kebab-llm",
|
||||
"kebab-llm-local",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"kebab-parse-types",
|
||||
"kebab-rag",
|
||||
"kebab-search",
|
||||
"kebab-source-fs",
|
||||
"kebab-store-sqlite",
|
||||
"kebab-store-vector",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3399,14 +3399,14 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-chunk"
|
||||
name = "kebab-chunk"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-core",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kebab-core",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"serde_json",
|
||||
"serde_json_canonicalizer",
|
||||
"time",
|
||||
@@ -3414,32 +3414,32 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-cli"
|
||||
name = "kebab-cli"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"kb-app",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-eval",
|
||||
"kebab-app",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-eval",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-config"
|
||||
name = "kebab-config"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dirs 5.0.1",
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-core"
|
||||
name = "kebab-core"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
@@ -3453,13 +3453,13 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-embed"
|
||||
name = "kebab-embed"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"proptest",
|
||||
"serde",
|
||||
"thiserror 2.0.18",
|
||||
@@ -3467,27 +3467,27 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-embed-local"
|
||||
name = "kebab-embed-local"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"fastembed",
|
||||
"kb-config",
|
||||
"kb-embed",
|
||||
"kebab-config",
|
||||
"kebab-embed",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-eval"
|
||||
name = "kebab-eval"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-app",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-store-sqlite",
|
||||
"kebab-app",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-store-sqlite",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3499,22 +3499,22 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-llm"
|
||||
name = "kebab-llm"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"proptest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-llm-local"
|
||||
name = "kebab-llm-local"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-llm",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-llm",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3525,13 +3525,13 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-normalize"
|
||||
name = "kebab-normalize"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kb-parse-md",
|
||||
"kb-parse-types",
|
||||
"kebab-core",
|
||||
"kebab-parse-md",
|
||||
"kebab-parse-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"time",
|
||||
@@ -3540,12 +3540,12 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-parse-md"
|
||||
name = "kebab-parse-md"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kb-parse-types",
|
||||
"kebab-core",
|
||||
"kebab-parse-types",
|
||||
"lingua",
|
||||
"pulldown-cmark",
|
||||
"serde",
|
||||
@@ -3557,24 +3557,24 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-parse-types"
|
||||
name = "kebab-parse-types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-rag"
|
||||
name = "kebab-rag"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-llm",
|
||||
"kb-search",
|
||||
"kb-store-sqlite",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-llm",
|
||||
"kebab-search",
|
||||
"kebab-store-sqlite",
|
||||
"regex",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
@@ -3586,16 +3586,16 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-search"
|
||||
name = "kebab-search"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"globset",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-embed",
|
||||
"kb-store-sqlite",
|
||||
"kb-store-vector",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-embed",
|
||||
"kebab-store-sqlite",
|
||||
"kebab-store-vector",
|
||||
"rusqlite",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -3604,14 +3604,14 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-source-fs"
|
||||
name = "kebab-source-fs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"ignore",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -3621,17 +3621,17 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-store-sqlite"
|
||||
name = "kebab-store-sqlite"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"globset",
|
||||
"kb-chunk",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kebab-chunk",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"refinery",
|
||||
"rusqlite",
|
||||
"serde_json",
|
||||
@@ -3642,7 +3642,7 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-store-vector"
|
||||
name = "kebab-store-vector"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
@@ -3651,9 +3651,9 @@ dependencies = [
|
||||
"arrow-schema",
|
||||
"blake3",
|
||||
"futures",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-store-sqlite",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-store-sqlite",
|
||||
"lancedb",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
|
||||
38
Cargo.toml
38
Cargo.toml
@@ -1,31 +1,31 @@
|
||||
[workspace]
|
||||
resolver = "3"
|
||||
members = [
|
||||
"crates/kb-core",
|
||||
"crates/kb-parse-types",
|
||||
"crates/kb-config",
|
||||
"crates/kb-source-fs",
|
||||
"crates/kb-parse-md",
|
||||
"crates/kb-normalize",
|
||||
"crates/kb-chunk",
|
||||
"crates/kb-store-sqlite",
|
||||
"crates/kb-store-vector",
|
||||
"crates/kb-search",
|
||||
"crates/kb-embed",
|
||||
"crates/kb-embed-local",
|
||||
"crates/kb-llm",
|
||||
"crates/kb-llm-local",
|
||||
"crates/kb-rag",
|
||||
"crates/kb-app",
|
||||
"crates/kb-cli",
|
||||
"crates/kb-eval",
|
||||
"crates/kebab-core",
|
||||
"crates/kebab-parse-types",
|
||||
"crates/kebab-config",
|
||||
"crates/kebab-source-fs",
|
||||
"crates/kebab-parse-md",
|
||||
"crates/kebab-normalize",
|
||||
"crates/kebab-chunk",
|
||||
"crates/kebab-store-sqlite",
|
||||
"crates/kebab-store-vector",
|
||||
"crates/kebab-search",
|
||||
"crates/kebab-embed",
|
||||
"crates/kebab-embed-local",
|
||||
"crates/kebab-llm",
|
||||
"crates/kebab-llm-local",
|
||||
"crates/kebab-rag",
|
||||
"crates/kebab-app",
|
||||
"crates/kebab-cli",
|
||||
"crates/kebab-eval",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
rust-version = "1.85"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/altair823/kb"
|
||||
repository = "https://github.com/altair823/kebab"
|
||||
version = "0.1.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-app"
|
||||
name = "kebab-app"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,21 +8,21 @@ repository = { workspace = true }
|
||||
description = "Facade — orchestrates components for kb-cli/tui/desktop"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-source-fs = { path = "../kb-source-fs" }
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kb-normalize = { path = "../kb-normalize" }
|
||||
kb-chunk = { path = "../kb-chunk" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kb-store-vector = { path = "../kb-store-vector" }
|
||||
kb-search = { path = "../kb-search" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kb-embed-local = { path = "../kb-embed-local" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kb-llm-local = { path = "../kb-llm-local" }
|
||||
kb-rag = { path = "../kb-rag" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-source-fs = { path = "../kebab-source-fs" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
kebab-chunk = { path = "../kebab-chunk" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
kebab-store-vector = { path = "../kebab-store-vector" }
|
||||
kebab-search = { path = "../kebab-search" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
kebab-embed-local = { path = "../kebab-embed-local" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
kebab-llm-local = { path = "../kebab-llm-local" }
|
||||
kebab-rag = { path = "../kebab-rag" }
|
||||
anyhow = { workspace = true }
|
||||
blake3 = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
@@ -37,16 +37,16 @@ use std::sync::{Arc, OnceLock};
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
|
||||
SearchQuery, VectorStore,
|
||||
};
|
||||
use kb_embed_local::FastembedEmbedder;
|
||||
use kb_llm_local::OllamaLanguageModel;
|
||||
use kb_rag::{AskOpts, RagPipeline};
|
||||
use kb_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kb_store_vector::LanceVectorStore;
|
||||
use kebab_embed_local::FastembedEmbedder;
|
||||
use kebab_llm_local::OllamaLanguageModel;
|
||||
use kebab_rag::{AskOpts, RagPipeline};
|
||||
use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use kebab_store_vector::LanceVectorStore;
|
||||
|
||||
/// Facade state — see module docs for lifetime rules.
|
||||
///
|
||||
@@ -55,7 +55,7 @@ use kb_store_vector::LanceVectorStore;
|
||||
/// ask calls. The OnceLock-backed `embedder` / `vector` fields ensure
|
||||
/// the cold-start cost is paid exactly once per instance.
|
||||
pub struct App {
|
||||
pub(crate) config: kb_config::Config,
|
||||
pub(crate) config: kebab_config::Config,
|
||||
pub(crate) sqlite: Arc<SqliteStore>,
|
||||
/// Memoized embedder — built lazily on first `embedder()` call when
|
||||
/// embeddings are enabled. `OnceLock` keeps the struct `Sync` and
|
||||
@@ -80,7 +80,7 @@ impl App {
|
||||
/// Downstream `LanceVectorStore::new` (called by [`Self::vector`])
|
||||
/// internally drives a `tokio::Runtime::block_on`, which panics if
|
||||
/// invoked from inside another tokio runtime.
|
||||
pub fn open_with_config(config: kb_config::Config) -> Result<Self> {
|
||||
pub fn open_with_config(config: kebab_config::Config) -> Result<Self> {
|
||||
let sqlite = SqliteStore::open(&config).context("kb-app: open SqliteStore")?;
|
||||
sqlite
|
||||
.run_migrations()
|
||||
@@ -286,7 +286,7 @@ impl App {
|
||||
/// the active config. This token surfaces in `SearchHit.index_version`
|
||||
/// and on snapshot tests; including the chunker version pins it to
|
||||
/// the chunking policy in effect.
|
||||
fn lexical_index_version(config: &kb_config::Config) -> IndexVersion {
|
||||
fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
|
||||
IndexVersion(format!("lex:{}", config.chunking.chunker_version))
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
//! ## Config seam (`*_with_config`)
|
||||
//!
|
||||
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
|
||||
//! companion that takes a fully-resolved [`kb_config::Config`] directly.
|
||||
//! companion that takes a fully-resolved [`kebab_config::Config`] directly.
|
||||
//! Three callers go through it: (1) the top-level free functions
|
||||
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes
|
||||
//! `--config <path>` (CLI builds the Config via
|
||||
@@ -39,16 +39,16 @@ use std::sync::Arc;
|
||||
use anyhow::{Context, anyhow};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use kb_chunk::MdHeadingV1Chunker;
|
||||
use kb_core::{
|
||||
use kebab_chunk::MdHeadingV1Chunker;
|
||||
use kebab_core::{
|
||||
Answer, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
|
||||
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
|
||||
EmbeddingKind, IngestReport, ParserVersion, RawAsset, SearchHit, SearchQuery,
|
||||
SourceConnector, SourceScope, SourceUri, VectorRecord, VectorStore,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kb_source_fs::FsSourceConnector;
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
|
||||
mod app;
|
||||
pub mod doctor_signal;
|
||||
@@ -65,11 +65,11 @@ const KB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x";
|
||||
|
||||
/// Caller-supplied knobs for one [`ask`] invocation.
|
||||
///
|
||||
/// Re-exported from [`kb_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
|
||||
/// `use kb_app::AskOpts` keeps working without churn. The struct gained
|
||||
/// Re-exported from [`kebab_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
|
||||
/// `use kebab_app::AskOpts` keeps working without churn. The struct gained
|
||||
/// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today)
|
||||
/// pass `stream_sink: None`.
|
||||
pub use kb_rag::AskOpts;
|
||||
pub use kebab_rag::AskOpts;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct DoctorReport {
|
||||
@@ -90,10 +90,10 @@ pub struct DoctorCheck {
|
||||
/// Create XDG dirs and write a starter `config.toml`. Idempotent unless
|
||||
/// `force=true` (which overwrites an existing config).
|
||||
pub fn init_workspace(force: bool) -> anyhow::Result<()> {
|
||||
let cfg_path = kb_config::Config::xdg_config_path();
|
||||
let data_dir = kb_config::Config::xdg_data_dir();
|
||||
let cache_dir = kb_config::Config::xdg_cache_dir();
|
||||
let state_dir = kb_config::Config::xdg_state_dir();
|
||||
let cfg_path = kebab_config::Config::xdg_config_path();
|
||||
let data_dir = kebab_config::Config::xdg_data_dir();
|
||||
let cache_dir = kebab_config::Config::xdg_cache_dir();
|
||||
let state_dir = kebab_config::Config::xdg_state_dir();
|
||||
|
||||
for d in [
|
||||
cfg_path.parent().map(PathBuf::from).unwrap_or_default(),
|
||||
@@ -107,11 +107,11 @@ pub fn init_workspace(force: bool) -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let workspace_root = expand_tilde(&kb_config::Config::defaults().workspace.root);
|
||||
let workspace_root = expand_tilde(&kebab_config::Config::defaults().workspace.root);
|
||||
std::fs::create_dir_all(&workspace_root)?;
|
||||
|
||||
if !cfg_path.exists() || force {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
let toml_text = toml::to_string_pretty(&cfg)?;
|
||||
std::fs::write(&cfg_path, toml_text)?;
|
||||
}
|
||||
@@ -141,8 +141,8 @@ fn expand_tilde(s: &str) -> PathBuf {
|
||||
/// Callers that already have a Config in hand (CLI honoring `--config`,
|
||||
/// integration tests, TUI session) should bypass this and call the
|
||||
/// matching `*_with_config` helper directly.
|
||||
fn load_config() -> anyhow::Result<kb_config::Config> {
|
||||
kb_config::Config::load(None)
|
||||
fn load_config() -> anyhow::Result<kebab_config::Config> {
|
||||
kebab_config::Config::load(None)
|
||||
}
|
||||
|
||||
// ── ingest ────────────────────────────────────────────────────────────────
|
||||
@@ -154,11 +154,11 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
|
||||
|
||||
/// Config-explicit variant — bypasses [`load_config`] when the
|
||||
/// caller (kb-cli with `--config`, integration tests, TUI session)
|
||||
/// already has a [`kb_config::Config`] in hand. The public free
|
||||
/// already has a [`kebab_config::Config`] in hand. The public free
|
||||
/// function [`ingest`] wraps this with the XDG-default load.
|
||||
#[doc(hidden)]
|
||||
pub fn ingest_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
scope: SourceScope,
|
||||
summary_only: bool,
|
||||
) -> anyhow::Result<IngestReport> {
|
||||
@@ -205,13 +205,13 @@ pub fn ingest_with_config(
|
||||
|
||||
let started_at = time::OffsetDateTime::now_utc();
|
||||
|
||||
let mut items: Vec<kb_core::IngestItem> = Vec::new();
|
||||
let mut items: Vec<kebab_core::IngestItem> = Vec::new();
|
||||
let mut new_count: u32 = 0;
|
||||
let mut updated_count: u32 = 0;
|
||||
let mut skipped_count: u32 = 0;
|
||||
let mut error_count: u32 = 0;
|
||||
// Aggregate counts surfaced into `ingest_runs` (and tracing). Not
|
||||
// exposed on `IngestReport` today — `kb_core::IngestReport` is a
|
||||
// exposed on `IngestReport` today — `kebab_core::IngestReport` is a
|
||||
// wire-stable struct without these fields — but persisting them
|
||||
// means audit tooling and `kb jobs` (P+) can recover the totals
|
||||
// without re-walking the DB.
|
||||
@@ -242,8 +242,8 @@ pub fn ingest_with_config(
|
||||
"kb-app::ingest: per-file fatal"
|
||||
);
|
||||
error_count = error_count.saturating_add(1);
|
||||
kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Error,
|
||||
kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Error,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -259,7 +259,7 @@ pub fn ingest_with_config(
|
||||
};
|
||||
|
||||
match item.kind {
|
||||
kb_core::IngestItemKind::New => {
|
||||
kebab_core::IngestItemKind::New => {
|
||||
new_count = new_count.saturating_add(1);
|
||||
let n = item.chunk_count.unwrap_or(0);
|
||||
chunks_indexed = chunks_indexed.saturating_add(n);
|
||||
@@ -267,7 +267,7 @@ pub fn ingest_with_config(
|
||||
embeddings_indexed = embeddings_indexed.saturating_add(n);
|
||||
}
|
||||
}
|
||||
kb_core::IngestItemKind::Updated => {
|
||||
kebab_core::IngestItemKind::Updated => {
|
||||
updated_count = updated_count.saturating_add(1);
|
||||
let n = item.chunk_count.unwrap_or(0);
|
||||
chunks_indexed = chunks_indexed.saturating_add(n);
|
||||
@@ -275,10 +275,10 @@ pub fn ingest_with_config(
|
||||
embeddings_indexed = embeddings_indexed.saturating_add(n);
|
||||
}
|
||||
}
|
||||
kb_core::IngestItemKind::Skipped => {
|
||||
kebab_core::IngestItemKind::Skipped => {
|
||||
skipped_count = skipped_count.saturating_add(1)
|
||||
}
|
||||
kb_core::IngestItemKind::Error => {
|
||||
kebab_core::IngestItemKind::Error => {
|
||||
error_count = error_count.saturating_add(1)
|
||||
}
|
||||
}
|
||||
@@ -293,9 +293,9 @@ pub fn ingest_with_config(
|
||||
"scope": scope,
|
||||
"summary_only": summary_only,
|
||||
});
|
||||
let job_id_res = <SqliteStoreAlias as kb_core::JobRepo>::create(
|
||||
let job_id_res = <SqliteStoreAlias as kebab_core::JobRepo>::create(
|
||||
&app.sqlite,
|
||||
kb_core::JobKind::Ingest,
|
||||
kebab_core::JobKind::Ingest,
|
||||
payload,
|
||||
);
|
||||
match job_id_res {
|
||||
@@ -312,7 +312,7 @@ pub fn ingest_with_config(
|
||||
"chunks_indexed": chunks_indexed,
|
||||
"embeddings_indexed": embeddings_indexed,
|
||||
});
|
||||
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::update_progress(
|
||||
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::update_progress(
|
||||
&app.sqlite,
|
||||
&jid,
|
||||
progress,
|
||||
@@ -323,10 +323,10 @@ pub fn ingest_with_config(
|
||||
"kb-app::ingest: JobRepo::update_progress failed"
|
||||
);
|
||||
}
|
||||
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::finish(
|
||||
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::finish(
|
||||
&app.sqlite,
|
||||
&jid,
|
||||
kb_core::JobStatus::Succeeded,
|
||||
kebab_core::JobStatus::Succeeded,
|
||||
None,
|
||||
) {
|
||||
tracing::warn!(
|
||||
@@ -370,7 +370,7 @@ pub fn ingest_with_config(
|
||||
}
|
||||
};
|
||||
let run_id = mint_ingest_run_id(&scope_json, started_at);
|
||||
let row = kb_store_sqlite::IngestRunRow {
|
||||
let row = kebab_store_sqlite::IngestRunRow {
|
||||
run_id: &run_id,
|
||||
scope_json: &scope_json,
|
||||
scanned: scanned_count,
|
||||
@@ -432,7 +432,7 @@ fn mint_ingest_run_id(scope_json: &str, at: time::OffsetDateTime) -> String {
|
||||
/// vs `JobRepo`) on the same store. Plain `app.sqlite.create(...)`
|
||||
/// would pick one based on inherent vs trait methods; we go through
|
||||
/// `<… as JobRepo>` to be explicit.
|
||||
type SqliteStoreAlias = kb_store_sqlite::SqliteStore;
|
||||
type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;
|
||||
|
||||
/// Process a single asset: read bytes, parse, normalize, chunk,
|
||||
/// persist, embed. Per-asset failures bubble up to the caller for
|
||||
@@ -444,18 +444,18 @@ fn ingest_one_asset(
|
||||
parser_version: &ParserVersion,
|
||||
chunk_policy: &ChunkPolicy,
|
||||
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
|
||||
vector_store: Option<&Arc<kb_store_vector::LanceVectorStore>>,
|
||||
vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
|
||||
existing_doc_ids: &std::collections::HashSet<String>,
|
||||
) -> anyhow::Result<kb_core::IngestItem> {
|
||||
) -> anyhow::Result<kebab_core::IngestItem> {
|
||||
tracing::debug!(
|
||||
target: "kb-app::ingest",
|
||||
path = %asset.workspace_path.0,
|
||||
"processing asset"
|
||||
);
|
||||
// Only handle Markdown for now; other media types are P6+ work.
|
||||
if asset.media_type != kb_core::MediaType::Markdown {
|
||||
return Ok(kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Skipped,
|
||||
if asset.media_type != kebab_core::MediaType::Markdown {
|
||||
return Ok(kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Skipped,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -472,8 +472,8 @@ fn ingest_one_asset(
|
||||
let path = match &asset.source_uri {
|
||||
SourceUri::File(p) => p.clone(),
|
||||
SourceUri::Kb(_) => {
|
||||
return Ok(kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Skipped,
|
||||
return Ok(kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Skipped,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -569,7 +569,7 @@ fn ingest_one_asset(
|
||||
.iter()
|
||||
.zip(vectors)
|
||||
.map(|(c, v)| VectorRecord {
|
||||
embedding_id: kb_core::id_for_embedding(
|
||||
embedding_id: kebab_core::id_for_embedding(
|
||||
&c.chunk_id,
|
||||
&model_id,
|
||||
&model_version,
|
||||
@@ -592,12 +592,12 @@ fn ingest_one_asset(
|
||||
}
|
||||
|
||||
let kind = if existing_doc_ids.contains(&canonical.doc_id.0) {
|
||||
kb_core::IngestItemKind::Updated
|
||||
kebab_core::IngestItemKind::Updated
|
||||
} else {
|
||||
kb_core::IngestItemKind::New
|
||||
kebab_core::IngestItemKind::New
|
||||
};
|
||||
|
||||
Ok(kb_core::IngestItem {
|
||||
Ok(kebab_core::IngestItem {
|
||||
kind,
|
||||
doc_id: Some(canonical.doc_id.clone()),
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
@@ -613,7 +613,7 @@ fn ingest_one_asset(
|
||||
}
|
||||
|
||||
/// Convenience: end byte of the frontmatter region (or 0 when absent).
|
||||
fn fm_span_end(span: Option<kb_parse_md::FrontmatterSpan>) -> usize {
|
||||
fn fm_span_end(span: Option<kebab_parse_md::FrontmatterSpan>) -> usize {
|
||||
span.map(|s| s.end).unwrap_or(0)
|
||||
}
|
||||
|
||||
@@ -640,7 +640,7 @@ fn build_body_hints(asset: &RawAsset) -> BodyHints {
|
||||
}
|
||||
|
||||
/// Build a `ChunkPolicy` from the active config.
|
||||
fn chunk_policy_from_config(config: &kb_config::Config) -> ChunkPolicy {
|
||||
fn chunk_policy_from_config(config: &kebab_config::Config) -> ChunkPolicy {
|
||||
ChunkPolicy {
|
||||
target_tokens: config.chunking.target_tokens,
|
||||
overlap_tokens: config.chunking.overlap_tokens,
|
||||
@@ -660,7 +660,7 @@ pub fn list_docs(filter: DocFilter) -> anyhow::Result<Vec<DocSummary>> {
|
||||
/// ([`list_docs`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn list_docs_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
filter: DocFilter,
|
||||
) -> anyhow::Result<Vec<DocSummary>> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -676,7 +676,7 @@ pub fn inspect_doc(id: &DocumentId) -> anyhow::Result<CanonicalDocument> {
|
||||
/// ([`inspect_doc`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn inspect_doc_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
id: &DocumentId,
|
||||
) -> anyhow::Result<CanonicalDocument> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -694,7 +694,7 @@ pub fn inspect_chunk(id: &ChunkId) -> anyhow::Result<Chunk> {
|
||||
/// ([`inspect_chunk`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn inspect_chunk_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
id: &ChunkId,
|
||||
) -> anyhow::Result<Chunk> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -716,7 +716,7 @@ pub fn search(query: SearchQuery) -> anyhow::Result<Vec<SearchHit>> {
|
||||
/// directly to amortize the embedder / vector-store cold start.
|
||||
#[doc(hidden)]
|
||||
pub fn search_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
query: SearchQuery,
|
||||
) -> anyhow::Result<Vec<SearchHit>> {
|
||||
App::open_with_config(config)?.search(query)
|
||||
@@ -740,7 +740,7 @@ pub fn ask(query: &str, opts: AskOpts) -> anyhow::Result<Answer> {
|
||||
/// [`App::ask`].
|
||||
#[doc(hidden)]
|
||||
pub fn ask_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
query: &str,
|
||||
opts: AskOpts,
|
||||
) -> anyhow::Result<Answer> {
|
||||
@@ -761,10 +761,10 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
|
||||
// override first, else XDG default. Report whichever was probed.
|
||||
let cfg_path: PathBuf = match config_path {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => kb_config::Config::xdg_config_path(),
|
||||
None => kebab_config::Config::xdg_config_path(),
|
||||
};
|
||||
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
|
||||
match kb_config::Config::from_file(&cfg_path) {
|
||||
match kebab_config::Config::from_file(&cfg_path) {
|
||||
Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
|
||||
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
|
||||
}
|
||||
@@ -804,7 +804,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
|
||||
let merged = c.clone().apply_env(&env);
|
||||
expand_tilde(&merged.storage.data_dir)
|
||||
}
|
||||
None => kb_config::Config::xdg_data_dir(),
|
||||
None => kebab_config::Config::xdg_data_dir(),
|
||||
};
|
||||
let writable = (|| -> anyhow::Result<()> {
|
||||
std::fs::create_dir_all(&data_dir)?;
|
||||
@@ -19,7 +19,7 @@ pub enum LogLevel {
|
||||
/// — a second call is a no-op (the second `try_init` is dropped silently
|
||||
/// but the guard is still returned so the caller can keep it alive).
|
||||
pub fn init(level: LogLevel) -> Result<WorkerGuard> {
|
||||
let log_dir = kb_config::Config::xdg_state_dir().join("logs");
|
||||
let log_dir = kebab_config::Config::xdg_state_dir().join("logs");
|
||||
std::fs::create_dir_all(&log_dir)?;
|
||||
|
||||
let file_appender = tracing_appender::rolling::daily(&log_dir, "kb.log");
|
||||
@@ -21,12 +21,12 @@ use common::TestEnv;
|
||||
#[ignore = "requires real Ollama on 127.0.0.1:11434"]
|
||||
fn ask_lexical_smoke() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let opts = kb_app::AskOpts {
|
||||
let opts = kebab_app::AskOpts {
|
||||
k: 5,
|
||||
explain: false,
|
||||
mode: kb_core::SearchMode::Lexical,
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
temperature: Some(0.0),
|
||||
seed: Some(0),
|
||||
stream_sink: None,
|
||||
@@ -34,10 +34,10 @@ fn ask_lexical_smoke() {
|
||||
// The fixture workspace contains "ownership" content; the model's
|
||||
// citation behavior depends on its training, so we don't assert on
|
||||
// grounded — only that the call returns a structurally-valid Answer.
|
||||
let answer = kb_app::ask_with_config(env.config.clone(), "ownership", opts)
|
||||
let answer = kebab_app::ask_with_config(env.config.clone(), "ownership", opts)
|
||||
.expect("ask returns Ok with a real Ollama backend");
|
||||
// retrieval summary always populated, regardless of grounded path.
|
||||
assert_eq!(answer.retrieval.mode, kb_core::SearchMode::Lexical);
|
||||
assert_eq!(answer.retrieval.mode, kebab_core::SearchMode::Lexical);
|
||||
assert!(answer.retrieval.k >= 5);
|
||||
assert!(answer.retrieval.trace_id.0.starts_with("ret_"));
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use kb_config::Config;
|
||||
use kebab_config::Config;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Test environment: owns a `TempDir` and exposes a `Config` whose
|
||||
@@ -72,8 +72,8 @@ impl TestEnv {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scope(&self) -> kb_core::SourceScope {
|
||||
kb_core::SourceScope {
|
||||
pub fn scope(&self) -> kebab_core::SourceScope {
|
||||
kebab_core::SourceScope {
|
||||
root: self.workspace_root.clone(),
|
||||
include: self.config.workspace.include.clone(),
|
||||
exclude: self.config.workspace.exclude.clone(),
|
||||
@@ -9,7 +9,7 @@ use common::TestEnv;
|
||||
fn ingest_then_list_inspects_round_trip() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
|
||||
// The fixture has 3 markdown files; first ingest should label them
|
||||
// all as New.
|
||||
@@ -27,16 +27,16 @@ fn ingest_then_list_inspects_round_trip() {
|
||||
}
|
||||
|
||||
// list_docs returns the 3 docs.
|
||||
let docs = kb_app::list_docs_with_config(
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kb_core::DocFilter::default(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3, "docs: {docs:?}");
|
||||
|
||||
// inspect_doc round-trips one of them.
|
||||
let any_doc_id = docs[0].doc_id.clone();
|
||||
let canonical = kb_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
.unwrap();
|
||||
assert_eq!(canonical.doc_id, any_doc_id);
|
||||
assert!(!canonical.blocks.is_empty(), "blocks empty");
|
||||
@@ -47,20 +47,20 @@ fn ingest_idempotent_on_second_run() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let r1 =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(r1.new, 3);
|
||||
|
||||
let r2 =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
// Same files re-ingested — labelled Updated, not duplicated.
|
||||
assert_eq!(r2.scanned, 3, "second scan: {r2:?}");
|
||||
assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}");
|
||||
assert_eq!(r2.updated, 3, "second run updated: {r2:?}");
|
||||
|
||||
// list_docs still has 3 docs (no duplicates).
|
||||
let docs = kb_app::list_docs_with_config(
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kb_core::DocFilter::default(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3);
|
||||
@@ -70,7 +70,7 @@ fn ingest_idempotent_on_second_run() {
|
||||
fn ingest_summary_only_drops_items() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert!(report.items.is_none(), "summary-only should null items");
|
||||
}
|
||||
@@ -82,7 +82,7 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
|
||||
// of every run. `summary_only=true` writes `items_json=NULL`; the
|
||||
// counts MUST still be present.
|
||||
let env = TestEnv::lexical_only();
|
||||
let report = kb_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
|
||||
@@ -137,7 +137,7 @@ fn ingest_provider_none_skips_lance() {
|
||||
// tables under it).
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0, "lexical-only run must not error");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
@@ -170,22 +170,22 @@ fn ingest_provider_none_skips_lance() {
|
||||
#[test]
|
||||
fn list_docs_filters_by_tags_any() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let filter = kb_core::DocFilter {
|
||||
let filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["python".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let docs = kb_app::list_docs_with_config(env.config.clone(), filter).unwrap();
|
||||
let docs = kebab_app::list_docs_with_config(env.config.clone(), filter).unwrap();
|
||||
assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}");
|
||||
assert!(docs[0].tags.contains(&"python".to_string()));
|
||||
|
||||
let rust_filter = kb_core::DocFilter {
|
||||
let rust_filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["rust".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let rust_docs =
|
||||
kb_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
// intro.md and notes/cargo.md both tag "rust".
|
||||
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
|
||||
}
|
||||
@@ -194,8 +194,8 @@ fn list_docs_filters_by_tags_any() {
|
||||
fn inspect_doc_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus =
|
||||
kb_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let err = kb_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("not found"),
|
||||
@@ -210,10 +210,10 @@ fn inspect_doc_not_found_returns_actionable_error() {
|
||||
#[test]
|
||||
fn inspect_chunk_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus = kb_core::ChunkId(
|
||||
let bogus = kebab_core::ChunkId(
|
||||
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
|
||||
);
|
||||
let err = kb_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
.unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("not found"), "got: {msg}");
|
||||
@@ -5,24 +5,24 @@ mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
|
||||
fn lexical_query(text: &str) -> kb_core::SearchQuery {
|
||||
kb_core::SearchQuery {
|
||||
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
|
||||
kebab_core::SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: kb_core::SearchMode::Lexical,
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexical_search_returns_hits_after_ingest() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
// "Ownership" appears as a heading + paragraph in intro.md and
|
||||
// matches FTS5 default tokenizer easily.
|
||||
let hits =
|
||||
kb_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
.unwrap();
|
||||
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
|
||||
|
||||
@@ -34,7 +34,7 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
);
|
||||
assert_eq!(
|
||||
h.retrieval.method,
|
||||
kb_core::SearchMode::Lexical,
|
||||
kebab_core::SearchMode::Lexical,
|
||||
"method label should be Lexical"
|
||||
);
|
||||
}
|
||||
@@ -43,8 +43,8 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
#[test]
|
||||
fn lexical_search_empty_query_returns_empty() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kb_app::search_with_config(env.config.clone(), lexical_query(" "))
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
|
||||
.unwrap();
|
||||
assert!(hits.is_empty(), "blank query must short-circuit empty");
|
||||
}
|
||||
@@ -52,15 +52,15 @@ fn lexical_search_empty_query_returns_empty() {
|
||||
#[test]
|
||||
fn vector_mode_with_provider_none_errors_clearly() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Vector,
|
||||
mode: kebab_core::SearchMode::Vector,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let err = kb_app::search_with_config(env.config.clone(), q).unwrap_err();
|
||||
let err = kebab_app::search_with_config(env.config.clone(), q).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("embeddings disabled") || msg.contains("disabled"),
|
||||
@@ -31,21 +31,21 @@ fn ingest_then_hybrid_search_returns_hits() {
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Hybrid,
|
||||
mode: kebab_core::SearchMode::Hybrid,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
assert!(!hits.is_empty(), "expected hybrid hits for 'ownership'");
|
||||
let methods: Vec<_> = hits.iter().map(|h| h.retrieval.method).collect();
|
||||
assert!(
|
||||
methods.iter().all(|m| *m == kb_core::SearchMode::Hybrid),
|
||||
methods.iter().all(|m| *m == kebab_core::SearchMode::Hybrid),
|
||||
"every hit must report method=Hybrid: {methods:?}"
|
||||
);
|
||||
}
|
||||
@@ -58,22 +58,22 @@ fn ingest_then_vector_search_carries_embedding_model() {
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Vector,
|
||||
mode: kebab_core::SearchMode::Vector,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
assert!(!hits.is_empty(), "expected vector hits for 'ownership'");
|
||||
|
||||
// Vector mode dispatches through `VectorRetriever` and MUST stamp
|
||||
// each hit with the configured embedding_model id.
|
||||
let expected = kb_core::EmbeddingModelId(env.config.models.embedding.model.clone());
|
||||
let expected = kebab_core::EmbeddingModelId(env.config.models.embedding.model.clone());
|
||||
for h in &hits {
|
||||
assert_eq!(
|
||||
h.embedding_model,
|
||||
@@ -82,7 +82,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
|
||||
);
|
||||
assert_eq!(
|
||||
h.retrieval.method,
|
||||
kb_core::SearchMode::Vector,
|
||||
kebab_core::SearchMode::Vector,
|
||||
"vector-mode hit must report method=Vector"
|
||||
);
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-chunk"
|
||||
name = "kebab-chunk"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,7 +8,7 @@ repository = { workspace = true }
|
||||
description = "Chunkers that turn kb-core::CanonicalDocument into kb-core::Chunk batches (§3.5, §4.2, §7.2)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
serde_json_canonicalizer = "0.3"
|
||||
blake3 = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
@@ -20,7 +20,7 @@ tracing = { workspace = true }
|
||||
# regular deps per design §8 (chunker consumes CanonicalDocument from kb-core
|
||||
# only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps)
|
||||
# confirms this.
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kb-normalize = { path = "../kb-normalize" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true }
|
||||
@@ -1,4 +1,4 @@
|
||||
//! `kb-chunk` — chunkers that emit [`kb_core::Chunk`] batches.
|
||||
//! `kb-chunk` — chunkers that emit [`kebab_core::Chunk`] batches.
|
||||
//!
|
||||
//! Per design §3.5 (Chunk), §4.2 (chunk_id recipe), §7.2 (`Chunker`
|
||||
//! trait), §0 Q3/§14 (chunking priority).
|
||||
@@ -1,6 +1,6 @@
|
||||
//! `md-heading-v1` — heading-aware Markdown chunker.
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker,
|
||||
ChunkerVersion, DocumentId, SourceSpan, id_for_chunk,
|
||||
};
|
||||
@@ -24,7 +24,7 @@ const POLICY_HASH_HEX_LEN: usize = 16;
|
||||
|
||||
/// Heading-aware Markdown chunker.
|
||||
///
|
||||
/// Implements [`kb_core::Chunker`] for Markdown-derived
|
||||
/// Implements [`kebab_core::Chunker`] for Markdown-derived
|
||||
/// [`CanonicalDocument`]s.
|
||||
///
|
||||
/// **Behavior contract** (design §0 / §14, in priority order):
|
||||
@@ -409,7 +409,7 @@ fn estimate_block_tokens(b: &Block) -> usize {
|
||||
}
|
||||
|
||||
/// Borrow the `CommonBlock` of any [`Block`] variant.
|
||||
fn common(b: &Block) -> &kb_core::CommonBlock {
|
||||
fn common(b: &Block) -> &kebab_core::CommonBlock {
|
||||
match b {
|
||||
Block::Heading(h) => &h.common,
|
||||
Block::Paragraph(t) | Block::Quote(t) => &t.common,
|
||||
@@ -424,7 +424,7 @@ fn common(b: &Block) -> &kb_core::CommonBlock {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, CodeBlock, CommonBlock, HeadingBlock, ImageRefBlock, Lang,
|
||||
Metadata, Provenance, SourceType, TableBlock, TextBlock, TrustLevel,
|
||||
WorkspacePath, id_for_block,
|
||||
@@ -433,7 +433,7 @@ mod tests {
|
||||
|
||||
fn make_doc(blocks: Vec<Block>) -> CanonicalDocument {
|
||||
CanonicalDocument {
|
||||
doc_id: kb_core::DocumentId("d".repeat(32)),
|
||||
doc_id: kebab_core::DocumentId("d".repeat(32)),
|
||||
source_asset_id: AssetId("a".repeat(32)),
|
||||
workspace_path: WorkspacePath::new("notes/test.md".into()).unwrap(),
|
||||
title: "Test".into(),
|
||||
@@ -450,14 +450,14 @@ mod tests {
|
||||
user: Default::default(),
|
||||
},
|
||||
provenance: Provenance { events: vec![] },
|
||||
parser_version: kb_core::ParserVersion("test-parser-0".into()),
|
||||
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_id() -> kb_core::DocumentId {
|
||||
kb_core::DocumentId("d".repeat(32))
|
||||
fn doc_id() -> kebab_core::DocumentId {
|
||||
kebab_core::DocumentId("d".repeat(32))
|
||||
}
|
||||
|
||||
fn span(start: u32, end: u32) -> SourceSpan {
|
||||
@@ -13,13 +13,13 @@
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_chunk::MdHeadingV1Chunker;
|
||||
use kb_core::{
|
||||
use kebab_chunk::MdHeadingV1Chunker;
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, ChunkPolicy, ChunkerVersion, Chunker, MediaType,
|
||||
ParserVersion, RawAsset, SourceUri, WorkspacePath,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-cli"
|
||||
name = "kebab-cli"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -12,9 +12,9 @@ name = "kb"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-app = { path = "../kb-app" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-app = { path = "../kebab-app" }
|
||||
# kb-eval re-exports `compute_aggregate` / `compare_runs` /
|
||||
# `render_report_md` (P5-2). The DoD calls for these to be reached
|
||||
# "via kb-app", but kb-eval already depends on kb-app (P5-1 runner
|
||||
@@ -22,7 +22,7 @@ kb-app = { path = "../kb-app" }
|
||||
# require kb-app → kb-eval, forming a cycle. We therefore wire
|
||||
# kb-cli → kb-eval directly; documented in
|
||||
# `tasks/p5/p5-2-metrics-compare.md`.
|
||||
kb-eval = { path = "../kb-eval" }
|
||||
kebab-eval = { path = "../kebab-eval" }
|
||||
anyhow = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
@@ -6,7 +6,7 @@ use std::process::ExitCode;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
use kb_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
|
||||
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
|
||||
|
||||
mod wire;
|
||||
|
||||
@@ -170,12 +170,12 @@ enum ModeFlag {
|
||||
Hybrid,
|
||||
}
|
||||
|
||||
impl From<ModeFlag> for kb_core::SearchMode {
|
||||
impl From<ModeFlag> for kebab_core::SearchMode {
|
||||
fn from(m: ModeFlag) -> Self {
|
||||
match m {
|
||||
ModeFlag::Lexical => kb_core::SearchMode::Lexical,
|
||||
ModeFlag::Vector => kb_core::SearchMode::Vector,
|
||||
ModeFlag::Hybrid => kb_core::SearchMode::Hybrid,
|
||||
ModeFlag::Lexical => kebab_core::SearchMode::Lexical,
|
||||
ModeFlag::Vector => kebab_core::SearchMode::Vector,
|
||||
ModeFlag::Hybrid => kebab_core::SearchMode::Hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -183,15 +183,15 @@ impl From<ModeFlag> for kb_core::SearchMode {
|
||||
fn main() -> ExitCode {
|
||||
let cli = Cli::parse();
|
||||
let level = if cli.debug {
|
||||
kb_app::logging::LogLevel::Debug
|
||||
kebab_app::logging::LogLevel::Debug
|
||||
} else if cli.verbose {
|
||||
kb_app::logging::LogLevel::Verbose
|
||||
kebab_app::logging::LogLevel::Verbose
|
||||
} else {
|
||||
kb_app::logging::LogLevel::Default
|
||||
kebab_app::logging::LogLevel::Default
|
||||
};
|
||||
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
|
||||
// proceed without a guard rather than crashing — `kb` is still usable.
|
||||
let _log_guard = kb_app::logging::init(level).ok();
|
||||
let _log_guard = kebab_app::logging::init(level).ok();
|
||||
match run(&cli) {
|
||||
Ok(()) => ExitCode::from(0),
|
||||
Err(e) => {
|
||||
@@ -227,14 +227,14 @@ fn exit_code(err: &anyhow::Error) -> u8 {
|
||||
fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
match &cli.command {
|
||||
Cmd::Init { force } => {
|
||||
kb_app::init_workspace(*force)?;
|
||||
kebab_app::init_workspace(*force)?;
|
||||
if !cli.json {
|
||||
println!(
|
||||
"created {}",
|
||||
kb_config::Config::xdg_config_path().display()
|
||||
kebab_config::Config::xdg_config_path().display()
|
||||
);
|
||||
println!("created {}", kb_config::Config::xdg_data_dir().display());
|
||||
println!("created {}", kb_config::Config::xdg_state_dir().display());
|
||||
println!("created {}", kebab_config::Config::xdg_data_dir().display());
|
||||
println!("created {}", kebab_config::Config::xdg_state_dir().display());
|
||||
println!("hint edit the config above, then `kb ingest`");
|
||||
}
|
||||
Ok(())
|
||||
@@ -244,13 +244,13 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
root,
|
||||
summary_only,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let scope = kb_core::SourceScope {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let scope = kebab_core::SourceScope {
|
||||
root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)),
|
||||
include: cfg.workspace.include.clone(),
|
||||
exclude: cfg.workspace.exclude.clone(),
|
||||
};
|
||||
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?;
|
||||
let report = kebab_app::ingest_with_config(cfg, scope, *summary_only)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
|
||||
} else {
|
||||
@@ -269,8 +269,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::List { what } => match what {
|
||||
ListWhat::Docs => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let docs = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
|
||||
} else {
|
||||
@@ -284,9 +284,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::Inspect { what } => match what {
|
||||
InspectWhat::Doc { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let doc_id: kb_core::DocumentId = id.parse()?;
|
||||
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let doc_id: kebab_core::DocumentId = id.parse()?;
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, &doc_id)?;
|
||||
// Inspect doc emits a `CanonicalDocument` — there's no §2
|
||||
// wire schema for it (P1-5 will decide whether this also
|
||||
// becomes a tagged wrapper or stays as the raw domain
|
||||
@@ -296,9 +296,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
InspectWhat::Chunk { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let chunk_id: kb_core::ChunkId = id.parse()?;
|
||||
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let chunk_id: kebab_core::ChunkId = id.parse()?;
|
||||
let chunk = kebab_app::inspect_chunk_with_config(cfg, &chunk_id)?;
|
||||
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
|
||||
Ok(())
|
||||
}
|
||||
@@ -310,14 +310,14 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
mode,
|
||||
explain: _,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kb_core::SearchQuery {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: query.clone(),
|
||||
mode: (*mode).into(),
|
||||
k: *k,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(cfg, q)?;
|
||||
let hits = kebab_app::search_with_config(cfg, q)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
|
||||
} else {
|
||||
@@ -351,8 +351,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature,
|
||||
seed,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let opts = kb_app::AskOpts {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let opts = kebab_app::AskOpts {
|
||||
k: *k,
|
||||
explain: *explain,
|
||||
mode: (*mode).into(),
|
||||
@@ -363,7 +363,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
// wires up a real `mpsc::Sender` here.
|
||||
stream_sink: None,
|
||||
};
|
||||
let ans = kb_app::ask_with_config(cfg, query, opts)?;
|
||||
let ans = kebab_app::ask_with_config(cfg, query, opts)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
|
||||
} else {
|
||||
@@ -377,7 +377,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
Cmd::Doctor => {
|
||||
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?;
|
||||
let report = kebab_app::doctor_with_config_path(cli.config.as_deref())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
|
||||
} else {
|
||||
@@ -409,7 +409,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature,
|
||||
seed,
|
||||
} => {
|
||||
let opts = kb_eval::EvalRunOpts {
|
||||
let opts = kebab_eval::EvalRunOpts {
|
||||
suite: suite.clone(),
|
||||
mode: (*mode).into(),
|
||||
with_rag: *with_rag,
|
||||
@@ -417,7 +417,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature: *temperature,
|
||||
seed: *seed,
|
||||
};
|
||||
let run = kb_eval::run_eval(&opts)?;
|
||||
let run = kebab_eval::run_eval(&opts)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&run)?);
|
||||
} else {
|
||||
@@ -430,8 +430,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
EvalWhat::Aggregate { run_id } => {
|
||||
let agg = kb_eval::compute_aggregate(run_id)?;
|
||||
kb_eval::store_aggregate(run_id, &agg)?;
|
||||
let agg = kebab_eval::compute_aggregate(run_id)?;
|
||||
kebab_eval::store_aggregate(run_id, &agg)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&agg)?);
|
||||
} else {
|
||||
@@ -450,20 +450,20 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
strict_chunker_version,
|
||||
write_report,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(None)?;
|
||||
let opts = kb_eval::CompareOpts {
|
||||
let cfg = kebab_config::Config::load(None)?;
|
||||
let opts = kebab_eval::CompareOpts {
|
||||
strict_chunker_version: *strict_chunker_version,
|
||||
};
|
||||
let report = kb_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
|
||||
let md = kb_eval::render_report_md(&report);
|
||||
let report = kebab_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
|
||||
let md = kebab_eval::render_report_md(&report);
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&report)?);
|
||||
} else {
|
||||
print!("{md}");
|
||||
}
|
||||
if *write_report {
|
||||
let resolved_data_dir = kb_config::expand_path(&cfg.storage.data_dir, "");
|
||||
let runs_dir = kb_config::expand_path(
|
||||
let resolved_data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
|
||||
let runs_dir = kebab_config::expand_path(
|
||||
&cfg.storage.runs_dir,
|
||||
&resolved_data_dir.to_string_lossy(),
|
||||
);
|
||||
@@ -17,8 +17,8 @@
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use kb_app::DoctorReport;
|
||||
use kb_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
|
||||
use kebab_app::DoctorReport;
|
||||
use kebab_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
|
||||
|
||||
/// Insert `schema_version` into an object-shaped `Value`. Helper for the
|
||||
/// "serialize, then tag" pattern used by all the per-type wrappers below.
|
||||
@@ -132,7 +132,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn ingest_wrapper_tags_schema_version() {
|
||||
use kb_core::SourceScope;
|
||||
use kebab_core::SourceScope;
|
||||
let r = IngestReport {
|
||||
scope: SourceScope {
|
||||
root: std::path::PathBuf::from("/tmp"),
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-config"
|
||||
name = "kebab-config"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -9,7 +9,7 @@ description = "Config schema + XDG path resolution"
|
||||
|
||||
[dependencies]
|
||||
# kb-core::CoreError reserved for P1-* config errors
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-core"
|
||||
name = "kebab-core"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-embed-local"
|
||||
name = "kebab-embed-local"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)"
|
||||
|
||||
[dependencies]
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
# Default features bring `ort-download-binaries` (bundled ONNX runtime)
|
||||
# and `hf-hub-native-tls` (first-run model download). No extra features
|
||||
# needed for the multilingual-e5-small path.
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed
|
||||
//! [`Embedder`](kb_embed::Embedder) implementation.
|
||||
//! [`Embedder`](kebab_embed::Embedder) implementation.
|
||||
//!
|
||||
//! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small`
|
||||
//! (384-dim) model. Honors `config.models.embedding.batch_size` and applies
|
||||
@@ -26,8 +26,8 @@ use std::sync::Mutex;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||
use kb_config::expand_path;
|
||||
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
use kebab_config::expand_path;
|
||||
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
|
||||
/// Subdirectory under `config.storage.model_dir` where the fastembed
|
||||
/// adapter writes / reads ONNX + tokenizer files. Hard-coded per task
|
||||
@@ -58,9 +58,9 @@ impl FastembedEmbedder {
|
||||
/// `config.models.embedding.dimensions` matches the model's actual
|
||||
/// dim BEFORE returning, so a mismatch fails at construction (not on
|
||||
/// first `embed`).
|
||||
pub fn new(config: &kb_config::Config) -> Result<Self> {
|
||||
pub fn new(config: &kebab_config::Config) -> Result<Self> {
|
||||
// 1. Resolve `{data_dir}/models/fastembed/` from the config
|
||||
// templates. Goes through the shared `kb_config::expand_path`
|
||||
// templates. Goes through the shared `kebab_config::expand_path`
|
||||
// so every crate resolves storage paths identically.
|
||||
let data_dir = expand_path(&config.storage.data_dir, "");
|
||||
let model_dir = expand_path(&config.storage.model_dir, &data_dir.to_string_lossy());
|
||||
@@ -224,7 +224,7 @@ pub(crate) fn check_dim(model_dim: usize, cfg_dim: usize) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_embed::EmbeddingInput;
|
||||
use kebab_embed::EmbeddingInput;
|
||||
|
||||
// ── check_dim ────────────────────────────────────────────────────
|
||||
//
|
||||
@@ -22,16 +22,16 @@ use std::hash::{Hash, Hasher};
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Instant;
|
||||
|
||||
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind};
|
||||
use kb_embed_local::FastembedEmbedder;
|
||||
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind};
|
||||
use kebab_embed_local::FastembedEmbedder;
|
||||
|
||||
/// Build a `Config` whose `data_dir` lives in a per-process temp dir so
|
||||
/// the test never writes into the developer's real `~/.local/share/kb`.
|
||||
/// Returns the `Config` and the `TempDir` guard (caller keeps the guard
|
||||
/// alive for the test duration).
|
||||
fn test_config() -> (kb_config::Config, tempfile::TempDir) {
|
||||
fn test_config() -> (kebab_config::Config, tempfile::TempDir) {
|
||||
let tmp = tempfile::tempdir().expect("create tempdir");
|
||||
let mut cfg = kb_config::Config::defaults();
|
||||
let mut cfg = kebab_config::Config::defaults();
|
||||
cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned();
|
||||
// model_dir keeps its default `{data_dir}/models` template; the
|
||||
// adapter resolves it itself.
|
||||
@@ -141,12 +141,12 @@ fn output_vectors_are_l2_normalized() {
|
||||
},
|
||||
];
|
||||
let out = emb.embed(&inputs).expect("embed");
|
||||
// Per `kb_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
|
||||
// Per `kebab_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
|
||||
// 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add
|
||||
// their own per-component noise; 1e-3 is very generous and matches
|
||||
// the spec's `± 1e-3`).
|
||||
kb_embed::assert_unit_norm(&out, 1e-3);
|
||||
kb_embed::assert_vector_shape(&out, 384);
|
||||
kebab_embed::assert_unit_norm(&out, 1e-3);
|
||||
kebab_embed::assert_vector_shape(&out, 384);
|
||||
}
|
||||
|
||||
// ─── determinism ──────────────────────────────────────────────────────
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-embed"
|
||||
name = "kebab-embed"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Embedder trait re-exports + opt-in deterministic MockEmbedder for downstream tests"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
serde = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
@@ -1,9 +1,9 @@
|
||||
//! `kb-embed` — thin re-export crate for the [`Embedder`] trait surface.
|
||||
//!
|
||||
//! This crate exists so downstream code (`kb-store-vector`, `kb-search`,
|
||||
//! adapters in p3-2) can `use kb_embed::Embedder` and stay stable across
|
||||
//! adapters in p3-2) can `use kebab_embed::Embedder` and stay stable across
|
||||
//! kb-core reorganizations. It defines **no new types**; everything is a
|
||||
//! re-export of [`kb_core`].
|
||||
//! re-export of [`kebab_core`].
|
||||
//!
|
||||
//! ## Mock implementation
|
||||
//!
|
||||
@@ -19,7 +19,7 @@
|
||||
// Per spec §7.2 — these are the only public-surface types this crate offers.
|
||||
// Adding new types is forbidden by the task contract.
|
||||
|
||||
pub use kb_core::{
|
||||
pub use kebab_core::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
|
||||
};
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
//! * Different `text` → different output with overwhelming probability.
|
||||
//! * All output components are finite (`is_finite()`).
|
||||
|
||||
use kb_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
use kebab_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
|
||||
/// Deterministic test double. See module docs for the hashing recipe.
|
||||
pub struct MockEmbedder {
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#![cfg(feature = "mock")]
|
||||
|
||||
use kb_embed::{
|
||||
use kebab_embed::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder,
|
||||
assert_unit_norm, assert_vector_shape,
|
||||
};
|
||||
@@ -5,7 +5,7 @@
|
||||
//! Runs under both `cargo test -p kb-embed` and
|
||||
//! `cargo test -p kb-embed --features mock`.
|
||||
|
||||
use kb_embed::{
|
||||
use kebab_embed::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
|
||||
assert_vector_shape,
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-eval"
|
||||
name = "kebab-eval"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -9,10 +9,10 @@ description = "Golden-fixture eval runner: load YAML, drive kb-app search/ask,
|
||||
|
||||
[dependencies]
|
||||
# Allowed deps per p5-1 spec — domain types + facade only.
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-app = { path = "../kb-app" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-app = { path = "../kebab-app" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
@@ -14,9 +14,9 @@ use std::fmt::Write as _;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{ChunkId, DocumentId};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{ChunkId, DocumentId};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::loader::load_golden_set;
|
||||
use crate::metrics::{
|
||||
@@ -300,7 +300,7 @@ fn extract_chunker_version(snapshot_json: &str) -> Option<String> {
|
||||
}
|
||||
|
||||
fn parse_results(
|
||||
rows: &[kb_store_sqlite::EvalQueryResultRecord],
|
||||
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
|
||||
) -> Result<HashMap<String, QueryResult>> {
|
||||
let mut out = HashMap::with_capacity(rows.len());
|
||||
for row in rows {
|
||||
@@ -456,9 +456,9 @@ mod tests {
|
||||
let g = GoldenQuery {
|
||||
id: "q1".into(),
|
||||
query: "q".into(),
|
||||
lang: kb_core::Lang(String::new()),
|
||||
lang: kebab_core::Lang(String::new()),
|
||||
expected_doc_ids: vec![],
|
||||
expected_chunk_ids: vec![kb_core::ChunkId("c1".into())],
|
||||
expected_chunk_ids: vec![kebab_core::ChunkId("c1".into())],
|
||||
must_contain: vec![],
|
||||
forbidden: vec![],
|
||||
difficulty: None,
|
||||
@@ -1,7 +1,7 @@
|
||||
//! `kb-eval` — golden-fixture eval runner (P5-1).
|
||||
//!
|
||||
//! Loads `fixtures/golden_queries.yaml`, runs each entry through the
|
||||
//! [`kb_app`] facade (lexical / vector / hybrid + optional RAG), and
|
||||
//! [`kebab_app`] facade (lexical / vector / hybrid + optional RAG), and
|
||||
//! persists results into `eval_runs` / `eval_query_results` plus
|
||||
//! `runs_dir/<run_id>/per_query.jsonl` (design §5.7, §6.3).
|
||||
//!
|
||||
@@ -6,7 +6,7 @@
|
||||
//! tests that don't have a SQLite store handy.
|
||||
//! - [`load_golden_set_validated`] — additionally verifies every
|
||||
//! `expected_doc_id` / `expected_chunk_id` exists in the SQLite DB
|
||||
//! the supplied [`kb_config::Config`] points at. Used by
|
||||
//! the supplied [`kebab_config::Config`] points at. Used by
|
||||
//! [`crate::run_eval`] in production so a stale golden set fails
|
||||
//! fast at run start.
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::collections::{BTreeSet, HashSet};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::types::GoldenQuery;
|
||||
|
||||
@@ -43,11 +43,11 @@ pub fn load_golden_set(path: &Path) -> Result<Vec<GoldenQuery>> {
|
||||
/// Currently used only by the in-module tests below; production code
|
||||
/// inlines `load_golden_set` + `validate_against_db` in
|
||||
/// [`crate::run_eval_with_config`] so the validation can run against
|
||||
/// an already-opened [`kb_config::Config`] without re-parsing YAML.
|
||||
/// an already-opened [`kebab_config::Config`] without re-parsing YAML.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn load_golden_set_validated(
|
||||
yaml_path: &Path,
|
||||
cfg: &kb_config::Config,
|
||||
cfg: &kebab_config::Config,
|
||||
) -> Result<Vec<GoldenQuery>> {
|
||||
let queries = load_golden_set(yaml_path)?;
|
||||
validate_against_db(&queries, cfg)?;
|
||||
@@ -73,7 +73,7 @@ fn check_unique_ids(queries: &[GoldenQuery]) -> Result<()> {
|
||||
/// Read every doc_id / chunk_id referenced by `queries` and confirm
|
||||
/// SQLite has rows for them. Builds a sorted, deduplicated error
|
||||
/// message listing every missing ID.
|
||||
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kb_config::Config) -> Result<()> {
|
||||
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kebab_config::Config) -> Result<()> {
|
||||
// Short-circuit when there is nothing to validate — saves opening
|
||||
// SQLite for golden sets that omit expected_*_ids entirely.
|
||||
let needs_check = queries
|
||||
@@ -140,8 +140,8 @@ mod tests {
|
||||
//! `tests/loader.rs`; only the validated-variant cases need to sit
|
||||
//! next to the function so they can see the `pub(crate)` symbol.
|
||||
use super::*;
|
||||
use kb_config::Config;
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
@@ -13,9 +13,9 @@ use std::path::PathBuf;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{ChunkId, Citation, DocumentId};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{ChunkId, Citation, DocumentId};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::loader::load_golden_set;
|
||||
use crate::types::{GoldenQuery, QueryResult};
|
||||
@@ -175,7 +175,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
|
||||
/// `tasks/p5/p5-2-metrics-compare.md`), this will need to take one.
|
||||
pub(crate) fn aggregate_from_rows(
|
||||
queries: &[GoldenQuery],
|
||||
rows: &[kb_store_sqlite::EvalQueryResultRecord],
|
||||
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
|
||||
) -> Result<AggregateMetrics> {
|
||||
let golden_by_id: HashMap<&str, &GoldenQuery> =
|
||||
queries.iter().map(|q| (q.id.as_str(), q)).collect();
|
||||
@@ -395,14 +395,14 @@ fn ratio_or_zero(num: u32, denom: u32) -> f32 {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, RetrievalDetail, SearchHit,
|
||||
SearchMode,
|
||||
};
|
||||
use kb_core::asset::WorkspacePath;
|
||||
use kb_core::media::Lang;
|
||||
use kb_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
|
||||
use kb_core::versions::PromptTemplateVersion;
|
||||
use kebab_core::asset::WorkspacePath;
|
||||
use kebab_core::media::Lang;
|
||||
use kebab_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
|
||||
use kebab_core::versions::PromptTemplateVersion;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
fn gq(id: &str, expected_chunks: &[&str], expected_docs: &[&str]) -> GoldenQuery {
|
||||
@@ -460,9 +460,9 @@ mod tests {
|
||||
}
|
||||
|
||||
fn record(id: &str, hits: Vec<SearchHit>, error: Option<String>, answer: Option<Answer>)
|
||||
-> kb_store_sqlite::EvalQueryResultRecord
|
||||
-> kebab_store_sqlite::EvalQueryResultRecord
|
||||
{
|
||||
kb_store_sqlite::EvalQueryResultRecord {
|
||||
kebab_store_sqlite::EvalQueryResultRecord {
|
||||
query_id: id.into(),
|
||||
result_json: serde_json::to_string(&qr(id, hits, error, answer)).unwrap(),
|
||||
}
|
||||
@@ -6,10 +6,10 @@ use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_app::App;
|
||||
use kb_config::expand_path;
|
||||
use kb_core::{SearchFilters, SearchQuery};
|
||||
use kb_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use kebab_app::App;
|
||||
use kebab_config::expand_path;
|
||||
use kebab_core::{SearchFilters, SearchQuery};
|
||||
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::loader::{load_golden_set, validate_against_db};
|
||||
@@ -25,18 +25,18 @@ fn elapsed_ms_u32(start: Instant) -> u32 {
|
||||
}
|
||||
|
||||
/// Run the golden suite end-to-end against the active XDG-loaded
|
||||
/// [`kb_config::Config`]. Wraps [`run_eval_with_config`] with
|
||||
/// [`kebab_config::Config`]. Wraps [`run_eval_with_config`] with
|
||||
/// `Config::load(None)`.
|
||||
pub fn run_eval(opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
let cfg = kb_config::Config::load(None).context("load Config for run_eval")?;
|
||||
let cfg = kebab_config::Config::load(None).context("load Config for run_eval")?;
|
||||
run_eval_with_config(&cfg, opts)
|
||||
}
|
||||
|
||||
/// Run the golden suite end-to-end against an explicit
|
||||
/// [`kb_config::Config`]. Used by integration tests (TempDir-backed
|
||||
/// [`kebab_config::Config`]. Used by integration tests (TempDir-backed
|
||||
/// data_dir) and any future caller that wants to drive the runner
|
||||
/// against a non-default config.
|
||||
pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
pub fn run_eval_with_config(cfg: &kebab_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
let started = Instant::now();
|
||||
|
||||
// ── 1. Load golden set ────────────────────────────────────────────────
|
||||
@@ -167,7 +167,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
|
||||
// call did not already error out (we want one error per query, not
|
||||
// a duplicated one).
|
||||
let answer = if opts.with_rag && error.is_none() {
|
||||
let ask_opts = kb_app::AskOpts {
|
||||
let ask_opts = kebab_app::AskOpts {
|
||||
k: opts.k,
|
||||
explain: true,
|
||||
mode: opts.mode,
|
||||
@@ -206,7 +206,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
|
||||
/// stable run-time property of the config alone. P5-2 may compose it
|
||||
/// from `embedding.{model,version,dimensions}` if it needs the field
|
||||
/// for compare reports.
|
||||
fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
|
||||
fn build_config_snapshot(cfg: &kebab_config::Config) -> Result<serde_json::Value> {
|
||||
let cfg_value = serde_json::to_value(cfg).context("serialize Config")?;
|
||||
Ok(serde_json::json!({
|
||||
"config": cfg_value,
|
||||
@@ -234,7 +234,7 @@ fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
|
||||
/// `run_id` collision would already have failed the `eval_runs`
|
||||
/// PRIMARY KEY upstream).
|
||||
fn write_per_query_jsonl(
|
||||
cfg: &kb_config::Config,
|
||||
cfg: &kebab_config::Config,
|
||||
run_id: &str,
|
||||
per_query: &[QueryResult],
|
||||
) -> Result<()> {
|
||||
@@ -4,7 +4,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use kb_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
|
||||
use kebab_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
|
||||
|
||||
/// One golden query loaded from `fixtures/golden_queries.yaml`.
|
||||
///
|
||||
@@ -41,10 +41,10 @@ pub struct EvalRunOpts {
|
||||
/// Suite label persisted into `eval_runs.suite`. The shipped
|
||||
/// fixture is `"golden"`; other suites can reuse the same runner.
|
||||
pub suite: String,
|
||||
/// Retrieval mode forwarded to every `kb_app::search` /
|
||||
/// `kb_app::ask` call inside the run.
|
||||
/// Retrieval mode forwarded to every `kebab_app::search` /
|
||||
/// `kebab_app::ask` call inside the run.
|
||||
pub mode: SearchMode,
|
||||
/// When `true`, also call `kb_app::ask` per query and record the
|
||||
/// When `true`, also call `kebab_app::ask` per query and record the
|
||||
/// resulting `Answer` on the `QueryResult`.
|
||||
pub with_rag: bool,
|
||||
/// Top-k forwarded to retrieval (and `AskOpts.k` when `with_rag`).
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
use std::fs;
|
||||
|
||||
use kb_eval::load_golden_set;
|
||||
use kebab_eval::load_golden_set;
|
||||
use tempfile::tempdir;
|
||||
|
||||
// ── 1. parser accepts well-formed YAML with optional fields ──────────────────
|
||||
@@ -9,17 +9,17 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, Lang,
|
||||
RetrievalDetail, SearchHit, SearchMode,
|
||||
asset::WorkspacePath,
|
||||
};
|
||||
use kb_eval::{
|
||||
use kebab_eval::{
|
||||
AggregateMetrics, CompareOpts, CompareReport, ComparisonKind, GoldenQuery, QueryResult,
|
||||
compare_runs_with_config, compute_aggregate_with_config, store_aggregate_with_config,
|
||||
};
|
||||
use kb_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -259,7 +259,7 @@ fn compare_runs_classifies_win_loss_draw_regression() {
|
||||
drop(store);
|
||||
|
||||
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
|
||||
let by_id: std::collections::HashMap<&str, &kb_eval::QueryComparison> =
|
||||
let by_id: std::collections::HashMap<&str, &kebab_eval::QueryComparison> =
|
||||
report.per_query.iter().map(|c| (c.query_id.as_str(), c)).collect();
|
||||
assert_eq!(by_id["q-001"].kind, ComparisonKind::Loss);
|
||||
assert_eq!(by_id["q-002"].kind, ComparisonKind::Win);
|
||||
@@ -414,7 +414,7 @@ fn render_report_md_is_human_readable() {
|
||||
drop(store);
|
||||
|
||||
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
|
||||
let md = kb_eval::render_report_md(&report);
|
||||
let md = kebab_eval::render_report_md(&report);
|
||||
assert!(md.starts_with("# Eval compare:"), "md = {md}");
|
||||
assert!(md.contains("hit@1"));
|
||||
assert!(md.contains("MRR"));
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Runner integration tests for `kb-eval` (P5-1).
|
||||
//!
|
||||
//! Drives [`kb_eval::run_eval_with_config`] end-to-end against a
|
||||
//! Drives [`kebab_eval::run_eval_with_config`] end-to-end against a
|
||||
//! TempDir-backed config:
|
||||
//!
|
||||
//! - tiny seeded SQLite corpus (3 docs / 3 chunks) used as the
|
||||
@@ -17,10 +17,10 @@ use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::SearchMode;
|
||||
use kb_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::SearchMode;
|
||||
use kebab_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -110,7 +110,7 @@ fn seed_corpus(store: &SqliteStore) {
|
||||
// Build the FTS index so lexical search returns hits. Reuses the
|
||||
// same connection guard rather than reopening — the SAVEPOINT
|
||||
// protocol nests correctly under the existing read_conn lock.
|
||||
kb_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
|
||||
kebab_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
|
||||
drop(conn);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-llm-local"
|
||||
name = "kebab-llm-local"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,9 +8,9 @@ repository = { workspace = true }
|
||||
description = "Ollama HTTP adapter implementing kb_core::LanguageModel via reqwest::blocking"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
# `default-features = false` drops the `default-tls` (native-tls / openssl)
|
||||
# feature so we don't pull in a system OpenSSL; we explicitly pin rustls.
|
||||
# Note: `default-features = false` does NOT drop tokio — reqwest 0.12's
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-llm-local` — Ollama HTTP adapter implementing
|
||||
//! [`kb_core::LanguageModel`] over the local `POST /api/generate` endpoint.
|
||||
//! [`kebab_core::LanguageModel`] over the local `POST /api/generate` endpoint.
|
||||
//!
|
||||
//! ## Why a separate crate
|
||||
//!
|
||||
@@ -39,11 +39,11 @@ mod ollama;
|
||||
pub use error::LlmError;
|
||||
pub use ollama::OllamaLanguageModel;
|
||||
|
||||
// Re-export the trait surface so adapter consumers can `use kb_llm_local::*`
|
||||
// Re-export the trait surface so adapter consumers can `use kebab_llm_local::*`
|
||||
// without also depending on `kb-llm` directly. These are the same symbols
|
||||
// `kb-llm` re-exports from `kb-core`; this crate adds **no new types** to
|
||||
// the trait surface (`LlmError` and `OllamaLanguageModel` are
|
||||
// implementation-side only).
|
||||
pub use kb_llm::{
|
||||
pub use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
@@ -41,7 +41,7 @@
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::time::Duration;
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -68,7 +68,7 @@ pub struct OllamaLanguageModel {
|
||||
}
|
||||
|
||||
impl OllamaLanguageModel {
|
||||
/// Build an adapter from a workspace [`kb_config::Config`]. Reads
|
||||
/// Build an adapter from a workspace [`kebab_config::Config`]. Reads
|
||||
/// `config.models.llm.{provider, model, endpoint, context_tokens,
|
||||
/// temperature, seed}`.
|
||||
///
|
||||
@@ -76,7 +76,7 @@ impl OllamaLanguageModel {
|
||||
/// expected to have validated `provider == "ollama"`; this constructor
|
||||
/// trusts the config and would happily build for an unknown provider.
|
||||
/// (Provider routing is the App layer's job, not the adapter's.)
|
||||
pub fn new(config: &kb_config::Config) -> anyhow::Result<Self> {
|
||||
pub fn new(config: &kebab_config::Config) -> anyhow::Result<Self> {
|
||||
let llm = &config.models.llm;
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.timeout(REQUEST_TIMEOUT)
|
||||
@@ -292,7 +292,7 @@ impl Iterator for OllamaStream {
|
||||
// pipelines that expect a terminal frame still terminate.
|
||||
self.done = true;
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama stream ended without a `done: true` frame; synthesizing Aborted",
|
||||
);
|
||||
return Some(Ok(TokenChunk::Done {
|
||||
@@ -361,14 +361,14 @@ impl Iterator for OllamaStream {
|
||||
};
|
||||
let prompt_tokens = line.prompt_eval_count.unwrap_or_else(|| {
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama done frame missing prompt_eval_count; defaulting to 0",
|
||||
);
|
||||
0
|
||||
});
|
||||
let completion_tokens = line.eval_count.unwrap_or_else(|| {
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama done frame missing eval_count; defaulting to 0",
|
||||
);
|
||||
0
|
||||
@@ -2,8 +2,8 @@
|
||||
//! relevant config fields and exposes them via the trait surface, all
|
||||
//! without touching the network (per design §7.2 lazy-connect contract).
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
|
||||
#[test]
|
||||
fn construction_with_default_config_returns_expected_model_ref() {
|
||||
@@ -11,9 +11,9 @@
|
||||
//! These hit `http://127.0.0.1:11434` directly and require an actual model
|
||||
//! pulled locally. CI runs default (non-ignored) tests only.
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{GenerateRequest, TokenChunk};
|
||||
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{GenerateRequest, TokenChunk};
|
||||
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
|
||||
#[test]
|
||||
#[ignore = "requires a local Ollama daemon + pulled model"]
|
||||
@@ -10,9 +10,9 @@
|
||||
//! error mapping, finish-reason mapping, missing-counter degradation, and
|
||||
//! determinism semantics.
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{FinishReason, GenerateRequest, TokenChunk};
|
||||
use kb_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{FinishReason, GenerateRequest, TokenChunk};
|
||||
use kebab_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
|
||||
use wiremock::matchers::{method, path};
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-llm"
|
||||
name = "kebab-llm"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,7 +8,7 @@ repository = { workspace = true }
|
||||
description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
anyhow = { workspace = true }
|
||||
|
||||
[features]
|
||||
@@ -1,8 +1,8 @@
|
||||
//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
|
||||
//!
|
||||
//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
|
||||
//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations.
|
||||
//! It defines **no new types**; everything is a re-export of [`kb_core`].
|
||||
//! `use kebab_llm::LanguageModel` and stay stable across kb-core reorganizations.
|
||||
//! It defines **no new types**; everything is a re-export of [`kebab_core`].
|
||||
//!
|
||||
//! ## Mock implementation
|
||||
//!
|
||||
@@ -20,7 +20,7 @@
|
||||
// Per spec §7.2 — these are the only public-surface types this crate offers.
|
||||
// Adding new types is forbidden by the task contract.
|
||||
|
||||
pub use kb_core::{
|
||||
pub use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
|
||||
//! the constructor was given — the mock does not count.
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#![cfg(feature = "mock")]
|
||||
|
||||
use kb_llm::{
|
||||
use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
@@ -5,7 +5,7 @@
|
||||
//! Runs under both `cargo test -p kb-llm` and
|
||||
//! `cargo test -p kb-llm --features mock`.
|
||||
|
||||
use kb_llm::{
|
||||
use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-normalize"
|
||||
name = "kebab-normalize"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Lift parser output (kb-parse-types) into kb-core::CanonicalDocument with deterministic IDs (§3.4, §4.2, §4.3)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
unicode-normalization = "0.1"
|
||||
@@ -23,5 +23,5 @@ tracing = { workspace = true }
|
||||
# Forbidden as a regular dep per design §8 (kb-normalize must not depend
|
||||
# on any specific parser); `cargo tree -p kb-normalize --depth 1` (the
|
||||
# default scope, excluding dev-deps) confirms this.
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-normalize` — lift parser output (`kb-parse-types`) into a
|
||||
//! [`kb_core::CanonicalDocument`] with deterministic IDs.
|
||||
//! [`kebab_core::CanonicalDocument`] with deterministic IDs.
|
||||
//!
|
||||
//! Per design §3.4 (CanonicalDocument / Block), §4.2 (ID recipe), §4.3
|
||||
//! (ordinal rule), §3.6 (Provenance), §8 (module boundaries).
|
||||
@@ -20,16 +20,16 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Block, BlockId, CanonicalDocument, CodeBlock, CommonBlock, DocumentId, HeadingBlock,
|
||||
ImageRefBlock, Inline, Lang, ListBlock, Metadata, ParserVersion, Provenance, ProvenanceEvent,
|
||||
ProvenanceKind, RawAsset, TableBlock, TextBlock,
|
||||
};
|
||||
use kb_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
|
||||
use kebab_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
|
||||
use time::OffsetDateTime;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
pub use kb_core::{id_for_block, id_for_doc};
|
||||
pub use kebab_core::{id_for_block, id_for_doc};
|
||||
|
||||
/// Build a [`CanonicalDocument`] from the raw asset, frontmatter
|
||||
/// metadata, parser blocks, parser version, and any warnings.
|
||||
@@ -38,7 +38,7 @@ pub use kb_core::{id_for_block, id_for_doc};
|
||||
///
|
||||
/// * `doc_id = id_for_doc(workspace_path, asset_id, parser_version)` —
|
||||
/// `workspace_path` is consumed verbatim from `asset` (already NFC +
|
||||
/// POSIX per `kb_core::normalize::to_posix`).
|
||||
/// POSIX per `kebab_core::normalize::to_posix`).
|
||||
/// * `block_id = id_for_block(doc_id, kind, heading_path, ordinal,
|
||||
/// source_span)` — `ordinal` is **0-based, scoped to (heading_path,
|
||||
/// block_kind), in document order** per §4.3.
|
||||
@@ -329,7 +329,7 @@ fn flatten_inline(i: &Inline, out: &mut String) {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, MediaType, SourceSpan, SourceType, SourceUri,
|
||||
TrustLevel, WorkspacePath, normalize::to_posix,
|
||||
};
|
||||
@@ -386,7 +386,7 @@ mod tests {
|
||||
let h1_b = vec!["B".to_string()];
|
||||
vec![
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 1, end: 1 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -395,7 +395,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 2, end: 2 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -404,7 +404,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 3, end: 3 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -413,7 +413,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Code,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Code,
|
||||
heading_path: h1_a,
|
||||
source_span: SourceSpan::Line { start: 4, end: 5 },
|
||||
payload: ParsedPayload::Code {
|
||||
@@ -422,7 +422,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_b,
|
||||
source_span: SourceSpan::Line { start: 6, end: 6 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -715,7 +715,7 @@ mod tests {
|
||||
fn audio_ref_block_skipped_with_warning() {
|
||||
let span = SourceSpan::Line { start: 1, end: 1 };
|
||||
let blocks = vec![ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::AudioRef,
|
||||
kind: kebab_parse_types::ParsedBlockKind::AudioRef,
|
||||
heading_path: vec![],
|
||||
source_span: span,
|
||||
payload: ParsedPayload::AudioRef {
|
||||
@@ -759,7 +759,7 @@ mod tests {
|
||||
let nfd_heading = "\u{1100}\u{1161}".to_string(); // 가 (NFD)
|
||||
let nfc_heading = "\u{AC00}".to_string(); // 가 (NFC)
|
||||
let mk_block = |heading: String| ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: vec![heading],
|
||||
source_span: span.clone(),
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -15,12 +15,12 @@
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, MediaType, ParserVersion, RawAsset, SourceUri,
|
||||
WorkspacePath,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-parse-md"
|
||||
name = "kebab-parse-md"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Markdown frontmatter and block parsing into kb-core::Metadata / kb-parse-types intermediates"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,10 +1,10 @@
|
||||
//! Markdown body → flat `Vec<kb_parse_types::ParsedBlock>` (§3.4 / §3.7b).
|
||||
//! Markdown body → flat `Vec<kebab_parse_types::ParsedBlock>` (§3.4 / §3.7b).
|
||||
//!
|
||||
//! Uses `pulldown-cmark` (with GFM tables enabled at runtime via
|
||||
//! `Options::ENABLE_TABLES`) to walk the body once and emit a flat list of
|
||||
//! parsed blocks. Heading paths are computed by tracking the most-recent
|
||||
//! heading text at each level. Source spans are reported as
|
||||
//! [`kb_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
|
||||
//! [`kebab_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
|
||||
//! converting `pulldown-cmark`'s byte offsets to line numbers and adding the
|
||||
//! caller-supplied `body_offset_lines`.
|
||||
//!
|
||||
@@ -19,10 +19,10 @@
|
||||
//!
|
||||
//! ## Inline filter
|
||||
//!
|
||||
//! [`kb_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
|
||||
//! [`kebab_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
|
||||
//! Inline images, footnotes, hard breaks, etc. are dropped silently per
|
||||
//! design §3.4. Block-level `` (an image as the sole content of a
|
||||
//! paragraph) is lifted to [`kb_parse_types::ParsedPayload::ImageRef`].
|
||||
//! paragraph) is lifted to [`kebab_parse_types::ParsedPayload::ImageRef`].
|
||||
//!
|
||||
//! ## CRLF
|
||||
//!
|
||||
@@ -33,8 +33,8 @@
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use kb_core::{Inline, SourceSpan};
|
||||
use kb_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
|
||||
use kebab_core::{Inline, SourceSpan};
|
||||
use kebab_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
|
||||
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
|
||||
|
||||
/// Parse a Markdown body into a flat `Vec<ParsedBlock>` plus any warnings.
|
||||
@@ -1595,7 +1595,7 @@ mod tests {
|
||||
let (blocks, _) = parse(body, 1);
|
||||
assert_eq!(blocks.len(), 1, "expected single list block");
|
||||
match &blocks[0].kind {
|
||||
kb_parse_types::ParsedBlockKind::List => {}
|
||||
kebab_parse_types::ParsedBlockKind::List => {}
|
||||
other => panic!("expected list, got {other:?}"),
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Markdown frontmatter parsing → `kb_core::Metadata`.
|
||||
//! Markdown frontmatter parsing → `kebab_core::Metadata`.
|
||||
//!
|
||||
//! Implements the contract pinned in design §0 Q9 (frontmatter derive table)
|
||||
//! and §3.6 (Metadata shape). Produces structured warnings via
|
||||
@@ -18,8 +18,8 @@
|
||||
use std::ops::Range;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use kb_core::{Metadata, SourceType, TrustLevel};
|
||||
use kb_parse_types::{Warning, WarningKind};
|
||||
use kebab_core::{Metadata, SourceType, TrustLevel};
|
||||
use kebab_parse_types::{Warning, WarningKind};
|
||||
use lingua::{IsoCode639_1, Language, LanguageDetector, LanguageDetectorBuilder};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{Map, Value};
|
||||
@@ -59,7 +59,7 @@ pub struct FrontmatterSpan {
|
||||
}
|
||||
|
||||
/// Parse the frontmatter (if any) from a Markdown byte slice into a
|
||||
/// `kb_core::Metadata`, applying the §0 Q9 derive table for missing fields.
|
||||
/// `kebab_core::Metadata`, applying the §0 Q9 derive table for missing fields.
|
||||
///
|
||||
/// On a malformed frontmatter the function still returns `Ok` — the
|
||||
/// frontmatter contents are discarded and the caller is told via a
|
||||
@@ -589,7 +589,7 @@ fn iso_code(lang: Language) -> &'static str {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, WorkspacePath,
|
||||
ids::id_for_doc,
|
||||
versions::ParserVersion,
|
||||
@@ -10,13 +10,13 @@
|
||||
//! env-var pattern. Migrating kb-parse-md to the env-var style is out of
|
||||
//! scope; both styles are intentional for now.
|
||||
//!
|
||||
//! Following the kb_core::Inline schema migration (struct-variant shape),
|
||||
//! Following the kebab_core::Inline schema migration (struct-variant shape),
|
||||
//! `ParsedBlock` now serializes directly through serde — no projection
|
||||
//! shim is required. Inlines surface as structured objects, e.g.
|
||||
//! `[{"kind":"text","text":"…"},{"kind":"code","code":"…"}]`.
|
||||
|
||||
use kb_parse_md::parse_blocks;
|
||||
use kb_parse_types::{ParsedBlock, Warning};
|
||||
use kebab_parse_md::parse_blocks;
|
||||
use kebab_parse_types::{ParsedBlock, Warning};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
@@ -5,7 +5,7 @@
|
||||
//! and therefore stable; lingua autodetect over our fixtures is also
|
||||
//! stable for the language set we configured.
|
||||
|
||||
use kb_parse_md::{BodyHints, parse_frontmatter};
|
||||
use kebab_parse_md::{BodyHints, parse_frontmatter};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
@@ -18,9 +18,9 @@ use time::macros::datetime;
|
||||
/// snapshot focuses on the §0 Q9 derive contract.
|
||||
#[derive(Serialize)]
|
||||
struct Snapshot {
|
||||
metadata: kb_core::Metadata,
|
||||
metadata: kebab_core::Metadata,
|
||||
span_present: bool,
|
||||
warnings: Vec<kb_parse_types::Warning>,
|
||||
warnings: Vec<kebab_parse_types::Warning>,
|
||||
}
|
||||
|
||||
fn fixtures_dir() -> PathBuf {
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-parse-types"
|
||||
name = "kebab-parse-types"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,5 +8,5 @@ repository = { workspace = true }
|
||||
description = "Parser intermediate representations (no parser libs allowed)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
serde = { workspace = true }
|
||||
@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct ParsedBlock {
|
||||
pub kind: ParsedBlockKind,
|
||||
pub heading_path: Vec<String>,
|
||||
pub source_span: kb_core::SourceSpan,
|
||||
pub source_span: kebab_core::SourceSpan,
|
||||
pub payload: ParsedPayload,
|
||||
}
|
||||
|
||||
@@ -36,11 +36,11 @@ pub enum ParsedPayload {
|
||||
},
|
||||
Paragraph {
|
||||
text: String,
|
||||
inlines: Vec<kb_core::Inline>,
|
||||
inlines: Vec<kebab_core::Inline>,
|
||||
},
|
||||
List {
|
||||
ordered: bool,
|
||||
items: Vec<Vec<kb_core::Inline>>,
|
||||
items: Vec<Vec<kebab_core::Inline>>,
|
||||
},
|
||||
Code {
|
||||
lang: Option<String>,
|
||||
@@ -52,7 +52,7 @@ pub enum ParsedPayload {
|
||||
},
|
||||
Quote {
|
||||
text: String,
|
||||
inlines: Vec<kb_core::Inline>,
|
||||
inlines: Vec<kebab_core::Inline>,
|
||||
},
|
||||
ImageRef {
|
||||
src: String,
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-rag"
|
||||
name = "kebab-rag"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,11 +8,11 @@ repository = { workspace = true }
|
||||
description = "RAG pipeline: retrieve → gate → pack → generate → cite-validate"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-search = { path = "../kb-search" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-search = { path = "../kebab-search" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
@@ -23,7 +23,7 @@ anyhow = { workspace = true }
|
||||
blake3 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
kb-llm = { path = "../kb-llm", features = ["mock"] }
|
||||
kebab-llm = { path = "../kebab-llm", features = ["mock"] }
|
||||
tempfile = { workspace = true }
|
||||
rusqlite = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -18,7 +18,7 @@
|
||||
//! reachable via `Retriever`), `kb-embed*` (only via `Retriever`),
|
||||
//! `kb-llm-local` (only via `LanguageModel`), `kb-tui`, `kb-desktop`.
|
||||
|
||||
pub use kb_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
|
||||
pub use kebab_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
|
||||
|
||||
mod pipeline;
|
||||
|
||||
@@ -33,13 +33,13 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
|
||||
GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
|
||||
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
|
||||
};
|
||||
use kb_core::versions::PromptTemplateVersion;
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_core::versions::PromptTemplateVersion;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use regex::Regex;
|
||||
use std::sync::OnceLock;
|
||||
use time::OffsetDateTime;
|
||||
@@ -86,7 +86,7 @@ pub struct AskOpts {
|
||||
|
||||
/// Single-threaded RAG orchestrator. See module docs for the stage list.
|
||||
pub struct RagPipeline {
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
retriever: Arc<dyn Retriever>,
|
||||
llm: Arc<dyn LanguageModel>,
|
||||
docs: Arc<SqliteStore>,
|
||||
@@ -98,7 +98,7 @@ impl RagPipeline {
|
||||
/// `Arc`'d trait objects (kb-app builds them from config; tests
|
||||
/// inject mocks).
|
||||
pub fn new(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
retriever: Arc<dyn Retriever>,
|
||||
llm: Arc<dyn LanguageModel>,
|
||||
docs: Arc<SqliteStore>,
|
||||
@@ -380,7 +380,7 @@ impl RagPipeline {
|
||||
|
||||
for hit in hits {
|
||||
let chunk_full =
|
||||
<SqliteStore as kb_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
|
||||
<SqliteStore as kebab_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
|
||||
.context("kb-rag: docs.get_chunk")?;
|
||||
let chunk_text = match chunk_full {
|
||||
Some(c) => c.text,
|
||||
@@ -542,7 +542,7 @@ impl RagPipeline {
|
||||
/// paths attach the configured embedding model so `kb explain` can
|
||||
/// later identify which embedder shaped the retrieval (even on
|
||||
/// refusals — see `refuse_score_gate`).
|
||||
fn embedding_ref_for(mode: SearchMode, cfg: &kb_config::Config) -> Option<ModelRef> {
|
||||
fn embedding_ref_for(mode: SearchMode, cfg: &kebab_config::Config) -> Option<ModelRef> {
|
||||
match mode {
|
||||
SearchMode::Lexical => None,
|
||||
SearchMode::Vector | SearchMode::Hybrid => Some(ModelRef {
|
||||
@@ -14,12 +14,12 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkerVersion, ChunkId, Citation, DocumentId, IndexVersion, RetrievalDetail,
|
||||
Retriever, SearchHit, SearchMode, SearchQuery, WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -176,7 +176,7 @@ impl Retriever for MockRetriever {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
|
||||
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
|
||||
pub fn id32(prefix: &str) -> String {
|
||||
let mut s = prefix.to_string();
|
||||
while s.len() < 32 {
|
||||
@@ -10,11 +10,11 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use common::{MockRetriever, RagEnv, id32, mk_hit};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage,
|
||||
};
|
||||
use kb_llm::MockLanguageModel;
|
||||
use kb_rag::{AskOpts, RagPipeline, RefusalReason};
|
||||
use kebab_llm::MockLanguageModel;
|
||||
use kebab_rag::{AskOpts, RagPipeline, RefusalReason};
|
||||
|
||||
/// LM ID used everywhere — kept short so snapshots stay stable.
|
||||
const TEST_LM_ID: &str = "mock-lm";
|
||||
@@ -49,7 +49,7 @@ impl CountingLm {
|
||||
}
|
||||
|
||||
impl LanguageModel for CountingLm {
|
||||
fn model_ref(&self) -> kb_core::ModelRef {
|
||||
fn model_ref(&self) -> kebab_core::ModelRef {
|
||||
self.inner.model_ref()
|
||||
}
|
||||
fn context_tokens(&self) -> usize {
|
||||
@@ -57,7 +57,7 @@ impl LanguageModel for CountingLm {
|
||||
}
|
||||
fn generate_stream(
|
||||
&self,
|
||||
req: kb_core::GenerateRequest,
|
||||
req: kebab_core::GenerateRequest,
|
||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
|
||||
self.calls.fetch_add(1, Ordering::SeqCst);
|
||||
self.inner.generate_stream(req)
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-search"
|
||||
name = "kebab-search"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,17 +8,17 @@ repository = { workspace = true }
|
||||
description = "Retriever implementations for kb (P2-2 lexical FTS5; P3 vector / hybrid will follow)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
# P3-4 hybrid retriever wraps a `dyn VectorStore` (typically backed by
|
||||
# `kb-store-vector::LanceVectorStore`) and a `dyn Embedder` (any P3-2
|
||||
# adapter). Listed as a runtime dep so callers can construct
|
||||
# `VectorRetriever::new` against the trait objects without a concrete
|
||||
# adapter — the concrete adapter (`kb-embed-local`) stays out of this
|
||||
# crate per the spec's Forbidden deps list.
|
||||
kb-store-vector = { path = "../kb-store-vector" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kebab-store-vector = { path = "../kebab-store-vector" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
rusqlite = { workspace = true }
|
||||
globset = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -32,4 +32,4 @@ tempfile = { workspace = true }
|
||||
# feature) and stand up a real `LanceVectorStore` on a tmp directory.
|
||||
# The mock-retriever unit tests (the bulk of the hybrid suite) do not
|
||||
# need either, but the integration / snapshot lane does.
|
||||
kb-embed = { path = "../kb-embed", features = ["mock"] }
|
||||
kebab-embed = { path = "../kebab-embed", features = ["mock"] }
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Shared helpers for building `kb_core::Citation` values from a
|
||||
//! Shared helpers for building `kebab_core::Citation` values from a
|
||||
//! chunk's first `SourceSpan`.
|
||||
//!
|
||||
//! Both the lexical and vector retrievers join against the same
|
||||
@@ -9,7 +9,7 @@
|
||||
//! §1.6). Living here means a future PDF / image / audio extractor can
|
||||
//! enrich the mapping in one place rather than two.
|
||||
|
||||
use kb_core::{Citation, SourceSpan, WorkspacePath};
|
||||
use kebab_core::{Citation, SourceSpan, WorkspacePath};
|
||||
|
||||
/// Build a `Citation` from the chunk's first `SourceSpan`. P1 markdown
|
||||
/// only emits `Line`, so the other variants are mostly defensive — we
|
||||
@@ -20,7 +20,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
|
||||
};
|
||||
|
||||
@@ -75,7 +75,7 @@ impl HybridRetriever {
|
||||
/// retrievers. Reads `config.search.hybrid_fusion` (only `"rrf"`
|
||||
/// is recognised today) and `config.search.rrf_k`.
|
||||
pub fn new(
|
||||
config: &kb_config::Config,
|
||||
config: &kebab_config::Config,
|
||||
lexical: Arc<dyn Retriever>,
|
||||
vector: Arc<dyn Retriever>,
|
||||
) -> Self {
|
||||
@@ -335,7 +335,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, SearchFilters,
|
||||
SearchHit, SearchMode, WorkspacePath,
|
||||
};
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Lexical (FTS5 + bm25) retriever — design §3.7 / §1.5 / §2.2 / §6.4.
|
||||
//!
|
||||
//! Owns the SQL pattern documented in `tasks/p2/p2-2-lexical-retriever.md`
|
||||
//! and constructs `kb_core::SearchHit` values directly from the joined
|
||||
//! and constructs `kebab_core::SearchHit` values directly from the joined
|
||||
//! `chunks_fts` / `chunks` / `documents` rows. Reads only — never mutates
|
||||
//! the underlying SQLite file.
|
||||
|
||||
@@ -9,12 +9,12 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use globset::GlobMatcher;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
|
||||
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
|
||||
WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::{params_from_iter, Connection, Row, ToSql};
|
||||
|
||||
use crate::citation_helper::citation_from_first_span;
|
||||
@@ -57,7 +57,7 @@ impl LexicalRetriever {
|
||||
/// Construct with default settings derived from `kb-config`'s defaults.
|
||||
/// Snippet width is computed from `Config::defaults().search.snippet_chars`.
|
||||
pub fn new(store: Arc<SqliteStore>, index_version: IndexVersion) -> Self {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
Self::with_settings(store, index_version, cfg.search.snippet_chars)
|
||||
}
|
||||
|
||||
@@ -297,7 +297,7 @@ fn run_query(
|
||||
params.push(Box::new(lang.0.clone()));
|
||||
}
|
||||
if let Some(trust_min) = &filters.trust_min {
|
||||
// Mirror `kb_store_sqlite::documents::list_documents` ranking:
|
||||
// Mirror `kebab_store_sqlite::documents::list_documents` ranking:
|
||||
// Generated < Secondary < Primary. Doing the rank in SQL
|
||||
// (rather than post-filtering) keeps the row stream short
|
||||
// when the workspace contains many low-trust docs.
|
||||
@@ -523,7 +523,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_line_round_trip() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("a/b.md".to_string()).unwrap();
|
||||
let span = SourceSpan::Line { start: 7, end: 12 };
|
||||
let c = citation_from_first_span("c1", p.clone(), Some("S1".to_string()), Some(&span));
|
||||
@@ -545,7 +545,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_page_forwards_section() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("doc.pdf".to_string()).unwrap();
|
||||
let span = SourceSpan::Page {
|
||||
page: 4,
|
||||
@@ -568,7 +568,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_none_falls_back_to_line_one() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("x.md".to_string()).unwrap();
|
||||
let c = citation_from_first_span("c1", p, None, None);
|
||||
match c {
|
||||
@@ -1,4 +1,4 @@
|
||||
//! `kb-search` — `kb_core::Retriever` implementations.
|
||||
//! `kb-search` — `kebab_core::Retriever` implementations.
|
||||
//!
|
||||
//! - [`LexicalRetriever`] (P2-2): SQLite-FTS5 + bm25 backed retriever
|
||||
//! for `SearchMode::Lexical`.
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Vector retriever — design §3.7 / §7.2 / §1.6.
|
||||
//!
|
||||
//! Wraps a `dyn VectorStore` + `dyn Embedder` + the SQLite metadata
|
||||
//! store into a `kb_core::Retriever`. The vector store knows how to
|
||||
//! store into a `kebab_core::Retriever`. The vector store knows how to
|
||||
//! find the nearest chunks by cosine on the embedding column; SQLite
|
||||
//! owns the human-readable metadata (heading_path / section_label /
|
||||
//! source_spans / chunker_version / workspace_path) needed for
|
||||
@@ -19,12 +19,12 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, DocumentId, Embedder, EmbeddingInput, EmbeddingKind,
|
||||
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
|
||||
SourceSpan, VectorHit, VectorStore, WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params_from_iter;
|
||||
|
||||
use crate::citation_helper::citation_from_first_span;
|
||||
@@ -67,7 +67,7 @@ impl VectorRetriever {
|
||||
sqlite: Arc<SqliteStore>,
|
||||
index_version: IndexVersion,
|
||||
) -> Self {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
Self::with_settings(store, embed, sqlite, index_version, cfg.search.snippet_chars)
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ fn build_hit(
|
||||
meta: &ChunkMeta,
|
||||
rank: u32,
|
||||
index_version: &IndexVersion,
|
||||
model_id: &kb_core::EmbeddingModelId,
|
||||
model_id: &kebab_core::EmbeddingModelId,
|
||||
snippet_chars: usize,
|
||||
) -> Result<SearchHit> {
|
||||
let heading_path: Vec<String> = serde_json::from_str(&meta.heading_path_json)
|
||||
@@ -16,15 +16,15 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
|
||||
EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
|
||||
};
|
||||
use kb_embed::{Embedder, MockEmbedder};
|
||||
use kb_search::{LexicalRetriever, VectorRetriever};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kb_store_vector::LanceVectorStore;
|
||||
use kebab_embed::{Embedder, MockEmbedder};
|
||||
use kebab_search::{LexicalRetriever, VectorRetriever};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use kebab_store_vector::LanceVectorStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -205,7 +205,7 @@ impl HybridEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
|
||||
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
|
||||
pub fn id32(prefix: &str) -> String {
|
||||
let mut s = prefix.to_string();
|
||||
while s.len() < 32 {
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user