Merge pull request 'refactor(rename): kb → kebab — 프로젝트 전체 rename' (#29) from refactor/rename-kb-to-kebab into main

Reviewed-on: altair823-org/kb#29
This commit was merged in pull request #29.
This commit is contained in:
2026-05-02 04:03:59 +00:00
200 changed files with 2236 additions and 2236 deletions

158
Cargo.lock generated
View File

@@ -3366,27 +3366,27 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-app" name = "kebab-app"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"dirs 5.0.1", "dirs 5.0.1",
"kb-chunk", "kebab-chunk",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-embed", "kebab-embed",
"kb-embed-local", "kebab-embed-local",
"kb-llm", "kebab-llm",
"kb-llm-local", "kebab-llm-local",
"kb-normalize", "kebab-normalize",
"kb-parse-md", "kebab-parse-md",
"kb-parse-types", "kebab-parse-types",
"kb-rag", "kebab-rag",
"kb-search", "kebab-search",
"kb-source-fs", "kebab-source-fs",
"kb-store-sqlite", "kebab-store-sqlite",
"kb-store-vector", "kebab-store-vector",
"rusqlite", "rusqlite",
"serde", "serde",
"serde_json", "serde_json",
@@ -3399,14 +3399,14 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-chunk" name = "kebab-chunk"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"kb-core", "kebab-core",
"kb-normalize", "kebab-normalize",
"kb-parse-md", "kebab-parse-md",
"serde_json", "serde_json",
"serde_json_canonicalizer", "serde_json_canonicalizer",
"time", "time",
@@ -3414,32 +3414,32 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-cli" name = "kebab-cli"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"kb-app", "kebab-app",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-eval", "kebab-eval",
"serde_json", "serde_json",
] ]
[[package]] [[package]]
name = "kb-config" name = "kebab-config"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"dirs 5.0.1", "dirs 5.0.1",
"kb-core", "kebab-core",
"serde", "serde",
"serde_json", "serde_json",
"toml", "toml",
] ]
[[package]] [[package]]
name = "kb-core" name = "kebab-core"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
@@ -3453,13 +3453,13 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-embed" name = "kebab-embed"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"proptest", "proptest",
"serde", "serde",
"thiserror 2.0.18", "thiserror 2.0.18",
@@ -3467,27 +3467,27 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-embed-local" name = "kebab-embed-local"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"fastembed", "fastembed",
"kb-config", "kebab-config",
"kb-embed", "kebab-embed",
"serde_json", "serde_json",
"tempfile", "tempfile",
"tracing", "tracing",
] ]
[[package]] [[package]]
name = "kb-eval" name = "kebab-eval"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"kb-app", "kebab-app",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-store-sqlite", "kebab-store-sqlite",
"rusqlite", "rusqlite",
"serde", "serde",
"serde_json", "serde_json",
@@ -3499,22 +3499,22 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-llm" name = "kebab-llm"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"kb-core", "kebab-core",
"proptest", "proptest",
] ]
[[package]] [[package]]
name = "kb-llm-local" name = "kebab-llm-local"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-llm", "kebab-llm",
"reqwest", "reqwest",
"serde", "serde",
"serde_json", "serde_json",
@@ -3525,13 +3525,13 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-normalize" name = "kebab-normalize"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"kb-core", "kebab-core",
"kb-parse-md", "kebab-parse-md",
"kb-parse-types", "kebab-parse-types",
"serde", "serde",
"serde_json", "serde_json",
"time", "time",
@@ -3540,12 +3540,12 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-parse-md" name = "kebab-parse-md"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"kb-core", "kebab-core",
"kb-parse-types", "kebab-parse-types",
"lingua", "lingua",
"pulldown-cmark", "pulldown-cmark",
"serde", "serde",
@@ -3557,24 +3557,24 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-parse-types" name = "kebab-parse-types"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"kb-core", "kebab-core",
"serde", "serde",
] ]
[[package]] [[package]]
name = "kb-rag" name = "kebab-rag"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-llm", "kebab-llm",
"kb-search", "kebab-search",
"kb-store-sqlite", "kebab-store-sqlite",
"regex", "regex",
"rusqlite", "rusqlite",
"serde", "serde",
@@ -3586,16 +3586,16 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-search" name = "kebab-search"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"globset", "globset",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-embed", "kebab-embed",
"kb-store-sqlite", "kebab-store-sqlite",
"kb-store-vector", "kebab-store-vector",
"rusqlite", "rusqlite",
"serde_json", "serde_json",
"tempfile", "tempfile",
@@ -3604,14 +3604,14 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-source-fs" name = "kebab-source-fs"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"ignore", "ignore",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"serde", "serde",
"serde_json", "serde_json",
"tempfile", "tempfile",
@@ -3621,17 +3621,17 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-store-sqlite" name = "kebab-store-sqlite"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"blake3", "blake3",
"globset", "globset",
"kb-chunk", "kebab-chunk",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-normalize", "kebab-normalize",
"kb-parse-md", "kebab-parse-md",
"refinery", "refinery",
"rusqlite", "rusqlite",
"serde_json", "serde_json",
@@ -3642,7 +3642,7 @@ dependencies = [
] ]
[[package]] [[package]]
name = "kb-store-vector" name = "kebab-store-vector"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
@@ -3651,9 +3651,9 @@ dependencies = [
"arrow-schema", "arrow-schema",
"blake3", "blake3",
"futures", "futures",
"kb-config", "kebab-config",
"kb-core", "kebab-core",
"kb-store-sqlite", "kebab-store-sqlite",
"lancedb", "lancedb",
"rusqlite", "rusqlite",
"serde", "serde",

View File

@@ -1,31 +1,31 @@
[workspace] [workspace]
resolver = "3" resolver = "3"
members = [ members = [
"crates/kb-core", "crates/kebab-core",
"crates/kb-parse-types", "crates/kebab-parse-types",
"crates/kb-config", "crates/kebab-config",
"crates/kb-source-fs", "crates/kebab-source-fs",
"crates/kb-parse-md", "crates/kebab-parse-md",
"crates/kb-normalize", "crates/kebab-normalize",
"crates/kb-chunk", "crates/kebab-chunk",
"crates/kb-store-sqlite", "crates/kebab-store-sqlite",
"crates/kb-store-vector", "crates/kebab-store-vector",
"crates/kb-search", "crates/kebab-search",
"crates/kb-embed", "crates/kebab-embed",
"crates/kb-embed-local", "crates/kebab-embed-local",
"crates/kb-llm", "crates/kebab-llm",
"crates/kb-llm-local", "crates/kebab-llm-local",
"crates/kb-rag", "crates/kebab-rag",
"crates/kb-app", "crates/kebab-app",
"crates/kb-cli", "crates/kebab-cli",
"crates/kb-eval", "crates/kebab-eval",
] ]
[workspace.package] [workspace.package]
edition = "2024" edition = "2024"
rust-version = "1.85" rust-version = "1.85"
license = "MIT OR Apache-2.0" license = "MIT OR Apache-2.0"
repository = "https://github.com/altair823/kb" repository = "https://github.com/altair823/kebab"
version = "0.1.0" version = "0.1.0"
[workspace.dependencies] [workspace.dependencies]

140
README.md
View File

@@ -1,8 +1,8 @@
# kb — Local-first Knowledge Base # kebab — Local-first Knowledge Base
> **상태:** P0P4 구현 완료 (31 component task 중 17 완료) + 3건 post-merge hotfix 적용. `kb index` / `kb search --mode {lexical,vector,hybrid}` / `kb ask` 모두 실 동작. 다음 단계 = P5 (eval suite). 자세한 진행 상황은 [tasks/INDEX.md](tasks/INDEX.md), 머지 후 발견된 버그와 fix는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md). > **상태:** P0P4 구현 완료 (31 component task 중 17 완료) + 3건 post-merge hotfix 적용. `kebab index` / `kebab search --mode {lexical,vector,hybrid}` / `kebab ask` 모두 실 동작. 다음 단계 = P5 (eval suite). 자세한 진행 상황은 [tasks/INDEX.md](tasks/INDEX.md), 머지 후 발견된 버그와 fix는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md).
`kb` 는 개인용 로컬 knowledge base + RAG 도구다. Markdown / PDF / 이미지 / 음성을 한 곳에 색인하고, 의미 검색 + citation 포함 LLM 답변을 단일 binary 로 제공한다. 모든 추론은 로컬 (Ollama / fastembed / whisper.cpp) 에서 돌아간다. `kebab` 는 개인용 로컬 knowledge base + RAG 도구다. Markdown / PDF / 이미지 / 음성을 한 곳에 색인하고, 의미 검색 + citation 포함 LLM 답변을 단일 binary 로 제공한다. 모든 추론은 로컬 (Ollama / fastembed / whisper.cpp) 에서 돌아간다.
대상 하드웨어: M4 48GB MacBook 1대, 사용자 1명. 대상 하드웨어: M4 48GB MacBook 1대, 사용자 1명.
@@ -12,14 +12,14 @@
| 명령 | 동작 | 상태 | | 명령 | 동작 | 상태 |
|------|------|------| |------|------|------|
| `kb init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | ✅ P0 | | `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | ✅ P0 |
| `kb ingest [<path>]` | Markdown 색인 (idempotent). PDF/이미지/음성은 P6+. | ✅ P3-5 | | `kebab ingest [<path>]` | Markdown 색인 (idempotent). PDF/이미지/음성은 P6+. | ✅ P3-5 |
| `kb search --mode {lexical,vector,hybrid} "<query>"` | 검색 — citation 포함, hybrid는 RRF fusion | ✅ P3-5 | | `kebab search --mode {lexical,vector,hybrid} "<query>"` | 검색 — citation 포함, hybrid는 RRF fusion | ✅ P3-5 |
| `kb list docs` | 색인된 문서 목록 | ✅ P3-5 | | `kebab list docs` | 색인된 문서 목록 | ✅ P3-5 |
| `kb inspect doc <id>` / `kb inspect chunk <id>` | raw record 보기 | ✅ P3-5 | | `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 | ✅ P3-5 |
| `kb ask "<query>"` | RAG 답변 + 근거 인용. 근거 부족 시 거절. Ollama 필요. | ✅ P4-3 | | `kebab ask "<query>"` | RAG 답변 + 근거 인용. 근거 부족 시 거절. Ollama 필요. | ✅ P4-3 |
| `kb doctor` | 설정/모델/DB 헬스 체크 | ✅ P0 | | `kebab doctor` | 설정/모델/DB 헬스 체크 | ✅ P0 |
| `kb eval run / compare` | golden query 회귀 측정 | ⏳ P5 | | `kebab eval run / compare` | golden query 회귀 측정 | ⏳ P5 |
기계 친화 모드: 모든 명령에 `--json` 플래그. 출력은 frozen wire schema v1 (`schema_version` 필드 항상 포함, 예: `ingest_report.v1`, `search_hit.v1`, `answer.v1`, `doctor.v1`). 기계 친화 모드: 모든 명령에 `--json` 플래그. 출력은 frozen wire schema v1 (`schema_version` 필드 항상 포함, 예: `ingest_report.v1`, `search_hit.v1`, `answer.v1`, `doctor.v1`).
@@ -44,35 +44,35 @@
| citation 형식 | URI fragment (`path#L12-L34`, W3C Media Fragments) | | citation 형식 | URI fragment (`path#L12-L34`, W3C Media Fragments) |
| ID 생성 | `blake3(canonical_json(tuple))[..32]` hex | | ID 생성 | `blake3(canonical_json(tuple))[..32]` hex |
| RRF fusion_score | `[0, 1]` 정규화 — `2 / (k_rrf + 1)` 로 나눠 mode 간 비교 가능 (post-merge hotfix) | | RRF fusion_score | `[0, 1]` 정규화 — `2 / (k_rrf + 1)` 로 나눠 mode 간 비교 가능 (post-merge hotfix) |
| layout | XDG (`~/.local/share/kb/`, `~/.config/kb/`, …) | | layout | XDG (`~/.local/share/kebab/`, `~/.config/kebab/`, …) |
전체는 [docs/superpowers/specs/2026-04-27-kb-final-form-design.md](docs/superpowers/specs/2026-04-27-kb-final-form-design.md) 참조. 전체는 [docs/superpowers/specs/2026-04-27-kebab-final-form-design.md](docs/superpowers/specs/2026-04-27-kebab-final-form-design.md) 참조.
--- ---
## 의존성 그래프 ## 의존성 그래프
```text ```text
kb-cli, kb-tui, kb-desktop kebab-cli, kebab-tui, kebab-desktop
└─> kb-app └─> kebab-app
├─> kb-source-fs ├─> kebab-source-fs
├─> kb-parse-md / kb-parse-pdf / kb-parse-image / kb-parse-audio ├─> kebab-parse-md / kebab-parse-pdf / kebab-parse-image / kebab-parse-audio
│ └─> kb-parse-types │ └─> kebab-parse-types
├─> kb-normalize ├─> kebab-normalize
│ └─> kb-parse-types │ └─> kebab-parse-types
├─> kb-chunk ├─> kebab-chunk
├─> kb-store-sqlite ├─> kebab-store-sqlite
├─> kb-store-vector ├─> kebab-store-vector
├─> kb-embed-local (kb-embed trait crate) ├─> kebab-embed-local (kebab-embed trait crate)
├─> kb-search ├─> kebab-search
├─> kb-llm-local (kb-llm trait crate) ├─> kebab-llm-local (kebab-llm trait crate)
├─> kb-rag ├─> kebab-rag
├─> kb-eval ├─> kebab-eval
└─> kb-config └─> kebab-config
└─> kb-core (모두 의존) └─> kebab-core (모두 의존)
``` ```
UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kb-app` facade 만 통한다 (design §8). `kb-cli``--config <path>` flag 를 honor 하려면 `kb_app::*_with_config(cfg, …)` companion 을 통해 Config 을 명시적으로 thread 하는 패턴 — 자세한 이유는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md) 의 `--config` 항목. UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kebab-app` facade 만 통한다 (design §8). `kebab-cli``--config <path>` flag 를 honor 하려면 `kebab_app::*_with_config(cfg, …)` companion 을 통해 Config 을 명시적으로 thread 하는 패턴 — 자세한 이유는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md) 의 `--config` 항목.
--- ---
@@ -80,16 +80,16 @@ UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kb-ap
| Phase | 내용 | 핵심 산출 crate | 선행 | 상태 | | Phase | 내용 | 핵심 산출 crate | 선행 | 상태 |
|-------|------|----------------|------|------| |-------|------|----------------|------|------|
| **P0** | Workspace 뼈대 + 도메인 계약 + ID recipe | `kb-core`, `kb-parse-types`, `kb-config`, `kb-app`, `kb-cli` | | ✅ 완료 | | **P0** | Workspace 뼈대 + 도메인 계약 + ID recipe | `kebab-core`, `kebab-parse-types`, `kebab-config`, `kebab-app`, `kebab-cli` | | ✅ 완료 |
| **P1** | Markdown ingestion (walk → parse → chunk → SQLite) | `kb-source-fs`, `kb-parse-md`, `kb-normalize`, `kb-chunk`, `kb-store-sqlite` | P0 | ✅ 완료 | | **P1** | Markdown ingestion (walk → parse → chunk → SQLite) | `kebab-source-fs`, `kebab-parse-md`, `kebab-normalize`, `kebab-chunk`, `kebab-store-sqlite` | P0 | ✅ 완료 |
| **P2** | SQLite FTS5 lexical 검색 + citation | `kb-search` (lexical) | P1 | ✅ 완료 | | **P2** | SQLite FTS5 lexical 검색 + citation | `kebab-search` (lexical) | P1 | ✅ 완료 |
| **P3** | Local embedding + LanceDB + hybrid (RRF) + kb-app wiring | `kb-embed`, `kb-embed-local`, `kb-store-vector`, `kb-search` | P2 | ✅ 완료 | | **P3** | Local embedding + LanceDB + hybrid (RRF) + kebab-app wiring | `kebab-embed`, `kebab-embed-local`, `kebab-store-vector`, `kebab-search` | P2 | ✅ 완료 |
| **P4** | Local LLM + RAG + grounded answer | `kb-llm`, `kb-llm-local`, `kb-rag` | P3 | ✅ 완료 | | **P4** | Local LLM + RAG + grounded answer | `kebab-llm`, `kebab-llm-local`, `kebab-rag` | P3 | ✅ 완료 |
| **P5** | Golden query / regression eval | `kb-eval` | P4 | ⏳ 다음 | | **P5** | Golden query / regression eval | `kebab-eval` | P4 | ⏳ 다음 |
| **P6** | 이미지 ingestion (OCR + caption) | `kb-parse-image` | P5 | ⏳ | | **P6** | 이미지 ingestion (OCR + caption) | `kebab-parse-image` | P5 | ⏳ |
| **P7** | PDF text + page citation | `kb-parse-pdf` | P5 | ⏳ | | **P7** | PDF text + page citation | `kebab-parse-pdf` | P5 | ⏳ |
| **P8** | 음성 transcription + timestamp citation | `kb-parse-audio` | P5 | ⏳ | | **P8** | 음성 transcription + timestamp citation | `kebab-parse-audio` | P5 | ⏳ |
| **P9** | TUI + desktop app | `kb-tui`, `kb-desktop` | P5 | ⏳ | | **P9** | TUI + desktop app | `kebab-tui`, `kebab-desktop` | P5 | ⏳ |
P0~P5 직렬. P6~P9 P5 이후 병렬 가능. P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
@@ -100,13 +100,13 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
## 디렉토리 구조 ## 디렉토리 구조
```text ```text
kb/ kebab/
├── README.md # 이 파일 ├── README.md # 이 파일
├── kb_local_rust_report.md # 최초 설계 보고서 (방향성 + 근거) ├── kebab_local_rust_report.md # 최초 설계 보고서 (방향성 + 근거)
├── docs/ ├── docs/
│ ├── superpowers/ │ ├── superpowers/
│ │ ├── specs/ │ │ ├── specs/
│ │ │ └── 2026-04-27-kb-final-form-design.md # frozen design (12 sections) │ │ │ └── 2026-04-27-kebab-final-form-design.md # frozen design (12 sections)
│ │ └── plans/ │ │ └── plans/
│ │ └── 2026-04-27-task-decomposition.md # task 분해 implementation plan │ │ └── 2026-04-27-task-decomposition.md # task 분해 implementation plan
│ ├── SMOKE.md # 로컬 워크스페이스에 직접 돌려보는 절차 │ ├── SMOKE.md # 로컬 워크스페이스에 직접 돌려보는 절차
@@ -127,19 +127,19 @@ kb/
│ ├── p8/p8-1, p8-2 # (2) │ ├── p8/p8-1, p8-2 # (2)
│ └── p9/p9-1 … p9-5 # (5) │ └── p9/p9-1 … p9-5 # (5)
├── crates/ ├── crates/
│ ├── kb-core/ kb-parse-types/ kb-config/ # 도메인 + 설정 (P0) │ ├── kebab-core/ kebab-parse-types/ kebab-config/ # 도메인 + 설정 (P0)
│ ├── kb-source-fs/ # 워크스페이스 walk + checksum (P1-1) │ ├── kebab-source-fs/ # 워크스페이스 walk + checksum (P1-1)
│ ├── kb-parse-md/ # Markdown frontmatter + blocks (P1-2/3) │ ├── kebab-parse-md/ # Markdown frontmatter + blocks (P1-2/3)
│ ├── kb-normalize/ # ParsedBlock → CanonicalDocument (P1-4) │ ├── kebab-normalize/ # ParsedBlock → CanonicalDocument (P1-4)
│ ├── kb-chunk/ # heading-aware chunker (P1-5) │ ├── kebab-chunk/ # heading-aware chunker (P1-5)
│ ├── kb-store-sqlite/ # SQLite + FTS5 (V001/V002/V003) (P1-6, P2-1, P3-3) │ ├── kebab-store-sqlite/ # SQLite + FTS5 (V001/V002/V003) (P1-6, P2-1, P3-3)
│ ├── kb-search/ # Lexical + Vector + Hybrid retriever (P2-2, P3-4) │ ├── kebab-search/ # Lexical + Vector + Hybrid retriever (P2-2, P3-4)
│ ├── kb-embed/ kb-embed-local/ # Embedder trait + fastembed adapter (P3-1, P3-2) │ ├── kebab-embed/ kebab-embed-local/ # Embedder trait + fastembed adapter (P3-1, P3-2)
│ ├── kb-store-vector/ # LanceDB VectorStore (P3-3) │ ├── kebab-store-vector/ # LanceDB VectorStore (P3-3)
│ ├── kb-llm/ kb-llm-local/ # LanguageModel trait + Ollama adapter (P4-1, P4-2) │ ├── kebab-llm/ kebab-llm-local/ # LanguageModel trait + Ollama adapter (P4-1, P4-2)
│ ├── kb-rag/ # RAG pipeline (P4-3) │ ├── kebab-rag/ # RAG pipeline (P4-3)
│ ├── kb-app/ # facade (P0 시그니처 + P3-5 본체) │ ├── kebab-app/ # facade (P0 시그니처 + P3-5 본체)
│ └── kb-cli/ # binary (P0 → 핫픽스로 --config flag wiring 강화) │ └── kebab-cli/ # binary (P0 → 핫픽스로 --config flag wiring 강화)
├── migrations/ # SQLite refinery V001/V002/V003 ├── migrations/ # SQLite refinery V001/V002/V003
└── fixtures/ # 테스트 fixture 트리 └── fixtures/ # 테스트 fixture 트리
``` ```
@@ -153,19 +153,19 @@ kb/
cargo build --release cargo build --release
# 첫 실행 — XDG 경로에 config.toml 생성 # 첫 실행 — XDG 경로에 config.toml 생성
./target/release/kb init ./target/release/kebab init
# config 손보고 # config 손보고
${EDITOR:-vi} ~/.config/kb/config.toml ${EDITOR:-vi} ~/.config/kebab/config.toml
# 색인 # 색인
./target/release/kb ingest ./target/release/kebab ingest
# 검색 # 검색
./target/release/kb search "Markdown chunking 규칙" --mode hybrid ./target/release/kebab search "Markdown chunking 규칙" --mode hybrid
# 질문 (Ollama 필요) # 질문 (Ollama 필요)
./target/release/kb ask "내 KB 설계에서 저장소 전략은?" ./target/release/kebab ask "내 KB 설계에서 저장소 전략은?"
``` ```
워크스페이스를 격리해서 직접 돌려보는 패턴은 [docs/SMOKE.md](docs/SMOKE.md) 참조 — `--config <path>` 로 임시 디렉토리에 격리된 KB 를 만들 수 있다. 워크스페이스를 격리해서 직접 돌려보는 패턴은 [docs/SMOKE.md](docs/SMOKE.md) 참조 — `--config <path>` 로 임시 디렉토리에 격리된 KB 를 만들 수 있다.
@@ -181,17 +181,17 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
- multi-workspace (P+ 후순위) - multi-workspace (P+ 후순위)
- LLM-as-judge eval (rule-based `must_contain` 만) - LLM-as-judge eval (rule-based `must_contain` 만)
- visual embedding (CLIP) — P+ - visual embedding (CLIP) — P+
- desktop app `kb://` protocol handler — P+ - desktop app `kebab://` protocol handler — P+
--- ---
## 외부 AI 통합 ## 외부 AI 통합
`kb``--json` 모드 + frozen wire schema v1 은 외부 자동화의 stable contract. 가능한 통합: `kebab``--json` 모드 + frozen wire schema v1 은 외부 자동화의 stable contract. 가능한 통합:
1. **Claude Code / Codex skill** — 얇은 wrapper (`kb search --json` / `kb ask --json` 호출). ~50 lines. 1. **Claude Code / Codex skill** — 얇은 wrapper (`kebab search --json` / `kebab ask --json` 호출). ~50 lines.
2. **MCP server**`kb-mcp` binary (stdio JSON-RPC) 가 `kb-app` facade 를 1:1 노출. Claude Desktop / Cursor / Zed 등 공유. 2. **MCP server**`kebab-mcp` binary (stdio JSON-RPC) 가 `kebab-app` facade 를 1:1 노출. Claude Desktop / Cursor / Zed 등 공유.
3. **HTTP wrapper**`kb serve --bind 127.0.0.1:7711` (P+, local-only 가치 깨므로 신중). 3. **HTTP wrapper**`kebab serve --bind 127.0.0.1:7711` (P+, local-only 가치 깨므로 신중).
--- ---
@@ -199,7 +199,7 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
이 repo 는 단일 사용자 프로젝트지만 spec 변경 절차는 명문화되어 있다. 이 repo 는 단일 사용자 프로젝트지만 spec 변경 절차는 명문화되어 있다.
1. **frozen design 변경**`docs/superpowers/specs/2026-04-27-kb-final-form-design.md` 가 단일 contract. 변경 시 영향 받는 component task 모두 동시 갱신 필요. PR 1개로 묶기. 1. **frozen design 변경**`docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` 가 단일 contract. 변경 시 영향 받는 component task 모두 동시 갱신 필요. PR 1개로 묶기.
2. **새 component task 추가**`tasks/_template.md` 복사 후 `tasks/p<phase>/p<phase>-<n>-<name>.md` 생성. `contract_sections` 에 design doc 섹션 명시. `Allowed/Forbidden dependencies` 는 design §8 module-boundary 표 따름. 2. **새 component task 추가**`tasks/_template.md` 복사 후 `tasks/p<phase>/p<phase>-<n>-<name>.md` 생성. `contract_sections` 에 design doc 섹션 명시. `Allowed/Forbidden dependencies` 는 design §8 module-boundary 표 따름.
3. **구현** — component task 1개당 sub-agent 1세션 권장. `cargo test -p <crate>` + DoD 체크리스트 통과. PR 으로 머지. 3. **구현** — component task 1개당 sub-agent 1세션 권장. `cargo test -p <crate>` + DoD 체크리스트 통과. PR 으로 머지.
4. **버전 변경**`parser_version` / `chunker_version` / `embedding_version` 등 변경은 design §9 의 cascade rule 따름. 영향 받는 record 는 재처리 필요. 4. **버전 변경**`parser_version` / `chunker_version` / `embedding_version` 등 변경은 design §9 의 cascade rule 따름. 영향 받는 record 는 재처리 필요.
@@ -215,8 +215,8 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
## 참고 ## 참고
- 최초 설계 보고서: [kb_local_rust_report.md](kb_local_rust_report.md) - 최초 설계 보고서: [kebab_local_rust_report.md](kebab_local_rust_report.md)
- Frozen design: [docs/superpowers/specs/2026-04-27-kb-final-form-design.md](docs/superpowers/specs/2026-04-27-kb-final-form-design.md) - Frozen design: [docs/superpowers/specs/2026-04-27-kebab-final-form-design.md](docs/superpowers/specs/2026-04-27-kebab-final-form-design.md)
- Task 분해 plan: [docs/superpowers/plans/2026-04-27-task-decomposition.md](docs/superpowers/plans/2026-04-27-task-decomposition.md) - Task 분해 plan: [docs/superpowers/plans/2026-04-27-task-decomposition.md](docs/superpowers/plans/2026-04-27-task-decomposition.md)
- Task 인덱스: [tasks/INDEX.md](tasks/INDEX.md) - Task 인덱스: [tasks/INDEX.md](tasks/INDEX.md)
- Post-merge 핫픽스 로그: [tasks/HOTFIXES.md](tasks/HOTFIXES.md) - Post-merge 핫픽스 로그: [tasks/HOTFIXES.md](tasks/HOTFIXES.md)

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-app" name = "kebab-app"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,21 +8,21 @@ repository = { workspace = true }
description = "Facade — orchestrates components for kb-cli/tui/desktop" description = "Facade — orchestrates components for kb-cli/tui/desktop"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-source-fs = { path = "../kb-source-fs" } kebab-source-fs = { path = "../kebab-source-fs" }
kb-parse-md = { path = "../kb-parse-md" } kebab-parse-md = { path = "../kebab-parse-md" }
kb-parse-types = { path = "../kb-parse-types" } kebab-parse-types = { path = "../kebab-parse-types" }
kb-normalize = { path = "../kb-normalize" } kebab-normalize = { path = "../kebab-normalize" }
kb-chunk = { path = "../kb-chunk" } kebab-chunk = { path = "../kebab-chunk" }
kb-store-sqlite = { path = "../kb-store-sqlite" } kebab-store-sqlite = { path = "../kebab-store-sqlite" }
kb-store-vector = { path = "../kb-store-vector" } kebab-store-vector = { path = "../kebab-store-vector" }
kb-search = { path = "../kb-search" } kebab-search = { path = "../kebab-search" }
kb-embed = { path = "../kb-embed" } kebab-embed = { path = "../kebab-embed" }
kb-embed-local = { path = "../kb-embed-local" } kebab-embed-local = { path = "../kebab-embed-local" }
kb-llm = { path = "../kb-llm" } kebab-llm = { path = "../kebab-llm" }
kb-llm-local = { path = "../kb-llm-local" } kebab-llm-local = { path = "../kebab-llm-local" }
kb-rag = { path = "../kb-rag" } kebab-rag = { path = "../kebab-rag" }
anyhow = { workspace = true } anyhow = { workspace = true }
blake3 = { workspace = true } blake3 = { workspace = true }
serde = { workspace = true } serde = { workspace = true }

View File

@@ -37,16 +37,16 @@ use std::sync::{Arc, OnceLock};
use anyhow::{Context, Result, anyhow}; use anyhow::{Context, Result, anyhow};
use kb_core::{ use kebab_core::{
Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode, Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
SearchQuery, VectorStore, SearchQuery, VectorStore,
}; };
use kb_embed_local::FastembedEmbedder; use kebab_embed_local::FastembedEmbedder;
use kb_llm_local::OllamaLanguageModel; use kebab_llm_local::OllamaLanguageModel;
use kb_rag::{AskOpts, RagPipeline}; use kebab_rag::{AskOpts, RagPipeline};
use kb_search::{HybridRetriever, LexicalRetriever, VectorRetriever}; use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use kb_store_vector::LanceVectorStore; use kebab_store_vector::LanceVectorStore;
/// Facade state — see module docs for lifetime rules. /// Facade state — see module docs for lifetime rules.
/// ///
@@ -55,7 +55,7 @@ use kb_store_vector::LanceVectorStore;
/// ask calls. The OnceLock-backed `embedder` / `vector` fields ensure /// ask calls. The OnceLock-backed `embedder` / `vector` fields ensure
/// the cold-start cost is paid exactly once per instance. /// the cold-start cost is paid exactly once per instance.
pub struct App { pub struct App {
pub(crate) config: kb_config::Config, pub(crate) config: kebab_config::Config,
pub(crate) sqlite: Arc<SqliteStore>, pub(crate) sqlite: Arc<SqliteStore>,
/// Memoized embedder — built lazily on first `embedder()` call when /// Memoized embedder — built lazily on first `embedder()` call when
/// embeddings are enabled. `OnceLock` keeps the struct `Sync` and /// embeddings are enabled. `OnceLock` keeps the struct `Sync` and
@@ -80,7 +80,7 @@ impl App {
/// Downstream `LanceVectorStore::new` (called by [`Self::vector`]) /// Downstream `LanceVectorStore::new` (called by [`Self::vector`])
/// internally drives a `tokio::Runtime::block_on`, which panics if /// internally drives a `tokio::Runtime::block_on`, which panics if
/// invoked from inside another tokio runtime. /// invoked from inside another tokio runtime.
pub fn open_with_config(config: kb_config::Config) -> Result<Self> { pub fn open_with_config(config: kebab_config::Config) -> Result<Self> {
let sqlite = SqliteStore::open(&config).context("kb-app: open SqliteStore")?; let sqlite = SqliteStore::open(&config).context("kb-app: open SqliteStore")?;
sqlite sqlite
.run_migrations() .run_migrations()
@@ -286,7 +286,7 @@ impl App {
/// the active config. This token surfaces in `SearchHit.index_version` /// the active config. This token surfaces in `SearchHit.index_version`
/// and on snapshot tests; including the chunker version pins it to /// and on snapshot tests; including the chunker version pins it to
/// the chunking policy in effect. /// the chunking policy in effect.
fn lexical_index_version(config: &kb_config::Config) -> IndexVersion { fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
IndexVersion(format!("lex:{}", config.chunking.chunker_version)) IndexVersion(format!("lex:{}", config.chunking.chunker_version))
} }

View File

@@ -23,7 +23,7 @@
//! ## Config seam (`*_with_config`) //! ## Config seam (`*_with_config`)
//! //!
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config` //! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
//! companion that takes a fully-resolved [`kb_config::Config`] directly. //! companion that takes a fully-resolved [`kebab_config::Config`] directly.
//! Three callers go through it: (1) the top-level free functions //! Three callers go through it: (1) the top-level free functions
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes //! themselves, after `load_config()`; (2) `kb-cli` when the user passes
//! `--config <path>` (CLI builds the Config via //! `--config <path>` (CLI builds the Config via
@@ -39,16 +39,16 @@ use std::sync::Arc;
use anyhow::{Context, anyhow}; use anyhow::{Context, anyhow};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use kb_chunk::MdHeadingV1Chunker; use kebab_chunk::MdHeadingV1Chunker;
use kb_core::{ use kebab_core::{
Answer, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker, Answer, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput, DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
EmbeddingKind, IngestReport, ParserVersion, RawAsset, SearchHit, SearchQuery, EmbeddingKind, IngestReport, ParserVersion, RawAsset, SearchHit, SearchQuery,
SourceConnector, SourceScope, SourceUri, VectorRecord, VectorStore, SourceConnector, SourceScope, SourceUri, VectorRecord, VectorStore,
}; };
use kb_normalize::build_canonical_document; use kebab_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter}; use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use kb_source_fs::FsSourceConnector; use kebab_source_fs::FsSourceConnector;
mod app; mod app;
pub mod doctor_signal; pub mod doctor_signal;
@@ -61,15 +61,15 @@ pub use app::App;
/// Kept in lock-step with the literal used in the `kb-store-sqlite` /// Kept in lock-step with the literal used in the `kb-store-sqlite`
/// idempotency / round-trip tests so the version label written by the /// idempotency / round-trip tests so the version label written by the
/// app and the one used in cross-crate fixtures match. /// app and the one used in cross-crate fixtures match.
const KB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x"; const KEBAB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x";
/// Caller-supplied knobs for one [`ask`] invocation. /// Caller-supplied knobs for one [`ask`] invocation.
/// ///
/// Re-exported from [`kb_rag::AskOpts`] (P4-3 owns the type) so kb-cli's /// Re-exported from [`kebab_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
/// `use kb_app::AskOpts` keeps working without churn. The struct gained /// `use kebab_app::AskOpts` keeps working without churn. The struct gained
/// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today) /// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today)
/// pass `stream_sink: None`. /// pass `stream_sink: None`.
pub use kb_rag::AskOpts; pub use kebab_rag::AskOpts;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct DoctorReport { pub struct DoctorReport {
@@ -90,10 +90,10 @@ pub struct DoctorCheck {
/// Create XDG dirs and write a starter `config.toml`. Idempotent unless /// Create XDG dirs and write a starter `config.toml`. Idempotent unless
/// `force=true` (which overwrites an existing config). /// `force=true` (which overwrites an existing config).
pub fn init_workspace(force: bool) -> anyhow::Result<()> { pub fn init_workspace(force: bool) -> anyhow::Result<()> {
let cfg_path = kb_config::Config::xdg_config_path(); let cfg_path = kebab_config::Config::xdg_config_path();
let data_dir = kb_config::Config::xdg_data_dir(); let data_dir = kebab_config::Config::xdg_data_dir();
let cache_dir = kb_config::Config::xdg_cache_dir(); let cache_dir = kebab_config::Config::xdg_cache_dir();
let state_dir = kb_config::Config::xdg_state_dir(); let state_dir = kebab_config::Config::xdg_state_dir();
for d in [ for d in [
cfg_path.parent().map(PathBuf::from).unwrap_or_default(), cfg_path.parent().map(PathBuf::from).unwrap_or_default(),
@@ -107,11 +107,11 @@ pub fn init_workspace(force: bool) -> anyhow::Result<()> {
} }
} }
let workspace_root = expand_tilde(&kb_config::Config::defaults().workspace.root); let workspace_root = expand_tilde(&kebab_config::Config::defaults().workspace.root);
std::fs::create_dir_all(&workspace_root)?; std::fs::create_dir_all(&workspace_root)?;
if !cfg_path.exists() || force { if !cfg_path.exists() || force {
let cfg = kb_config::Config::defaults(); let cfg = kebab_config::Config::defaults();
let toml_text = toml::to_string_pretty(&cfg)?; let toml_text = toml::to_string_pretty(&cfg)?;
std::fs::write(&cfg_path, toml_text)?; std::fs::write(&cfg_path, toml_text)?;
} }
@@ -141,8 +141,8 @@ fn expand_tilde(s: &str) -> PathBuf {
/// Callers that already have a Config in hand (CLI honoring `--config`, /// Callers that already have a Config in hand (CLI honoring `--config`,
/// integration tests, TUI session) should bypass this and call the /// integration tests, TUI session) should bypass this and call the
/// matching `*_with_config` helper directly. /// matching `*_with_config` helper directly.
fn load_config() -> anyhow::Result<kb_config::Config> { fn load_config() -> anyhow::Result<kebab_config::Config> {
kb_config::Config::load(None) kebab_config::Config::load(None)
} }
// ── ingest ──────────────────────────────────────────────────────────────── // ── ingest ────────────────────────────────────────────────────────────────
@@ -154,11 +154,11 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
/// Config-explicit variant — bypasses [`load_config`] when the /// Config-explicit variant — bypasses [`load_config`] when the
/// caller (kb-cli with `--config`, integration tests, TUI session) /// caller (kb-cli with `--config`, integration tests, TUI session)
/// already has a [`kb_config::Config`] in hand. The public free /// already has a [`kebab_config::Config`] in hand. The public free
/// function [`ingest`] wraps this with the XDG-default load. /// function [`ingest`] wraps this with the XDG-default load.
#[doc(hidden)] #[doc(hidden)]
pub fn ingest_with_config( pub fn ingest_with_config(
config: kb_config::Config, config: kebab_config::Config,
scope: SourceScope, scope: SourceScope,
summary_only: bool, summary_only: bool,
) -> anyhow::Result<IngestReport> { ) -> anyhow::Result<IngestReport> {
@@ -187,7 +187,7 @@ pub fn ingest_with_config(
.context("kb-app::ingest: ensure Lance table")?; .context("kb-app::ingest: ensure Lance table")?;
} }
let parser_version = ParserVersion(KB_PARSE_MD_VERSION.to_string()); let parser_version = ParserVersion(KEBAB_PARSE_MD_VERSION.to_string());
let chunk_policy = chunk_policy_from_config(&app.config); let chunk_policy = chunk_policy_from_config(&app.config);
// Pre-load every existing doc_id so we can label `IngestItem.kind` // Pre-load every existing doc_id so we can label `IngestItem.kind`
@@ -205,13 +205,13 @@ pub fn ingest_with_config(
let started_at = time::OffsetDateTime::now_utc(); let started_at = time::OffsetDateTime::now_utc();
let mut items: Vec<kb_core::IngestItem> = Vec::new(); let mut items: Vec<kebab_core::IngestItem> = Vec::new();
let mut new_count: u32 = 0; let mut new_count: u32 = 0;
let mut updated_count: u32 = 0; let mut updated_count: u32 = 0;
let mut skipped_count: u32 = 0; let mut skipped_count: u32 = 0;
let mut error_count: u32 = 0; let mut error_count: u32 = 0;
// Aggregate counts surfaced into `ingest_runs` (and tracing). Not // Aggregate counts surfaced into `ingest_runs` (and tracing). Not
// exposed on `IngestReport` today — `kb_core::IngestReport` is a // exposed on `IngestReport` today — `kebab_core::IngestReport` is a
// wire-stable struct without these fields — but persisting them // wire-stable struct without these fields — but persisting them
// means audit tooling and `kb jobs` (P+) can recover the totals // means audit tooling and `kb jobs` (P+) can recover the totals
// without re-walking the DB. // without re-walking the DB.
@@ -236,14 +236,14 @@ pub fn ingest_with_config(
Ok(i) => i, Ok(i) => i,
Err(e) => { Err(e) => {
tracing::error!( tracing::error!(
target: "kb-app", target: "kebab-app",
path = %asset.workspace_path.0, path = %asset.workspace_path.0,
error = %e, error = %e,
"kb-app::ingest: per-file fatal" "kb-app::ingest: per-file fatal"
); );
error_count = error_count.saturating_add(1); error_count = error_count.saturating_add(1);
kb_core::IngestItem { kebab_core::IngestItem {
kind: kb_core::IngestItemKind::Error, kind: kebab_core::IngestItemKind::Error,
doc_id: None, doc_id: None,
doc_path: asset.workspace_path.clone(), doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()), asset_id: Some(asset.asset_id.clone()),
@@ -259,7 +259,7 @@ pub fn ingest_with_config(
}; };
match item.kind { match item.kind {
kb_core::IngestItemKind::New => { kebab_core::IngestItemKind::New => {
new_count = new_count.saturating_add(1); new_count = new_count.saturating_add(1);
let n = item.chunk_count.unwrap_or(0); let n = item.chunk_count.unwrap_or(0);
chunks_indexed = chunks_indexed.saturating_add(n); chunks_indexed = chunks_indexed.saturating_add(n);
@@ -267,7 +267,7 @@ pub fn ingest_with_config(
embeddings_indexed = embeddings_indexed.saturating_add(n); embeddings_indexed = embeddings_indexed.saturating_add(n);
} }
} }
kb_core::IngestItemKind::Updated => { kebab_core::IngestItemKind::Updated => {
updated_count = updated_count.saturating_add(1); updated_count = updated_count.saturating_add(1);
let n = item.chunk_count.unwrap_or(0); let n = item.chunk_count.unwrap_or(0);
chunks_indexed = chunks_indexed.saturating_add(n); chunks_indexed = chunks_indexed.saturating_add(n);
@@ -275,10 +275,10 @@ pub fn ingest_with_config(
embeddings_indexed = embeddings_indexed.saturating_add(n); embeddings_indexed = embeddings_indexed.saturating_add(n);
} }
} }
kb_core::IngestItemKind::Skipped => { kebab_core::IngestItemKind::Skipped => {
skipped_count = skipped_count.saturating_add(1) skipped_count = skipped_count.saturating_add(1)
} }
kb_core::IngestItemKind::Error => { kebab_core::IngestItemKind::Error => {
error_count = error_count.saturating_add(1) error_count = error_count.saturating_add(1)
} }
} }
@@ -293,9 +293,9 @@ pub fn ingest_with_config(
"scope": scope, "scope": scope,
"summary_only": summary_only, "summary_only": summary_only,
}); });
let job_id_res = <SqliteStoreAlias as kb_core::JobRepo>::create( let job_id_res = <SqliteStoreAlias as kebab_core::JobRepo>::create(
&app.sqlite, &app.sqlite,
kb_core::JobKind::Ingest, kebab_core::JobKind::Ingest,
payload, payload,
); );
match job_id_res { match job_id_res {
@@ -312,25 +312,25 @@ pub fn ingest_with_config(
"chunks_indexed": chunks_indexed, "chunks_indexed": chunks_indexed,
"embeddings_indexed": embeddings_indexed, "embeddings_indexed": embeddings_indexed,
}); });
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::update_progress( if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::update_progress(
&app.sqlite, &app.sqlite,
&jid, &jid,
progress, progress,
) { ) {
tracing::warn!( tracing::warn!(
target: "kb-app", target: "kebab-app",
error = %e, error = %e,
"kb-app::ingest: JobRepo::update_progress failed" "kb-app::ingest: JobRepo::update_progress failed"
); );
} }
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::finish( if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::finish(
&app.sqlite, &app.sqlite,
&jid, &jid,
kb_core::JobStatus::Succeeded, kebab_core::JobStatus::Succeeded,
None, None,
) { ) {
tracing::warn!( tracing::warn!(
target: "kb-app", target: "kebab-app",
error = %e, error = %e,
"kb-app::ingest: JobRepo::finish failed" "kb-app::ingest: JobRepo::finish failed"
); );
@@ -338,7 +338,7 @@ pub fn ingest_with_config(
} }
Err(e) => { Err(e) => {
tracing::warn!( tracing::warn!(
target: "kb-app", target: "kebab-app",
error = %e, error = %e,
"kb-app::ingest: JobRepo::create failed; run not recorded in `jobs`" "kb-app::ingest: JobRepo::create failed; run not recorded in `jobs`"
); );
@@ -361,7 +361,7 @@ pub fn ingest_with_config(
Ok(s) => Some(s), Ok(s) => Some(s),
Err(e) => { Err(e) => {
tracing::warn!( tracing::warn!(
target: "kb-app", target: "kebab-app",
error = %e, error = %e,
"kb-app::ingest: failed to serialize items_json; storing NULL" "kb-app::ingest: failed to serialize items_json; storing NULL"
); );
@@ -370,7 +370,7 @@ pub fn ingest_with_config(
} }
}; };
let run_id = mint_ingest_run_id(&scope_json, started_at); let run_id = mint_ingest_run_id(&scope_json, started_at);
let row = kb_store_sqlite::IngestRunRow { let row = kebab_store_sqlite::IngestRunRow {
run_id: &run_id, run_id: &run_id,
scope_json: &scope_json, scope_json: &scope_json,
scanned: scanned_count, scanned: scanned_count,
@@ -385,14 +385,14 @@ pub fn ingest_with_config(
}; };
if let Err(e) = app.sqlite.record_ingest_run(&row) { if let Err(e) = app.sqlite.record_ingest_run(&row) {
tracing::warn!( tracing::warn!(
target: "kb-app", target: "kebab-app",
error = %e, error = %e,
"kb-app::ingest: record_ingest_run failed" "kb-app::ingest: record_ingest_run failed"
); );
} }
tracing::info!( tracing::info!(
target: "kb-app", target: "kebab-app",
scanned = scanned_count, scanned = scanned_count,
new = new_count, new = new_count,
updated = updated_count, updated = updated_count,
@@ -432,7 +432,7 @@ fn mint_ingest_run_id(scope_json: &str, at: time::OffsetDateTime) -> String {
/// vs `JobRepo`) on the same store. Plain `app.sqlite.create(...)` /// vs `JobRepo`) on the same store. Plain `app.sqlite.create(...)`
/// would pick one based on inherent vs trait methods; we go through /// would pick one based on inherent vs trait methods; we go through
/// `<… as JobRepo>` to be explicit. /// `<… as JobRepo>` to be explicit.
type SqliteStoreAlias = kb_store_sqlite::SqliteStore; type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;
/// Process a single asset: read bytes, parse, normalize, chunk, /// Process a single asset: read bytes, parse, normalize, chunk,
/// persist, embed. Per-asset failures bubble up to the caller for /// persist, embed. Per-asset failures bubble up to the caller for
@@ -444,18 +444,18 @@ fn ingest_one_asset(
parser_version: &ParserVersion, parser_version: &ParserVersion,
chunk_policy: &ChunkPolicy, chunk_policy: &ChunkPolicy,
embedder: Option<&Arc<dyn Embedder + Send + Sync>>, embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
vector_store: Option<&Arc<kb_store_vector::LanceVectorStore>>, vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
existing_doc_ids: &std::collections::HashSet<String>, existing_doc_ids: &std::collections::HashSet<String>,
) -> anyhow::Result<kb_core::IngestItem> { ) -> anyhow::Result<kebab_core::IngestItem> {
tracing::debug!( tracing::debug!(
target: "kb-app::ingest", target: "kebab-app::ingest",
path = %asset.workspace_path.0, path = %asset.workspace_path.0,
"processing asset" "processing asset"
); );
// Only handle Markdown for now; other media types are P6+ work. // Only handle Markdown for now; other media types are P6+ work.
if asset.media_type != kb_core::MediaType::Markdown { if asset.media_type != kebab_core::MediaType::Markdown {
return Ok(kb_core::IngestItem { return Ok(kebab_core::IngestItem {
kind: kb_core::IngestItemKind::Skipped, kind: kebab_core::IngestItemKind::Skipped,
doc_id: None, doc_id: None,
doc_path: asset.workspace_path.clone(), doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()), asset_id: Some(asset.asset_id.clone()),
@@ -472,8 +472,8 @@ fn ingest_one_asset(
let path = match &asset.source_uri { let path = match &asset.source_uri {
SourceUri::File(p) => p.clone(), SourceUri::File(p) => p.clone(),
SourceUri::Kb(_) => { SourceUri::Kb(_) => {
return Ok(kb_core::IngestItem { return Ok(kebab_core::IngestItem {
kind: kb_core::IngestItemKind::Skipped, kind: kebab_core::IngestItemKind::Skipped,
doc_id: None, doc_id: None,
doc_path: asset.workspace_path.clone(), doc_path: asset.workspace_path.clone(),
asset_id: Some(asset.asset_id.clone()), asset_id: Some(asset.asset_id.clone()),
@@ -569,7 +569,7 @@ fn ingest_one_asset(
.iter() .iter()
.zip(vectors) .zip(vectors)
.map(|(c, v)| VectorRecord { .map(|(c, v)| VectorRecord {
embedding_id: kb_core::id_for_embedding( embedding_id: kebab_core::id_for_embedding(
&c.chunk_id, &c.chunk_id,
&model_id, &model_id,
&model_version, &model_version,
@@ -592,12 +592,12 @@ fn ingest_one_asset(
} }
let kind = if existing_doc_ids.contains(&canonical.doc_id.0) { let kind = if existing_doc_ids.contains(&canonical.doc_id.0) {
kb_core::IngestItemKind::Updated kebab_core::IngestItemKind::Updated
} else { } else {
kb_core::IngestItemKind::New kebab_core::IngestItemKind::New
}; };
Ok(kb_core::IngestItem { Ok(kebab_core::IngestItem {
kind, kind,
doc_id: Some(canonical.doc_id.clone()), doc_id: Some(canonical.doc_id.clone()),
doc_path: asset.workspace_path.clone(), doc_path: asset.workspace_path.clone(),
@@ -613,7 +613,7 @@ fn ingest_one_asset(
} }
/// Convenience: end byte of the frontmatter region (or 0 when absent). /// Convenience: end byte of the frontmatter region (or 0 when absent).
fn fm_span_end(span: Option<kb_parse_md::FrontmatterSpan>) -> usize { fn fm_span_end(span: Option<kebab_parse_md::FrontmatterSpan>) -> usize {
span.map(|s| s.end).unwrap_or(0) span.map(|s| s.end).unwrap_or(0)
} }
@@ -640,7 +640,7 @@ fn build_body_hints(asset: &RawAsset) -> BodyHints {
} }
/// Build a `ChunkPolicy` from the active config. /// Build a `ChunkPolicy` from the active config.
fn chunk_policy_from_config(config: &kb_config::Config) -> ChunkPolicy { fn chunk_policy_from_config(config: &kebab_config::Config) -> ChunkPolicy {
ChunkPolicy { ChunkPolicy {
target_tokens: config.chunking.target_tokens, target_tokens: config.chunking.target_tokens,
overlap_tokens: config.chunking.overlap_tokens, overlap_tokens: config.chunking.overlap_tokens,
@@ -660,7 +660,7 @@ pub fn list_docs(filter: DocFilter) -> anyhow::Result<Vec<DocSummary>> {
/// ([`list_docs`]), not this. /// ([`list_docs`]), not this.
#[doc(hidden)] #[doc(hidden)]
pub fn list_docs_with_config( pub fn list_docs_with_config(
config: kb_config::Config, config: kebab_config::Config,
filter: DocFilter, filter: DocFilter,
) -> anyhow::Result<Vec<DocSummary>> { ) -> anyhow::Result<Vec<DocSummary>> {
let app = App::open_with_config(config)?; let app = App::open_with_config(config)?;
@@ -676,7 +676,7 @@ pub fn inspect_doc(id: &DocumentId) -> anyhow::Result<CanonicalDocument> {
/// ([`inspect_doc`]), not this. /// ([`inspect_doc`]), not this.
#[doc(hidden)] #[doc(hidden)]
pub fn inspect_doc_with_config( pub fn inspect_doc_with_config(
config: kb_config::Config, config: kebab_config::Config,
id: &DocumentId, id: &DocumentId,
) -> anyhow::Result<CanonicalDocument> { ) -> anyhow::Result<CanonicalDocument> {
let app = App::open_with_config(config)?; let app = App::open_with_config(config)?;
@@ -694,7 +694,7 @@ pub fn inspect_chunk(id: &ChunkId) -> anyhow::Result<Chunk> {
/// ([`inspect_chunk`]), not this. /// ([`inspect_chunk`]), not this.
#[doc(hidden)] #[doc(hidden)]
pub fn inspect_chunk_with_config( pub fn inspect_chunk_with_config(
config: kb_config::Config, config: kebab_config::Config,
id: &ChunkId, id: &ChunkId,
) -> anyhow::Result<Chunk> { ) -> anyhow::Result<Chunk> {
let app = App::open_with_config(config)?; let app = App::open_with_config(config)?;
@@ -716,7 +716,7 @@ pub fn search(query: SearchQuery) -> anyhow::Result<Vec<SearchHit>> {
/// directly to amortize the embedder / vector-store cold start. /// directly to amortize the embedder / vector-store cold start.
#[doc(hidden)] #[doc(hidden)]
pub fn search_with_config( pub fn search_with_config(
config: kb_config::Config, config: kebab_config::Config,
query: SearchQuery, query: SearchQuery,
) -> anyhow::Result<Vec<SearchHit>> { ) -> anyhow::Result<Vec<SearchHit>> {
App::open_with_config(config)?.search(query) App::open_with_config(config)?.search(query)
@@ -740,7 +740,7 @@ pub fn ask(query: &str, opts: AskOpts) -> anyhow::Result<Answer> {
/// [`App::ask`]. /// [`App::ask`].
#[doc(hidden)] #[doc(hidden)]
pub fn ask_with_config( pub fn ask_with_config(
config: kb_config::Config, config: kebab_config::Config,
query: &str, query: &str,
opts: AskOpts, opts: AskOpts,
) -> anyhow::Result<Answer> { ) -> anyhow::Result<Answer> {
@@ -761,10 +761,10 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
// override first, else XDG default. Report whichever was probed. // override first, else XDG default. Report whichever was probed.
let cfg_path: PathBuf = match config_path { let cfg_path: PathBuf = match config_path {
Some(p) => p.to_path_buf(), Some(p) => p.to_path_buf(),
None => kb_config::Config::xdg_config_path(), None => kebab_config::Config::xdg_config_path(),
}; };
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() { let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
match kb_config::Config::from_file(&cfg_path) { match kebab_config::Config::from_file(&cfg_path) {
Ok(c) => (true, cfg_path.display().to_string(), Some(c)), Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None), Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
} }
@@ -795,7 +795,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
// data_dir_writable — probe the resolved storage.data_dir from the // data_dir_writable — probe the resolved storage.data_dir from the
// loaded config when present, else the XDG default. Apply env // loaded config when present, else the XDG default. Apply env
// overrides so KB_STORAGE_DATA_DIR is respected too. // overrides so KEBAB_STORAGE_DATA_DIR is respected too.
let data_dir = match loaded_cfg.as_ref() { let data_dir = match loaded_cfg.as_ref() {
Some(c) => { Some(c) => {
// Re-apply env overrides on top so the same precedence as // Re-apply env overrides on top so the same precedence as
@@ -804,7 +804,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
let merged = c.clone().apply_env(&env); let merged = c.clone().apply_env(&env);
expand_tilde(&merged.storage.data_dir) expand_tilde(&merged.storage.data_dir)
} }
None => kb_config::Config::xdg_data_dir(), None => kebab_config::Config::xdg_data_dir(),
}; };
let writable = (|| -> anyhow::Result<()> { let writable = (|| -> anyhow::Result<()> {
std::fs::create_dir_all(&data_dir)?; std::fs::create_dir_all(&data_dir)?;

View File

@@ -1,6 +1,6 @@
//! Tracing initialization helper for `kb-cli`. //! Tracing initialization helper for `kb-cli`.
//! //!
//! Daily-rolling file appender at `~/.local/state/kb/logs/` per task spec. //! Daily-rolling file appender at `~/.local/state/kebab/logs/` per task spec.
//! Returns a `WorkerGuard` that the caller must keep alive until program //! Returns a `WorkerGuard` that the caller must keep alive until program
//! exit (so buffered log lines flush). //! exit (so buffered log lines flush).
@@ -19,7 +19,7 @@ pub enum LogLevel {
/// — a second call is a no-op (the second `try_init` is dropped silently /// — a second call is a no-op (the second `try_init` is dropped silently
/// but the guard is still returned so the caller can keep it alive). /// but the guard is still returned so the caller can keep it alive).
pub fn init(level: LogLevel) -> Result<WorkerGuard> { pub fn init(level: LogLevel) -> Result<WorkerGuard> {
let log_dir = kb_config::Config::xdg_state_dir().join("logs"); let log_dir = kebab_config::Config::xdg_state_dir().join("logs");
std::fs::create_dir_all(&log_dir)?; std::fs::create_dir_all(&log_dir)?;
let file_appender = tracing_appender::rolling::daily(&log_dir, "kb.log"); let file_appender = tracing_appender::rolling::daily(&log_dir, "kb.log");

View File

@@ -21,12 +21,12 @@ use common::TestEnv;
#[ignore = "requires real Ollama on 127.0.0.1:11434"] #[ignore = "requires real Ollama on 127.0.0.1:11434"]
fn ask_lexical_smoke() { fn ask_lexical_smoke() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let opts = kb_app::AskOpts { let opts = kebab_app::AskOpts {
k: 5, k: 5,
explain: false, explain: false,
mode: kb_core::SearchMode::Lexical, mode: kebab_core::SearchMode::Lexical,
temperature: Some(0.0), temperature: Some(0.0),
seed: Some(0), seed: Some(0),
stream_sink: None, stream_sink: None,
@@ -34,10 +34,10 @@ fn ask_lexical_smoke() {
// The fixture workspace contains "ownership" content; the model's // The fixture workspace contains "ownership" content; the model's
// citation behavior depends on its training, so we don't assert on // citation behavior depends on its training, so we don't assert on
// grounded — only that the call returns a structurally-valid Answer. // grounded — only that the call returns a structurally-valid Answer.
let answer = kb_app::ask_with_config(env.config.clone(), "ownership", opts) let answer = kebab_app::ask_with_config(env.config.clone(), "ownership", opts)
.expect("ask returns Ok with a real Ollama backend"); .expect("ask returns Ok with a real Ollama backend");
// retrieval summary always populated, regardless of grounded path. // retrieval summary always populated, regardless of grounded path.
assert_eq!(answer.retrieval.mode, kb_core::SearchMode::Lexical); assert_eq!(answer.retrieval.mode, kebab_core::SearchMode::Lexical);
assert!(answer.retrieval.k >= 5); assert!(answer.retrieval.k >= 5);
assert!(answer.retrieval.trace_id.0.starts_with("ret_")); assert!(answer.retrieval.trace_id.0.starts_with("ret_"));
} }

View File

@@ -12,7 +12,7 @@
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use kb_config::Config; use kebab_config::Config;
use tempfile::TempDir; use tempfile::TempDir;
/// Test environment: owns a `TempDir` and exposes a `Config` whose /// Test environment: owns a `TempDir` and exposes a `Config` whose
@@ -72,8 +72,8 @@ impl TestEnv {
} }
} }
pub fn scope(&self) -> kb_core::SourceScope { pub fn scope(&self) -> kebab_core::SourceScope {
kb_core::SourceScope { kebab_core::SourceScope {
root: self.workspace_root.clone(), root: self.workspace_root.clone(),
include: self.config.workspace.include.clone(), include: self.config.workspace.include.clone(),
exclude: self.config.workspace.exclude.clone(), exclude: self.config.workspace.exclude.clone(),

View File

@@ -9,7 +9,7 @@ use common::TestEnv;
fn ingest_then_list_inspects_round_trip() { fn ingest_then_list_inspects_round_trip() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let report = let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// The fixture has 3 markdown files; first ingest should label them // The fixture has 3 markdown files; first ingest should label them
// all as New. // all as New.
@@ -27,16 +27,16 @@ fn ingest_then_list_inspects_round_trip() {
} }
// list_docs returns the 3 docs. // list_docs returns the 3 docs.
let docs = kb_app::list_docs_with_config( let docs = kebab_app::list_docs_with_config(
env.config.clone(), env.config.clone(),
kb_core::DocFilter::default(), kebab_core::DocFilter::default(),
) )
.unwrap(); .unwrap();
assert_eq!(docs.len(), 3, "docs: {docs:?}"); assert_eq!(docs.len(), 3, "docs: {docs:?}");
// inspect_doc round-trips one of them. // inspect_doc round-trips one of them.
let any_doc_id = docs[0].doc_id.clone(); let any_doc_id = docs[0].doc_id.clone();
let canonical = kb_app::inspect_doc_with_config(env.config.clone(), &any_doc_id) let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
.unwrap(); .unwrap();
assert_eq!(canonical.doc_id, any_doc_id); assert_eq!(canonical.doc_id, any_doc_id);
assert!(!canonical.blocks.is_empty(), "blocks empty"); assert!(!canonical.blocks.is_empty(), "blocks empty");
@@ -47,20 +47,20 @@ fn ingest_idempotent_on_second_run() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let r1 = let r1 =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(r1.new, 3); assert_eq!(r1.new, 3);
let r2 = let r2 =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
// Same files re-ingested — labelled Updated, not duplicated. // Same files re-ingested — labelled Updated, not duplicated.
assert_eq!(r2.scanned, 3, "second scan: {r2:?}"); assert_eq!(r2.scanned, 3, "second scan: {r2:?}");
assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}"); assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}");
assert_eq!(r2.updated, 3, "second run updated: {r2:?}"); assert_eq!(r2.updated, 3, "second run updated: {r2:?}");
// list_docs still has 3 docs (no duplicates). // list_docs still has 3 docs (no duplicates).
let docs = kb_app::list_docs_with_config( let docs = kebab_app::list_docs_with_config(
env.config.clone(), env.config.clone(),
kb_core::DocFilter::default(), kebab_core::DocFilter::default(),
) )
.unwrap(); .unwrap();
assert_eq!(docs.len(), 3); assert_eq!(docs.len(), 3);
@@ -70,7 +70,7 @@ fn ingest_idempotent_on_second_run() {
fn ingest_summary_only_drops_items() { fn ingest_summary_only_drops_items() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let report = let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.scanned, 3); assert_eq!(report.scanned, 3);
assert!(report.items.is_none(), "summary-only should null items"); assert!(report.items.is_none(), "summary-only should null items");
} }
@@ -82,13 +82,13 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
// of every run. `summary_only=true` writes `items_json=NULL`; the // of every run. `summary_only=true` writes `items_json=NULL`; the
// counts MUST still be present. // counts MUST still be present.
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let report = kb_app::ingest_with_config(env.config.clone(), env.scope(), true) let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
.unwrap(); .unwrap();
assert_eq!(report.scanned, 3); assert_eq!(report.scanned, 3);
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir) let db_path = std::path::PathBuf::from(&env.config.storage.data_dir)
.join("kb.sqlite"); .join("kebab.sqlite");
let conn = rusqlite::Connection::open(&db_path).expect("open kb.sqlite"); let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
let (scanned, new_c, updated, skipped, errors, items_json): ( let (scanned, new_c, updated, skipped, errors, items_json): (
i64, i64,
i64, i64,
@@ -137,7 +137,7 @@ fn ingest_provider_none_skips_lance() {
// tables under it). // tables under it).
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let report = let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
assert_eq!(report.errors, 0, "lexical-only run must not error"); assert_eq!(report.errors, 0, "lexical-only run must not error");
assert_eq!(report.new, 3); assert_eq!(report.new, 3);
@@ -170,22 +170,22 @@ fn ingest_provider_none_skips_lance() {
#[test] #[test]
fn list_docs_filters_by_tags_any() { fn list_docs_filters_by_tags_any() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let filter = kb_core::DocFilter { let filter = kebab_core::DocFilter {
tags_any: vec!["python".to_string()], tags_any: vec!["python".to_string()],
..Default::default() ..Default::default()
}; };
let docs = kb_app::list_docs_with_config(env.config.clone(), filter).unwrap(); let docs = kebab_app::list_docs_with_config(env.config.clone(), filter).unwrap();
assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}"); assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}");
assert!(docs[0].tags.contains(&"python".to_string())); assert!(docs[0].tags.contains(&"python".to_string()));
let rust_filter = kb_core::DocFilter { let rust_filter = kebab_core::DocFilter {
tags_any: vec!["rust".to_string()], tags_any: vec!["rust".to_string()],
..Default::default() ..Default::default()
}; };
let rust_docs = let rust_docs =
kb_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap(); kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
// intro.md and notes/cargo.md both tag "rust". // intro.md and notes/cargo.md both tag "rust".
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}"); assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
} }
@@ -194,8 +194,8 @@ fn list_docs_filters_by_tags_any() {
fn inspect_doc_not_found_returns_actionable_error() { fn inspect_doc_not_found_returns_actionable_error() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let bogus = let bogus =
kb_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string()); kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
let err = kb_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err(); let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
let msg = format!("{err:#}"); let msg = format!("{err:#}");
assert!( assert!(
msg.contains("not found"), msg.contains("not found"),
@@ -210,10 +210,10 @@ fn inspect_doc_not_found_returns_actionable_error() {
#[test] #[test]
fn inspect_chunk_not_found_returns_actionable_error() { fn inspect_chunk_not_found_returns_actionable_error() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
let bogus = kb_core::ChunkId( let bogus = kebab_core::ChunkId(
"0000000000000000000000000000000000000000000000000000000000000000".to_string(), "0000000000000000000000000000000000000000000000000000000000000000".to_string(),
); );
let err = kb_app::inspect_chunk_with_config(env.config.clone(), &bogus) let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
.unwrap_err(); .unwrap_err();
let msg = format!("{err:#}"); let msg = format!("{err:#}");
assert!(msg.contains("not found"), "got: {msg}"); assert!(msg.contains("not found"), "got: {msg}");

View File

@@ -5,24 +5,24 @@ mod common;
use common::TestEnv; use common::TestEnv;
fn lexical_query(text: &str) -> kb_core::SearchQuery { fn lexical_query(text: &str) -> kebab_core::SearchQuery {
kb_core::SearchQuery { kebab_core::SearchQuery {
text: text.to_string(), text: text.to_string(),
mode: kb_core::SearchMode::Lexical, mode: kebab_core::SearchMode::Lexical,
k: 10, k: 10,
filters: kb_core::SearchFilters::default(), filters: kebab_core::SearchFilters::default(),
} }
} }
#[test] #[test]
fn lexical_search_returns_hits_after_ingest() { fn lexical_search_returns_hits_after_ingest() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
// "Ownership" appears as a heading + paragraph in intro.md and // "Ownership" appears as a heading + paragraph in intro.md and
// matches FTS5 default tokenizer easily. // matches FTS5 default tokenizer easily.
let hits = let hits =
kb_app::search_with_config(env.config.clone(), lexical_query("ownership")) kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
.unwrap(); .unwrap();
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'"); assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
@@ -34,7 +34,7 @@ fn lexical_search_returns_hits_after_ingest() {
); );
assert_eq!( assert_eq!(
h.retrieval.method, h.retrieval.method,
kb_core::SearchMode::Lexical, kebab_core::SearchMode::Lexical,
"method label should be Lexical" "method label should be Lexical"
); );
} }
@@ -43,8 +43,8 @@ fn lexical_search_returns_hits_after_ingest() {
#[test] #[test]
fn lexical_search_empty_query_returns_empty() { fn lexical_search_empty_query_returns_empty() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let hits = kb_app::search_with_config(env.config.clone(), lexical_query(" ")) let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
.unwrap(); .unwrap();
assert!(hits.is_empty(), "blank query must short-circuit empty"); assert!(hits.is_empty(), "blank query must short-circuit empty");
} }
@@ -52,15 +52,15 @@ fn lexical_search_empty_query_returns_empty() {
#[test] #[test]
fn vector_mode_with_provider_none_errors_clearly() { fn vector_mode_with_provider_none_errors_clearly() {
let env = TestEnv::lexical_only(); let env = TestEnv::lexical_only();
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
let q = kb_core::SearchQuery { let q = kebab_core::SearchQuery {
text: "ownership".to_string(), text: "ownership".to_string(),
mode: kb_core::SearchMode::Vector, mode: kebab_core::SearchMode::Vector,
k: 10, k: 10,
filters: kb_core::SearchFilters::default(), filters: kebab_core::SearchFilters::default(),
}; };
let err = kb_app::search_with_config(env.config.clone(), q).unwrap_err(); let err = kebab_app::search_with_config(env.config.clone(), q).unwrap_err();
let msg = format!("{err:#}"); let msg = format!("{err:#}");
assert!( assert!(
msg.contains("embeddings disabled") || msg.contains("disabled"), msg.contains("embeddings disabled") || msg.contains("disabled"),

View File

@@ -31,21 +31,21 @@ fn ingest_then_hybrid_search_returns_hits() {
let env = TestEnv::with_embeddings(); let env = TestEnv::with_embeddings();
let report = let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}"); assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3); assert_eq!(report.new, 3);
let q = kb_core::SearchQuery { let q = kebab_core::SearchQuery {
text: "ownership".to_string(), text: "ownership".to_string(),
mode: kb_core::SearchMode::Hybrid, mode: kebab_core::SearchMode::Hybrid,
k: 10, k: 10,
filters: kb_core::SearchFilters::default(), filters: kebab_core::SearchFilters::default(),
}; };
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap(); let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
assert!(!hits.is_empty(), "expected hybrid hits for 'ownership'"); assert!(!hits.is_empty(), "expected hybrid hits for 'ownership'");
let methods: Vec<_> = hits.iter().map(|h| h.retrieval.method).collect(); let methods: Vec<_> = hits.iter().map(|h| h.retrieval.method).collect();
assert!( assert!(
methods.iter().all(|m| *m == kb_core::SearchMode::Hybrid), methods.iter().all(|m| *m == kebab_core::SearchMode::Hybrid),
"every hit must report method=Hybrid: {methods:?}" "every hit must report method=Hybrid: {methods:?}"
); );
} }
@@ -58,22 +58,22 @@ fn ingest_then_vector_search_carries_embedding_model() {
let env = TestEnv::with_embeddings(); let env = TestEnv::with_embeddings();
let report = let report =
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap(); kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
assert_eq!(report.errors, 0, "no per-file errors: {report:?}"); assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
assert_eq!(report.new, 3); assert_eq!(report.new, 3);
let q = kb_core::SearchQuery { let q = kebab_core::SearchQuery {
text: "ownership".to_string(), text: "ownership".to_string(),
mode: kb_core::SearchMode::Vector, mode: kebab_core::SearchMode::Vector,
k: 10, k: 10,
filters: kb_core::SearchFilters::default(), filters: kebab_core::SearchFilters::default(),
}; };
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap(); let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
assert!(!hits.is_empty(), "expected vector hits for 'ownership'"); assert!(!hits.is_empty(), "expected vector hits for 'ownership'");
// Vector mode dispatches through `VectorRetriever` and MUST stamp // Vector mode dispatches through `VectorRetriever` and MUST stamp
// each hit with the configured embedding_model id. // each hit with the configured embedding_model id.
let expected = kb_core::EmbeddingModelId(env.config.models.embedding.model.clone()); let expected = kebab_core::EmbeddingModelId(env.config.models.embedding.model.clone());
for h in &hits { for h in &hits {
assert_eq!( assert_eq!(
h.embedding_model, h.embedding_model,
@@ -82,7 +82,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
); );
assert_eq!( assert_eq!(
h.retrieval.method, h.retrieval.method,
kb_core::SearchMode::Vector, kebab_core::SearchMode::Vector,
"vector-mode hit must report method=Vector" "vector-mode hit must report method=Vector"
); );
} }

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-chunk" name = "kebab-chunk"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,7 +8,7 @@ repository = { workspace = true }
description = "Chunkers that turn kb-core::CanonicalDocument into kb-core::Chunk batches (§3.5, §4.2, §7.2)" description = "Chunkers that turn kb-core::CanonicalDocument into kb-core::Chunk batches (§3.5, §4.2, §7.2)"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
serde_json_canonicalizer = "0.3" serde_json_canonicalizer = "0.3"
blake3 = { workspace = true } blake3 = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
@@ -20,7 +20,7 @@ tracing = { workspace = true }
# regular deps per design §8 (chunker consumes CanonicalDocument from kb-core # regular deps per design §8 (chunker consumes CanonicalDocument from kb-core
# only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps) # only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps)
# confirms this. # confirms this.
kb-parse-md = { path = "../kb-parse-md" } kebab-parse-md = { path = "../kebab-parse-md" }
kb-normalize = { path = "../kb-normalize" } kebab-normalize = { path = "../kebab-normalize" }
serde_json = { workspace = true } serde_json = { workspace = true }
time = { workspace = true } time = { workspace = true }

View File

@@ -1,4 +1,4 @@
//! `kb-chunk` — chunkers that emit [`kb_core::Chunk`] batches. //! `kb-chunk` — chunkers that emit [`kebab_core::Chunk`] batches.
//! //!
//! Per design §3.5 (Chunk), §4.2 (chunk_id recipe), §7.2 (`Chunker` //! Per design §3.5 (Chunk), §4.2 (chunk_id recipe), §7.2 (`Chunker`
//! trait), §0 Q3/§14 (chunking priority). //! trait), §0 Q3/§14 (chunking priority).

View File

@@ -1,6 +1,6 @@
//! `md-heading-v1` — heading-aware Markdown chunker. //! `md-heading-v1` — heading-aware Markdown chunker.
use kb_core::{ use kebab_core::{
Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker, Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker,
ChunkerVersion, DocumentId, SourceSpan, id_for_chunk, ChunkerVersion, DocumentId, SourceSpan, id_for_chunk,
}; };
@@ -24,7 +24,7 @@ const POLICY_HASH_HEX_LEN: usize = 16;
/// Heading-aware Markdown chunker. /// Heading-aware Markdown chunker.
/// ///
/// Implements [`kb_core::Chunker`] for Markdown-derived /// Implements [`kebab_core::Chunker`] for Markdown-derived
/// [`CanonicalDocument`]s. /// [`CanonicalDocument`]s.
/// ///
/// **Behavior contract** (design §0 / §14, in priority order): /// **Behavior contract** (design §0 / §14, in priority order):
@@ -186,7 +186,7 @@ impl Chunker for MdHeadingV1Chunker {
flush(&mut acc, doc, &chunker_version, &policy_hash, &mut out); flush(&mut acc, doc, &chunker_version, &policy_hash, &mut out);
tracing::debug!( tracing::debug!(
target: "kb-chunk", target: "kebab-chunk",
doc_id = %doc.doc_id, doc_id = %doc.doc_id,
chunks = out.len(), chunks = out.len(),
"md-heading-v1 chunked", "md-heading-v1 chunked",
@@ -409,7 +409,7 @@ fn estimate_block_tokens(b: &Block) -> usize {
} }
/// Borrow the `CommonBlock` of any [`Block`] variant. /// Borrow the `CommonBlock` of any [`Block`] variant.
fn common(b: &Block) -> &kb_core::CommonBlock { fn common(b: &Block) -> &kebab_core::CommonBlock {
match b { match b {
Block::Heading(h) => &h.common, Block::Heading(h) => &h.common,
Block::Paragraph(t) | Block::Quote(t) => &t.common, Block::Paragraph(t) | Block::Quote(t) => &t.common,
@@ -424,7 +424,7 @@ fn common(b: &Block) -> &kb_core::CommonBlock {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_core::{ use kebab_core::{
AssetId, CodeBlock, CommonBlock, HeadingBlock, ImageRefBlock, Lang, AssetId, CodeBlock, CommonBlock, HeadingBlock, ImageRefBlock, Lang,
Metadata, Provenance, SourceType, TableBlock, TextBlock, TrustLevel, Metadata, Provenance, SourceType, TableBlock, TextBlock, TrustLevel,
WorkspacePath, id_for_block, WorkspacePath, id_for_block,
@@ -433,7 +433,7 @@ mod tests {
fn make_doc(blocks: Vec<Block>) -> CanonicalDocument { fn make_doc(blocks: Vec<Block>) -> CanonicalDocument {
CanonicalDocument { CanonicalDocument {
doc_id: kb_core::DocumentId("d".repeat(32)), doc_id: kebab_core::DocumentId("d".repeat(32)),
source_asset_id: AssetId("a".repeat(32)), source_asset_id: AssetId("a".repeat(32)),
workspace_path: WorkspacePath::new("notes/test.md".into()).unwrap(), workspace_path: WorkspacePath::new("notes/test.md".into()).unwrap(),
title: "Test".into(), title: "Test".into(),
@@ -450,14 +450,14 @@ mod tests {
user: Default::default(), user: Default::default(),
}, },
provenance: Provenance { events: vec![] }, provenance: Provenance { events: vec![] },
parser_version: kb_core::ParserVersion("test-parser-0".into()), parser_version: kebab_core::ParserVersion("test-parser-0".into()),
schema_version: 1, schema_version: 1,
doc_version: 1, doc_version: 1,
} }
} }
fn doc_id() -> kb_core::DocumentId { fn doc_id() -> kebab_core::DocumentId {
kb_core::DocumentId("d".repeat(32)) kebab_core::DocumentId("d".repeat(32))
} }
fn span(start: u32, end: u32) -> SourceSpan { fn span(start: u32, end: u32) -> SourceSpan {

View File

@@ -13,13 +13,13 @@
use std::path::PathBuf; use std::path::PathBuf;
use kb_chunk::MdHeadingV1Chunker; use kebab_chunk::MdHeadingV1Chunker;
use kb_core::{ use kebab_core::{
AssetId, AssetStorage, Checksum, ChunkPolicy, ChunkerVersion, Chunker, MediaType, AssetId, AssetStorage, Checksum, ChunkPolicy, ChunkerVersion, Chunker, MediaType,
ParserVersion, RawAsset, SourceUri, WorkspacePath, ParserVersion, RawAsset, SourceUri, WorkspacePath,
}; };
use kb_normalize::build_canonical_document; use kebab_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter}; use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use serde_json::Value; use serde_json::Value;
use time::OffsetDateTime; use time::OffsetDateTime;

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-cli" name = "kebab-cli"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,13 +8,13 @@ repository = { workspace = true }
description = "kb command-line interface" description = "kb command-line interface"
[[bin]] [[bin]]
name = "kb" name = "kebab"
path = "src/main.rs" path = "src/main.rs"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-app = { path = "../kb-app" } kebab-app = { path = "../kebab-app" }
# kb-eval re-exports `compute_aggregate` / `compare_runs` / # kb-eval re-exports `compute_aggregate` / `compare_runs` /
# `render_report_md` (P5-2). The DoD calls for these to be reached # `render_report_md` (P5-2). The DoD calls for these to be reached
# "via kb-app", but kb-eval already depends on kb-app (P5-1 runner # "via kb-app", but kb-eval already depends on kb-app (P5-1 runner
@@ -22,7 +22,7 @@ kb-app = { path = "../kb-app" }
# require kb-app → kb-eval, forming a cycle. We therefore wire # require kb-app → kb-eval, forming a cycle. We therefore wire
# kb-cli → kb-eval directly; documented in # kb-cli → kb-eval directly; documented in
# `tasks/p5/p5-2-metrics-compare.md`. # `tasks/p5/p5-2-metrics-compare.md`.
kb-eval = { path = "../kb-eval" } kebab-eval = { path = "../kebab-eval" }
anyhow = { workspace = true } anyhow = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
clap = { version = "4", features = ["derive"] } clap = { version = "4", features = ["derive"] }

View File

@@ -6,12 +6,12 @@ use std::process::ExitCode;
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use kb_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal}; use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
mod wire; mod wire;
#[derive(Parser, Debug)] #[derive(Parser, Debug)]
#[command(name = "kb", version, about = "personal local knowledge base")] #[command(name = "kebab", version, about = "personal local knowledge base")]
struct Cli { struct Cli {
/// Path to a non-default `config.toml`. /// Path to a non-default `config.toml`.
#[arg(long, global = true)] #[arg(long, global = true)]
@@ -170,12 +170,12 @@ enum ModeFlag {
Hybrid, Hybrid,
} }
impl From<ModeFlag> for kb_core::SearchMode { impl From<ModeFlag> for kebab_core::SearchMode {
fn from(m: ModeFlag) -> Self { fn from(m: ModeFlag) -> Self {
match m { match m {
ModeFlag::Lexical => kb_core::SearchMode::Lexical, ModeFlag::Lexical => kebab_core::SearchMode::Lexical,
ModeFlag::Vector => kb_core::SearchMode::Vector, ModeFlag::Vector => kebab_core::SearchMode::Vector,
ModeFlag::Hybrid => kb_core::SearchMode::Hybrid, ModeFlag::Hybrid => kebab_core::SearchMode::Hybrid,
} }
} }
} }
@@ -183,15 +183,15 @@ impl From<ModeFlag> for kb_core::SearchMode {
fn main() -> ExitCode { fn main() -> ExitCode {
let cli = Cli::parse(); let cli = Cli::parse();
let level = if cli.debug { let level = if cli.debug {
kb_app::logging::LogLevel::Debug kebab_app::logging::LogLevel::Debug
} else if cli.verbose { } else if cli.verbose {
kb_app::logging::LogLevel::Verbose kebab_app::logging::LogLevel::Verbose
} else { } else {
kb_app::logging::LogLevel::Default kebab_app::logging::LogLevel::Default
}; };
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only), // Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
// proceed without a guard rather than crashing — `kb` is still usable. // proceed without a guard rather than crashing — `kb` is still usable.
let _log_guard = kb_app::logging::init(level).ok(); let _log_guard = kebab_app::logging::init(level).ok();
match run(&cli) { match run(&cli) {
Ok(()) => ExitCode::from(0), Ok(()) => ExitCode::from(0),
Err(e) => { Err(e) => {
@@ -227,14 +227,14 @@ fn exit_code(err: &anyhow::Error) -> u8 {
fn run(cli: &Cli) -> anyhow::Result<()> { fn run(cli: &Cli) -> anyhow::Result<()> {
match &cli.command { match &cli.command {
Cmd::Init { force } => { Cmd::Init { force } => {
kb_app::init_workspace(*force)?; kebab_app::init_workspace(*force)?;
if !cli.json { if !cli.json {
println!( println!(
"created {}", "created {}",
kb_config::Config::xdg_config_path().display() kebab_config::Config::xdg_config_path().display()
); );
println!("created {}", kb_config::Config::xdg_data_dir().display()); println!("created {}", kebab_config::Config::xdg_data_dir().display());
println!("created {}", kb_config::Config::xdg_state_dir().display()); println!("created {}", kebab_config::Config::xdg_state_dir().display());
println!("hint edit the config above, then `kb ingest`"); println!("hint edit the config above, then `kb ingest`");
} }
Ok(()) Ok(())
@@ -244,13 +244,13 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
root, root,
summary_only, summary_only,
} => { } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let scope = kb_core::SourceScope { let scope = kebab_core::SourceScope {
root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)), root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)),
include: cfg.workspace.include.clone(), include: cfg.workspace.include.clone(),
exclude: cfg.workspace.exclude.clone(), exclude: cfg.workspace.exclude.clone(),
}; };
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?; let report = kebab_app::ingest_with_config(cfg, scope, *summary_only)?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?); println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
} else { } else {
@@ -269,8 +269,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::List { what } => match what { Cmd::List { what } => match what {
ListWhat::Docs => { ListWhat::Docs => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?; let docs = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?); println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
} else { } else {
@@ -284,9 +284,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Cmd::Inspect { what } => match what { Cmd::Inspect { what } => match what {
InspectWhat::Doc { id } => { InspectWhat::Doc { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let doc_id: kb_core::DocumentId = id.parse()?; let doc_id: kebab_core::DocumentId = id.parse()?;
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?; let doc = kebab_app::inspect_doc_with_config(cfg, &doc_id)?;
// Inspect doc emits a `CanonicalDocument` — there's no §2 // Inspect doc emits a `CanonicalDocument` — there's no §2
// wire schema for it (P1-5 will decide whether this also // wire schema for it (P1-5 will decide whether this also
// becomes a tagged wrapper or stays as the raw domain // becomes a tagged wrapper or stays as the raw domain
@@ -296,9 +296,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
InspectWhat::Chunk { id } => { InspectWhat::Chunk { id } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let chunk_id: kb_core::ChunkId = id.parse()?; let chunk_id: kebab_core::ChunkId = id.parse()?;
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?; let chunk = kebab_app::inspect_chunk_with_config(cfg, &chunk_id)?;
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?); println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
Ok(()) Ok(())
} }
@@ -310,14 +310,14 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
mode, mode,
explain: _, explain: _,
} => { } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let q = kb_core::SearchQuery { let q = kebab_core::SearchQuery {
text: query.clone(), text: query.clone(),
mode: (*mode).into(), mode: (*mode).into(),
k: *k, k: *k,
filters: kb_core::SearchFilters::default(), filters: kebab_core::SearchFilters::default(),
}; };
let hits = kb_app::search_with_config(cfg, q)?; let hits = kebab_app::search_with_config(cfg, q)?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?); println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
} else { } else {
@@ -351,8 +351,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature, temperature,
seed, seed,
} => { } => {
let cfg = kb_config::Config::load(cli.config.as_deref())?; let cfg = kebab_config::Config::load(cli.config.as_deref())?;
let opts = kb_app::AskOpts { let opts = kebab_app::AskOpts {
k: *k, k: *k,
explain: *explain, explain: *explain,
mode: (*mode).into(), mode: (*mode).into(),
@@ -363,7 +363,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
// wires up a real `mpsc::Sender` here. // wires up a real `mpsc::Sender` here.
stream_sink: None, stream_sink: None,
}; };
let ans = kb_app::ask_with_config(cfg, query, opts)?; let ans = kebab_app::ask_with_config(cfg, query, opts)?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?); println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
} else { } else {
@@ -377,7 +377,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
} }
Cmd::Doctor => { Cmd::Doctor => {
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?; let report = kebab_app::doctor_with_config_path(cli.config.as_deref())?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?); println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
} else { } else {
@@ -409,7 +409,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature, temperature,
seed, seed,
} => { } => {
let opts = kb_eval::EvalRunOpts { let opts = kebab_eval::EvalRunOpts {
suite: suite.clone(), suite: suite.clone(),
mode: (*mode).into(), mode: (*mode).into(),
with_rag: *with_rag, with_rag: *with_rag,
@@ -417,7 +417,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
temperature: *temperature, temperature: *temperature,
seed: *seed, seed: *seed,
}; };
let run = kb_eval::run_eval(&opts)?; let run = kebab_eval::run_eval(&opts)?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string_pretty(&run)?); println!("{}", serde_json::to_string_pretty(&run)?);
} else { } else {
@@ -430,8 +430,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
} }
EvalWhat::Aggregate { run_id } => { EvalWhat::Aggregate { run_id } => {
let agg = kb_eval::compute_aggregate(run_id)?; let agg = kebab_eval::compute_aggregate(run_id)?;
kb_eval::store_aggregate(run_id, &agg)?; kebab_eval::store_aggregate(run_id, &agg)?;
if cli.json { if cli.json {
println!("{}", serde_json::to_string_pretty(&agg)?); println!("{}", serde_json::to_string_pretty(&agg)?);
} else { } else {
@@ -450,20 +450,20 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
strict_chunker_version, strict_chunker_version,
write_report, write_report,
} => { } => {
let cfg = kb_config::Config::load(None)?; let cfg = kebab_config::Config::load(None)?;
let opts = kb_eval::CompareOpts { let opts = kebab_eval::CompareOpts {
strict_chunker_version: *strict_chunker_version, strict_chunker_version: *strict_chunker_version,
}; };
let report = kb_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?; let report = kebab_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
let md = kb_eval::render_report_md(&report); let md = kebab_eval::render_report_md(&report);
if cli.json { if cli.json {
println!("{}", serde_json::to_string_pretty(&report)?); println!("{}", serde_json::to_string_pretty(&report)?);
} else { } else {
print!("{md}"); print!("{md}");
} }
if *write_report { if *write_report {
let resolved_data_dir = kb_config::expand_path(&cfg.storage.data_dir, ""); let resolved_data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
let runs_dir = kb_config::expand_path( let runs_dir = kebab_config::expand_path(
&cfg.storage.runs_dir, &cfg.storage.runs_dir,
&resolved_data_dir.to_string_lossy(), &resolved_data_dir.to_string_lossy(),
); );

View File

@@ -17,8 +17,8 @@
use serde_json::Value; use serde_json::Value;
use kb_app::DoctorReport; use kebab_app::DoctorReport;
use kb_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit}; use kebab_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
/// Insert `schema_version` into an object-shaped `Value`. Helper for the /// Insert `schema_version` into an object-shaped `Value`. Helper for the
/// "serialize, then tag" pattern used by all the per-type wrappers below. /// "serialize, then tag" pattern used by all the per-type wrappers below.
@@ -132,7 +132,7 @@ mod tests {
#[test] #[test]
fn ingest_wrapper_tags_schema_version() { fn ingest_wrapper_tags_schema_version() {
use kb_core::SourceScope; use kebab_core::SourceScope;
let r = IngestReport { let r = IngestReport {
scope: SourceScope { scope: SourceScope {
root: std::path::PathBuf::from("/tmp"), root: std::path::PathBuf::from("/tmp"),

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-config" name = "kebab-config"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -9,7 +9,7 @@ description = "Config schema + XDG path resolution"
[dependencies] [dependencies]
# kb-core::CoreError reserved for P1-* config errors # kb-core::CoreError reserved for P1-* config errors
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
anyhow = { workspace = true } anyhow = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }

View File

@@ -1,6 +1,6 @@
//! `kb-config` — `Config` schema and XDG path resolution (§6). //! `kb-config` — `Config` schema and XDG path resolution (§6).
//! //!
//! Layer order (`Config::load`): defaults → file → env (`KB_<SECTION>_<KEY>`). //! Layer order (`Config::load`): defaults → file → env (`KEBAB_<SECTION>_<KEY>`).
//! CLI overrides land later, applied by `kb-cli` after `Config::load`. //! CLI overrides land later, applied by `kb-cli` after `Config::load`.
use std::collections::HashMap; use std::collections::HashMap;
@@ -113,8 +113,8 @@ impl Config {
], ],
}, },
storage: StorageCfg { storage: StorageCfg {
data_dir: "${XDG_DATA_HOME:-~/.local/share}/kb".to_string(), data_dir: "${XDG_DATA_HOME:-~/.local/share}/kebab".to_string(),
sqlite: "{data_dir}/kb.sqlite".to_string(), sqlite: "{data_dir}/kebab.sqlite".to_string(),
vector_dir: "{data_dir}/lancedb".to_string(), vector_dir: "{data_dir}/lancedb".to_string(),
asset_dir: "{data_dir}/assets".to_string(), asset_dir: "{data_dir}/assets".to_string(),
artifact_dir: "{data_dir}/artifacts".to_string(), artifact_dir: "{data_dir}/artifacts".to_string(),
@@ -191,139 +191,139 @@ impl Config {
Ok(cfg) Ok(cfg)
} }
/// Apply `KB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored. /// Apply `KEBAB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored.
/// ///
/// The mapping is an explicit grep-friendly whitelist — one match arm /// The mapping is an explicit grep-friendly whitelist — one match arm
/// per leaf key in `Config`. Booleans accept `1` / `true` / `yes` /// per leaf key in `Config`. Booleans accept `1` / `true` / `yes`
/// (case-insensitive) for true and anything else for false. Numeric /// (case-insensitive) for true and anything else for false. Numeric
/// keys silently keep their prior value if the env value fails to /// keys silently keep their prior value if the env value fails to
/// parse, so a malformed `KB_*` cannot crash startup. /// parse, so a malformed `KEBAB_*` cannot crash startup.
pub fn apply_env(mut self, env: &HashMap<String, String>) -> Self { pub fn apply_env(mut self, env: &HashMap<String, String>) -> Self {
for (k, v) in env { for (k, v) in env {
if !k.starts_with("KB_") { if !k.starts_with("KEBAB_") {
continue; continue;
} }
match k.as_str() { match k.as_str() {
// workspace // workspace
"KB_WORKSPACE_ROOT" => self.workspace.root = v.clone(), "KEBAB_WORKSPACE_ROOT" => self.workspace.root = v.clone(),
// storage // storage
"KB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(), "KEBAB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(),
"KB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(), "KEBAB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(),
"KB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(), "KEBAB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(),
"KB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(), "KEBAB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(),
"KB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(), "KEBAB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(),
"KB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(), "KEBAB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(),
"KB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(), "KEBAB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(),
"KB_STORAGE_COPY_THRESHOLD_MB" => { "KEBAB_STORAGE_COPY_THRESHOLD_MB" => {
if let Ok(n) = v.parse::<u64>() { if let Ok(n) = v.parse::<u64>() {
self.storage.copy_threshold_mb = n; self.storage.copy_threshold_mb = n;
} }
} }
// indexing // indexing
"KB_INDEXING_MAX_PARALLEL_EXTRACTORS" => { "KEBAB_INDEXING_MAX_PARALLEL_EXTRACTORS" => {
if let Ok(n) = v.parse::<u32>() { if let Ok(n) = v.parse::<u32>() {
self.indexing.max_parallel_extractors = n; self.indexing.max_parallel_extractors = n;
} }
} }
"KB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => { "KEBAB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => {
if let Ok(n) = v.parse::<u32>() { if let Ok(n) = v.parse::<u32>() {
self.indexing.max_parallel_embeddings = n; self.indexing.max_parallel_embeddings = n;
} }
} }
"KB_INDEXING_WATCH_FILESYSTEM" => { "KEBAB_INDEXING_WATCH_FILESYSTEM" => {
self.indexing.watch_filesystem = parse_bool(v); self.indexing.watch_filesystem = parse_bool(v);
} }
// chunking // chunking
"KB_CHUNKING_TARGET_TOKENS" => { "KEBAB_CHUNKING_TARGET_TOKENS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.chunking.target_tokens = n; self.chunking.target_tokens = n;
} }
} }
"KB_CHUNKING_OVERLAP_TOKENS" => { "KEBAB_CHUNKING_OVERLAP_TOKENS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.chunking.overlap_tokens = n; self.chunking.overlap_tokens = n;
} }
} }
"KB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => { "KEBAB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => {
self.chunking.respect_markdown_headings = parse_bool(v); self.chunking.respect_markdown_headings = parse_bool(v);
} }
"KB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(), "KEBAB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(),
// models.embedding // models.embedding
"KB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(), "KEBAB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(),
"KB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(), "KEBAB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(),
"KB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(), "KEBAB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(),
"KB_MODELS_EMBEDDING_DIMENSIONS" => { "KEBAB_MODELS_EMBEDDING_DIMENSIONS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.models.embedding.dimensions = n; self.models.embedding.dimensions = n;
} }
} }
"KB_MODELS_EMBEDDING_BATCH_SIZE" => { "KEBAB_MODELS_EMBEDDING_BATCH_SIZE" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.models.embedding.batch_size = n; self.models.embedding.batch_size = n;
} }
} }
// models.llm // models.llm
"KB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(), "KEBAB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(),
"KB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(), "KEBAB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(),
"KB_MODELS_LLM_CONTEXT_TOKENS" => { "KEBAB_MODELS_LLM_CONTEXT_TOKENS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.models.llm.context_tokens = n; self.models.llm.context_tokens = n;
} }
} }
"KB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(), "KEBAB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(),
"KB_MODELS_LLM_TEMPERATURE" => { "KEBAB_MODELS_LLM_TEMPERATURE" => {
if let Ok(f) = v.parse::<f32>() { if let Ok(f) = v.parse::<f32>() {
self.models.llm.temperature = f; self.models.llm.temperature = f;
} }
} }
"KB_MODELS_LLM_SEED" => { "KEBAB_MODELS_LLM_SEED" => {
if let Ok(n) = v.parse::<u64>() { if let Ok(n) = v.parse::<u64>() {
self.models.llm.seed = n; self.models.llm.seed = n;
} }
} }
// search // search
"KB_SEARCH_DEFAULT_K" => { "KEBAB_SEARCH_DEFAULT_K" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.search.default_k = n; self.search.default_k = n;
} }
} }
"KB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(), "KEBAB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(),
"KB_SEARCH_RRF_K" => { "KEBAB_SEARCH_RRF_K" => {
if let Ok(n) = v.parse::<u32>() { if let Ok(n) = v.parse::<u32>() {
self.search.rrf_k = n; self.search.rrf_k = n;
} }
} }
"KB_SEARCH_SNIPPET_CHARS" => { "KEBAB_SEARCH_SNIPPET_CHARS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.search.snippet_chars = n; self.search.snippet_chars = n;
} }
} }
// rag // rag
"KB_RAG_PROMPT_TEMPLATE_VERSION" => { "KEBAB_RAG_PROMPT_TEMPLATE_VERSION" => {
self.rag.prompt_template_version = v.clone(); self.rag.prompt_template_version = v.clone();
} }
"KB_RAG_SCORE_GATE" => { "KEBAB_RAG_SCORE_GATE" => {
if let Ok(f) = v.parse::<f32>() { if let Ok(f) = v.parse::<f32>() {
self.rag.score_gate = f; self.rag.score_gate = f;
} }
} }
"KB_RAG_EXPLAIN_DEFAULT" => { "KEBAB_RAG_EXPLAIN_DEFAULT" => {
self.rag.explain_default = parse_bool(v); self.rag.explain_default = parse_bool(v);
} }
"KB_RAG_MAX_CONTEXT_TOKENS" => { "KEBAB_RAG_MAX_CONTEXT_TOKENS" => {
if let Ok(n) = v.parse::<usize>() { if let Ok(n) = v.parse::<usize>() {
self.rag.max_context_tokens = n; self.rag.max_context_tokens = n;
} }
} }
// Unknown KB_* keys are silently ignored — see // Unknown KEBAB_* keys are silently ignored — see
// `env_unknown_key_is_ignored` test. // `env_unknown_key_is_ignored` test.
_ => {} _ => {}
} }
@@ -331,58 +331,58 @@ impl Config {
self self
} }
/// `~/.config/kb/config.toml` (honors `XDG_CONFIG_HOME`). /// `~/.config/kebab/config.toml` (honors `XDG_CONFIG_HOME`).
pub fn xdg_config_path() -> PathBuf { pub fn xdg_config_path() -> PathBuf {
if let Ok(custom) = std::env::var("XDG_CONFIG_HOME") { if let Ok(custom) = std::env::var("XDG_CONFIG_HOME") {
if !custom.is_empty() { if !custom.is_empty() {
return PathBuf::from(custom).join("kb").join("config.toml"); return PathBuf::from(custom).join("kebab").join("config.toml");
} }
} }
match dirs::config_dir() { match dirs::config_dir() {
Some(d) => d.join("kb").join("config.toml"), Some(d) => d.join("kebab").join("config.toml"),
None => PathBuf::from("./kb/config.toml"), None => PathBuf::from("./kebab/config.toml"),
} }
} }
/// `~/.local/share/kb` (honors `XDG_DATA_HOME`). /// `~/.local/share/kebab` (honors `XDG_DATA_HOME`).
pub fn xdg_data_dir() -> PathBuf { pub fn xdg_data_dir() -> PathBuf {
if let Ok(custom) = std::env::var("XDG_DATA_HOME") { if let Ok(custom) = std::env::var("XDG_DATA_HOME") {
if !custom.is_empty() { if !custom.is_empty() {
return PathBuf::from(custom).join("kb"); return PathBuf::from(custom).join("kebab");
} }
} }
match dirs::data_dir() { match dirs::data_dir() {
Some(d) => d.join("kb"), Some(d) => d.join("kebab"),
None => PathBuf::from("./kb-data"), None => PathBuf::from("./kebab-data"),
} }
} }
/// `~/.cache/kb` (honors `XDG_CACHE_HOME`). /// `~/.cache/kebab` (honors `XDG_CACHE_HOME`).
pub fn xdg_cache_dir() -> PathBuf { pub fn xdg_cache_dir() -> PathBuf {
if let Ok(custom) = std::env::var("XDG_CACHE_HOME") { if let Ok(custom) = std::env::var("XDG_CACHE_HOME") {
if !custom.is_empty() { if !custom.is_empty() {
return PathBuf::from(custom).join("kb"); return PathBuf::from(custom).join("kebab");
} }
} }
match dirs::cache_dir() { match dirs::cache_dir() {
Some(d) => d.join("kb"), Some(d) => d.join("kebab"),
None => PathBuf::from("./kb-cache"), None => PathBuf::from("./kebab-cache"),
} }
} }
/// `~/.local/state/kb` (honors `XDG_STATE_HOME`). /// `~/.local/state/kebab` (honors `XDG_STATE_HOME`).
pub fn xdg_state_dir() -> PathBuf { pub fn xdg_state_dir() -> PathBuf {
if let Ok(custom) = std::env::var("XDG_STATE_HOME") { if let Ok(custom) = std::env::var("XDG_STATE_HOME") {
if !custom.is_empty() { if !custom.is_empty() {
return PathBuf::from(custom).join("kb"); return PathBuf::from(custom).join("kebab");
} }
} }
// `dirs` doesn't expose state_dir on all platforms; fall back to // `dirs` doesn't expose state_dir on all platforms; fall back to
// `$HOME/.local/state/kb` if XDG_STATE_HOME is unset. // `$HOME/.local/state/kebab` if XDG_STATE_HOME is unset.
if let Some(home) = dirs::home_dir() { if let Some(home) = dirs::home_dir() {
return home.join(".local").join("state").join("kb"); return home.join(".local").join("state").join("kebab");
} }
PathBuf::from("./kb-state") PathBuf::from("./kebab-state")
} }
} }
@@ -417,7 +417,7 @@ mod tests {
#[test] #[test]
fn env_override_score_gate() { fn env_override_score_gate() {
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert("KB_RAG_SCORE_GATE".to_string(), "0.5".to_string()); env.insert("KEBAB_RAG_SCORE_GATE".to_string(), "0.5".to_string());
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
assert!((c.rag.score_gate - 0.5).abs() < 1e-6); assert!((c.rag.score_gate - 0.5).abs() < 1e-6);
} }
@@ -425,7 +425,7 @@ mod tests {
#[test] #[test]
fn env_override_search_k() { fn env_override_search_k() {
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert("KB_SEARCH_DEFAULT_K".to_string(), "25".to_string()); env.insert("KEBAB_SEARCH_DEFAULT_K".to_string(), "25".to_string());
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
assert_eq!(c.search.default_k, 25); assert_eq!(c.search.default_k, 25);
} }
@@ -434,7 +434,7 @@ mod tests {
fn env_unknown_key_is_ignored() { fn env_unknown_key_is_ignored() {
let baseline = Config::defaults(); let baseline = Config::defaults();
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert("KB_NOPE_FOO".to_string(), "garbage".to_string()); env.insert("KEBAB_NOPE_FOO".to_string(), "garbage".to_string());
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
assert_eq!(c, baseline); assert_eq!(c, baseline);
} }
@@ -442,7 +442,7 @@ mod tests {
#[test] #[test]
fn env_overrides_chunking_target_tokens() { fn env_overrides_chunking_target_tokens() {
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert("KB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string()); env.insert("KEBAB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
assert_eq!(c.chunking.target_tokens, 777); assert_eq!(c.chunking.target_tokens, 777);
} }
@@ -451,10 +451,10 @@ mod tests {
fn env_overrides_models_llm_endpoint_and_temperature() { fn env_overrides_models_llm_endpoint_and_temperature() {
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert( env.insert(
"KB_MODELS_LLM_ENDPOINT".to_string(), "KEBAB_MODELS_LLM_ENDPOINT".to_string(),
"http://10.0.0.1:11434".to_string(), "http://10.0.0.1:11434".to_string(),
); );
env.insert("KB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string()); env.insert("KEBAB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434"); assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434");
assert!((c.models.llm.temperature - 0.7).abs() < 1e-6); assert!((c.models.llm.temperature - 0.7).abs() < 1e-6);
@@ -464,7 +464,7 @@ mod tests {
fn env_overrides_indexing_watch_filesystem_bool() { fn env_overrides_indexing_watch_filesystem_bool() {
let mut env = HashMap::new(); let mut env = HashMap::new();
env.insert( env.insert(
"KB_INDEXING_WATCH_FILESYSTEM".to_string(), "KEBAB_INDEXING_WATCH_FILESYSTEM".to_string(),
"true".to_string(), "true".to_string(),
); );
let c = Config::defaults().apply_env(&env); let c = Config::defaults().apply_env(&env);
@@ -477,10 +477,10 @@ mod tests {
let prev = std::env::var("XDG_CONFIG_HOME").ok(); let prev = std::env::var("XDG_CONFIG_HOME").ok();
// SAFETY: tests in this module run sequentially; we restore below. // SAFETY: tests in this module run sequentially; we restore below.
unsafe { unsafe {
std::env::set_var("XDG_CONFIG_HOME", "/tmp/kbtest-xdg-config"); std::env::set_var("XDG_CONFIG_HOME", "/tmp/kebabtest-xdg-config");
} }
let p = Config::xdg_config_path(); let p = Config::xdg_config_path();
assert_eq!(p, PathBuf::from("/tmp/kbtest-xdg-config/kb/config.toml")); assert_eq!(p, PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml"));
// SAFETY: scope-local restore. // SAFETY: scope-local restore.
unsafe { unsafe {
match prev { match prev {

View File

@@ -1,7 +1,7 @@
//! Shared path expansion helper. //! Shared path expansion helper.
//! //!
//! `Config::storage.*` fields are stored as raw template strings (e.g. //! `Config::storage.*` fields are stored as raw template strings (e.g.
//! `${XDG_DATA_HOME:-~/.local/share}/kb`, `{data_dir}/runs`). Every //! `${XDG_DATA_HOME:-~/.local/share}/kebab`, `{data_dir}/runs`). Every
//! crate that turns one of those strings into a real filesystem path //! crate that turns one of those strings into a real filesystem path
//! needs to apply the same set of substitutions; this module is the //! needs to apply the same set of substitutions; this module is the
//! single source of truth so the behavior cannot drift. //! single source of truth so the behavior cannot drift.
@@ -133,8 +133,8 @@ mod tests {
// SAFETY: lock held for the duration of this test. // SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") }; unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") };
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kb", ""); let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kebab", "");
assert_eq!(p, PathBuf::from("/custom/path/kb")); assert_eq!(p, PathBuf::from("/custom/path/kebab"));
} }
#[test] #[test]
@@ -145,8 +145,8 @@ mod tests {
unsafe { std::env::remove_var("XDG_DATA_HOME") }; unsafe { std::env::remove_var("XDG_DATA_HOME") };
let home = std::env::var("HOME").expect("HOME must be set in tests"); let home = std::env::var("HOME").expect("HOME must be set in tests");
let expected = PathBuf::from(home).join(".local/share/kb"); let expected = PathBuf::from(home).join(".local/share/kebab");
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kb", ""); let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kebab", "");
assert_eq!(p, expected); assert_eq!(p, expected);
} }
@@ -180,7 +180,7 @@ mod tests {
// SAFETY: lock held for the duration of this test. // SAFETY: lock held for the duration of this test.
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") }; unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
let p = expand_path("{data_dir}/runs", "/xdg/data/kb"); let p = expand_path("{data_dir}/runs", "/xdg/data/kebab");
assert_eq!(p, PathBuf::from("/xdg/data/kb/runs")); assert_eq!(p, PathBuf::from("/xdg/data/kebab/runs"));
} }
} }

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-core" name = "kebab-core"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }

View File

@@ -4,7 +4,7 @@
//! `kb-*` crate, so every other crate in the workspace can depend on it //! `kb-*` crate, so every other crate in the workspace can depend on it
//! freely. //! freely.
//! //!
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` for //! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` for
//! the canonical type bodies — this crate is the byte-for-byte mirror. //! the canonical type bodies — this crate is the byte-for-byte mirror.
pub mod ids; pub mod ids;

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-embed-local" name = "kebab-embed-local"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)" description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)"
[dependencies] [dependencies]
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-embed = { path = "../kb-embed" } kebab-embed = { path = "../kebab-embed" }
# Default features bring `ort-download-binaries` (bundled ONNX runtime) # Default features bring `ort-download-binaries` (bundled ONNX runtime)
# and `hf-hub-native-tls` (first-run model download). No extra features # and `hf-hub-native-tls` (first-run model download). No extra features
# needed for the multilingual-e5-small path. # needed for the multilingual-e5-small path.

View File

@@ -1,5 +1,5 @@
//! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed //! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed
//! [`Embedder`](kb_embed::Embedder) implementation. //! [`Embedder`](kebab_embed::Embedder) implementation.
//! //!
//! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small` //! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small`
//! (384-dim) model. Honors `config.models.embedding.batch_size` and applies //! (384-dim) model. Honors `config.models.embedding.batch_size` and applies
@@ -19,15 +19,15 @@
//! rules `kb-store-sqlite` applies to `data_dir` (`${XDG_DATA_HOME:-…}`, //! rules `kb-store-sqlite` applies to `data_dir` (`${XDG_DATA_HOME:-…}`,
//! leading `~`, `{data_dir}` substitution). //! leading `~`, `{data_dir}` substitution).
//! //!
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` //! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md`
//! §7.2 (Embedder), §6.4 ([models.embedding]), §9 (versioning). //! §7.2 (Embedder), §6.4 ([models.embedding]), §9 (versioning).
use std::sync::Mutex; use std::sync::Mutex;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding}; use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
use kb_config::expand_path; use kebab_config::expand_path;
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion}; use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
/// Subdirectory under `config.storage.model_dir` where the fastembed /// Subdirectory under `config.storage.model_dir` where the fastembed
/// adapter writes / reads ONNX + tokenizer files. Hard-coded per task /// adapter writes / reads ONNX + tokenizer files. Hard-coded per task
@@ -58,9 +58,9 @@ impl FastembedEmbedder {
/// `config.models.embedding.dimensions` matches the model's actual /// `config.models.embedding.dimensions` matches the model's actual
/// dim BEFORE returning, so a mismatch fails at construction (not on /// dim BEFORE returning, so a mismatch fails at construction (not on
/// first `embed`). /// first `embed`).
pub fn new(config: &kb_config::Config) -> Result<Self> { pub fn new(config: &kebab_config::Config) -> Result<Self> {
// 1. Resolve `{data_dir}/models/fastembed/` from the config // 1. Resolve `{data_dir}/models/fastembed/` from the config
// templates. Goes through the shared `kb_config::expand_path` // templates. Goes through the shared `kebab_config::expand_path`
// so every crate resolves storage paths identically. // so every crate resolves storage paths identically.
let data_dir = expand_path(&config.storage.data_dir, ""); let data_dir = expand_path(&config.storage.data_dir, "");
let model_dir = expand_path(&config.storage.model_dir, &data_dir.to_string_lossy()); let model_dir = expand_path(&config.storage.model_dir, &data_dir.to_string_lossy());
@@ -82,7 +82,7 @@ impl FastembedEmbedder {
check_dim(model_info.dim, config.models.embedding.dimensions)?; check_dim(model_info.dim, config.models.embedding.dimensions)?;
tracing::info!( tracing::info!(
target: "kb-embed-local", target: "kebab-embed-local",
cache_dir = %cache_dir.display(), cache_dir = %cache_dir.display(),
model = %config.models.embedding.model, model = %config.models.embedding.model,
dims = model_info.dim, dims = model_info.dim,
@@ -97,7 +97,7 @@ impl FastembedEmbedder {
.with_cache_dir(cache_dir.clone()) .with_cache_dir(cache_dir.clone())
.with_show_download_progress(false); .with_show_download_progress(false);
tracing::info!( tracing::info!(
target: "kb-embed-local", target: "kebab-embed-local",
model = %config.models.embedding.model, model = %config.models.embedding.model,
cache_dir = %cache_dir.display(), cache_dir = %cache_dir.display(),
"loading embedding model (first run will download ~470MB)" "loading embedding model (first run will download ~470MB)"
@@ -106,7 +106,7 @@ impl FastembedEmbedder {
.context("fastembed: TextEmbedding::try_new")?; .context("fastembed: TextEmbedding::try_new")?;
let dimensions = model_info.dim; let dimensions = model_info.dim;
tracing::info!( tracing::info!(
target: "kb-embed-local", target: "kebab-embed-local",
model = %config.models.embedding.model, model = %config.models.embedding.model,
dimensions, dimensions,
"embedding model loaded" "embedding model loaded"
@@ -224,7 +224,7 @@ pub(crate) fn check_dim(model_dim: usize, cfg_dim: usize) -> Result<()> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_embed::EmbeddingInput; use kebab_embed::EmbeddingInput;
// ── check_dim ──────────────────────────────────────────────────── // ── check_dim ────────────────────────────────────────────────────
// //

View File

@@ -22,16 +22,16 @@ use std::hash::{Hash, Hasher};
use std::sync::OnceLock; use std::sync::OnceLock;
use std::time::Instant; use std::time::Instant;
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind}; use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind};
use kb_embed_local::FastembedEmbedder; use kebab_embed_local::FastembedEmbedder;
/// Build a `Config` whose `data_dir` lives in a per-process temp dir so /// Build a `Config` whose `data_dir` lives in a per-process temp dir so
/// the test never writes into the developer's real `~/.local/share/kb`. /// the test never writes into the developer's real `~/.local/share/kebab`.
/// Returns the `Config` and the `TempDir` guard (caller keeps the guard /// Returns the `Config` and the `TempDir` guard (caller keeps the guard
/// alive for the test duration). /// alive for the test duration).
fn test_config() -> (kb_config::Config, tempfile::TempDir) { fn test_config() -> (kebab_config::Config, tempfile::TempDir) {
let tmp = tempfile::tempdir().expect("create tempdir"); let tmp = tempfile::tempdir().expect("create tempdir");
let mut cfg = kb_config::Config::defaults(); let mut cfg = kebab_config::Config::defaults();
cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned(); cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned();
// model_dir keeps its default `{data_dir}/models` template; the // model_dir keeps its default `{data_dir}/models` template; the
// adapter resolves it itself. // adapter resolves it itself.
@@ -141,12 +141,12 @@ fn output_vectors_are_l2_normalized() {
}, },
]; ];
let out = emb.embed(&inputs).expect("embed"); let out = emb.embed(&inputs).expect("embed");
// Per `kb_embed::assert_unit_norm` docs: `5e-4` is the safe bound at // Per `kebab_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
// 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add // 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add
// their own per-component noise; 1e-3 is very generous and matches // their own per-component noise; 1e-3 is very generous and matches
// the spec's `± 1e-3`). // the spec's `± 1e-3`).
kb_embed::assert_unit_norm(&out, 1e-3); kebab_embed::assert_unit_norm(&out, 1e-3);
kb_embed::assert_vector_shape(&out, 384); kebab_embed::assert_vector_shape(&out, 384);
} }
// ─── determinism ────────────────────────────────────────────────────── // ─── determinism ──────────────────────────────────────────────────────

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-embed" name = "kebab-embed"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Embedder trait re-exports + opt-in deterministic MockEmbedder for downstream tests" description = "Embedder trait re-exports + opt-in deterministic MockEmbedder for downstream tests"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
serde = { workspace = true } serde = { workspace = true }
thiserror = { workspace = true } thiserror = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View File

@@ -1,9 +1,9 @@
//! `kb-embed` — thin re-export crate for the [`Embedder`] trait surface. //! `kb-embed` — thin re-export crate for the [`Embedder`] trait surface.
//! //!
//! This crate exists so downstream code (`kb-store-vector`, `kb-search`, //! This crate exists so downstream code (`kb-store-vector`, `kb-search`,
//! adapters in p3-2) can `use kb_embed::Embedder` and stay stable across //! adapters in p3-2) can `use kebab_embed::Embedder` and stay stable across
//! kb-core reorganizations. It defines **no new types**; everything is a //! kb-core reorganizations. It defines **no new types**; everything is a
//! re-export of [`kb_core`]. //! re-export of [`kebab_core`].
//! //!
//! ## Mock implementation //! ## Mock implementation
//! //!
@@ -11,7 +11,7 @@
//! deterministic test double. Real adapters (fastembed, candle, ollama-embed) //! deterministic test double. Real adapters (fastembed, candle, ollama-embed)
//! live in p3-2 and MUST NOT be implemented here. //! live in p3-2 and MUST NOT be implemented here.
//! //!
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2, //! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.1, §7.2,
//! §11 for the contract. //! §11 for the contract.
// ── Trait re-exports ────────────────────────────────────────────────────── // ── Trait re-exports ──────────────────────────────────────────────────────
@@ -19,7 +19,7 @@
// Per spec §7.2 — these are the only public-surface types this crate offers. // Per spec §7.2 — these are the only public-surface types this crate offers.
// Adding new types is forbidden by the task contract. // Adding new types is forbidden by the task contract.
pub use kb_core::{ pub use kebab_core::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
}; };

View File

@@ -38,7 +38,7 @@
//! * Different `text` → different output with overwhelming probability. //! * Different `text` → different output with overwhelming probability.
//! * All output components are finite (`is_finite()`). //! * All output components are finite (`is_finite()`).
use kb_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion}; use kebab_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
/// Deterministic test double. See module docs for the hashing recipe. /// Deterministic test double. See module docs for the hashing recipe.
pub struct MockEmbedder { pub struct MockEmbedder {

View File

@@ -4,7 +4,7 @@
#![cfg(feature = "mock")] #![cfg(feature = "mock")]
use kb_embed::{ use kebab_embed::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder, Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder,
assert_unit_norm, assert_vector_shape, assert_unit_norm, assert_vector_shape,
}; };

View File

@@ -5,7 +5,7 @@
//! Runs under both `cargo test -p kb-embed` and //! Runs under both `cargo test -p kb-embed` and
//! `cargo test -p kb-embed --features mock`. //! `cargo test -p kb-embed --features mock`.
use kb_embed::{ use kebab_embed::{
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
assert_vector_shape, assert_vector_shape,
}; };

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-eval" name = "kebab-eval"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -9,10 +9,10 @@ description = "Golden-fixture eval runner: load YAML, drive kb-app search/ask,
[dependencies] [dependencies]
# Allowed deps per p5-1 spec — domain types + facade only. # Allowed deps per p5-1 spec — domain types + facade only.
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-app = { path = "../kb-app" } kebab-app = { path = "../kebab-app" }
kb-store-sqlite = { path = "../kb-store-sqlite" } kebab-store-sqlite = { path = "../kebab-store-sqlite" }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
serde_yaml = { workspace = true } serde_yaml = { workspace = true }

View File

@@ -14,9 +14,9 @@ use std::fmt::Write as _;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use kb_config::Config; use kebab_config::Config;
use kb_core::{ChunkId, DocumentId}; use kebab_core::{ChunkId, DocumentId};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use crate::loader::load_golden_set; use crate::loader::load_golden_set;
use crate::metrics::{ use crate::metrics::{
@@ -300,7 +300,7 @@ fn extract_chunker_version(snapshot_json: &str) -> Option<String> {
} }
fn parse_results( fn parse_results(
rows: &[kb_store_sqlite::EvalQueryResultRecord], rows: &[kebab_store_sqlite::EvalQueryResultRecord],
) -> Result<HashMap<String, QueryResult>> { ) -> Result<HashMap<String, QueryResult>> {
let mut out = HashMap::with_capacity(rows.len()); let mut out = HashMap::with_capacity(rows.len());
for row in rows { for row in rows {
@@ -456,9 +456,9 @@ mod tests {
let g = GoldenQuery { let g = GoldenQuery {
id: "q1".into(), id: "q1".into(),
query: "q".into(), query: "q".into(),
lang: kb_core::Lang(String::new()), lang: kebab_core::Lang(String::new()),
expected_doc_ids: vec![], expected_doc_ids: vec![],
expected_chunk_ids: vec![kb_core::ChunkId("c1".into())], expected_chunk_ids: vec![kebab_core::ChunkId("c1".into())],
must_contain: vec![], must_contain: vec![],
forbidden: vec![], forbidden: vec![],
difficulty: None, difficulty: None,

View File

@@ -1,7 +1,7 @@
//! `kb-eval` — golden-fixture eval runner (P5-1). //! `kb-eval` — golden-fixture eval runner (P5-1).
//! //!
//! Loads `fixtures/golden_queries.yaml`, runs each entry through the //! Loads `fixtures/golden_queries.yaml`, runs each entry through the
//! [`kb_app`] facade (lexical / vector / hybrid + optional RAG), and //! [`kebab_app`] facade (lexical / vector / hybrid + optional RAG), and
//! persists results into `eval_runs` / `eval_query_results` plus //! persists results into `eval_runs` / `eval_query_results` plus
//! `runs_dir/<run_id>/per_query.jsonl` (design §5.7, §6.3). //! `runs_dir/<run_id>/per_query.jsonl` (design §5.7, §6.3).
//! //!

View File

@@ -6,7 +6,7 @@
//! tests that don't have a SQLite store handy. //! tests that don't have a SQLite store handy.
//! - [`load_golden_set_validated`] — additionally verifies every //! - [`load_golden_set_validated`] — additionally verifies every
//! `expected_doc_id` / `expected_chunk_id` exists in the SQLite DB //! `expected_doc_id` / `expected_chunk_id` exists in the SQLite DB
//! the supplied [`kb_config::Config`] points at. Used by //! the supplied [`kebab_config::Config`] points at. Used by
//! [`crate::run_eval`] in production so a stale golden set fails //! [`crate::run_eval`] in production so a stale golden set fails
//! fast at run start. //! fast at run start.
@@ -14,7 +14,7 @@ use std::collections::{BTreeSet, HashSet};
use std::path::Path; use std::path::Path;
use anyhow::{Context, Result, anyhow}; use anyhow::{Context, Result, anyhow};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use crate::types::GoldenQuery; use crate::types::GoldenQuery;
@@ -43,11 +43,11 @@ pub fn load_golden_set(path: &Path) -> Result<Vec<GoldenQuery>> {
/// Currently used only by the in-module tests below; production code /// Currently used only by the in-module tests below; production code
/// inlines `load_golden_set` + `validate_against_db` in /// inlines `load_golden_set` + `validate_against_db` in
/// [`crate::run_eval_with_config`] so the validation can run against /// [`crate::run_eval_with_config`] so the validation can run against
/// an already-opened [`kb_config::Config`] without re-parsing YAML. /// an already-opened [`kebab_config::Config`] without re-parsing YAML.
#[cfg(test)] #[cfg(test)]
pub(crate) fn load_golden_set_validated( pub(crate) fn load_golden_set_validated(
yaml_path: &Path, yaml_path: &Path,
cfg: &kb_config::Config, cfg: &kebab_config::Config,
) -> Result<Vec<GoldenQuery>> { ) -> Result<Vec<GoldenQuery>> {
let queries = load_golden_set(yaml_path)?; let queries = load_golden_set(yaml_path)?;
validate_against_db(&queries, cfg)?; validate_against_db(&queries, cfg)?;
@@ -73,7 +73,7 @@ fn check_unique_ids(queries: &[GoldenQuery]) -> Result<()> {
/// Read every doc_id / chunk_id referenced by `queries` and confirm /// Read every doc_id / chunk_id referenced by `queries` and confirm
/// SQLite has rows for them. Builds a sorted, deduplicated error /// SQLite has rows for them. Builds a sorted, deduplicated error
/// message listing every missing ID. /// message listing every missing ID.
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kb_config::Config) -> Result<()> { pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kebab_config::Config) -> Result<()> {
// Short-circuit when there is nothing to validate — saves opening // Short-circuit when there is nothing to validate — saves opening
// SQLite for golden sets that omit expected_*_ids entirely. // SQLite for golden sets that omit expected_*_ids entirely.
let needs_check = queries let needs_check = queries
@@ -140,8 +140,8 @@ mod tests {
//! `tests/loader.rs`; only the validated-variant cases need to sit //! `tests/loader.rs`; only the validated-variant cases need to sit
//! next to the function so they can see the `pub(crate)` symbol. //! next to the function so they can see the `pub(crate)` symbol.
use super::*; use super::*;
use kb_config::Config; use kebab_config::Config;
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use rusqlite::params; use rusqlite::params;
use std::fs; use std::fs;
use tempfile::tempdir; use tempfile::tempdir;

View File

@@ -13,9 +13,9 @@ use std::path::PathBuf;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use kb_config::Config; use kebab_config::Config;
use kb_core::{ChunkId, Citation, DocumentId}; use kebab_core::{ChunkId, Citation, DocumentId};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use crate::loader::load_golden_set; use crate::loader::load_golden_set;
use crate::types::{GoldenQuery, QueryResult}; use crate::types::{GoldenQuery, QueryResult};
@@ -40,10 +40,10 @@ const STORAGE_DECIMALS: u32 = 4;
/// (P5-1) used — otherwise `expected_*` / `must_contain` won't line up /// (P5-1) used — otherwise `expected_*` / `must_contain` won't line up
/// with the stored `query_id`s. `pub(crate)` so the runner shares the /// with the stored `query_id`s. `pub(crate)` so the runner shares the
/// exact same name + default rather than duplicating constants. /// exact same name + default rather than duplicating constants.
pub(crate) const KB_EVAL_GOLDEN: &str = "KB_EVAL_GOLDEN"; pub(crate) const KEBAB_EVAL_GOLDEN: &str = "KEBAB_EVAL_GOLDEN";
/// Default golden YAML path (relative to CWD when set). Same /// Default golden YAML path (relative to CWD when set). Same
/// rationale as [`KB_EVAL_GOLDEN`] — single source of truth. /// rationale as [`KEBAB_EVAL_GOLDEN`] — single source of truth.
pub(crate) const DEFAULT_GOLDEN_PATH: &str = "fixtures/golden_queries.yaml"; pub(crate) const DEFAULT_GOLDEN_PATH: &str = "fixtures/golden_queries.yaml";
/// Aggregate metrics for one stored eval run. /// Aggregate metrics for one stored eval run.
@@ -151,7 +151,7 @@ pub fn store_aggregate_with_config(
/// the runner uses, same default path. Pulled into its own helper so /// the runner uses, same default path. Pulled into its own helper so
/// `compare_runs` can share it. /// `compare_runs` can share it.
pub(crate) fn resolve_golden_path() -> PathBuf { pub(crate) fn resolve_golden_path() -> PathBuf {
match std::env::var(KB_EVAL_GOLDEN) { match std::env::var(KEBAB_EVAL_GOLDEN) {
Ok(s) if !s.is_empty() => PathBuf::from(s), Ok(s) if !s.is_empty() => PathBuf::from(s),
_ => PathBuf::from(DEFAULT_GOLDEN_PATH), _ => PathBuf::from(DEFAULT_GOLDEN_PATH),
} }
@@ -161,7 +161,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
let path = resolve_golden_path(); let path = resolve_golden_path();
load_golden_set(&path).with_context(|| { load_golden_set(&path).with_context(|| {
format!( format!(
"load golden set from {} (override via KB_EVAL_GOLDEN)", "load golden set from {} (override via KEBAB_EVAL_GOLDEN)",
path.display() path.display()
) )
}) })
@@ -175,7 +175,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
/// `tasks/p5/p5-2-metrics-compare.md`), this will need to take one. /// `tasks/p5/p5-2-metrics-compare.md`), this will need to take one.
pub(crate) fn aggregate_from_rows( pub(crate) fn aggregate_from_rows(
queries: &[GoldenQuery], queries: &[GoldenQuery],
rows: &[kb_store_sqlite::EvalQueryResultRecord], rows: &[kebab_store_sqlite::EvalQueryResultRecord],
) -> Result<AggregateMetrics> { ) -> Result<AggregateMetrics> {
let golden_by_id: HashMap<&str, &GoldenQuery> = let golden_by_id: HashMap<&str, &GoldenQuery> =
queries.iter().map(|q| (q.id.as_str(), q)).collect(); queries.iter().map(|q| (q.id.as_str(), q)).collect();
@@ -395,14 +395,14 @@ fn ratio_or_zero(num: u32, denom: u32) -> f32 {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_core::{ use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, RetrievalDetail, SearchHit, ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, RetrievalDetail, SearchHit,
SearchMode, SearchMode,
}; };
use kb_core::asset::WorkspacePath; use kebab_core::asset::WorkspacePath;
use kb_core::media::Lang; use kebab_core::media::Lang;
use kb_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId}; use kebab_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
use kb_core::versions::PromptTemplateVersion; use kebab_core::versions::PromptTemplateVersion;
use time::OffsetDateTime; use time::OffsetDateTime;
fn gq(id: &str, expected_chunks: &[&str], expected_docs: &[&str]) -> GoldenQuery { fn gq(id: &str, expected_chunks: &[&str], expected_docs: &[&str]) -> GoldenQuery {
@@ -460,9 +460,9 @@ mod tests {
} }
fn record(id: &str, hits: Vec<SearchHit>, error: Option<String>, answer: Option<Answer>) fn record(id: &str, hits: Vec<SearchHit>, error: Option<String>, answer: Option<Answer>)
-> kb_store_sqlite::EvalQueryResultRecord -> kebab_store_sqlite::EvalQueryResultRecord
{ {
kb_store_sqlite::EvalQueryResultRecord { kebab_store_sqlite::EvalQueryResultRecord {
query_id: id.into(), query_id: id.into(),
result_json: serde_json::to_string(&qr(id, hits, error, answer)).unwrap(), result_json: serde_json::to_string(&qr(id, hits, error, answer)).unwrap(),
} }

View File

@@ -6,14 +6,14 @@ use std::path::PathBuf;
use std::time::Instant; use std::time::Instant;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use kb_app::App; use kebab_app::App;
use kb_config::expand_path; use kebab_config::expand_path;
use kb_core::{SearchFilters, SearchQuery}; use kebab_core::{SearchFilters, SearchQuery};
use kb_store_sqlite::{EvalRunRow, SqliteStore}; use kebab_store_sqlite::{EvalRunRow, SqliteStore};
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::loader::{load_golden_set, validate_against_db}; use crate::loader::{load_golden_set, validate_against_db};
use crate::metrics::{DEFAULT_GOLDEN_PATH, KB_EVAL_GOLDEN}; use crate::metrics::{DEFAULT_GOLDEN_PATH, KEBAB_EVAL_GOLDEN};
use crate::types::{EvalRun, EvalRunOpts, GoldenQuery, QueryResult}; use crate::types::{EvalRun, EvalRunOpts, GoldenQuery, QueryResult};
/// Convert a wall-clock duration since `start` into milliseconds clamped /// Convert a wall-clock duration since `start` into milliseconds clamped
@@ -25,18 +25,18 @@ fn elapsed_ms_u32(start: Instant) -> u32 {
} }
/// Run the golden suite end-to-end against the active XDG-loaded /// Run the golden suite end-to-end against the active XDG-loaded
/// [`kb_config::Config`]. Wraps [`run_eval_with_config`] with /// [`kebab_config::Config`]. Wraps [`run_eval_with_config`] with
/// `Config::load(None)`. /// `Config::load(None)`.
pub fn run_eval(opts: &EvalRunOpts) -> Result<EvalRun> { pub fn run_eval(opts: &EvalRunOpts) -> Result<EvalRun> {
let cfg = kb_config::Config::load(None).context("load Config for run_eval")?; let cfg = kebab_config::Config::load(None).context("load Config for run_eval")?;
run_eval_with_config(&cfg, opts) run_eval_with_config(&cfg, opts)
} }
/// Run the golden suite end-to-end against an explicit /// Run the golden suite end-to-end against an explicit
/// [`kb_config::Config`]. Used by integration tests (TempDir-backed /// [`kebab_config::Config`]. Used by integration tests (TempDir-backed
/// data_dir) and any future caller that wants to drive the runner /// data_dir) and any future caller that wants to drive the runner
/// against a non-default config. /// against a non-default config.
pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> { pub fn run_eval_with_config(cfg: &kebab_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
let started = Instant::now(); let started = Instant::now();
// ── 1. Load golden set ──────────────────────────────────────────────── // ── 1. Load golden set ────────────────────────────────────────────────
@@ -46,7 +46,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
let golden_path = resolve_golden_path(); let golden_path = resolve_golden_path();
let queries = load_golden_set(&golden_path).with_context(|| { let queries = load_golden_set(&golden_path).with_context(|| {
format!( format!(
"load golden set from {} (override via KB_EVAL_GOLDEN)", "load golden set from {} (override via KEBAB_EVAL_GOLDEN)",
golden_path.display() golden_path.display()
) )
})?; })?;
@@ -55,7 +55,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
// ── 2. Mint identifiers + open store ────────────────────────────────── // ── 2. Mint identifiers + open store ──────────────────────────────────
let run_id = mint_run_id(); let run_id = mint_run_id();
let created_at = OffsetDateTime::now_utc(); let created_at = OffsetDateTime::now_utc();
let commit_hash = std::env::var("KB_COMMIT_HASH") let commit_hash = std::env::var("KEBAB_COMMIT_HASH")
.ok() .ok()
.filter(|s| !s.is_empty()); .filter(|s| !s.is_empty());
@@ -110,7 +110,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
let duration_ms = elapsed_ms_u32(started); let duration_ms = elapsed_ms_u32(started);
tracing::info!( tracing::info!(
target: "kb-eval", target: "kebab-eval",
run_id = %run_id, run_id = %run_id,
suite = %opts.suite, suite = %opts.suite,
queries = per_query.len(), queries = per_query.len(),
@@ -136,11 +136,11 @@ fn mint_run_id() -> String {
format!("run_{id}") format!("run_{id}")
} }
/// Resolve the golden YAML path. Honors the `KB_EVAL_GOLDEN` env /// Resolve the golden YAML path. Honors the `KEBAB_EVAL_GOLDEN` env
/// override; otherwise relative to CWD. The path is NOT expanded for /// override; otherwise relative to CWD. The path is NOT expanded for
/// `~` / `${...}` placeholders — direct file paths only. /// `~` / `${...}` placeholders — direct file paths only.
fn resolve_golden_path() -> PathBuf { fn resolve_golden_path() -> PathBuf {
match std::env::var(KB_EVAL_GOLDEN) { match std::env::var(KEBAB_EVAL_GOLDEN) {
Ok(s) if !s.is_empty() => PathBuf::from(s), Ok(s) if !s.is_empty() => PathBuf::from(s),
_ => PathBuf::from(DEFAULT_GOLDEN_PATH), _ => PathBuf::from(DEFAULT_GOLDEN_PATH),
} }
@@ -167,7 +167,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
// call did not already error out (we want one error per query, not // call did not already error out (we want one error per query, not
// a duplicated one). // a duplicated one).
let answer = if opts.with_rag && error.is_none() { let answer = if opts.with_rag && error.is_none() {
let ask_opts = kb_app::AskOpts { let ask_opts = kebab_app::AskOpts {
k: opts.k, k: opts.k,
explain: true, explain: true,
mode: opts.mode, mode: opts.mode,
@@ -206,7 +206,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
/// stable run-time property of the config alone. P5-2 may compose it /// stable run-time property of the config alone. P5-2 may compose it
/// from `embedding.{model,version,dimensions}` if it needs the field /// from `embedding.{model,version,dimensions}` if it needs the field
/// for compare reports. /// for compare reports.
fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> { fn build_config_snapshot(cfg: &kebab_config::Config) -> Result<serde_json::Value> {
let cfg_value = serde_json::to_value(cfg).context("serialize Config")?; let cfg_value = serde_json::to_value(cfg).context("serialize Config")?;
Ok(serde_json::json!({ Ok(serde_json::json!({
"config": cfg_value, "config": cfg_value,
@@ -234,7 +234,7 @@ fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
/// `run_id` collision would already have failed the `eval_runs` /// `run_id` collision would already have failed the `eval_runs`
/// PRIMARY KEY upstream). /// PRIMARY KEY upstream).
fn write_per_query_jsonl( fn write_per_query_jsonl(
cfg: &kb_config::Config, cfg: &kebab_config::Config,
run_id: &str, run_id: &str,
per_query: &[QueryResult], per_query: &[QueryResult],
) -> Result<()> { ) -> Result<()> {

View File

@@ -4,7 +4,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use kb_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode}; use kebab_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
/// One golden query loaded from `fixtures/golden_queries.yaml`. /// One golden query loaded from `fixtures/golden_queries.yaml`.
/// ///
@@ -41,10 +41,10 @@ pub struct EvalRunOpts {
/// Suite label persisted into `eval_runs.suite`. The shipped /// Suite label persisted into `eval_runs.suite`. The shipped
/// fixture is `"golden"`; other suites can reuse the same runner. /// fixture is `"golden"`; other suites can reuse the same runner.
pub suite: String, pub suite: String,
/// Retrieval mode forwarded to every `kb_app::search` / /// Retrieval mode forwarded to every `kebab_app::search` /
/// `kb_app::ask` call inside the run. /// `kebab_app::ask` call inside the run.
pub mode: SearchMode, pub mode: SearchMode,
/// When `true`, also call `kb_app::ask` per query and record the /// When `true`, also call `kebab_app::ask` per query and record the
/// resulting `Answer` on the `QueryResult`. /// resulting `Answer` on the `QueryResult`.
pub with_rag: bool, pub with_rag: bool,
/// Top-k forwarded to retrieval (and `AskOpts.k` when `with_rag`). /// Top-k forwarded to retrieval (and `AskOpts.k` when `with_rag`).

View File

@@ -8,7 +8,7 @@
use std::fs; use std::fs;
use kb_eval::load_golden_set; use kebab_eval::load_golden_set;
use tempfile::tempdir; use tempfile::tempdir;
// ── 1. parser accepts well-formed YAML with optional fields ────────────────── // ── 1. parser accepts well-formed YAML with optional fields ──────────────────

View File

@@ -9,17 +9,17 @@
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
use kb_config::Config; use kebab_config::Config;
use kb_core::{ use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, Lang, ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, Lang,
RetrievalDetail, SearchHit, SearchMode, RetrievalDetail, SearchHit, SearchMode,
asset::WorkspacePath, asset::WorkspacePath,
}; };
use kb_eval::{ use kebab_eval::{
AggregateMetrics, CompareOpts, CompareReport, ComparisonKind, GoldenQuery, QueryResult, AggregateMetrics, CompareOpts, CompareReport, ComparisonKind, GoldenQuery, QueryResult,
compare_runs_with_config, compute_aggregate_with_config, store_aggregate_with_config, compare_runs_with_config, compute_aggregate_with_config, store_aggregate_with_config,
}; };
use kb_store_sqlite::{EvalRunRow, SqliteStore}; use kebab_store_sqlite::{EvalRunRow, SqliteStore};
use tempfile::TempDir; use tempfile::TempDir;
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -34,7 +34,7 @@ fn cfg_with_data_dir(tmp: &TempDir, golden_yaml: &str) -> Config {
// SAFELY scoped — `set_var` is process-global so callers serialise // SAFELY scoped — `set_var` is process-global so callers serialise
// tests via the `serial_test`-style guard below. // tests via the `serial_test`-style guard below.
unsafe { unsafe {
std::env::set_var("KB_EVAL_GOLDEN", &golden_path); std::env::set_var("KEBAB_EVAL_GOLDEN", &golden_path);
} }
cfg cfg
} }
@@ -127,9 +127,9 @@ fn write_run(
store.record_eval_run_with_results(&row, &results).unwrap(); store.record_eval_run_with_results(&row, &results).unwrap();
} }
/// Each test mutates a process-global env var (`KB_EVAL_GOLDEN`) and /// Each test mutates a process-global env var (`KEBAB_EVAL_GOLDEN`) and
/// expects to see its own write. Take this mutex around the body of /// expects to see its own write. Take this mutex around the body of
/// every test that touches `KB_EVAL_GOLDEN` so two concurrent test /// every test that touches `KEBAB_EVAL_GOLDEN` so two concurrent test
/// threads don't trip over each other's golden YAML. /// threads don't trip over each other's golden YAML.
fn env_guard() -> std::sync::MutexGuard<'static, ()> { fn env_guard() -> std::sync::MutexGuard<'static, ()> {
use std::sync::{Mutex, OnceLock}; use std::sync::{Mutex, OnceLock};
@@ -259,7 +259,7 @@ fn compare_runs_classifies_win_loss_draw_regression() {
drop(store); drop(store);
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap(); let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
let by_id: std::collections::HashMap<&str, &kb_eval::QueryComparison> = let by_id: std::collections::HashMap<&str, &kebab_eval::QueryComparison> =
report.per_query.iter().map(|c| (c.query_id.as_str(), c)).collect(); report.per_query.iter().map(|c| (c.query_id.as_str(), c)).collect();
assert_eq!(by_id["q-001"].kind, ComparisonKind::Loss); assert_eq!(by_id["q-001"].kind, ComparisonKind::Loss);
assert_eq!(by_id["q-002"].kind, ComparisonKind::Win); assert_eq!(by_id["q-002"].kind, ComparisonKind::Win);
@@ -414,7 +414,7 @@ fn render_report_md_is_human_readable() {
drop(store); drop(store);
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap(); let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
let md = kb_eval::render_report_md(&report); let md = kebab_eval::render_report_md(&report);
assert!(md.starts_with("# Eval compare:"), "md = {md}"); assert!(md.starts_with("# Eval compare:"), "md = {md}");
assert!(md.contains("hit@1")); assert!(md.contains("hit@1"));
assert!(md.contains("MRR")); assert!(md.contains("MRR"));

View File

@@ -1,13 +1,13 @@
//! Runner integration tests for `kb-eval` (P5-1). //! Runner integration tests for `kb-eval` (P5-1).
//! //!
//! Drives [`kb_eval::run_eval_with_config`] end-to-end against a //! Drives [`kebab_eval::run_eval_with_config`] end-to-end against a
//! TempDir-backed config: //! TempDir-backed config:
//! //!
//! - tiny seeded SQLite corpus (3 docs / 3 chunks) used as the //! - tiny seeded SQLite corpus (3 docs / 3 chunks) used as the
//! workspace's source-of-truth, //! workspace's source-of-truth,
//! - lexical-only retrieval (`SearchMode::Lexical`) so no embedder is //! - lexical-only retrieval (`SearchMode::Lexical`) so no embedder is
//! required (`models.embedding.provider = "none"`), //! required (`models.embedding.provider = "none"`),
//! - golden YAML pointed at via `KB_EVAL_GOLDEN`. //! - golden YAML pointed at via `KEBAB_EVAL_GOLDEN`.
//! //!
//! Determinism: lexical-only with a fixed seed corpus produces //! Determinism: lexical-only with a fixed seed corpus produces
//! byte-identical `per_query.jsonl` content (modulo `run_id` / //! byte-identical `per_query.jsonl` content (modulo `run_id` /
@@ -17,14 +17,14 @@ use std::fs;
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use std::sync::Mutex; use std::sync::Mutex;
use kb_config::Config; use kebab_config::Config;
use kb_core::SearchMode; use kebab_core::SearchMode;
use kb_eval::{EvalRunOpts, QueryResult, run_eval_with_config}; use kebab_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use rusqlite::params; use rusqlite::params;
use tempfile::TempDir; use tempfile::TempDir;
/// `KB_EVAL_GOLDEN` is process-global state. Tests touching it must /// `KEBAB_EVAL_GOLDEN` is process-global state. Tests touching it must
/// serialize so they don't trample each other when `cargo test` /// serialize so they don't trample each other when `cargo test`
/// runs them in parallel. /// runs them in parallel.
static GOLDEN_ENV_LOCK: Mutex<()> = Mutex::new(()); static GOLDEN_ENV_LOCK: Mutex<()> = Mutex::new(());
@@ -110,7 +110,7 @@ fn seed_corpus(store: &SqliteStore) {
// Build the FTS index so lexical search returns hits. Reuses the // Build the FTS index so lexical search returns hits. Reuses the
// same connection guard rather than reopening — the SAVEPOINT // same connection guard rather than reopening — the SAVEPOINT
// protocol nests correctly under the existing read_conn lock. // protocol nests correctly under the existing read_conn lock.
kb_store_sqlite::rebuild_chunks_fts(&conn).unwrap(); kebab_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
drop(conn); drop(conn);
} }
@@ -143,19 +143,19 @@ fn lexical_opts() -> EvalRunOpts {
} }
} }
/// Run the eval after pointing `KB_EVAL_GOLDEN` at `yaml`. The env /// Run the eval after pointing `KEBAB_EVAL_GOLDEN` at `yaml`. The env
/// guard must outlive the call so concurrent tests don't reset the /// guard must outlive the call so concurrent tests don't reset the
/// var mid-run. /// var mid-run.
fn run_with_golden<F: FnOnce() -> R, R>(yaml: &Path, f: F) -> R { fn run_with_golden<F: FnOnce() -> R, R>(yaml: &Path, f: F) -> R {
let _g = GOLDEN_ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner()); let _g = GOLDEN_ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
// SAFETY: `KB_EVAL_GOLDEN` is a benign env var; the GOLDEN_ENV_LOCK // SAFETY: `KEBAB_EVAL_GOLDEN` is a benign env var; the GOLDEN_ENV_LOCK
// serializes mutations so concurrent tests don't race. // serializes mutations so concurrent tests don't race.
unsafe { unsafe {
std::env::set_var("KB_EVAL_GOLDEN", yaml); std::env::set_var("KEBAB_EVAL_GOLDEN", yaml);
} }
let out = f(); let out = f();
unsafe { unsafe {
std::env::remove_var("KB_EVAL_GOLDEN"); std::env::remove_var("KEBAB_EVAL_GOLDEN");
} }
out out
} }

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-llm-local" name = "kebab-llm-local"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,9 +8,9 @@ repository = { workspace = true }
description = "Ollama HTTP adapter implementing kb_core::LanguageModel via reqwest::blocking" description = "Ollama HTTP adapter implementing kb_core::LanguageModel via reqwest::blocking"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-llm = { path = "../kb-llm" } kebab-llm = { path = "../kebab-llm" }
# `default-features = false` drops the `default-tls` (native-tls / openssl) # `default-features = false` drops the `default-tls` (native-tls / openssl)
# feature so we don't pull in a system OpenSSL; we explicitly pin rustls. # feature so we don't pull in a system OpenSSL; we explicitly pin rustls.
# Note: `default-features = false` does NOT drop tokio — reqwest 0.12's # Note: `default-features = false` does NOT drop tokio — reqwest 0.12's

View File

@@ -1,5 +1,5 @@
//! `kb-llm-local` — Ollama HTTP adapter implementing //! `kb-llm-local` — Ollama HTTP adapter implementing
//! [`kb_core::LanguageModel`] over the local `POST /api/generate` endpoint. //! [`kebab_core::LanguageModel`] over the local `POST /api/generate` endpoint.
//! //!
//! ## Why a separate crate //! ## Why a separate crate
//! //!
@@ -29,7 +29,7 @@
//! - **Lazy connect.** [`OllamaLanguageModel::new`] does not hit the network; //! - **Lazy connect.** [`OllamaLanguageModel::new`] does not hit the network;
//! the first error surfaces on [`LanguageModel::generate_stream`]. //! the first error surfaces on [`LanguageModel::generate_stream`].
//! //!
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.2, //! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.2,
//! §6.4 (`[models.llm]`), §0 Q5 (streaming), §10 (errors), and report §11.2 //! §6.4 (`[models.llm]`), §0 Q5 (streaming), §10 (errors), and report §11.2
//! (Ollama protocol notes). //! (Ollama protocol notes).
@@ -39,11 +39,11 @@ mod ollama;
pub use error::LlmError; pub use error::LlmError;
pub use ollama::OllamaLanguageModel; pub use ollama::OllamaLanguageModel;
// Re-export the trait surface so adapter consumers can `use kb_llm_local::*` // Re-export the trait surface so adapter consumers can `use kebab_llm_local::*`
// without also depending on `kb-llm` directly. These are the same symbols // without also depending on `kb-llm` directly. These are the same symbols
// `kb-llm` re-exports from `kb-core`; this crate adds **no new types** to // `kb-llm` re-exports from `kb-core`; this crate adds **no new types** to
// the trait surface (`LlmError` and `OllamaLanguageModel` are // the trait surface (`LlmError` and `OllamaLanguageModel` are
// implementation-side only). // implementation-side only).
pub use kb_llm::{ pub use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
}; };

View File

@@ -41,7 +41,7 @@
use std::io::{BufRead, BufReader}; use std::io::{BufRead, BufReader};
use std::time::Duration; use std::time::Duration;
use kb_core::{ use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -68,7 +68,7 @@ pub struct OllamaLanguageModel {
} }
impl OllamaLanguageModel { impl OllamaLanguageModel {
/// Build an adapter from a workspace [`kb_config::Config`]. Reads /// Build an adapter from a workspace [`kebab_config::Config`]. Reads
/// `config.models.llm.{provider, model, endpoint, context_tokens, /// `config.models.llm.{provider, model, endpoint, context_tokens,
/// temperature, seed}`. /// temperature, seed}`.
/// ///
@@ -76,7 +76,7 @@ impl OllamaLanguageModel {
/// expected to have validated `provider == "ollama"`; this constructor /// expected to have validated `provider == "ollama"`; this constructor
/// trusts the config and would happily build for an unknown provider. /// trusts the config and would happily build for an unknown provider.
/// (Provider routing is the App layer's job, not the adapter's.) /// (Provider routing is the App layer's job, not the adapter's.)
pub fn new(config: &kb_config::Config) -> anyhow::Result<Self> { pub fn new(config: &kebab_config::Config) -> anyhow::Result<Self> {
let llm = &config.models.llm; let llm = &config.models.llm;
let client = reqwest::blocking::Client::builder() let client = reqwest::blocking::Client::builder()
.timeout(REQUEST_TIMEOUT) .timeout(REQUEST_TIMEOUT)
@@ -292,7 +292,7 @@ impl Iterator for OllamaStream {
// pipelines that expect a terminal frame still terminate. // pipelines that expect a terminal frame still terminate.
self.done = true; self.done = true;
tracing::warn!( tracing::warn!(
target: "kb_llm_local", target: "kebab_llm_local",
"ollama stream ended without a `done: true` frame; synthesizing Aborted", "ollama stream ended without a `done: true` frame; synthesizing Aborted",
); );
return Some(Ok(TokenChunk::Done { return Some(Ok(TokenChunk::Done {
@@ -361,14 +361,14 @@ impl Iterator for OllamaStream {
}; };
let prompt_tokens = line.prompt_eval_count.unwrap_or_else(|| { let prompt_tokens = line.prompt_eval_count.unwrap_or_else(|| {
tracing::warn!( tracing::warn!(
target: "kb_llm_local", target: "kebab_llm_local",
"ollama done frame missing prompt_eval_count; defaulting to 0", "ollama done frame missing prompt_eval_count; defaulting to 0",
); );
0 0
}); });
let completion_tokens = line.eval_count.unwrap_or_else(|| { let completion_tokens = line.eval_count.unwrap_or_else(|| {
tracing::warn!( tracing::warn!(
target: "kb_llm_local", target: "kebab_llm_local",
"ollama done frame missing eval_count; defaulting to 0", "ollama done frame missing eval_count; defaulting to 0",
); );
0 0

View File

@@ -2,8 +2,8 @@
//! relevant config fields and exposes them via the trait surface, all //! relevant config fields and exposes them via the trait surface, all
//! without touching the network (per design §7.2 lazy-connect contract). //! without touching the network (per design §7.2 lazy-connect contract).
use kb_config::Config; use kebab_config::Config;
use kb_llm_local::{LanguageModel, OllamaLanguageModel}; use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
#[test] #[test]
fn construction_with_default_config_returns_expected_model_ref() { fn construction_with_default_config_returns_expected_model_ref() {

View File

@@ -11,16 +11,16 @@
//! These hit `http://127.0.0.1:11434` directly and require an actual model //! These hit `http://127.0.0.1:11434` directly and require an actual model
//! pulled locally. CI runs default (non-ignored) tests only. //! pulled locally. CI runs default (non-ignored) tests only.
use kb_config::Config; use kebab_config::Config;
use kb_core::{GenerateRequest, TokenChunk}; use kebab_core::{GenerateRequest, TokenChunk};
use kb_llm_local::{LanguageModel, OllamaLanguageModel}; use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
#[test] #[test]
#[ignore = "requires a local Ollama daemon + pulled model"] #[ignore = "requires a local Ollama daemon + pulled model"]
fn real_ollama_streams_non_empty_response() { fn real_ollama_streams_non_empty_response() {
// Use whatever model the workspace defaults select. Override via the // Use whatever model the workspace defaults select. Override via the
// KB_MODELS_LLM_MODEL env var if you want a different one for this run // KEBAB_MODELS_LLM_MODEL env var if you want a different one for this run
// (e.g. `KB_MODELS_LLM_MODEL=qwen2.5:7b-instruct cargo test ... -- --ignored`). // (e.g. `KEBAB_MODELS_LLM_MODEL=qwen2.5:7b-instruct cargo test ... -- --ignored`).
let cfg = Config::load(None).expect("config should load"); let cfg = Config::load(None).expect("config should load");
let llm = OllamaLanguageModel::new(&cfg).unwrap(); let llm = OllamaLanguageModel::new(&cfg).unwrap();

View File

@@ -10,9 +10,9 @@
//! error mapping, finish-reason mapping, missing-counter degradation, and //! error mapping, finish-reason mapping, missing-counter degradation, and
//! determinism semantics. //! determinism semantics.
use kb_config::Config; use kebab_config::Config;
use kb_core::{FinishReason, GenerateRequest, TokenChunk}; use kebab_core::{FinishReason, GenerateRequest, TokenChunk};
use kb_llm_local::{LanguageModel, LlmError, OllamaLanguageModel}; use kebab_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
use wiremock::matchers::{method, path}; use wiremock::matchers::{method, path};
use wiremock::{Mock, MockServer, ResponseTemplate}; use wiremock::{Mock, MockServer, ResponseTemplate};

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-llm" name = "kebab-llm"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,7 +8,7 @@ repository = { workspace = true }
description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests" description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
anyhow = { workspace = true } anyhow = { workspace = true }
[features] [features]

View File

@@ -1,8 +1,8 @@
//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface. //! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
//! //!
//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can //! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations. //! `use kebab_llm::LanguageModel` and stay stable across kb-core reorganizations.
//! It defines **no new types**; everything is a re-export of [`kb_core`]. //! It defines **no new types**; everything is a re-export of [`kebab_core`].
//! //!
//! ## Mock implementation //! ## Mock implementation
//! //!
@@ -12,7 +12,7 @@
//! from `generate_stream` itself (e.g., connection refused) before any chunk //! from `generate_stream` itself (e.g., connection refused) before any chunk
//! is yielded; the mock never does. //! is yielded; the mock never does.
//! //!
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2, //! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.1, §7.2,
//! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract. //! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract.
// ── Trait re-exports ────────────────────────────────────────────────────── // ── Trait re-exports ──────────────────────────────────────────────────────
@@ -20,7 +20,7 @@
// Per spec §7.2 — these are the only public-surface types this crate offers. // Per spec §7.2 — these are the only public-surface types this crate offers.
// Adding new types is forbidden by the task contract. // Adding new types is forbidden by the task contract.
pub use kb_core::{ pub use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
}; };

View File

@@ -36,7 +36,7 @@
//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever //! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
//! the constructor was given — the mock does not count. //! the constructor was given — the mock does not count.
use kb_core::{ use kebab_core::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
}; };

View File

@@ -4,7 +4,7 @@
#![cfg(feature = "mock")] #![cfg(feature = "mock")]
use kb_llm::{ use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
assert_finish_chunk, assert_finish_chunk,
}; };

View File

@@ -5,7 +5,7 @@
//! Runs under both `cargo test -p kb-llm` and //! Runs under both `cargo test -p kb-llm` and
//! `cargo test -p kb-llm --features mock`. //! `cargo test -p kb-llm --features mock`.
use kb_llm::{ use kebab_llm::{
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
assert_finish_chunk, assert_finish_chunk,
}; };

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-normalize" name = "kebab-normalize"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Lift parser output (kb-parse-types) into kb-core::CanonicalDocument with deterministic IDs (§3.4, §4.2, §4.3)" description = "Lift parser output (kb-parse-types) into kb-core::CanonicalDocument with deterministic IDs (§3.4, §4.2, §4.3)"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-parse-types = { path = "../kb-parse-types" } kebab-parse-types = { path = "../kebab-parse-types" }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
unicode-normalization = "0.1" unicode-normalization = "0.1"
@@ -23,5 +23,5 @@ tracing = { workspace = true }
# Forbidden as a regular dep per design §8 (kb-normalize must not depend # Forbidden as a regular dep per design §8 (kb-normalize must not depend
# on any specific parser); `cargo tree -p kb-normalize --depth 1` (the # on any specific parser); `cargo tree -p kb-normalize --depth 1` (the
# default scope, excluding dev-deps) confirms this. # default scope, excluding dev-deps) confirms this.
kb-parse-md = { path = "../kb-parse-md" } kebab-parse-md = { path = "../kebab-parse-md" }
serde_json = { workspace = true } serde_json = { workspace = true }

View File

@@ -1,5 +1,5 @@
//! `kb-normalize` — lift parser output (`kb-parse-types`) into a //! `kb-normalize` — lift parser output (`kb-parse-types`) into a
//! [`kb_core::CanonicalDocument`] with deterministic IDs. //! [`kebab_core::CanonicalDocument`] with deterministic IDs.
//! //!
//! Per design §3.4 (CanonicalDocument / Block), §4.2 (ID recipe), §4.3 //! Per design §3.4 (CanonicalDocument / Block), §4.2 (ID recipe), §4.3
//! (ordinal rule), §3.6 (Provenance), §8 (module boundaries). //! (ordinal rule), §3.6 (Provenance), §8 (module boundaries).
@@ -20,16 +20,16 @@
use std::collections::HashMap; use std::collections::HashMap;
use anyhow::Result; use anyhow::Result;
use kb_core::{ use kebab_core::{
Block, BlockId, CanonicalDocument, CodeBlock, CommonBlock, DocumentId, HeadingBlock, Block, BlockId, CanonicalDocument, CodeBlock, CommonBlock, DocumentId, HeadingBlock,
ImageRefBlock, Inline, Lang, ListBlock, Metadata, ParserVersion, Provenance, ProvenanceEvent, ImageRefBlock, Inline, Lang, ListBlock, Metadata, ParserVersion, Provenance, ProvenanceEvent,
ProvenanceKind, RawAsset, TableBlock, TextBlock, ProvenanceKind, RawAsset, TableBlock, TextBlock,
}; };
use kb_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind}; use kebab_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
use time::OffsetDateTime; use time::OffsetDateTime;
use unicode_normalization::UnicodeNormalization; use unicode_normalization::UnicodeNormalization;
pub use kb_core::{id_for_block, id_for_doc}; pub use kebab_core::{id_for_block, id_for_doc};
/// Build a [`CanonicalDocument`] from the raw asset, frontmatter /// Build a [`CanonicalDocument`] from the raw asset, frontmatter
/// metadata, parser blocks, parser version, and any warnings. /// metadata, parser blocks, parser version, and any warnings.
@@ -38,7 +38,7 @@ pub use kb_core::{id_for_block, id_for_doc};
/// ///
/// * `doc_id = id_for_doc(workspace_path, asset_id, parser_version)` — /// * `doc_id = id_for_doc(workspace_path, asset_id, parser_version)` —
/// `workspace_path` is consumed verbatim from `asset` (already NFC + /// `workspace_path` is consumed verbatim from `asset` (already NFC +
/// POSIX per `kb_core::normalize::to_posix`). /// POSIX per `kebab_core::normalize::to_posix`).
/// * `block_id = id_for_block(doc_id, kind, heading_path, ordinal, /// * `block_id = id_for_block(doc_id, kind, heading_path, ordinal,
/// source_span)` — `ordinal` is **0-based, scoped to (heading_path, /// source_span)` — `ordinal` is **0-based, scoped to (heading_path,
/// block_kind), in document order** per §4.3. /// block_kind), in document order** per §4.3.
@@ -96,7 +96,7 @@ pub fn build_canonical_document(
.collect(); .collect();
tracing::debug!( tracing::debug!(
target: "kb-normalize", target: "kebab-normalize",
"built canonical document doc_id={} blocks={}", "built canonical document doc_id={} blocks={}",
doc_id.0, doc_id.0,
lifted_blocks.len() lifted_blocks.len()
@@ -329,7 +329,7 @@ fn flatten_inline(i: &Inline, out: &mut String) {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_core::{ use kebab_core::{
AssetId, AssetStorage, Checksum, MediaType, SourceSpan, SourceType, SourceUri, AssetId, AssetStorage, Checksum, MediaType, SourceSpan, SourceType, SourceUri,
TrustLevel, WorkspacePath, normalize::to_posix, TrustLevel, WorkspacePath, normalize::to_posix,
}; };
@@ -386,7 +386,7 @@ mod tests {
let h1_b = vec!["B".to_string()]; let h1_b = vec!["B".to_string()];
vec![ vec![
ParsedBlock { ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph, kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(), heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 1, end: 1 }, source_span: SourceSpan::Line { start: 1, end: 1 },
payload: ParsedPayload::Paragraph { payload: ParsedPayload::Paragraph {
@@ -395,7 +395,7 @@ mod tests {
}, },
}, },
ParsedBlock { ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph, kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(), heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 2, end: 2 }, source_span: SourceSpan::Line { start: 2, end: 2 },
payload: ParsedPayload::Paragraph { payload: ParsedPayload::Paragraph {
@@ -404,7 +404,7 @@ mod tests {
}, },
}, },
ParsedBlock { ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph, kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_a.clone(), heading_path: h1_a.clone(),
source_span: SourceSpan::Line { start: 3, end: 3 }, source_span: SourceSpan::Line { start: 3, end: 3 },
payload: ParsedPayload::Paragraph { payload: ParsedPayload::Paragraph {
@@ -413,7 +413,7 @@ mod tests {
}, },
}, },
ParsedBlock { ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Code, kind: kebab_parse_types::ParsedBlockKind::Code,
heading_path: h1_a, heading_path: h1_a,
source_span: SourceSpan::Line { start: 4, end: 5 }, source_span: SourceSpan::Line { start: 4, end: 5 },
payload: ParsedPayload::Code { payload: ParsedPayload::Code {
@@ -422,7 +422,7 @@ mod tests {
}, },
}, },
ParsedBlock { ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph, kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: h1_b, heading_path: h1_b,
source_span: SourceSpan::Line { start: 6, end: 6 }, source_span: SourceSpan::Line { start: 6, end: 6 },
payload: ParsedPayload::Paragraph { payload: ParsedPayload::Paragraph {
@@ -715,7 +715,7 @@ mod tests {
fn audio_ref_block_skipped_with_warning() { fn audio_ref_block_skipped_with_warning() {
let span = SourceSpan::Line { start: 1, end: 1 }; let span = SourceSpan::Line { start: 1, end: 1 };
let blocks = vec![ParsedBlock { let blocks = vec![ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::AudioRef, kind: kebab_parse_types::ParsedBlockKind::AudioRef,
heading_path: vec![], heading_path: vec![],
source_span: span, source_span: span,
payload: ParsedPayload::AudioRef { payload: ParsedPayload::AudioRef {
@@ -759,7 +759,7 @@ mod tests {
let nfd_heading = "\u{1100}\u{1161}".to_string(); // 가 (NFD) let nfd_heading = "\u{1100}\u{1161}".to_string(); // 가 (NFD)
let nfc_heading = "\u{AC00}".to_string(); // 가 (NFC) let nfc_heading = "\u{AC00}".to_string(); // 가 (NFC)
let mk_block = |heading: String| ParsedBlock { let mk_block = |heading: String| ParsedBlock {
kind: kb_parse_types::ParsedBlockKind::Paragraph, kind: kebab_parse_types::ParsedBlockKind::Paragraph,
heading_path: vec![heading], heading_path: vec![heading],
source_span: span.clone(), source_span: span.clone(),
payload: ParsedPayload::Paragraph { payload: ParsedPayload::Paragraph {

View File

@@ -15,12 +15,12 @@
use std::path::PathBuf; use std::path::PathBuf;
use kb_core::{ use kebab_core::{
AssetId, AssetStorage, Checksum, MediaType, ParserVersion, RawAsset, SourceUri, AssetId, AssetStorage, Checksum, MediaType, ParserVersion, RawAsset, SourceUri,
WorkspacePath, WorkspacePath,
}; };
use kb_normalize::build_canonical_document; use kebab_normalize::build_canonical_document;
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter}; use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
use serde_json::Value; use serde_json::Value;
use time::OffsetDateTime; use time::OffsetDateTime;

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-parse-md" name = "kebab-parse-md"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,8 +8,8 @@ repository = { workspace = true }
description = "Markdown frontmatter and block parsing into kb-core::Metadata / kb-parse-types intermediates" description = "Markdown frontmatter and block parsing into kb-core::Metadata / kb-parse-types intermediates"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-parse-types = { path = "../kb-parse-types" } kebab-parse-types = { path = "../kebab-parse-types" }
anyhow = { workspace = true } anyhow = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }

View File

@@ -1,10 +1,10 @@
//! Markdown body → flat `Vec<kb_parse_types::ParsedBlock>` (§3.4 / §3.7b). //! Markdown body → flat `Vec<kebab_parse_types::ParsedBlock>` (§3.4 / §3.7b).
//! //!
//! Uses `pulldown-cmark` (with GFM tables enabled at runtime via //! Uses `pulldown-cmark` (with GFM tables enabled at runtime via
//! `Options::ENABLE_TABLES`) to walk the body once and emit a flat list of //! `Options::ENABLE_TABLES`) to walk the body once and emit a flat list of
//! parsed blocks. Heading paths are computed by tracking the most-recent //! parsed blocks. Heading paths are computed by tracking the most-recent
//! heading text at each level. Source spans are reported as //! heading text at each level. Source spans are reported as
//! [`kb_core::SourceSpan::Line`] in 1-indexed file-line coordinates by //! [`kebab_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
//! converting `pulldown-cmark`'s byte offsets to line numbers and adding the //! converting `pulldown-cmark`'s byte offsets to line numbers and adding the
//! caller-supplied `body_offset_lines`. //! caller-supplied `body_offset_lines`.
//! //!
@@ -19,10 +19,10 @@
//! //!
//! ## Inline filter //! ## Inline filter
//! //!
//! [`kb_core::Inline`] only models `Text | Code | Link | Strong | Emph`. //! [`kebab_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
//! Inline images, footnotes, hard breaks, etc. are dropped silently per //! Inline images, footnotes, hard breaks, etc. are dropped silently per
//! design §3.4. Block-level `![alt](src)` (an image as the sole content of a //! design §3.4. Block-level `![alt](src)` (an image as the sole content of a
//! paragraph) is lifted to [`kb_parse_types::ParsedPayload::ImageRef`]. //! paragraph) is lifted to [`kebab_parse_types::ParsedPayload::ImageRef`].
//! //!
//! ## CRLF //! ## CRLF
//! //!
@@ -33,8 +33,8 @@
use std::ops::Range; use std::ops::Range;
use kb_core::{Inline, SourceSpan}; use kebab_core::{Inline, SourceSpan};
use kb_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind}; use kebab_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd}; use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
/// Parse a Markdown body into a flat `Vec<ParsedBlock>` plus any warnings. /// Parse a Markdown body into a flat `Vec<ParsedBlock>` plus any warnings.
@@ -1595,7 +1595,7 @@ mod tests {
let (blocks, _) = parse(body, 1); let (blocks, _) = parse(body, 1);
assert_eq!(blocks.len(), 1, "expected single list block"); assert_eq!(blocks.len(), 1, "expected single list block");
match &blocks[0].kind { match &blocks[0].kind {
kb_parse_types::ParsedBlockKind::List => {} kebab_parse_types::ParsedBlockKind::List => {}
other => panic!("expected list, got {other:?}"), other => panic!("expected list, got {other:?}"),
} }
} }

View File

@@ -1,4 +1,4 @@
//! Markdown frontmatter parsing → `kb_core::Metadata`. //! Markdown frontmatter parsing → `kebab_core::Metadata`.
//! //!
//! Implements the contract pinned in design §0 Q9 (frontmatter derive table) //! Implements the contract pinned in design §0 Q9 (frontmatter derive table)
//! and §3.6 (Metadata shape). Produces structured warnings via //! and §3.6 (Metadata shape). Produces structured warnings via
@@ -18,8 +18,8 @@
use std::ops::Range; use std::ops::Range;
use std::sync::OnceLock; use std::sync::OnceLock;
use kb_core::{Metadata, SourceType, TrustLevel}; use kebab_core::{Metadata, SourceType, TrustLevel};
use kb_parse_types::{Warning, WarningKind}; use kebab_parse_types::{Warning, WarningKind};
use lingua::{IsoCode639_1, Language, LanguageDetector, LanguageDetectorBuilder}; use lingua::{IsoCode639_1, Language, LanguageDetector, LanguageDetectorBuilder};
use serde::Deserialize; use serde::Deserialize;
use serde_json::{Map, Value}; use serde_json::{Map, Value};
@@ -59,7 +59,7 @@ pub struct FrontmatterSpan {
} }
/// Parse the frontmatter (if any) from a Markdown byte slice into a /// Parse the frontmatter (if any) from a Markdown byte slice into a
/// `kb_core::Metadata`, applying the §0 Q9 derive table for missing fields. /// `kebab_core::Metadata`, applying the §0 Q9 derive table for missing fields.
/// ///
/// On a malformed frontmatter the function still returns `Ok` — the /// On a malformed frontmatter the function still returns `Ok` — the
/// frontmatter contents are discarded and the caller is told via a /// frontmatter contents are discarded and the caller is told via a
@@ -589,7 +589,7 @@ fn iso_code(lang: Language) -> &'static str {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_core::{ use kebab_core::{
AssetId, WorkspacePath, AssetId, WorkspacePath,
ids::id_for_doc, ids::id_for_doc,
versions::ParserVersion, versions::ParserVersion,

View File

@@ -10,13 +10,13 @@
//! env-var pattern. Migrating kb-parse-md to the env-var style is out of //! env-var pattern. Migrating kb-parse-md to the env-var style is out of
//! scope; both styles are intentional for now. //! scope; both styles are intentional for now.
//! //!
//! Following the kb_core::Inline schema migration (struct-variant shape), //! Following the kebab_core::Inline schema migration (struct-variant shape),
//! `ParsedBlock` now serializes directly through serde — no projection //! `ParsedBlock` now serializes directly through serde — no projection
//! shim is required. Inlines surface as structured objects, e.g. //! shim is required. Inlines surface as structured objects, e.g.
//! `[{"kind":"text","text":"…"},{"kind":"code","code":"…"}]`. //! `[{"kind":"text","text":"…"},{"kind":"code","code":"…"}]`.
use kb_parse_md::parse_blocks; use kebab_parse_md::parse_blocks;
use kb_parse_types::{ParsedBlock, Warning}; use kebab_parse_types::{ParsedBlock, Warning};
use serde::Serialize; use serde::Serialize;
use serde_json::Value; use serde_json::Value;
use std::fs; use std::fs;

View File

@@ -5,7 +5,7 @@
//! and therefore stable; lingua autodetect over our fixtures is also //! and therefore stable; lingua autodetect over our fixtures is also
//! stable for the language set we configured. //! stable for the language set we configured.
use kb_parse_md::{BodyHints, parse_frontmatter}; use kebab_parse_md::{BodyHints, parse_frontmatter};
use serde::Serialize; use serde::Serialize;
use serde_json::Value; use serde_json::Value;
use std::fs; use std::fs;
@@ -18,9 +18,9 @@ use time::macros::datetime;
/// snapshot focuses on the §0 Q9 derive contract. /// snapshot focuses on the §0 Q9 derive contract.
#[derive(Serialize)] #[derive(Serialize)]
struct Snapshot { struct Snapshot {
metadata: kb_core::Metadata, metadata: kebab_core::Metadata,
span_present: bool, span_present: bool,
warnings: Vec<kb_parse_types::Warning>, warnings: Vec<kebab_parse_types::Warning>,
} }
fn fixtures_dir() -> PathBuf { fn fixtures_dir() -> PathBuf {

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-parse-types" name = "kebab-parse-types"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,5 +8,5 @@ repository = { workspace = true }
description = "Parser intermediate representations (no parser libs allowed)" description = "Parser intermediate representations (no parser libs allowed)"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
serde = { workspace = true } serde = { workspace = true }

View File

@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
pub struct ParsedBlock { pub struct ParsedBlock {
pub kind: ParsedBlockKind, pub kind: ParsedBlockKind,
pub heading_path: Vec<String>, pub heading_path: Vec<String>,
pub source_span: kb_core::SourceSpan, pub source_span: kebab_core::SourceSpan,
pub payload: ParsedPayload, pub payload: ParsedPayload,
} }
@@ -36,11 +36,11 @@ pub enum ParsedPayload {
}, },
Paragraph { Paragraph {
text: String, text: String,
inlines: Vec<kb_core::Inline>, inlines: Vec<kebab_core::Inline>,
}, },
List { List {
ordered: bool, ordered: bool,
items: Vec<Vec<kb_core::Inline>>, items: Vec<Vec<kebab_core::Inline>>,
}, },
Code { Code {
lang: Option<String>, lang: Option<String>,
@@ -52,7 +52,7 @@ pub enum ParsedPayload {
}, },
Quote { Quote {
text: String, text: String,
inlines: Vec<kb_core::Inline>, inlines: Vec<kebab_core::Inline>,
}, },
ImageRef { ImageRef {
src: String, src: String,

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-rag" name = "kebab-rag"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,11 +8,11 @@ repository = { workspace = true }
description = "RAG pipeline: retrieve → gate → pack → generate → cite-validate" description = "RAG pipeline: retrieve → gate → pack → generate → cite-validate"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-search = { path = "../kb-search" } kebab-search = { path = "../kebab-search" }
kb-llm = { path = "../kb-llm" } kebab-llm = { path = "../kebab-llm" }
kb-store-sqlite = { path = "../kb-store-sqlite" } kebab-store-sqlite = { path = "../kebab-store-sqlite" }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
regex = { workspace = true } regex = { workspace = true }
@@ -23,7 +23,7 @@ anyhow = { workspace = true }
blake3 = { workspace = true } blake3 = { workspace = true }
[dev-dependencies] [dev-dependencies]
kb-llm = { path = "../kb-llm", features = ["mock"] } kebab-llm = { path = "../kebab-llm", features = ["mock"] }
tempfile = { workspace = true } tempfile = { workspace = true }
rusqlite = { workspace = true } rusqlite = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }

View File

@@ -18,7 +18,7 @@
//! reachable via `Retriever`), `kb-embed*` (only via `Retriever`), //! reachable via `Retriever`), `kb-embed*` (only via `Retriever`),
//! `kb-llm-local` (only via `LanguageModel`), `kb-tui`, `kb-desktop`. //! `kb-llm-local` (only via `LanguageModel`), `kb-tui`, `kb-desktop`.
pub use kb_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason}; pub use kebab_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
mod pipeline; mod pipeline;

View File

@@ -33,13 +33,13 @@
use std::sync::Arc; use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use kb_core::{ use kebab_core::{
Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason, Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters, GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId, SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
}; };
use kb_core::versions::PromptTemplateVersion; use kebab_core::versions::PromptTemplateVersion;
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use regex::Regex; use regex::Regex;
use std::sync::OnceLock; use std::sync::OnceLock;
use time::OffsetDateTime; use time::OffsetDateTime;
@@ -86,7 +86,7 @@ pub struct AskOpts {
/// Single-threaded RAG orchestrator. See module docs for the stage list. /// Single-threaded RAG orchestrator. See module docs for the stage list.
pub struct RagPipeline { pub struct RagPipeline {
config: kb_config::Config, config: kebab_config::Config,
retriever: Arc<dyn Retriever>, retriever: Arc<dyn Retriever>,
llm: Arc<dyn LanguageModel>, llm: Arc<dyn LanguageModel>,
docs: Arc<SqliteStore>, docs: Arc<SqliteStore>,
@@ -98,7 +98,7 @@ impl RagPipeline {
/// `Arc`'d trait objects (kb-app builds them from config; tests /// `Arc`'d trait objects (kb-app builds them from config; tests
/// inject mocks). /// inject mocks).
pub fn new( pub fn new(
config: kb_config::Config, config: kebab_config::Config,
retriever: Arc<dyn Retriever>, retriever: Arc<dyn Retriever>,
llm: Arc<dyn LanguageModel>, llm: Arc<dyn LanguageModel>,
docs: Arc<SqliteStore>, docs: Arc<SqliteStore>,
@@ -135,7 +135,7 @@ impl RagPipeline {
let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0); let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0);
tracing::debug!( tracing::debug!(
target: "kb-rag", target: "kebab-rag",
chunks_returned, chunks_returned,
top_score, top_score,
mode = ?opts.mode, mode = ?opts.mode,
@@ -161,7 +161,7 @@ impl RagPipeline {
// collapse to the more accurate `NoChunks` refusal here. // collapse to the more accurate `NoChunks` refusal here.
if packed_entries.is_empty() { if packed_entries.is_empty() {
tracing::warn!( tracing::warn!(
target: "kb-rag", target: "kebab-rag",
chunks_returned = hits.len(), chunks_returned = hits.len(),
"kb-rag: all retrieved chunks were unfetchable from the store; \ "kb-rag: all retrieved chunks were unfetchable from the store; \
falling back to NoChunks refusal" falling back to NoChunks refusal"
@@ -324,7 +324,7 @@ impl RagPipeline {
// Drop the moved `finish_reason` early into a tracing breadcrumb; the // Drop the moved `finish_reason` early into a tracing breadcrumb; the
// wire schema does not surface it (per design §3.8). // wire schema does not surface it (per design §3.8).
tracing::debug!( tracing::debug!(
target: "kb-rag", target: "kebab-rag",
grounded = answer.grounded, grounded = answer.grounded,
refusal = ?answer.refusal_reason, refusal = ?answer.refusal_reason,
refusal_phrase_detected = matched_refusal_phrase, refusal_phrase_detected = matched_refusal_phrase,
@@ -354,7 +354,7 @@ impl RagPipeline {
self.docs.put_answer(&answer, query, packed_chunks_json.as_deref()) self.docs.put_answer(&answer, query, packed_chunks_json.as_deref())
{ {
tracing::warn!( tracing::warn!(
target: "kb-rag", target: "kebab-rag",
error = %e, error = %e,
"kb-rag: put_answer failed; in-memory Answer still returned" "kb-rag: put_answer failed; in-memory Answer still returned"
); );
@@ -380,13 +380,13 @@ impl RagPipeline {
for hit in hits { for hit in hits {
let chunk_full = let chunk_full =
<SqliteStore as kb_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id) <SqliteStore as kebab_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
.context("kb-rag: docs.get_chunk")?; .context("kb-rag: docs.get_chunk")?;
let chunk_text = match chunk_full { let chunk_text = match chunk_full {
Some(c) => c.text, Some(c) => c.text,
None => { None => {
tracing::warn!( tracing::warn!(
target: "kb-rag", target: "kebab-rag",
chunk_id = %hit.chunk_id.0, chunk_id = %hit.chunk_id.0,
"kb-rag: chunk not found in store; skipping" "kb-rag: chunk not found in store; skipping"
); );
@@ -454,7 +454,7 @@ impl RagPipeline {
created_at: OffsetDateTime::now_utc(), created_at: OffsetDateTime::now_utc(),
}; };
if let Err(e) = self.docs.put_answer(&answer, query, None) { if let Err(e) = self.docs.put_answer(&answer, query, None) {
tracing::warn!(target: "kb-rag", error = %e, "kb-rag: put_answer (NoChunks) failed"); tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
} }
Ok(answer) Ok(answer)
} }
@@ -529,7 +529,7 @@ impl RagPipeline {
created_at: OffsetDateTime::now_utc(), created_at: OffsetDateTime::now_utc(),
}; };
if let Err(e) = self.docs.put_answer(&answer, query, None) { if let Err(e) = self.docs.put_answer(&answer, query, None) {
tracing::warn!(target: "kb-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed"); tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
} }
Ok(answer) Ok(answer)
} }
@@ -542,7 +542,7 @@ impl RagPipeline {
/// paths attach the configured embedding model so `kb explain` can /// paths attach the configured embedding model so `kb explain` can
/// later identify which embedder shaped the retrieval (even on /// later identify which embedder shaped the retrieval (even on
/// refusals — see `refuse_score_gate`). /// refusals — see `refuse_score_gate`).
fn embedding_ref_for(mode: SearchMode, cfg: &kb_config::Config) -> Option<ModelRef> { fn embedding_ref_for(mode: SearchMode, cfg: &kebab_config::Config) -> Option<ModelRef> {
match mode { match mode {
SearchMode::Lexical => None, SearchMode::Lexical => None,
SearchMode::Vector | SearchMode::Hybrid => Some(ModelRef { SearchMode::Vector | SearchMode::Hybrid => Some(ModelRef {

View File

@@ -14,12 +14,12 @@
use std::sync::Arc; use std::sync::Arc;
use kb_config::Config; use kebab_config::Config;
use kb_core::{ use kebab_core::{
ChunkerVersion, ChunkId, Citation, DocumentId, IndexVersion, RetrievalDetail, ChunkerVersion, ChunkId, Citation, DocumentId, IndexVersion, RetrievalDetail,
Retriever, SearchHit, SearchMode, SearchQuery, WorkspacePath, Retriever, SearchHit, SearchMode, SearchQuery, WorkspacePath,
}; };
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use rusqlite::params; use rusqlite::params;
use tempfile::TempDir; use tempfile::TempDir;
@@ -176,7 +176,7 @@ impl Retriever for MockRetriever {
} }
} }
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect. /// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
pub fn id32(prefix: &str) -> String { pub fn id32(prefix: &str) -> String {
let mut s = prefix.to_string(); let mut s = prefix.to_string();
while s.len() < 32 { while s.len() < 32 {

View File

@@ -10,11 +10,11 @@ use std::sync::Arc;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use common::{MockRetriever, RagEnv, id32, mk_hit}; use common::{MockRetriever, RagEnv, id32, mk_hit};
use kb_core::{ use kebab_core::{
FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage, FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage,
}; };
use kb_llm::MockLanguageModel; use kebab_llm::MockLanguageModel;
use kb_rag::{AskOpts, RagPipeline, RefusalReason}; use kebab_rag::{AskOpts, RagPipeline, RefusalReason};
/// LM ID used everywhere — kept short so snapshots stay stable. /// LM ID used everywhere — kept short so snapshots stay stable.
const TEST_LM_ID: &str = "mock-lm"; const TEST_LM_ID: &str = "mock-lm";
@@ -49,7 +49,7 @@ impl CountingLm {
} }
impl LanguageModel for CountingLm { impl LanguageModel for CountingLm {
fn model_ref(&self) -> kb_core::ModelRef { fn model_ref(&self) -> kebab_core::ModelRef {
self.inner.model_ref() self.inner.model_ref()
} }
fn context_tokens(&self) -> usize { fn context_tokens(&self) -> usize {
@@ -57,7 +57,7 @@ impl LanguageModel for CountingLm {
} }
fn generate_stream( fn generate_stream(
&self, &self,
req: kb_core::GenerateRequest, req: kebab_core::GenerateRequest,
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> { ) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
self.calls.fetch_add(1, Ordering::SeqCst); self.calls.fetch_add(1, Ordering::SeqCst);
self.inner.generate_stream(req) self.inner.generate_stream(req)

View File

@@ -1,5 +1,5 @@
[package] [package]
name = "kb-search" name = "kebab-search"
version = { workspace = true } version = { workspace = true }
edition = { workspace = true } edition = { workspace = true }
rust-version = { workspace = true } rust-version = { workspace = true }
@@ -8,17 +8,17 @@ repository = { workspace = true }
description = "Retriever implementations for kb (P2-2 lexical FTS5; P3 vector / hybrid will follow)" description = "Retriever implementations for kb (P2-2 lexical FTS5; P3 vector / hybrid will follow)"
[dependencies] [dependencies]
kb-core = { path = "../kb-core" } kebab-core = { path = "../kebab-core" }
kb-config = { path = "../kb-config" } kebab-config = { path = "../kebab-config" }
kb-store-sqlite = { path = "../kb-store-sqlite" } kebab-store-sqlite = { path = "../kebab-store-sqlite" }
# P3-4 hybrid retriever wraps a `dyn VectorStore` (typically backed by # P3-4 hybrid retriever wraps a `dyn VectorStore` (typically backed by
# `kb-store-vector::LanceVectorStore`) and a `dyn Embedder` (any P3-2 # `kb-store-vector::LanceVectorStore`) and a `dyn Embedder` (any P3-2
# adapter). Listed as a runtime dep so callers can construct # adapter). Listed as a runtime dep so callers can construct
# `VectorRetriever::new` against the trait objects without a concrete # `VectorRetriever::new` against the trait objects without a concrete
# adapter — the concrete adapter (`kb-embed-local`) stays out of this # adapter — the concrete adapter (`kb-embed-local`) stays out of this
# crate per the spec's Forbidden deps list. # crate per the spec's Forbidden deps list.
kb-store-vector = { path = "../kb-store-vector" } kebab-store-vector = { path = "../kebab-store-vector" }
kb-embed = { path = "../kb-embed" } kebab-embed = { path = "../kebab-embed" }
rusqlite = { workspace = true } rusqlite = { workspace = true }
globset = { workspace = true } globset = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
@@ -32,4 +32,4 @@ tempfile = { workspace = true }
# feature) and stand up a real `LanceVectorStore` on a tmp directory. # feature) and stand up a real `LanceVectorStore` on a tmp directory.
# The mock-retriever unit tests (the bulk of the hybrid suite) do not # The mock-retriever unit tests (the bulk of the hybrid suite) do not
# need either, but the integration / snapshot lane does. # need either, but the integration / snapshot lane does.
kb-embed = { path = "../kb-embed", features = ["mock"] } kebab-embed = { path = "../kebab-embed", features = ["mock"] }

View File

@@ -1,4 +1,4 @@
//! Shared helpers for building `kb_core::Citation` values from a //! Shared helpers for building `kebab_core::Citation` values from a
//! chunk's first `SourceSpan`. //! chunk's first `SourceSpan`.
//! //!
//! Both the lexical and vector retrievers join against the same //! Both the lexical and vector retrievers join against the same
@@ -9,7 +9,7 @@
//! §1.6). Living here means a future PDF / image / audio extractor can //! §1.6). Living here means a future PDF / image / audio extractor can
//! enrich the mapping in one place rather than two. //! enrich the mapping in one place rather than two.
use kb_core::{Citation, SourceSpan, WorkspacePath}; use kebab_core::{Citation, SourceSpan, WorkspacePath};
/// Build a `Citation` from the chunk's first `SourceSpan`. P1 markdown /// Build a `Citation` from the chunk's first `SourceSpan`. P1 markdown
/// only emits `Line`, so the other variants are mostly defensive — we /// only emits `Line`, so the other variants are mostly defensive — we

View File

@@ -20,7 +20,7 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use anyhow::Result; use anyhow::Result;
use kb_core::{ use kebab_core::{
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
}; };
@@ -75,7 +75,7 @@ impl HybridRetriever {
/// retrievers. Reads `config.search.hybrid_fusion` (only `"rrf"` /// retrievers. Reads `config.search.hybrid_fusion` (only `"rrf"`
/// is recognised today) and `config.search.rrf_k`. /// is recognised today) and `config.search.rrf_k`.
pub fn new( pub fn new(
config: &kb_config::Config, config: &kebab_config::Config,
lexical: Arc<dyn Retriever>, lexical: Arc<dyn Retriever>,
vector: Arc<dyn Retriever>, vector: Arc<dyn Retriever>,
) -> Self { ) -> Self {
@@ -93,7 +93,7 @@ impl HybridRetriever {
let vec_iv = vector.index_version(); let vec_iv = vector.index_version();
if lex_iv.0 != vec_iv.0 { if lex_iv.0 != vec_iv.0 {
tracing::warn!( tracing::warn!(
target: "kb-search", target: "kebab-search",
lexical_index = %lex_iv.0, lexical_index = %lex_iv.0,
vector_index = %vec_iv.0, vector_index = %vec_iv.0,
"kb-search hybrid: lexical and vector index_version differ; consider re-indexing" "kb-search hybrid: lexical and vector index_version differ; consider re-indexing"
@@ -323,7 +323,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
"rrf" => FusionPolicy::Rrf { k_rrf: k }, "rrf" => FusionPolicy::Rrf { k_rrf: k },
other => { other => {
tracing::warn!( tracing::warn!(
target: "kb-search", target: "kebab-search",
policy = other, policy = other,
"kb-search hybrid: unknown fusion policy; falling back to RRF" "kb-search hybrid: unknown fusion policy; falling back to RRF"
); );
@@ -335,7 +335,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use kb_core::{ use kebab_core::{
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, SearchFilters, ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, SearchFilters,
SearchHit, SearchMode, WorkspacePath, SearchHit, SearchMode, WorkspacePath,
}; };

View File

@@ -1,7 +1,7 @@
//! Lexical (FTS5 + bm25) retriever — design §3.7 / §1.5 / §2.2 / §6.4. //! Lexical (FTS5 + bm25) retriever — design §3.7 / §1.5 / §2.2 / §6.4.
//! //!
//! Owns the SQL pattern documented in `tasks/p2/p2-2-lexical-retriever.md` //! Owns the SQL pattern documented in `tasks/p2/p2-2-lexical-retriever.md`
//! and constructs `kb_core::SearchHit` values directly from the joined //! and constructs `kebab_core::SearchHit` values directly from the joined
//! `chunks_fts` / `chunks` / `documents` rows. Reads only — never mutates //! `chunks_fts` / `chunks` / `documents` rows. Reads only — never mutates
//! the underlying SQLite file. //! the underlying SQLite file.
@@ -9,12 +9,12 @@ use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use globset::GlobMatcher; use globset::GlobMatcher;
use kb_core::{ use kebab_core::{
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever, ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel, SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
WorkspacePath, WorkspacePath,
}; };
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use rusqlite::{params_from_iter, Connection, Row, ToSql}; use rusqlite::{params_from_iter, Connection, Row, ToSql};
use crate::citation_helper::citation_from_first_span; use crate::citation_helper::citation_from_first_span;
@@ -57,7 +57,7 @@ impl LexicalRetriever {
/// Construct with default settings derived from `kb-config`'s defaults. /// Construct with default settings derived from `kb-config`'s defaults.
/// Snippet width is computed from `Config::defaults().search.snippet_chars`. /// Snippet width is computed from `Config::defaults().search.snippet_chars`.
pub fn new(store: Arc<SqliteStore>, index_version: IndexVersion) -> Self { pub fn new(store: Arc<SqliteStore>, index_version: IndexVersion) -> Self {
let cfg = kb_config::Config::defaults(); let cfg = kebab_config::Config::defaults();
Self::with_settings(store, index_version, cfg.search.snippet_chars) Self::with_settings(store, index_version, cfg.search.snippet_chars)
} }
@@ -297,7 +297,7 @@ fn run_query(
params.push(Box::new(lang.0.clone())); params.push(Box::new(lang.0.clone()));
} }
if let Some(trust_min) = &filters.trust_min { if let Some(trust_min) = &filters.trust_min {
// Mirror `kb_store_sqlite::documents::list_documents` ranking: // Mirror `kebab_store_sqlite::documents::list_documents` ranking:
// Generated < Secondary < Primary. Doing the rank in SQL // Generated < Secondary < Primary. Doing the rank in SQL
// (rather than post-filtering) keeps the row stream short // (rather than post-filtering) keeps the row stream short
// when the workspace contains many low-trust docs. // when the workspace contains many low-trust docs.
@@ -523,7 +523,7 @@ mod tests {
#[test] #[test]
fn build_citation_line_round_trip() { fn build_citation_line_round_trip() {
use kb_core::Citation; use kebab_core::Citation;
let p = WorkspacePath::new("a/b.md".to_string()).unwrap(); let p = WorkspacePath::new("a/b.md".to_string()).unwrap();
let span = SourceSpan::Line { start: 7, end: 12 }; let span = SourceSpan::Line { start: 7, end: 12 };
let c = citation_from_first_span("c1", p.clone(), Some("S1".to_string()), Some(&span)); let c = citation_from_first_span("c1", p.clone(), Some("S1".to_string()), Some(&span));
@@ -545,7 +545,7 @@ mod tests {
#[test] #[test]
fn build_citation_page_forwards_section() { fn build_citation_page_forwards_section() {
use kb_core::Citation; use kebab_core::Citation;
let p = WorkspacePath::new("doc.pdf".to_string()).unwrap(); let p = WorkspacePath::new("doc.pdf".to_string()).unwrap();
let span = SourceSpan::Page { let span = SourceSpan::Page {
page: 4, page: 4,
@@ -568,7 +568,7 @@ mod tests {
#[test] #[test]
fn build_citation_none_falls_back_to_line_one() { fn build_citation_none_falls_back_to_line_one() {
use kb_core::Citation; use kebab_core::Citation;
let p = WorkspacePath::new("x.md".to_string()).unwrap(); let p = WorkspacePath::new("x.md".to_string()).unwrap();
let c = citation_from_first_span("c1", p, None, None); let c = citation_from_first_span("c1", p, None, None);
match c { match c {

View File

@@ -1,4 +1,4 @@
//! `kb-search` — `kb_core::Retriever` implementations. //! `kb-search` — `kebab_core::Retriever` implementations.
//! //!
//! - [`LexicalRetriever`] (P2-2): SQLite-FTS5 + bm25 backed retriever //! - [`LexicalRetriever`] (P2-2): SQLite-FTS5 + bm25 backed retriever
//! for `SearchMode::Lexical`. //! for `SearchMode::Lexical`.

View File

@@ -1,7 +1,7 @@
//! Vector retriever — design §3.7 / §7.2 / §1.6. //! Vector retriever — design §3.7 / §7.2 / §1.6.
//! //!
//! Wraps a `dyn VectorStore` + `dyn Embedder` + the SQLite metadata //! Wraps a `dyn VectorStore` + `dyn Embedder` + the SQLite metadata
//! store into a `kb_core::Retriever`. The vector store knows how to //! store into a `kebab_core::Retriever`. The vector store knows how to
//! find the nearest chunks by cosine on the embedding column; SQLite //! find the nearest chunks by cosine on the embedding column; SQLite
//! owns the human-readable metadata (heading_path / section_label / //! owns the human-readable metadata (heading_path / section_label /
//! source_spans / chunker_version / workspace_path) needed for //! source_spans / chunker_version / workspace_path) needed for
@@ -19,12 +19,12 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use anyhow::{Context, Result}; use anyhow::{Context, Result};
use kb_core::{ use kebab_core::{
ChunkId, ChunkerVersion, DocumentId, Embedder, EmbeddingInput, EmbeddingKind, ChunkId, ChunkerVersion, DocumentId, Embedder, EmbeddingInput, EmbeddingKind,
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery, IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
SourceSpan, VectorHit, VectorStore, WorkspacePath, SourceSpan, VectorHit, VectorStore, WorkspacePath,
}; };
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use rusqlite::params_from_iter; use rusqlite::params_from_iter;
use crate::citation_helper::citation_from_first_span; use crate::citation_helper::citation_from_first_span;
@@ -67,7 +67,7 @@ impl VectorRetriever {
sqlite: Arc<SqliteStore>, sqlite: Arc<SqliteStore>,
index_version: IndexVersion, index_version: IndexVersion,
) -> Self { ) -> Self {
let cfg = kb_config::Config::defaults(); let cfg = kebab_config::Config::defaults();
Self::with_settings(store, embed, sqlite, index_version, cfg.search.snippet_chars) Self::with_settings(store, embed, sqlite, index_version, cfg.search.snippet_chars)
} }
@@ -268,7 +268,7 @@ fn build_hit(
meta: &ChunkMeta, meta: &ChunkMeta,
rank: u32, rank: u32,
index_version: &IndexVersion, index_version: &IndexVersion,
model_id: &kb_core::EmbeddingModelId, model_id: &kebab_core::EmbeddingModelId,
snippet_chars: usize, snippet_chars: usize,
) -> Result<SearchHit> { ) -> Result<SearchHit> {
let heading_path: Vec<String> = serde_json::from_str(&meta.heading_path_json) let heading_path: Vec<String> = serde_json::from_str(&meta.heading_path_json)

View File

@@ -16,15 +16,15 @@
use std::sync::Arc; use std::sync::Arc;
use kb_config::Config; use kebab_config::Config;
use kb_core::{ use kebab_core::{
ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind, ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore, EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
}; };
use kb_embed::{Embedder, MockEmbedder}; use kebab_embed::{Embedder, MockEmbedder};
use kb_search::{LexicalRetriever, VectorRetriever}; use kebab_search::{LexicalRetriever, VectorRetriever};
use kb_store_sqlite::SqliteStore; use kebab_store_sqlite::SqliteStore;
use kb_store_vector::LanceVectorStore; use kebab_store_vector::LanceVectorStore;
use rusqlite::params; use rusqlite::params;
use tempfile::TempDir; use tempfile::TempDir;
@@ -205,7 +205,7 @@ impl HybridEnv {
} }
} }
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect. /// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
pub fn id32(prefix: &str) -> String { pub fn id32(prefix: &str) -> String {
let mut s = prefix.to_string(); let mut s = prefix.to_string();
while s.len() < 32 { while s.len() < 32 {

Some files were not shown because too many files have changed in this diff Show More