Merge pull request 'refactor(rename): kb → kebab — 프로젝트 전체 rename' (#29) from refactor/rename-kb-to-kebab into main
Reviewed-on: altair823-org/kb#29
This commit was merged in pull request #29.
This commit is contained in:
158
Cargo.lock
generated
158
Cargo.lock
generated
@@ -3366,27 +3366,27 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-app"
|
||||
name = "kebab-app"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"dirs 5.0.1",
|
||||
"kb-chunk",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-embed",
|
||||
"kb-embed-local",
|
||||
"kb-llm",
|
||||
"kb-llm-local",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kb-parse-types",
|
||||
"kb-rag",
|
||||
"kb-search",
|
||||
"kb-source-fs",
|
||||
"kb-store-sqlite",
|
||||
"kb-store-vector",
|
||||
"kebab-chunk",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-embed",
|
||||
"kebab-embed-local",
|
||||
"kebab-llm",
|
||||
"kebab-llm-local",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"kebab-parse-types",
|
||||
"kebab-rag",
|
||||
"kebab-search",
|
||||
"kebab-source-fs",
|
||||
"kebab-store-sqlite",
|
||||
"kebab-store-vector",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3399,14 +3399,14 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-chunk"
|
||||
name = "kebab-chunk"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-core",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kebab-core",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"serde_json",
|
||||
"serde_json_canonicalizer",
|
||||
"time",
|
||||
@@ -3414,32 +3414,32 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-cli"
|
||||
name = "kebab-cli"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"kb-app",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-eval",
|
||||
"kebab-app",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-eval",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-config"
|
||||
name = "kebab-config"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"dirs 5.0.1",
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"toml",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-core"
|
||||
name = "kebab-core"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
@@ -3453,13 +3453,13 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-embed"
|
||||
name = "kebab-embed"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"proptest",
|
||||
"serde",
|
||||
"thiserror 2.0.18",
|
||||
@@ -3467,27 +3467,27 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-embed-local"
|
||||
name = "kebab-embed-local"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"fastembed",
|
||||
"kb-config",
|
||||
"kb-embed",
|
||||
"kebab-config",
|
||||
"kebab-embed",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-eval"
|
||||
name = "kebab-eval"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-app",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-store-sqlite",
|
||||
"kebab-app",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-store-sqlite",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3499,22 +3499,22 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-llm"
|
||||
name = "kebab-llm"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"proptest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-llm-local"
|
||||
name = "kebab-llm-local"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-llm",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-llm",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"serde_json",
|
||||
@@ -3525,13 +3525,13 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-normalize"
|
||||
name = "kebab-normalize"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kb-parse-md",
|
||||
"kb-parse-types",
|
||||
"kebab-core",
|
||||
"kebab-parse-md",
|
||||
"kebab-parse-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"time",
|
||||
@@ -3540,12 +3540,12 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-parse-md"
|
||||
name = "kebab-parse-md"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"kb-parse-types",
|
||||
"kebab-core",
|
||||
"kebab-parse-types",
|
||||
"lingua",
|
||||
"pulldown-cmark",
|
||||
"serde",
|
||||
@@ -3557,24 +3557,24 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-parse-types"
|
||||
name = "kebab-parse-types"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"kb-core",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-rag"
|
||||
name = "kebab-rag"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-llm",
|
||||
"kb-search",
|
||||
"kb-store-sqlite",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-llm",
|
||||
"kebab-search",
|
||||
"kebab-store-sqlite",
|
||||
"regex",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
@@ -3586,16 +3586,16 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-search"
|
||||
name = "kebab-search"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"globset",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-embed",
|
||||
"kb-store-sqlite",
|
||||
"kb-store-vector",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-embed",
|
||||
"kebab-store-sqlite",
|
||||
"kebab-store-vector",
|
||||
"rusqlite",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -3604,14 +3604,14 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-source-fs"
|
||||
name = "kebab-source-fs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"ignore",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
@@ -3621,17 +3621,17 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-store-sqlite"
|
||||
name = "kebab-store-sqlite"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"blake3",
|
||||
"globset",
|
||||
"kb-chunk",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-normalize",
|
||||
"kb-parse-md",
|
||||
"kebab-chunk",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-normalize",
|
||||
"kebab-parse-md",
|
||||
"refinery",
|
||||
"rusqlite",
|
||||
"serde_json",
|
||||
@@ -3642,7 +3642,7 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-store-vector"
|
||||
name = "kebab-store-vector"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
@@ -3651,9 +3651,9 @@ dependencies = [
|
||||
"arrow-schema",
|
||||
"blake3",
|
||||
"futures",
|
||||
"kb-config",
|
||||
"kb-core",
|
||||
"kb-store-sqlite",
|
||||
"kebab-config",
|
||||
"kebab-core",
|
||||
"kebab-store-sqlite",
|
||||
"lancedb",
|
||||
"rusqlite",
|
||||
"serde",
|
||||
|
||||
38
Cargo.toml
38
Cargo.toml
@@ -1,31 +1,31 @@
|
||||
[workspace]
|
||||
resolver = "3"
|
||||
members = [
|
||||
"crates/kb-core",
|
||||
"crates/kb-parse-types",
|
||||
"crates/kb-config",
|
||||
"crates/kb-source-fs",
|
||||
"crates/kb-parse-md",
|
||||
"crates/kb-normalize",
|
||||
"crates/kb-chunk",
|
||||
"crates/kb-store-sqlite",
|
||||
"crates/kb-store-vector",
|
||||
"crates/kb-search",
|
||||
"crates/kb-embed",
|
||||
"crates/kb-embed-local",
|
||||
"crates/kb-llm",
|
||||
"crates/kb-llm-local",
|
||||
"crates/kb-rag",
|
||||
"crates/kb-app",
|
||||
"crates/kb-cli",
|
||||
"crates/kb-eval",
|
||||
"crates/kebab-core",
|
||||
"crates/kebab-parse-types",
|
||||
"crates/kebab-config",
|
||||
"crates/kebab-source-fs",
|
||||
"crates/kebab-parse-md",
|
||||
"crates/kebab-normalize",
|
||||
"crates/kebab-chunk",
|
||||
"crates/kebab-store-sqlite",
|
||||
"crates/kebab-store-vector",
|
||||
"crates/kebab-search",
|
||||
"crates/kebab-embed",
|
||||
"crates/kebab-embed-local",
|
||||
"crates/kebab-llm",
|
||||
"crates/kebab-llm-local",
|
||||
"crates/kebab-rag",
|
||||
"crates/kebab-app",
|
||||
"crates/kebab-cli",
|
||||
"crates/kebab-eval",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
edition = "2024"
|
||||
rust-version = "1.85"
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/altair823/kb"
|
||||
repository = "https://github.com/altair823/kebab"
|
||||
version = "0.1.0"
|
||||
|
||||
[workspace.dependencies]
|
||||
|
||||
140
README.md
140
README.md
@@ -1,8 +1,8 @@
|
||||
# kb — Local-first Knowledge Base
|
||||
# kebab — Local-first Knowledge Base
|
||||
|
||||
> **상태:** P0–P4 구현 완료 (31 component task 중 17 완료) + 3건 post-merge hotfix 적용. `kb index` / `kb search --mode {lexical,vector,hybrid}` / `kb ask` 모두 실 동작. 다음 단계 = P5 (eval suite). 자세한 진행 상황은 [tasks/INDEX.md](tasks/INDEX.md), 머지 후 발견된 버그와 fix는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md).
|
||||
> **상태:** P0–P4 구현 완료 (31 component task 중 17 완료) + 3건 post-merge hotfix 적용. `kebab index` / `kebab search --mode {lexical,vector,hybrid}` / `kebab ask` 모두 실 동작. 다음 단계 = P5 (eval suite). 자세한 진행 상황은 [tasks/INDEX.md](tasks/INDEX.md), 머지 후 발견된 버그와 fix는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md).
|
||||
|
||||
`kb` 는 개인용 로컬 knowledge base + RAG 도구다. Markdown / PDF / 이미지 / 음성을 한 곳에 색인하고, 의미 검색 + citation 포함 LLM 답변을 단일 binary 로 제공한다. 모든 추론은 로컬 (Ollama / fastembed / whisper.cpp) 에서 돌아간다.
|
||||
`kebab` 는 개인용 로컬 knowledge base + RAG 도구다. Markdown / PDF / 이미지 / 음성을 한 곳에 색인하고, 의미 검색 + citation 포함 LLM 답변을 단일 binary 로 제공한다. 모든 추론은 로컬 (Ollama / fastembed / whisper.cpp) 에서 돌아간다.
|
||||
|
||||
대상 하드웨어: M4 48GB MacBook 1대, 사용자 1명.
|
||||
|
||||
@@ -12,14 +12,14 @@
|
||||
|
||||
| 명령 | 동작 | 상태 |
|
||||
|------|------|------|
|
||||
| `kb init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | ✅ P0 |
|
||||
| `kb ingest [<path>]` | Markdown 색인 (idempotent). PDF/이미지/음성은 P6+. | ✅ P3-5 |
|
||||
| `kb search --mode {lexical,vector,hybrid} "<query>"` | 검색 — citation 포함, hybrid는 RRF fusion | ✅ P3-5 |
|
||||
| `kb list docs` | 색인된 문서 목록 | ✅ P3-5 |
|
||||
| `kb inspect doc <id>` / `kb inspect chunk <id>` | raw record 보기 | ✅ P3-5 |
|
||||
| `kb ask "<query>"` | RAG 답변 + 근거 인용. 근거 부족 시 거절. Ollama 필요. | ✅ P4-3 |
|
||||
| `kb doctor` | 설정/모델/DB 헬스 체크 | ✅ P0 |
|
||||
| `kb eval run / compare` | golden query 회귀 측정 | ⏳ P5 |
|
||||
| `kebab init` | XDG 경로에 데이터 디렉토리 + config.toml 생성 | ✅ P0 |
|
||||
| `kebab ingest [<path>]` | Markdown 색인 (idempotent). PDF/이미지/음성은 P6+. | ✅ P3-5 |
|
||||
| `kebab search --mode {lexical,vector,hybrid} "<query>"` | 검색 — citation 포함, hybrid는 RRF fusion | ✅ P3-5 |
|
||||
| `kebab list docs` | 색인된 문서 목록 | ✅ P3-5 |
|
||||
| `kebab inspect doc <id>` / `kebab inspect chunk <id>` | raw record 보기 | ✅ P3-5 |
|
||||
| `kebab ask "<query>"` | RAG 답변 + 근거 인용. 근거 부족 시 거절. Ollama 필요. | ✅ P4-3 |
|
||||
| `kebab doctor` | 설정/모델/DB 헬스 체크 | ✅ P0 |
|
||||
| `kebab eval run / compare` | golden query 회귀 측정 | ⏳ P5 |
|
||||
|
||||
기계 친화 모드: 모든 명령에 `--json` 플래그. 출력은 frozen wire schema v1 (`schema_version` 필드 항상 포함, 예: `ingest_report.v1`, `search_hit.v1`, `answer.v1`, `doctor.v1`).
|
||||
|
||||
@@ -44,35 +44,35 @@
|
||||
| citation 형식 | URI fragment (`path#L12-L34`, W3C Media Fragments) |
|
||||
| ID 생성 | `blake3(canonical_json(tuple))[..32]` hex |
|
||||
| RRF fusion_score | `[0, 1]` 정규화 — `2 / (k_rrf + 1)` 로 나눠 mode 간 비교 가능 (post-merge hotfix) |
|
||||
| layout | XDG (`~/.local/share/kb/`, `~/.config/kb/`, …) |
|
||||
| layout | XDG (`~/.local/share/kebab/`, `~/.config/kebab/`, …) |
|
||||
|
||||
전체는 [docs/superpowers/specs/2026-04-27-kb-final-form-design.md](docs/superpowers/specs/2026-04-27-kb-final-form-design.md) 참조.
|
||||
전체는 [docs/superpowers/specs/2026-04-27-kebab-final-form-design.md](docs/superpowers/specs/2026-04-27-kebab-final-form-design.md) 참조.
|
||||
|
||||
---
|
||||
|
||||
## 의존성 그래프
|
||||
|
||||
```text
|
||||
kb-cli, kb-tui, kb-desktop
|
||||
└─> kb-app
|
||||
├─> kb-source-fs
|
||||
├─> kb-parse-md / kb-parse-pdf / kb-parse-image / kb-parse-audio
|
||||
│ └─> kb-parse-types
|
||||
├─> kb-normalize
|
||||
│ └─> kb-parse-types
|
||||
├─> kb-chunk
|
||||
├─> kb-store-sqlite
|
||||
├─> kb-store-vector
|
||||
├─> kb-embed-local (kb-embed trait crate)
|
||||
├─> kb-search
|
||||
├─> kb-llm-local (kb-llm trait crate)
|
||||
├─> kb-rag
|
||||
├─> kb-eval
|
||||
└─> kb-config
|
||||
└─> kb-core (모두 의존)
|
||||
kebab-cli, kebab-tui, kebab-desktop
|
||||
└─> kebab-app
|
||||
├─> kebab-source-fs
|
||||
├─> kebab-parse-md / kebab-parse-pdf / kebab-parse-image / kebab-parse-audio
|
||||
│ └─> kebab-parse-types
|
||||
├─> kebab-normalize
|
||||
│ └─> kebab-parse-types
|
||||
├─> kebab-chunk
|
||||
├─> kebab-store-sqlite
|
||||
├─> kebab-store-vector
|
||||
├─> kebab-embed-local (kebab-embed trait crate)
|
||||
├─> kebab-search
|
||||
├─> kebab-llm-local (kebab-llm trait crate)
|
||||
├─> kebab-rag
|
||||
├─> kebab-eval
|
||||
└─> kebab-config
|
||||
└─> kebab-core (모두 의존)
|
||||
```
|
||||
|
||||
UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kb-app` facade 만 통한다 (design §8). `kb-cli` 가 `--config <path>` flag 를 honor 하려면 `kb_app::*_with_config(cfg, …)` companion 을 통해 Config 을 명시적으로 thread 하는 패턴 — 자세한 이유는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md) 의 `--config` 항목.
|
||||
UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kebab-app` facade 만 통한다 (design §8). `kebab-cli` 가 `--config <path>` flag 를 honor 하려면 `kebab_app::*_with_config(cfg, …)` companion 을 통해 Config 을 명시적으로 thread 하는 패턴 — 자세한 이유는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md) 의 `--config` 항목.
|
||||
|
||||
---
|
||||
|
||||
@@ -80,16 +80,16 @@ UI → store/llm/parse 직접 의존 금지. 모든 user-facing 진입은 `kb-ap
|
||||
|
||||
| Phase | 내용 | 핵심 산출 crate | 선행 | 상태 |
|
||||
|-------|------|----------------|------|------|
|
||||
| **P0** | Workspace 뼈대 + 도메인 계약 + ID recipe | `kb-core`, `kb-parse-types`, `kb-config`, `kb-app`, `kb-cli` | – | ✅ 완료 |
|
||||
| **P1** | Markdown ingestion (walk → parse → chunk → SQLite) | `kb-source-fs`, `kb-parse-md`, `kb-normalize`, `kb-chunk`, `kb-store-sqlite` | P0 | ✅ 완료 |
|
||||
| **P2** | SQLite FTS5 lexical 검색 + citation | `kb-search` (lexical) | P1 | ✅ 완료 |
|
||||
| **P3** | Local embedding + LanceDB + hybrid (RRF) + kb-app wiring | `kb-embed`, `kb-embed-local`, `kb-store-vector`, `kb-search` | P2 | ✅ 완료 |
|
||||
| **P4** | Local LLM + RAG + grounded answer | `kb-llm`, `kb-llm-local`, `kb-rag` | P3 | ✅ 완료 |
|
||||
| **P5** | Golden query / regression eval | `kb-eval` | P4 | ⏳ 다음 |
|
||||
| **P6** | 이미지 ingestion (OCR + caption) | `kb-parse-image` | P5 | ⏳ |
|
||||
| **P7** | PDF text + page citation | `kb-parse-pdf` | P5 | ⏳ |
|
||||
| **P8** | 음성 transcription + timestamp citation | `kb-parse-audio` | P5 | ⏳ |
|
||||
| **P9** | TUI + desktop app | `kb-tui`, `kb-desktop` | P5 | ⏳ |
|
||||
| **P0** | Workspace 뼈대 + 도메인 계약 + ID recipe | `kebab-core`, `kebab-parse-types`, `kebab-config`, `kebab-app`, `kebab-cli` | – | ✅ 완료 |
|
||||
| **P1** | Markdown ingestion (walk → parse → chunk → SQLite) | `kebab-source-fs`, `kebab-parse-md`, `kebab-normalize`, `kebab-chunk`, `kebab-store-sqlite` | P0 | ✅ 완료 |
|
||||
| **P2** | SQLite FTS5 lexical 검색 + citation | `kebab-search` (lexical) | P1 | ✅ 완료 |
|
||||
| **P3** | Local embedding + LanceDB + hybrid (RRF) + kebab-app wiring | `kebab-embed`, `kebab-embed-local`, `kebab-store-vector`, `kebab-search` | P2 | ✅ 완료 |
|
||||
| **P4** | Local LLM + RAG + grounded answer | `kebab-llm`, `kebab-llm-local`, `kebab-rag` | P3 | ✅ 완료 |
|
||||
| **P5** | Golden query / regression eval | `kebab-eval` | P4 | ⏳ 다음 |
|
||||
| **P6** | 이미지 ingestion (OCR + caption) | `kebab-parse-image` | P5 | ⏳ |
|
||||
| **P7** | PDF text + page citation | `kebab-parse-pdf` | P5 | ⏳ |
|
||||
| **P8** | 음성 transcription + timestamp citation | `kebab-parse-audio` | P5 | ⏳ |
|
||||
| **P9** | TUI + desktop app | `kebab-tui`, `kebab-desktop` | P5 | ⏳ |
|
||||
|
||||
P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
|
||||
|
||||
@@ -100,13 +100,13 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
|
||||
## 디렉토리 구조
|
||||
|
||||
```text
|
||||
kb/
|
||||
kebab/
|
||||
├── README.md # 이 파일
|
||||
├── kb_local_rust_report.md # 최초 설계 보고서 (방향성 + 근거)
|
||||
├── kebab_local_rust_report.md # 최초 설계 보고서 (방향성 + 근거)
|
||||
├── docs/
|
||||
│ ├── superpowers/
|
||||
│ │ ├── specs/
|
||||
│ │ │ └── 2026-04-27-kb-final-form-design.md # frozen design (12 sections)
|
||||
│ │ │ └── 2026-04-27-kebab-final-form-design.md # frozen design (12 sections)
|
||||
│ │ └── plans/
|
||||
│ │ └── 2026-04-27-task-decomposition.md # task 분해 implementation plan
|
||||
│ ├── SMOKE.md # 로컬 워크스페이스에 직접 돌려보는 절차
|
||||
@@ -127,19 +127,19 @@ kb/
|
||||
│ ├── p8/p8-1, p8-2 # (2)
|
||||
│ └── p9/p9-1 … p9-5 # (5)
|
||||
├── crates/
|
||||
│ ├── kb-core/ kb-parse-types/ kb-config/ # 도메인 + 설정 (P0)
|
||||
│ ├── kb-source-fs/ # 워크스페이스 walk + checksum (P1-1)
|
||||
│ ├── kb-parse-md/ # Markdown frontmatter + blocks (P1-2/3)
|
||||
│ ├── kb-normalize/ # ParsedBlock → CanonicalDocument (P1-4)
|
||||
│ ├── kb-chunk/ # heading-aware chunker (P1-5)
|
||||
│ ├── kb-store-sqlite/ # SQLite + FTS5 (V001/V002/V003) (P1-6, P2-1, P3-3)
|
||||
│ ├── kb-search/ # Lexical + Vector + Hybrid retriever (P2-2, P3-4)
|
||||
│ ├── kb-embed/ kb-embed-local/ # Embedder trait + fastembed adapter (P3-1, P3-2)
|
||||
│ ├── kb-store-vector/ # LanceDB VectorStore (P3-3)
|
||||
│ ├── kb-llm/ kb-llm-local/ # LanguageModel trait + Ollama adapter (P4-1, P4-2)
|
||||
│ ├── kb-rag/ # RAG pipeline (P4-3)
|
||||
│ ├── kb-app/ # facade (P0 시그니처 + P3-5 본체)
|
||||
│ └── kb-cli/ # binary (P0 → 핫픽스로 --config flag wiring 강화)
|
||||
│ ├── kebab-core/ kebab-parse-types/ kebab-config/ # 도메인 + 설정 (P0)
|
||||
│ ├── kebab-source-fs/ # 워크스페이스 walk + checksum (P1-1)
|
||||
│ ├── kebab-parse-md/ # Markdown frontmatter + blocks (P1-2/3)
|
||||
│ ├── kebab-normalize/ # ParsedBlock → CanonicalDocument (P1-4)
|
||||
│ ├── kebab-chunk/ # heading-aware chunker (P1-5)
|
||||
│ ├── kebab-store-sqlite/ # SQLite + FTS5 (V001/V002/V003) (P1-6, P2-1, P3-3)
|
||||
│ ├── kebab-search/ # Lexical + Vector + Hybrid retriever (P2-2, P3-4)
|
||||
│ ├── kebab-embed/ kebab-embed-local/ # Embedder trait + fastembed adapter (P3-1, P3-2)
|
||||
│ ├── kebab-store-vector/ # LanceDB VectorStore (P3-3)
|
||||
│ ├── kebab-llm/ kebab-llm-local/ # LanguageModel trait + Ollama adapter (P4-1, P4-2)
|
||||
│ ├── kebab-rag/ # RAG pipeline (P4-3)
|
||||
│ ├── kebab-app/ # facade (P0 시그니처 + P3-5 본체)
|
||||
│ └── kebab-cli/ # binary (P0 → 핫픽스로 --config flag wiring 강화)
|
||||
├── migrations/ # SQLite refinery V001/V002/V003
|
||||
└── fixtures/ # 테스트 fixture 트리
|
||||
```
|
||||
@@ -153,19 +153,19 @@ kb/
|
||||
cargo build --release
|
||||
|
||||
# 첫 실행 — XDG 경로에 config.toml 생성
|
||||
./target/release/kb init
|
||||
./target/release/kebab init
|
||||
|
||||
# config 손보고
|
||||
${EDITOR:-vi} ~/.config/kb/config.toml
|
||||
${EDITOR:-vi} ~/.config/kebab/config.toml
|
||||
|
||||
# 색인
|
||||
./target/release/kb ingest
|
||||
./target/release/kebab ingest
|
||||
|
||||
# 검색
|
||||
./target/release/kb search "Markdown chunking 규칙" --mode hybrid
|
||||
./target/release/kebab search "Markdown chunking 규칙" --mode hybrid
|
||||
|
||||
# 질문 (Ollama 필요)
|
||||
./target/release/kb ask "내 KB 설계에서 저장소 전략은?"
|
||||
./target/release/kebab ask "내 KB 설계에서 저장소 전략은?"
|
||||
```
|
||||
|
||||
워크스페이스를 격리해서 직접 돌려보는 패턴은 [docs/SMOKE.md](docs/SMOKE.md) 참조 — `--config <path>` 로 임시 디렉토리에 격리된 KB 를 만들 수 있다.
|
||||
@@ -181,17 +181,17 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
|
||||
- multi-workspace (P+ 후순위)
|
||||
- LLM-as-judge eval (rule-based `must_contain` 만)
|
||||
- visual embedding (CLIP) — P+
|
||||
- desktop app `kb://` protocol handler — P+
|
||||
- desktop app `kebab://` protocol handler — P+
|
||||
|
||||
---
|
||||
|
||||
## 외부 AI 통합
|
||||
|
||||
`kb` 의 `--json` 모드 + frozen wire schema v1 은 외부 자동화의 stable contract. 가능한 통합:
|
||||
`kebab` 의 `--json` 모드 + frozen wire schema v1 은 외부 자동화의 stable contract. 가능한 통합:
|
||||
|
||||
1. **Claude Code / Codex skill** — 얇은 wrapper (`kb search --json` / `kb ask --json` 호출). ~50 lines.
|
||||
2. **MCP server** — `kb-mcp` binary (stdio JSON-RPC) 가 `kb-app` facade 를 1:1 노출. Claude Desktop / Cursor / Zed 등 공유.
|
||||
3. **HTTP wrapper** — `kb serve --bind 127.0.0.1:7711` (P+, local-only 가치 깨므로 신중).
|
||||
1. **Claude Code / Codex skill** — 얇은 wrapper (`kebab search --json` / `kebab ask --json` 호출). ~50 lines.
|
||||
2. **MCP server** — `kebab-mcp` binary (stdio JSON-RPC) 가 `kebab-app` facade 를 1:1 노출. Claude Desktop / Cursor / Zed 등 공유.
|
||||
3. **HTTP wrapper** — `kebab serve --bind 127.0.0.1:7711` (P+, local-only 가치 깨므로 신중).
|
||||
|
||||
---
|
||||
|
||||
@@ -199,7 +199,7 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
|
||||
|
||||
이 repo 는 단일 사용자 프로젝트지만 spec 변경 절차는 명문화되어 있다.
|
||||
|
||||
1. **frozen design 변경** — `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` 가 단일 contract. 변경 시 영향 받는 component task 모두 동시 갱신 필요. PR 1개로 묶기.
|
||||
1. **frozen design 변경** — `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` 가 단일 contract. 변경 시 영향 받는 component task 모두 동시 갱신 필요. PR 1개로 묶기.
|
||||
2. **새 component task 추가** — `tasks/_template.md` 복사 후 `tasks/p<phase>/p<phase>-<n>-<name>.md` 생성. `contract_sections` 에 design doc 섹션 명시. `Allowed/Forbidden dependencies` 는 design §8 module-boundary 표 따름.
|
||||
3. **구현** — component task 1개당 sub-agent 1세션 권장. `cargo test -p <crate>` + DoD 체크리스트 통과. PR 으로 머지.
|
||||
4. **버전 변경** — `parser_version` / `chunker_version` / `embedding_version` 등 변경은 design §9 의 cascade rule 따름. 영향 받는 record 는 재처리 필요.
|
||||
@@ -215,8 +215,8 @@ ${EDITOR:-vi} ~/.config/kb/config.toml
|
||||
|
||||
## 참고
|
||||
|
||||
- 최초 설계 보고서: [kb_local_rust_report.md](kb_local_rust_report.md)
|
||||
- Frozen design: [docs/superpowers/specs/2026-04-27-kb-final-form-design.md](docs/superpowers/specs/2026-04-27-kb-final-form-design.md)
|
||||
- 최초 설계 보고서: [kebab_local_rust_report.md](kebab_local_rust_report.md)
|
||||
- Frozen design: [docs/superpowers/specs/2026-04-27-kebab-final-form-design.md](docs/superpowers/specs/2026-04-27-kebab-final-form-design.md)
|
||||
- Task 분해 plan: [docs/superpowers/plans/2026-04-27-task-decomposition.md](docs/superpowers/plans/2026-04-27-task-decomposition.md)
|
||||
- Task 인덱스: [tasks/INDEX.md](tasks/INDEX.md)
|
||||
- Post-merge 핫픽스 로그: [tasks/HOTFIXES.md](tasks/HOTFIXES.md)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-app"
|
||||
name = "kebab-app"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,21 +8,21 @@ repository = { workspace = true }
|
||||
description = "Facade — orchestrates components for kb-cli/tui/desktop"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-source-fs = { path = "../kb-source-fs" }
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kb-normalize = { path = "../kb-normalize" }
|
||||
kb-chunk = { path = "../kb-chunk" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kb-store-vector = { path = "../kb-store-vector" }
|
||||
kb-search = { path = "../kb-search" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kb-embed-local = { path = "../kb-embed-local" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kb-llm-local = { path = "../kb-llm-local" }
|
||||
kb-rag = { path = "../kb-rag" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-source-fs = { path = "../kebab-source-fs" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
kebab-chunk = { path = "../kebab-chunk" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
kebab-store-vector = { path = "../kebab-store-vector" }
|
||||
kebab-search = { path = "../kebab-search" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
kebab-embed-local = { path = "../kebab-embed-local" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
kebab-llm-local = { path = "../kebab-llm-local" }
|
||||
kebab-rag = { path = "../kebab-rag" }
|
||||
anyhow = { workspace = true }
|
||||
blake3 = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
@@ -37,16 +37,16 @@ use std::sync::{Arc, OnceLock};
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Answer, Embedder, IndexVersion, LanguageModel, Retriever, SearchHit, SearchMode,
|
||||
SearchQuery, VectorStore,
|
||||
};
|
||||
use kb_embed_local::FastembedEmbedder;
|
||||
use kb_llm_local::OllamaLanguageModel;
|
||||
use kb_rag::{AskOpts, RagPipeline};
|
||||
use kb_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kb_store_vector::LanceVectorStore;
|
||||
use kebab_embed_local::FastembedEmbedder;
|
||||
use kebab_llm_local::OllamaLanguageModel;
|
||||
use kebab_rag::{AskOpts, RagPipeline};
|
||||
use kebab_search::{HybridRetriever, LexicalRetriever, VectorRetriever};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use kebab_store_vector::LanceVectorStore;
|
||||
|
||||
/// Facade state — see module docs for lifetime rules.
|
||||
///
|
||||
@@ -55,7 +55,7 @@ use kb_store_vector::LanceVectorStore;
|
||||
/// ask calls. The OnceLock-backed `embedder` / `vector` fields ensure
|
||||
/// the cold-start cost is paid exactly once per instance.
|
||||
pub struct App {
|
||||
pub(crate) config: kb_config::Config,
|
||||
pub(crate) config: kebab_config::Config,
|
||||
pub(crate) sqlite: Arc<SqliteStore>,
|
||||
/// Memoized embedder — built lazily on first `embedder()` call when
|
||||
/// embeddings are enabled. `OnceLock` keeps the struct `Sync` and
|
||||
@@ -80,7 +80,7 @@ impl App {
|
||||
/// Downstream `LanceVectorStore::new` (called by [`Self::vector`])
|
||||
/// internally drives a `tokio::Runtime::block_on`, which panics if
|
||||
/// invoked from inside another tokio runtime.
|
||||
pub fn open_with_config(config: kb_config::Config) -> Result<Self> {
|
||||
pub fn open_with_config(config: kebab_config::Config) -> Result<Self> {
|
||||
let sqlite = SqliteStore::open(&config).context("kb-app: open SqliteStore")?;
|
||||
sqlite
|
||||
.run_migrations()
|
||||
@@ -286,7 +286,7 @@ impl App {
|
||||
/// the active config. This token surfaces in `SearchHit.index_version`
|
||||
/// and on snapshot tests; including the chunker version pins it to
|
||||
/// the chunking policy in effect.
|
||||
fn lexical_index_version(config: &kb_config::Config) -> IndexVersion {
|
||||
fn lexical_index_version(config: &kebab_config::Config) -> IndexVersion {
|
||||
IndexVersion(format!("lex:{}", config.chunking.chunker_version))
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
//! ## Config seam (`*_with_config`)
|
||||
//!
|
||||
//! Each public free function has a `#[doc(hidden)] pub fn *_with_config`
|
||||
//! companion that takes a fully-resolved [`kb_config::Config`] directly.
|
||||
//! companion that takes a fully-resolved [`kebab_config::Config`] directly.
|
||||
//! Three callers go through it: (1) the top-level free functions
|
||||
//! themselves, after `load_config()`; (2) `kb-cli` when the user passes
|
||||
//! `--config <path>` (CLI builds the Config via
|
||||
@@ -39,16 +39,16 @@ use std::sync::Arc;
|
||||
use anyhow::{Context, anyhow};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use kb_chunk::MdHeadingV1Chunker;
|
||||
use kb_core::{
|
||||
use kebab_chunk::MdHeadingV1Chunker;
|
||||
use kebab_core::{
|
||||
Answer, CanonicalDocument, Chunk, ChunkId, ChunkPolicy, ChunkerVersion, Chunker,
|
||||
DocFilter, DocSummary, DocumentId, DocumentStore, Embedder, EmbeddingInput,
|
||||
EmbeddingKind, IngestReport, ParserVersion, RawAsset, SearchHit, SearchQuery,
|
||||
SourceConnector, SourceScope, SourceUri, VectorRecord, VectorStore,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kb_source_fs::FsSourceConnector;
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_source_fs::FsSourceConnector;
|
||||
|
||||
mod app;
|
||||
pub mod doctor_signal;
|
||||
@@ -61,15 +61,15 @@ pub use app::App;
|
||||
/// Kept in lock-step with the literal used in the `kb-store-sqlite`
|
||||
/// idempotency / round-trip tests so the version label written by the
|
||||
/// app and the one used in cross-crate fixtures match.
|
||||
const KB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x";
|
||||
const KEBAB_PARSE_MD_VERSION: &str = "pulldown-cmark-0.x";
|
||||
|
||||
/// Caller-supplied knobs for one [`ask`] invocation.
|
||||
///
|
||||
/// Re-exported from [`kb_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
|
||||
/// `use kb_app::AskOpts` keeps working without churn. The struct gained
|
||||
/// Re-exported from [`kebab_rag::AskOpts`] (P4-3 owns the type) so kb-cli's
|
||||
/// `use kebab_app::AskOpts` keeps working without churn. The struct gained
|
||||
/// a `stream_sink` field in P4-3; non-streaming callers (kb-cli today)
|
||||
/// pass `stream_sink: None`.
|
||||
pub use kb_rag::AskOpts;
|
||||
pub use kebab_rag::AskOpts;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct DoctorReport {
|
||||
@@ -90,10 +90,10 @@ pub struct DoctorCheck {
|
||||
/// Create XDG dirs and write a starter `config.toml`. Idempotent unless
|
||||
/// `force=true` (which overwrites an existing config).
|
||||
pub fn init_workspace(force: bool) -> anyhow::Result<()> {
|
||||
let cfg_path = kb_config::Config::xdg_config_path();
|
||||
let data_dir = kb_config::Config::xdg_data_dir();
|
||||
let cache_dir = kb_config::Config::xdg_cache_dir();
|
||||
let state_dir = kb_config::Config::xdg_state_dir();
|
||||
let cfg_path = kebab_config::Config::xdg_config_path();
|
||||
let data_dir = kebab_config::Config::xdg_data_dir();
|
||||
let cache_dir = kebab_config::Config::xdg_cache_dir();
|
||||
let state_dir = kebab_config::Config::xdg_state_dir();
|
||||
|
||||
for d in [
|
||||
cfg_path.parent().map(PathBuf::from).unwrap_or_default(),
|
||||
@@ -107,11 +107,11 @@ pub fn init_workspace(force: bool) -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
let workspace_root = expand_tilde(&kb_config::Config::defaults().workspace.root);
|
||||
let workspace_root = expand_tilde(&kebab_config::Config::defaults().workspace.root);
|
||||
std::fs::create_dir_all(&workspace_root)?;
|
||||
|
||||
if !cfg_path.exists() || force {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
let toml_text = toml::to_string_pretty(&cfg)?;
|
||||
std::fs::write(&cfg_path, toml_text)?;
|
||||
}
|
||||
@@ -141,8 +141,8 @@ fn expand_tilde(s: &str) -> PathBuf {
|
||||
/// Callers that already have a Config in hand (CLI honoring `--config`,
|
||||
/// integration tests, TUI session) should bypass this and call the
|
||||
/// matching `*_with_config` helper directly.
|
||||
fn load_config() -> anyhow::Result<kb_config::Config> {
|
||||
kb_config::Config::load(None)
|
||||
fn load_config() -> anyhow::Result<kebab_config::Config> {
|
||||
kebab_config::Config::load(None)
|
||||
}
|
||||
|
||||
// ── ingest ────────────────────────────────────────────────────────────────
|
||||
@@ -154,11 +154,11 @@ pub fn ingest(scope: SourceScope, summary_only: bool) -> anyhow::Result<IngestRe
|
||||
|
||||
/// Config-explicit variant — bypasses [`load_config`] when the
|
||||
/// caller (kb-cli with `--config`, integration tests, TUI session)
|
||||
/// already has a [`kb_config::Config`] in hand. The public free
|
||||
/// already has a [`kebab_config::Config`] in hand. The public free
|
||||
/// function [`ingest`] wraps this with the XDG-default load.
|
||||
#[doc(hidden)]
|
||||
pub fn ingest_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
scope: SourceScope,
|
||||
summary_only: bool,
|
||||
) -> anyhow::Result<IngestReport> {
|
||||
@@ -187,7 +187,7 @@ pub fn ingest_with_config(
|
||||
.context("kb-app::ingest: ensure Lance table")?;
|
||||
}
|
||||
|
||||
let parser_version = ParserVersion(KB_PARSE_MD_VERSION.to_string());
|
||||
let parser_version = ParserVersion(KEBAB_PARSE_MD_VERSION.to_string());
|
||||
let chunk_policy = chunk_policy_from_config(&app.config);
|
||||
|
||||
// Pre-load every existing doc_id so we can label `IngestItem.kind`
|
||||
@@ -205,13 +205,13 @@ pub fn ingest_with_config(
|
||||
|
||||
let started_at = time::OffsetDateTime::now_utc();
|
||||
|
||||
let mut items: Vec<kb_core::IngestItem> = Vec::new();
|
||||
let mut items: Vec<kebab_core::IngestItem> = Vec::new();
|
||||
let mut new_count: u32 = 0;
|
||||
let mut updated_count: u32 = 0;
|
||||
let mut skipped_count: u32 = 0;
|
||||
let mut error_count: u32 = 0;
|
||||
// Aggregate counts surfaced into `ingest_runs` (and tracing). Not
|
||||
// exposed on `IngestReport` today — `kb_core::IngestReport` is a
|
||||
// exposed on `IngestReport` today — `kebab_core::IngestReport` is a
|
||||
// wire-stable struct without these fields — but persisting them
|
||||
// means audit tooling and `kb jobs` (P+) can recover the totals
|
||||
// without re-walking the DB.
|
||||
@@ -236,14 +236,14 @@ pub fn ingest_with_config(
|
||||
Ok(i) => i,
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
path = %asset.workspace_path.0,
|
||||
error = %e,
|
||||
"kb-app::ingest: per-file fatal"
|
||||
);
|
||||
error_count = error_count.saturating_add(1);
|
||||
kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Error,
|
||||
kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Error,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -259,7 +259,7 @@ pub fn ingest_with_config(
|
||||
};
|
||||
|
||||
match item.kind {
|
||||
kb_core::IngestItemKind::New => {
|
||||
kebab_core::IngestItemKind::New => {
|
||||
new_count = new_count.saturating_add(1);
|
||||
let n = item.chunk_count.unwrap_or(0);
|
||||
chunks_indexed = chunks_indexed.saturating_add(n);
|
||||
@@ -267,7 +267,7 @@ pub fn ingest_with_config(
|
||||
embeddings_indexed = embeddings_indexed.saturating_add(n);
|
||||
}
|
||||
}
|
||||
kb_core::IngestItemKind::Updated => {
|
||||
kebab_core::IngestItemKind::Updated => {
|
||||
updated_count = updated_count.saturating_add(1);
|
||||
let n = item.chunk_count.unwrap_or(0);
|
||||
chunks_indexed = chunks_indexed.saturating_add(n);
|
||||
@@ -275,10 +275,10 @@ pub fn ingest_with_config(
|
||||
embeddings_indexed = embeddings_indexed.saturating_add(n);
|
||||
}
|
||||
}
|
||||
kb_core::IngestItemKind::Skipped => {
|
||||
kebab_core::IngestItemKind::Skipped => {
|
||||
skipped_count = skipped_count.saturating_add(1)
|
||||
}
|
||||
kb_core::IngestItemKind::Error => {
|
||||
kebab_core::IngestItemKind::Error => {
|
||||
error_count = error_count.saturating_add(1)
|
||||
}
|
||||
}
|
||||
@@ -293,9 +293,9 @@ pub fn ingest_with_config(
|
||||
"scope": scope,
|
||||
"summary_only": summary_only,
|
||||
});
|
||||
let job_id_res = <SqliteStoreAlias as kb_core::JobRepo>::create(
|
||||
let job_id_res = <SqliteStoreAlias as kebab_core::JobRepo>::create(
|
||||
&app.sqlite,
|
||||
kb_core::JobKind::Ingest,
|
||||
kebab_core::JobKind::Ingest,
|
||||
payload,
|
||||
);
|
||||
match job_id_res {
|
||||
@@ -312,25 +312,25 @@ pub fn ingest_with_config(
|
||||
"chunks_indexed": chunks_indexed,
|
||||
"embeddings_indexed": embeddings_indexed,
|
||||
});
|
||||
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::update_progress(
|
||||
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::update_progress(
|
||||
&app.sqlite,
|
||||
&jid,
|
||||
progress,
|
||||
) {
|
||||
tracing::warn!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
error = %e,
|
||||
"kb-app::ingest: JobRepo::update_progress failed"
|
||||
);
|
||||
}
|
||||
if let Err(e) = <SqliteStoreAlias as kb_core::JobRepo>::finish(
|
||||
if let Err(e) = <SqliteStoreAlias as kebab_core::JobRepo>::finish(
|
||||
&app.sqlite,
|
||||
&jid,
|
||||
kb_core::JobStatus::Succeeded,
|
||||
kebab_core::JobStatus::Succeeded,
|
||||
None,
|
||||
) {
|
||||
tracing::warn!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
error = %e,
|
||||
"kb-app::ingest: JobRepo::finish failed"
|
||||
);
|
||||
@@ -338,7 +338,7 @@ pub fn ingest_with_config(
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
error = %e,
|
||||
"kb-app::ingest: JobRepo::create failed; run not recorded in `jobs`"
|
||||
);
|
||||
@@ -361,7 +361,7 @@ pub fn ingest_with_config(
|
||||
Ok(s) => Some(s),
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
error = %e,
|
||||
"kb-app::ingest: failed to serialize items_json; storing NULL"
|
||||
);
|
||||
@@ -370,7 +370,7 @@ pub fn ingest_with_config(
|
||||
}
|
||||
};
|
||||
let run_id = mint_ingest_run_id(&scope_json, started_at);
|
||||
let row = kb_store_sqlite::IngestRunRow {
|
||||
let row = kebab_store_sqlite::IngestRunRow {
|
||||
run_id: &run_id,
|
||||
scope_json: &scope_json,
|
||||
scanned: scanned_count,
|
||||
@@ -385,14 +385,14 @@ pub fn ingest_with_config(
|
||||
};
|
||||
if let Err(e) = app.sqlite.record_ingest_run(&row) {
|
||||
tracing::warn!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
error = %e,
|
||||
"kb-app::ingest: record_ingest_run failed"
|
||||
);
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
target: "kb-app",
|
||||
target: "kebab-app",
|
||||
scanned = scanned_count,
|
||||
new = new_count,
|
||||
updated = updated_count,
|
||||
@@ -432,7 +432,7 @@ fn mint_ingest_run_id(scope_json: &str, at: time::OffsetDateTime) -> String {
|
||||
/// vs `JobRepo`) on the same store. Plain `app.sqlite.create(...)`
|
||||
/// would pick one based on inherent vs trait methods; we go through
|
||||
/// `<… as JobRepo>` to be explicit.
|
||||
type SqliteStoreAlias = kb_store_sqlite::SqliteStore;
|
||||
type SqliteStoreAlias = kebab_store_sqlite::SqliteStore;
|
||||
|
||||
/// Process a single asset: read bytes, parse, normalize, chunk,
|
||||
/// persist, embed. Per-asset failures bubble up to the caller for
|
||||
@@ -444,18 +444,18 @@ fn ingest_one_asset(
|
||||
parser_version: &ParserVersion,
|
||||
chunk_policy: &ChunkPolicy,
|
||||
embedder: Option<&Arc<dyn Embedder + Send + Sync>>,
|
||||
vector_store: Option<&Arc<kb_store_vector::LanceVectorStore>>,
|
||||
vector_store: Option<&Arc<kebab_store_vector::LanceVectorStore>>,
|
||||
existing_doc_ids: &std::collections::HashSet<String>,
|
||||
) -> anyhow::Result<kb_core::IngestItem> {
|
||||
) -> anyhow::Result<kebab_core::IngestItem> {
|
||||
tracing::debug!(
|
||||
target: "kb-app::ingest",
|
||||
target: "kebab-app::ingest",
|
||||
path = %asset.workspace_path.0,
|
||||
"processing asset"
|
||||
);
|
||||
// Only handle Markdown for now; other media types are P6+ work.
|
||||
if asset.media_type != kb_core::MediaType::Markdown {
|
||||
return Ok(kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Skipped,
|
||||
if asset.media_type != kebab_core::MediaType::Markdown {
|
||||
return Ok(kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Skipped,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -472,8 +472,8 @@ fn ingest_one_asset(
|
||||
let path = match &asset.source_uri {
|
||||
SourceUri::File(p) => p.clone(),
|
||||
SourceUri::Kb(_) => {
|
||||
return Ok(kb_core::IngestItem {
|
||||
kind: kb_core::IngestItemKind::Skipped,
|
||||
return Ok(kebab_core::IngestItem {
|
||||
kind: kebab_core::IngestItemKind::Skipped,
|
||||
doc_id: None,
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
asset_id: Some(asset.asset_id.clone()),
|
||||
@@ -569,7 +569,7 @@ fn ingest_one_asset(
|
||||
.iter()
|
||||
.zip(vectors)
|
||||
.map(|(c, v)| VectorRecord {
|
||||
embedding_id: kb_core::id_for_embedding(
|
||||
embedding_id: kebab_core::id_for_embedding(
|
||||
&c.chunk_id,
|
||||
&model_id,
|
||||
&model_version,
|
||||
@@ -592,12 +592,12 @@ fn ingest_one_asset(
|
||||
}
|
||||
|
||||
let kind = if existing_doc_ids.contains(&canonical.doc_id.0) {
|
||||
kb_core::IngestItemKind::Updated
|
||||
kebab_core::IngestItemKind::Updated
|
||||
} else {
|
||||
kb_core::IngestItemKind::New
|
||||
kebab_core::IngestItemKind::New
|
||||
};
|
||||
|
||||
Ok(kb_core::IngestItem {
|
||||
Ok(kebab_core::IngestItem {
|
||||
kind,
|
||||
doc_id: Some(canonical.doc_id.clone()),
|
||||
doc_path: asset.workspace_path.clone(),
|
||||
@@ -613,7 +613,7 @@ fn ingest_one_asset(
|
||||
}
|
||||
|
||||
/// Convenience: end byte of the frontmatter region (or 0 when absent).
|
||||
fn fm_span_end(span: Option<kb_parse_md::FrontmatterSpan>) -> usize {
|
||||
fn fm_span_end(span: Option<kebab_parse_md::FrontmatterSpan>) -> usize {
|
||||
span.map(|s| s.end).unwrap_or(0)
|
||||
}
|
||||
|
||||
@@ -640,7 +640,7 @@ fn build_body_hints(asset: &RawAsset) -> BodyHints {
|
||||
}
|
||||
|
||||
/// Build a `ChunkPolicy` from the active config.
|
||||
fn chunk_policy_from_config(config: &kb_config::Config) -> ChunkPolicy {
|
||||
fn chunk_policy_from_config(config: &kebab_config::Config) -> ChunkPolicy {
|
||||
ChunkPolicy {
|
||||
target_tokens: config.chunking.target_tokens,
|
||||
overlap_tokens: config.chunking.overlap_tokens,
|
||||
@@ -660,7 +660,7 @@ pub fn list_docs(filter: DocFilter) -> anyhow::Result<Vec<DocSummary>> {
|
||||
/// ([`list_docs`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn list_docs_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
filter: DocFilter,
|
||||
) -> anyhow::Result<Vec<DocSummary>> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -676,7 +676,7 @@ pub fn inspect_doc(id: &DocumentId) -> anyhow::Result<CanonicalDocument> {
|
||||
/// ([`inspect_doc`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn inspect_doc_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
id: &DocumentId,
|
||||
) -> anyhow::Result<CanonicalDocument> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -694,7 +694,7 @@ pub fn inspect_chunk(id: &ChunkId) -> anyhow::Result<Chunk> {
|
||||
/// ([`inspect_chunk`]), not this.
|
||||
#[doc(hidden)]
|
||||
pub fn inspect_chunk_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
id: &ChunkId,
|
||||
) -> anyhow::Result<Chunk> {
|
||||
let app = App::open_with_config(config)?;
|
||||
@@ -716,7 +716,7 @@ pub fn search(query: SearchQuery) -> anyhow::Result<Vec<SearchHit>> {
|
||||
/// directly to amortize the embedder / vector-store cold start.
|
||||
#[doc(hidden)]
|
||||
pub fn search_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
query: SearchQuery,
|
||||
) -> anyhow::Result<Vec<SearchHit>> {
|
||||
App::open_with_config(config)?.search(query)
|
||||
@@ -740,7 +740,7 @@ pub fn ask(query: &str, opts: AskOpts) -> anyhow::Result<Answer> {
|
||||
/// [`App::ask`].
|
||||
#[doc(hidden)]
|
||||
pub fn ask_with_config(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
query: &str,
|
||||
opts: AskOpts,
|
||||
) -> anyhow::Result<Answer> {
|
||||
@@ -761,10 +761,10 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
|
||||
// override first, else XDG default. Report whichever was probed.
|
||||
let cfg_path: PathBuf = match config_path {
|
||||
Some(p) => p.to_path_buf(),
|
||||
None => kb_config::Config::xdg_config_path(),
|
||||
None => kebab_config::Config::xdg_config_path(),
|
||||
};
|
||||
let (config_ok, config_detail, loaded_cfg) = if cfg_path.exists() {
|
||||
match kb_config::Config::from_file(&cfg_path) {
|
||||
match kebab_config::Config::from_file(&cfg_path) {
|
||||
Ok(c) => (true, cfg_path.display().to_string(), Some(c)),
|
||||
Err(e) => (false, format!("{} ({e})", cfg_path.display()), None),
|
||||
}
|
||||
@@ -795,7 +795,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
|
||||
|
||||
// data_dir_writable — probe the resolved storage.data_dir from the
|
||||
// loaded config when present, else the XDG default. Apply env
|
||||
// overrides so KB_STORAGE_DATA_DIR is respected too.
|
||||
// overrides so KEBAB_STORAGE_DATA_DIR is respected too.
|
||||
let data_dir = match loaded_cfg.as_ref() {
|
||||
Some(c) => {
|
||||
// Re-apply env overrides on top so the same precedence as
|
||||
@@ -804,7 +804,7 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
|
||||
let merged = c.clone().apply_env(&env);
|
||||
expand_tilde(&merged.storage.data_dir)
|
||||
}
|
||||
None => kb_config::Config::xdg_data_dir(),
|
||||
None => kebab_config::Config::xdg_data_dir(),
|
||||
};
|
||||
let writable = (|| -> anyhow::Result<()> {
|
||||
std::fs::create_dir_all(&data_dir)?;
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Tracing initialization helper for `kb-cli`.
|
||||
//!
|
||||
//! Daily-rolling file appender at `~/.local/state/kb/logs/` per task spec.
|
||||
//! Daily-rolling file appender at `~/.local/state/kebab/logs/` per task spec.
|
||||
//! Returns a `WorkerGuard` that the caller must keep alive until program
|
||||
//! exit (so buffered log lines flush).
|
||||
|
||||
@@ -19,7 +19,7 @@ pub enum LogLevel {
|
||||
/// — a second call is a no-op (the second `try_init` is dropped silently
|
||||
/// but the guard is still returned so the caller can keep it alive).
|
||||
pub fn init(level: LogLevel) -> Result<WorkerGuard> {
|
||||
let log_dir = kb_config::Config::xdg_state_dir().join("logs");
|
||||
let log_dir = kebab_config::Config::xdg_state_dir().join("logs");
|
||||
std::fs::create_dir_all(&log_dir)?;
|
||||
|
||||
let file_appender = tracing_appender::rolling::daily(&log_dir, "kb.log");
|
||||
@@ -21,12 +21,12 @@ use common::TestEnv;
|
||||
#[ignore = "requires real Ollama on 127.0.0.1:11434"]
|
||||
fn ask_lexical_smoke() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let opts = kb_app::AskOpts {
|
||||
let opts = kebab_app::AskOpts {
|
||||
k: 5,
|
||||
explain: false,
|
||||
mode: kb_core::SearchMode::Lexical,
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
temperature: Some(0.0),
|
||||
seed: Some(0),
|
||||
stream_sink: None,
|
||||
@@ -34,10 +34,10 @@ fn ask_lexical_smoke() {
|
||||
// The fixture workspace contains "ownership" content; the model's
|
||||
// citation behavior depends on its training, so we don't assert on
|
||||
// grounded — only that the call returns a structurally-valid Answer.
|
||||
let answer = kb_app::ask_with_config(env.config.clone(), "ownership", opts)
|
||||
let answer = kebab_app::ask_with_config(env.config.clone(), "ownership", opts)
|
||||
.expect("ask returns Ok with a real Ollama backend");
|
||||
// retrieval summary always populated, regardless of grounded path.
|
||||
assert_eq!(answer.retrieval.mode, kb_core::SearchMode::Lexical);
|
||||
assert_eq!(answer.retrieval.mode, kebab_core::SearchMode::Lexical);
|
||||
assert!(answer.retrieval.k >= 5);
|
||||
assert!(answer.retrieval.trace_id.0.starts_with("ret_"));
|
||||
}
|
||||
@@ -12,7 +12,7 @@
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use kb_config::Config;
|
||||
use kebab_config::Config;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// Test environment: owns a `TempDir` and exposes a `Config` whose
|
||||
@@ -72,8 +72,8 @@ impl TestEnv {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scope(&self) -> kb_core::SourceScope {
|
||||
kb_core::SourceScope {
|
||||
pub fn scope(&self) -> kebab_core::SourceScope {
|
||||
kebab_core::SourceScope {
|
||||
root: self.workspace_root.clone(),
|
||||
include: self.config.workspace.include.clone(),
|
||||
exclude: self.config.workspace.exclude.clone(),
|
||||
@@ -9,7 +9,7 @@ use common::TestEnv;
|
||||
fn ingest_then_list_inspects_round_trip() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
|
||||
// The fixture has 3 markdown files; first ingest should label them
|
||||
// all as New.
|
||||
@@ -27,16 +27,16 @@ fn ingest_then_list_inspects_round_trip() {
|
||||
}
|
||||
|
||||
// list_docs returns the 3 docs.
|
||||
let docs = kb_app::list_docs_with_config(
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kb_core::DocFilter::default(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3, "docs: {docs:?}");
|
||||
|
||||
// inspect_doc round-trips one of them.
|
||||
let any_doc_id = docs[0].doc_id.clone();
|
||||
let canonical = kb_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
let canonical = kebab_app::inspect_doc_with_config(env.config.clone(), &any_doc_id)
|
||||
.unwrap();
|
||||
assert_eq!(canonical.doc_id, any_doc_id);
|
||||
assert!(!canonical.blocks.is_empty(), "blocks empty");
|
||||
@@ -47,20 +47,20 @@ fn ingest_idempotent_on_second_run() {
|
||||
let env = TestEnv::lexical_only();
|
||||
|
||||
let r1 =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(r1.new, 3);
|
||||
|
||||
let r2 =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
// Same files re-ingested — labelled Updated, not duplicated.
|
||||
assert_eq!(r2.scanned, 3, "second scan: {r2:?}");
|
||||
assert_eq!(r2.new, 0, "second run new should be 0: {r2:?}");
|
||||
assert_eq!(r2.updated, 3, "second run updated: {r2:?}");
|
||||
|
||||
// list_docs still has 3 docs (no duplicates).
|
||||
let docs = kb_app::list_docs_with_config(
|
||||
let docs = kebab_app::list_docs_with_config(
|
||||
env.config.clone(),
|
||||
kb_core::DocFilter::default(),
|
||||
kebab_core::DocFilter::default(),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(docs.len(), 3);
|
||||
@@ -70,7 +70,7 @@ fn ingest_idempotent_on_second_run() {
|
||||
fn ingest_summary_only_drops_items() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
assert!(report.items.is_none(), "summary-only should null items");
|
||||
}
|
||||
@@ -82,13 +82,13 @@ fn ingest_records_ingest_runs_row_with_aggregate_counts() {
|
||||
// of every run. `summary_only=true` writes `items_json=NULL`; the
|
||||
// counts MUST still be present.
|
||||
let env = TestEnv::lexical_only();
|
||||
let report = kb_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
let report = kebab_app::ingest_with_config(env.config.clone(), env.scope(), true)
|
||||
.unwrap();
|
||||
assert_eq!(report.scanned, 3);
|
||||
|
||||
let db_path = std::path::PathBuf::from(&env.config.storage.data_dir)
|
||||
.join("kb.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kb.sqlite");
|
||||
.join("kebab.sqlite");
|
||||
let conn = rusqlite::Connection::open(&db_path).expect("open kebab.sqlite");
|
||||
let (scanned, new_c, updated, skipped, errors, items_json): (
|
||||
i64,
|
||||
i64,
|
||||
@@ -137,7 +137,7 @@ fn ingest_provider_none_skips_lance() {
|
||||
// tables under it).
|
||||
let env = TestEnv::lexical_only();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), false).unwrap();
|
||||
assert_eq!(report.errors, 0, "lexical-only run must not error");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
@@ -170,22 +170,22 @@ fn ingest_provider_none_skips_lance() {
|
||||
#[test]
|
||||
fn list_docs_filters_by_tags_any() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let filter = kb_core::DocFilter {
|
||||
let filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["python".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let docs = kb_app::list_docs_with_config(env.config.clone(), filter).unwrap();
|
||||
let docs = kebab_app::list_docs_with_config(env.config.clone(), filter).unwrap();
|
||||
assert_eq!(docs.len(), 1, "expected only the python doc: {docs:?}");
|
||||
assert!(docs[0].tags.contains(&"python".to_string()));
|
||||
|
||||
let rust_filter = kb_core::DocFilter {
|
||||
let rust_filter = kebab_core::DocFilter {
|
||||
tags_any: vec!["rust".to_string()],
|
||||
..Default::default()
|
||||
};
|
||||
let rust_docs =
|
||||
kb_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
kebab_app::list_docs_with_config(env.config.clone(), rust_filter).unwrap();
|
||||
// intro.md and notes/cargo.md both tag "rust".
|
||||
assert_eq!(rust_docs.len(), 2, "expected 2 rust docs: {rust_docs:?}");
|
||||
}
|
||||
@@ -194,8 +194,8 @@ fn list_docs_filters_by_tags_any() {
|
||||
fn inspect_doc_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus =
|
||||
kb_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let err = kb_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
kebab_core::DocumentId("0000000000000000000000000000000000000000000000000000000000000000".to_string());
|
||||
let err = kebab_app::inspect_doc_with_config(env.config.clone(), &bogus).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("not found"),
|
||||
@@ -210,10 +210,10 @@ fn inspect_doc_not_found_returns_actionable_error() {
|
||||
#[test]
|
||||
fn inspect_chunk_not_found_returns_actionable_error() {
|
||||
let env = TestEnv::lexical_only();
|
||||
let bogus = kb_core::ChunkId(
|
||||
let bogus = kebab_core::ChunkId(
|
||||
"0000000000000000000000000000000000000000000000000000000000000000".to_string(),
|
||||
);
|
||||
let err = kb_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
let err = kebab_app::inspect_chunk_with_config(env.config.clone(), &bogus)
|
||||
.unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(msg.contains("not found"), "got: {msg}");
|
||||
@@ -5,24 +5,24 @@ mod common;
|
||||
|
||||
use common::TestEnv;
|
||||
|
||||
fn lexical_query(text: &str) -> kb_core::SearchQuery {
|
||||
kb_core::SearchQuery {
|
||||
fn lexical_query(text: &str) -> kebab_core::SearchQuery {
|
||||
kebab_core::SearchQuery {
|
||||
text: text.to_string(),
|
||||
mode: kb_core::SearchMode::Lexical,
|
||||
mode: kebab_core::SearchMode::Lexical,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lexical_search_returns_hits_after_ingest() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
// "Ownership" appears as a heading + paragraph in intro.md and
|
||||
// matches FTS5 default tokenizer easily.
|
||||
let hits =
|
||||
kb_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
kebab_app::search_with_config(env.config.clone(), lexical_query("ownership"))
|
||||
.unwrap();
|
||||
assert!(!hits.is_empty(), "expected ≥1 hit for 'ownership'");
|
||||
|
||||
@@ -34,7 +34,7 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
);
|
||||
assert_eq!(
|
||||
h.retrieval.method,
|
||||
kb_core::SearchMode::Lexical,
|
||||
kebab_core::SearchMode::Lexical,
|
||||
"method label should be Lexical"
|
||||
);
|
||||
}
|
||||
@@ -43,8 +43,8 @@ fn lexical_search_returns_hits_after_ingest() {
|
||||
#[test]
|
||||
fn lexical_search_empty_query_returns_empty() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kb_app::search_with_config(env.config.clone(), lexical_query(" "))
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), lexical_query(" "))
|
||||
.unwrap();
|
||||
assert!(hits.is_empty(), "blank query must short-circuit empty");
|
||||
}
|
||||
@@ -52,15 +52,15 @@ fn lexical_search_empty_query_returns_empty() {
|
||||
#[test]
|
||||
fn vector_mode_with_provider_none_errors_clearly() {
|
||||
let env = TestEnv::lexical_only();
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Vector,
|
||||
mode: kebab_core::SearchMode::Vector,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let err = kb_app::search_with_config(env.config.clone(), q).unwrap_err();
|
||||
let err = kebab_app::search_with_config(env.config.clone(), q).unwrap_err();
|
||||
let msg = format!("{err:#}");
|
||||
assert!(
|
||||
msg.contains("embeddings disabled") || msg.contains("disabled"),
|
||||
@@ -31,21 +31,21 @@ fn ingest_then_hybrid_search_returns_hits() {
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Hybrid,
|
||||
mode: kebab_core::SearchMode::Hybrid,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
assert!(!hits.is_empty(), "expected hybrid hits for 'ownership'");
|
||||
let methods: Vec<_> = hits.iter().map(|h| h.retrieval.method).collect();
|
||||
assert!(
|
||||
methods.iter().all(|m| *m == kb_core::SearchMode::Hybrid),
|
||||
methods.iter().all(|m| *m == kebab_core::SearchMode::Hybrid),
|
||||
"every hit must report method=Hybrid: {methods:?}"
|
||||
);
|
||||
}
|
||||
@@ -58,22 +58,22 @@ fn ingest_then_vector_search_carries_embedding_model() {
|
||||
|
||||
let env = TestEnv::with_embeddings();
|
||||
let report =
|
||||
kb_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
kebab_app::ingest_with_config(env.config.clone(), env.scope(), true).unwrap();
|
||||
assert_eq!(report.errors, 0, "no per-file errors: {report:?}");
|
||||
assert_eq!(report.new, 3);
|
||||
|
||||
let q = kb_core::SearchQuery {
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: "ownership".to_string(),
|
||||
mode: kb_core::SearchMode::Vector,
|
||||
mode: kebab_core::SearchMode::Vector,
|
||||
k: 10,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
let hits = kebab_app::search_with_config(env.config.clone(), q).unwrap();
|
||||
assert!(!hits.is_empty(), "expected vector hits for 'ownership'");
|
||||
|
||||
// Vector mode dispatches through `VectorRetriever` and MUST stamp
|
||||
// each hit with the configured embedding_model id.
|
||||
let expected = kb_core::EmbeddingModelId(env.config.models.embedding.model.clone());
|
||||
let expected = kebab_core::EmbeddingModelId(env.config.models.embedding.model.clone());
|
||||
for h in &hits {
|
||||
assert_eq!(
|
||||
h.embedding_model,
|
||||
@@ -82,7 +82,7 @@ fn ingest_then_vector_search_carries_embedding_model() {
|
||||
);
|
||||
assert_eq!(
|
||||
h.retrieval.method,
|
||||
kb_core::SearchMode::Vector,
|
||||
kebab_core::SearchMode::Vector,
|
||||
"vector-mode hit must report method=Vector"
|
||||
);
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-chunk"
|
||||
name = "kebab-chunk"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,7 +8,7 @@ repository = { workspace = true }
|
||||
description = "Chunkers that turn kb-core::CanonicalDocument into kb-core::Chunk batches (§3.5, §4.2, §7.2)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
serde_json_canonicalizer = "0.3"
|
||||
blake3 = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
@@ -20,7 +20,7 @@ tracing = { workspace = true }
|
||||
# regular deps per design §8 (chunker consumes CanonicalDocument from kb-core
|
||||
# only); `cargo tree -p kb-chunk --depth 1` (default scope, excludes dev-deps)
|
||||
# confirms this.
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kb-normalize = { path = "../kb-normalize" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
kebab-normalize = { path = "../kebab-normalize" }
|
||||
serde_json = { workspace = true }
|
||||
time = { workspace = true }
|
||||
@@ -1,4 +1,4 @@
|
||||
//! `kb-chunk` — chunkers that emit [`kb_core::Chunk`] batches.
|
||||
//! `kb-chunk` — chunkers that emit [`kebab_core::Chunk`] batches.
|
||||
//!
|
||||
//! Per design §3.5 (Chunk), §4.2 (chunk_id recipe), §7.2 (`Chunker`
|
||||
//! trait), §0 Q3/§14 (chunking priority).
|
||||
@@ -1,6 +1,6 @@
|
||||
//! `md-heading-v1` — heading-aware Markdown chunker.
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Block, BlockId, CanonicalDocument, Chunk, ChunkPolicy, Chunker,
|
||||
ChunkerVersion, DocumentId, SourceSpan, id_for_chunk,
|
||||
};
|
||||
@@ -24,7 +24,7 @@ const POLICY_HASH_HEX_LEN: usize = 16;
|
||||
|
||||
/// Heading-aware Markdown chunker.
|
||||
///
|
||||
/// Implements [`kb_core::Chunker`] for Markdown-derived
|
||||
/// Implements [`kebab_core::Chunker`] for Markdown-derived
|
||||
/// [`CanonicalDocument`]s.
|
||||
///
|
||||
/// **Behavior contract** (design §0 / §14, in priority order):
|
||||
@@ -186,7 +186,7 @@ impl Chunker for MdHeadingV1Chunker {
|
||||
flush(&mut acc, doc, &chunker_version, &policy_hash, &mut out);
|
||||
|
||||
tracing::debug!(
|
||||
target: "kb-chunk",
|
||||
target: "kebab-chunk",
|
||||
doc_id = %doc.doc_id,
|
||||
chunks = out.len(),
|
||||
"md-heading-v1 chunked",
|
||||
@@ -409,7 +409,7 @@ fn estimate_block_tokens(b: &Block) -> usize {
|
||||
}
|
||||
|
||||
/// Borrow the `CommonBlock` of any [`Block`] variant.
|
||||
fn common(b: &Block) -> &kb_core::CommonBlock {
|
||||
fn common(b: &Block) -> &kebab_core::CommonBlock {
|
||||
match b {
|
||||
Block::Heading(h) => &h.common,
|
||||
Block::Paragraph(t) | Block::Quote(t) => &t.common,
|
||||
@@ -424,7 +424,7 @@ fn common(b: &Block) -> &kb_core::CommonBlock {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, CodeBlock, CommonBlock, HeadingBlock, ImageRefBlock, Lang,
|
||||
Metadata, Provenance, SourceType, TableBlock, TextBlock, TrustLevel,
|
||||
WorkspacePath, id_for_block,
|
||||
@@ -433,7 +433,7 @@ mod tests {
|
||||
|
||||
fn make_doc(blocks: Vec<Block>) -> CanonicalDocument {
|
||||
CanonicalDocument {
|
||||
doc_id: kb_core::DocumentId("d".repeat(32)),
|
||||
doc_id: kebab_core::DocumentId("d".repeat(32)),
|
||||
source_asset_id: AssetId("a".repeat(32)),
|
||||
workspace_path: WorkspacePath::new("notes/test.md".into()).unwrap(),
|
||||
title: "Test".into(),
|
||||
@@ -450,14 +450,14 @@ mod tests {
|
||||
user: Default::default(),
|
||||
},
|
||||
provenance: Provenance { events: vec![] },
|
||||
parser_version: kb_core::ParserVersion("test-parser-0".into()),
|
||||
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
|
||||
schema_version: 1,
|
||||
doc_version: 1,
|
||||
}
|
||||
}
|
||||
|
||||
fn doc_id() -> kb_core::DocumentId {
|
||||
kb_core::DocumentId("d".repeat(32))
|
||||
fn doc_id() -> kebab_core::DocumentId {
|
||||
kebab_core::DocumentId("d".repeat(32))
|
||||
}
|
||||
|
||||
fn span(start: u32, end: u32) -> SourceSpan {
|
||||
@@ -13,13 +13,13 @@
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_chunk::MdHeadingV1Chunker;
|
||||
use kb_core::{
|
||||
use kebab_chunk::MdHeadingV1Chunker;
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, ChunkPolicy, ChunkerVersion, Chunker, MediaType,
|
||||
ParserVersion, RawAsset, SourceUri, WorkspacePath,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-cli"
|
||||
name = "kebab-cli"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,13 +8,13 @@ repository = { workspace = true }
|
||||
description = "kb command-line interface"
|
||||
|
||||
[[bin]]
|
||||
name = "kb"
|
||||
name = "kebab"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-app = { path = "../kb-app" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-app = { path = "../kebab-app" }
|
||||
# kb-eval re-exports `compute_aggregate` / `compare_runs` /
|
||||
# `render_report_md` (P5-2). The DoD calls for these to be reached
|
||||
# "via kb-app", but kb-eval already depends on kb-app (P5-1 runner
|
||||
@@ -22,7 +22,7 @@ kb-app = { path = "../kb-app" }
|
||||
# require kb-app → kb-eval, forming a cycle. We therefore wire
|
||||
# kb-cli → kb-eval directly; documented in
|
||||
# `tasks/p5/p5-2-metrics-compare.md`.
|
||||
kb-eval = { path = "../kb-eval" }
|
||||
kebab-eval = { path = "../kebab-eval" }
|
||||
anyhow = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
@@ -6,12 +6,12 @@ use std::process::ExitCode;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
use kb_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
|
||||
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
|
||||
|
||||
mod wire;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "kb", version, about = "personal local knowledge base")]
|
||||
#[command(name = "kebab", version, about = "personal local knowledge base")]
|
||||
struct Cli {
|
||||
/// Path to a non-default `config.toml`.
|
||||
#[arg(long, global = true)]
|
||||
@@ -170,12 +170,12 @@ enum ModeFlag {
|
||||
Hybrid,
|
||||
}
|
||||
|
||||
impl From<ModeFlag> for kb_core::SearchMode {
|
||||
impl From<ModeFlag> for kebab_core::SearchMode {
|
||||
fn from(m: ModeFlag) -> Self {
|
||||
match m {
|
||||
ModeFlag::Lexical => kb_core::SearchMode::Lexical,
|
||||
ModeFlag::Vector => kb_core::SearchMode::Vector,
|
||||
ModeFlag::Hybrid => kb_core::SearchMode::Hybrid,
|
||||
ModeFlag::Lexical => kebab_core::SearchMode::Lexical,
|
||||
ModeFlag::Vector => kebab_core::SearchMode::Vector,
|
||||
ModeFlag::Hybrid => kebab_core::SearchMode::Hybrid,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -183,15 +183,15 @@ impl From<ModeFlag> for kb_core::SearchMode {
|
||||
fn main() -> ExitCode {
|
||||
let cli = Cli::parse();
|
||||
let level = if cli.debug {
|
||||
kb_app::logging::LogLevel::Debug
|
||||
kebab_app::logging::LogLevel::Debug
|
||||
} else if cli.verbose {
|
||||
kb_app::logging::LogLevel::Verbose
|
||||
kebab_app::logging::LogLevel::Verbose
|
||||
} else {
|
||||
kb_app::logging::LogLevel::Default
|
||||
kebab_app::logging::LogLevel::Default
|
||||
};
|
||||
// Fail-soft: if logging init errors (e.g. XDG state dir is read-only),
|
||||
// proceed without a guard rather than crashing — `kb` is still usable.
|
||||
let _log_guard = kb_app::logging::init(level).ok();
|
||||
let _log_guard = kebab_app::logging::init(level).ok();
|
||||
match run(&cli) {
|
||||
Ok(()) => ExitCode::from(0),
|
||||
Err(e) => {
|
||||
@@ -227,14 +227,14 @@ fn exit_code(err: &anyhow::Error) -> u8 {
|
||||
fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
match &cli.command {
|
||||
Cmd::Init { force } => {
|
||||
kb_app::init_workspace(*force)?;
|
||||
kebab_app::init_workspace(*force)?;
|
||||
if !cli.json {
|
||||
println!(
|
||||
"created {}",
|
||||
kb_config::Config::xdg_config_path().display()
|
||||
kebab_config::Config::xdg_config_path().display()
|
||||
);
|
||||
println!("created {}", kb_config::Config::xdg_data_dir().display());
|
||||
println!("created {}", kb_config::Config::xdg_state_dir().display());
|
||||
println!("created {}", kebab_config::Config::xdg_data_dir().display());
|
||||
println!("created {}", kebab_config::Config::xdg_state_dir().display());
|
||||
println!("hint edit the config above, then `kb ingest`");
|
||||
}
|
||||
Ok(())
|
||||
@@ -244,13 +244,13 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
root,
|
||||
summary_only,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let scope = kb_core::SourceScope {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let scope = kebab_core::SourceScope {
|
||||
root: root.clone().unwrap_or_else(|| PathBuf::from(&cfg.workspace.root)),
|
||||
include: cfg.workspace.include.clone(),
|
||||
exclude: cfg.workspace.exclude.clone(),
|
||||
};
|
||||
let report = kb_app::ingest_with_config(cfg, scope, *summary_only)?;
|
||||
let report = kebab_app::ingest_with_config(cfg, scope, *summary_only)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_ingest(&report))?);
|
||||
} else {
|
||||
@@ -269,8 +269,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::List { what } => match what {
|
||||
ListWhat::Docs => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let docs = kb_app::list_docs_with_config(cfg, kb_core::DocFilter::default())?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let docs = kebab_app::list_docs_with_config(cfg, kebab_core::DocFilter::default())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doc_summaries(&docs))?);
|
||||
} else {
|
||||
@@ -284,9 +284,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
|
||||
Cmd::Inspect { what } => match what {
|
||||
InspectWhat::Doc { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let doc_id: kb_core::DocumentId = id.parse()?;
|
||||
let doc = kb_app::inspect_doc_with_config(cfg, &doc_id)?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let doc_id: kebab_core::DocumentId = id.parse()?;
|
||||
let doc = kebab_app::inspect_doc_with_config(cfg, &doc_id)?;
|
||||
// Inspect doc emits a `CanonicalDocument` — there's no §2
|
||||
// wire schema for it (P1-5 will decide whether this also
|
||||
// becomes a tagged wrapper or stays as the raw domain
|
||||
@@ -296,9 +296,9 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
InspectWhat::Chunk { id } => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let chunk_id: kb_core::ChunkId = id.parse()?;
|
||||
let chunk = kb_app::inspect_chunk_with_config(cfg, &chunk_id)?;
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let chunk_id: kebab_core::ChunkId = id.parse()?;
|
||||
let chunk = kebab_app::inspect_chunk_with_config(cfg, &chunk_id)?;
|
||||
println!("{}", serde_json::to_string(&wire::wire_chunk_inspection(&chunk))?);
|
||||
Ok(())
|
||||
}
|
||||
@@ -310,14 +310,14 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
mode,
|
||||
explain: _,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kb_core::SearchQuery {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let q = kebab_core::SearchQuery {
|
||||
text: query.clone(),
|
||||
mode: (*mode).into(),
|
||||
k: *k,
|
||||
filters: kb_core::SearchFilters::default(),
|
||||
filters: kebab_core::SearchFilters::default(),
|
||||
};
|
||||
let hits = kb_app::search_with_config(cfg, q)?;
|
||||
let hits = kebab_app::search_with_config(cfg, q)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_search_hits(&hits))?);
|
||||
} else {
|
||||
@@ -351,8 +351,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature,
|
||||
seed,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(cli.config.as_deref())?;
|
||||
let opts = kb_app::AskOpts {
|
||||
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
|
||||
let opts = kebab_app::AskOpts {
|
||||
k: *k,
|
||||
explain: *explain,
|
||||
mode: (*mode).into(),
|
||||
@@ -363,7 +363,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
// wires up a real `mpsc::Sender` here.
|
||||
stream_sink: None,
|
||||
};
|
||||
let ans = kb_app::ask_with_config(cfg, query, opts)?;
|
||||
let ans = kebab_app::ask_with_config(cfg, query, opts)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_answer(&ans))?);
|
||||
} else {
|
||||
@@ -377,7 +377,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
Cmd::Doctor => {
|
||||
let report = kb_app::doctor_with_config_path(cli.config.as_deref())?;
|
||||
let report = kebab_app::doctor_with_config_path(cli.config.as_deref())?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string(&wire::wire_doctor(&report))?);
|
||||
} else {
|
||||
@@ -409,7 +409,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature,
|
||||
seed,
|
||||
} => {
|
||||
let opts = kb_eval::EvalRunOpts {
|
||||
let opts = kebab_eval::EvalRunOpts {
|
||||
suite: suite.clone(),
|
||||
mode: (*mode).into(),
|
||||
with_rag: *with_rag,
|
||||
@@ -417,7 +417,7 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
temperature: *temperature,
|
||||
seed: *seed,
|
||||
};
|
||||
let run = kb_eval::run_eval(&opts)?;
|
||||
let run = kebab_eval::run_eval(&opts)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&run)?);
|
||||
} else {
|
||||
@@ -430,8 +430,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
EvalWhat::Aggregate { run_id } => {
|
||||
let agg = kb_eval::compute_aggregate(run_id)?;
|
||||
kb_eval::store_aggregate(run_id, &agg)?;
|
||||
let agg = kebab_eval::compute_aggregate(run_id)?;
|
||||
kebab_eval::store_aggregate(run_id, &agg)?;
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&agg)?);
|
||||
} else {
|
||||
@@ -450,20 +450,20 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
strict_chunker_version,
|
||||
write_report,
|
||||
} => {
|
||||
let cfg = kb_config::Config::load(None)?;
|
||||
let opts = kb_eval::CompareOpts {
|
||||
let cfg = kebab_config::Config::load(None)?;
|
||||
let opts = kebab_eval::CompareOpts {
|
||||
strict_chunker_version: *strict_chunker_version,
|
||||
};
|
||||
let report = kb_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
|
||||
let md = kb_eval::render_report_md(&report);
|
||||
let report = kebab_eval::compare_runs_with_config(&cfg, run_a, run_b, &opts)?;
|
||||
let md = kebab_eval::render_report_md(&report);
|
||||
if cli.json {
|
||||
println!("{}", serde_json::to_string_pretty(&report)?);
|
||||
} else {
|
||||
print!("{md}");
|
||||
}
|
||||
if *write_report {
|
||||
let resolved_data_dir = kb_config::expand_path(&cfg.storage.data_dir, "");
|
||||
let runs_dir = kb_config::expand_path(
|
||||
let resolved_data_dir = kebab_config::expand_path(&cfg.storage.data_dir, "");
|
||||
let runs_dir = kebab_config::expand_path(
|
||||
&cfg.storage.runs_dir,
|
||||
&resolved_data_dir.to_string_lossy(),
|
||||
);
|
||||
@@ -17,8 +17,8 @@
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use kb_app::DoctorReport;
|
||||
use kb_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
|
||||
use kebab_app::DoctorReport;
|
||||
use kebab_core::{Answer, Chunk, DocSummary, IngestReport, SearchHit};
|
||||
|
||||
/// Insert `schema_version` into an object-shaped `Value`. Helper for the
|
||||
/// "serialize, then tag" pattern used by all the per-type wrappers below.
|
||||
@@ -132,7 +132,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn ingest_wrapper_tags_schema_version() {
|
||||
use kb_core::SourceScope;
|
||||
use kebab_core::SourceScope;
|
||||
let r = IngestReport {
|
||||
scope: SourceScope {
|
||||
root: std::path::PathBuf::from("/tmp"),
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-config"
|
||||
name = "kebab-config"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -9,7 +9,7 @@ description = "Config schema + XDG path resolution"
|
||||
|
||||
[dependencies]
|
||||
# kb-core::CoreError reserved for P1-* config errors
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,6 +1,6 @@
|
||||
//! `kb-config` — `Config` schema and XDG path resolution (§6).
|
||||
//!
|
||||
//! Layer order (`Config::load`): defaults → file → env (`KB_<SECTION>_<KEY>`).
|
||||
//! Layer order (`Config::load`): defaults → file → env (`KEBAB_<SECTION>_<KEY>`).
|
||||
//! CLI overrides land later, applied by `kb-cli` after `Config::load`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
@@ -113,8 +113,8 @@ impl Config {
|
||||
],
|
||||
},
|
||||
storage: StorageCfg {
|
||||
data_dir: "${XDG_DATA_HOME:-~/.local/share}/kb".to_string(),
|
||||
sqlite: "{data_dir}/kb.sqlite".to_string(),
|
||||
data_dir: "${XDG_DATA_HOME:-~/.local/share}/kebab".to_string(),
|
||||
sqlite: "{data_dir}/kebab.sqlite".to_string(),
|
||||
vector_dir: "{data_dir}/lancedb".to_string(),
|
||||
asset_dir: "{data_dir}/assets".to_string(),
|
||||
artifact_dir: "{data_dir}/artifacts".to_string(),
|
||||
@@ -191,139 +191,139 @@ impl Config {
|
||||
Ok(cfg)
|
||||
}
|
||||
|
||||
/// Apply `KB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored.
|
||||
/// Apply `KEBAB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored.
|
||||
///
|
||||
/// The mapping is an explicit grep-friendly whitelist — one match arm
|
||||
/// per leaf key in `Config`. Booleans accept `1` / `true` / `yes`
|
||||
/// (case-insensitive) for true and anything else for false. Numeric
|
||||
/// keys silently keep their prior value if the env value fails to
|
||||
/// parse, so a malformed `KB_*` cannot crash startup.
|
||||
/// parse, so a malformed `KEBAB_*` cannot crash startup.
|
||||
pub fn apply_env(mut self, env: &HashMap<String, String>) -> Self {
|
||||
for (k, v) in env {
|
||||
if !k.starts_with("KB_") {
|
||||
if !k.starts_with("KEBAB_") {
|
||||
continue;
|
||||
}
|
||||
match k.as_str() {
|
||||
// workspace
|
||||
"KB_WORKSPACE_ROOT" => self.workspace.root = v.clone(),
|
||||
"KEBAB_WORKSPACE_ROOT" => self.workspace.root = v.clone(),
|
||||
|
||||
// storage
|
||||
"KB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(),
|
||||
"KB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(),
|
||||
"KB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(),
|
||||
"KB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(),
|
||||
"KB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(),
|
||||
"KB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(),
|
||||
"KB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(),
|
||||
"KB_STORAGE_COPY_THRESHOLD_MB" => {
|
||||
"KEBAB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(),
|
||||
"KEBAB_STORAGE_SQLITE" => self.storage.sqlite = v.clone(),
|
||||
"KEBAB_STORAGE_VECTOR_DIR" => self.storage.vector_dir = v.clone(),
|
||||
"KEBAB_STORAGE_ASSET_DIR" => self.storage.asset_dir = v.clone(),
|
||||
"KEBAB_STORAGE_ARTIFACT_DIR" => self.storage.artifact_dir = v.clone(),
|
||||
"KEBAB_STORAGE_MODEL_DIR" => self.storage.model_dir = v.clone(),
|
||||
"KEBAB_STORAGE_RUNS_DIR" => self.storage.runs_dir = v.clone(),
|
||||
"KEBAB_STORAGE_COPY_THRESHOLD_MB" => {
|
||||
if let Ok(n) = v.parse::<u64>() {
|
||||
self.storage.copy_threshold_mb = n;
|
||||
}
|
||||
}
|
||||
|
||||
// indexing
|
||||
"KB_INDEXING_MAX_PARALLEL_EXTRACTORS" => {
|
||||
"KEBAB_INDEXING_MAX_PARALLEL_EXTRACTORS" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.indexing.max_parallel_extractors = n;
|
||||
}
|
||||
}
|
||||
"KB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => {
|
||||
"KEBAB_INDEXING_MAX_PARALLEL_EMBEDDINGS" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.indexing.max_parallel_embeddings = n;
|
||||
}
|
||||
}
|
||||
"KB_INDEXING_WATCH_FILESYSTEM" => {
|
||||
"KEBAB_INDEXING_WATCH_FILESYSTEM" => {
|
||||
self.indexing.watch_filesystem = parse_bool(v);
|
||||
}
|
||||
|
||||
// chunking
|
||||
"KB_CHUNKING_TARGET_TOKENS" => {
|
||||
"KEBAB_CHUNKING_TARGET_TOKENS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.chunking.target_tokens = n;
|
||||
}
|
||||
}
|
||||
"KB_CHUNKING_OVERLAP_TOKENS" => {
|
||||
"KEBAB_CHUNKING_OVERLAP_TOKENS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.chunking.overlap_tokens = n;
|
||||
}
|
||||
}
|
||||
"KB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => {
|
||||
"KEBAB_CHUNKING_RESPECT_MARKDOWN_HEADINGS" => {
|
||||
self.chunking.respect_markdown_headings = parse_bool(v);
|
||||
}
|
||||
"KB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(),
|
||||
"KEBAB_CHUNKING_CHUNKER_VERSION" => self.chunking.chunker_version = v.clone(),
|
||||
|
||||
// models.embedding
|
||||
"KB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(),
|
||||
"KB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(),
|
||||
"KB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(),
|
||||
"KB_MODELS_EMBEDDING_DIMENSIONS" => {
|
||||
"KEBAB_MODELS_EMBEDDING_PROVIDER" => self.models.embedding.provider = v.clone(),
|
||||
"KEBAB_MODELS_EMBEDDING_MODEL" => self.models.embedding.model = v.clone(),
|
||||
"KEBAB_MODELS_EMBEDDING_VERSION" => self.models.embedding.version = v.clone(),
|
||||
"KEBAB_MODELS_EMBEDDING_DIMENSIONS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.models.embedding.dimensions = n;
|
||||
}
|
||||
}
|
||||
"KB_MODELS_EMBEDDING_BATCH_SIZE" => {
|
||||
"KEBAB_MODELS_EMBEDDING_BATCH_SIZE" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.models.embedding.batch_size = n;
|
||||
}
|
||||
}
|
||||
|
||||
// models.llm
|
||||
"KB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(),
|
||||
"KB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(),
|
||||
"KB_MODELS_LLM_CONTEXT_TOKENS" => {
|
||||
"KEBAB_MODELS_LLM_PROVIDER" => self.models.llm.provider = v.clone(),
|
||||
"KEBAB_MODELS_LLM_MODEL" => self.models.llm.model = v.clone(),
|
||||
"KEBAB_MODELS_LLM_CONTEXT_TOKENS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.models.llm.context_tokens = n;
|
||||
}
|
||||
}
|
||||
"KB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(),
|
||||
"KB_MODELS_LLM_TEMPERATURE" => {
|
||||
"KEBAB_MODELS_LLM_ENDPOINT" => self.models.llm.endpoint = v.clone(),
|
||||
"KEBAB_MODELS_LLM_TEMPERATURE" => {
|
||||
if let Ok(f) = v.parse::<f32>() {
|
||||
self.models.llm.temperature = f;
|
||||
}
|
||||
}
|
||||
"KB_MODELS_LLM_SEED" => {
|
||||
"KEBAB_MODELS_LLM_SEED" => {
|
||||
if let Ok(n) = v.parse::<u64>() {
|
||||
self.models.llm.seed = n;
|
||||
}
|
||||
}
|
||||
|
||||
// search
|
||||
"KB_SEARCH_DEFAULT_K" => {
|
||||
"KEBAB_SEARCH_DEFAULT_K" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.search.default_k = n;
|
||||
}
|
||||
}
|
||||
"KB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(),
|
||||
"KB_SEARCH_RRF_K" => {
|
||||
"KEBAB_SEARCH_HYBRID_FUSION" => self.search.hybrid_fusion = v.clone(),
|
||||
"KEBAB_SEARCH_RRF_K" => {
|
||||
if let Ok(n) = v.parse::<u32>() {
|
||||
self.search.rrf_k = n;
|
||||
}
|
||||
}
|
||||
"KB_SEARCH_SNIPPET_CHARS" => {
|
||||
"KEBAB_SEARCH_SNIPPET_CHARS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.search.snippet_chars = n;
|
||||
}
|
||||
}
|
||||
|
||||
// rag
|
||||
"KB_RAG_PROMPT_TEMPLATE_VERSION" => {
|
||||
"KEBAB_RAG_PROMPT_TEMPLATE_VERSION" => {
|
||||
self.rag.prompt_template_version = v.clone();
|
||||
}
|
||||
"KB_RAG_SCORE_GATE" => {
|
||||
"KEBAB_RAG_SCORE_GATE" => {
|
||||
if let Ok(f) = v.parse::<f32>() {
|
||||
self.rag.score_gate = f;
|
||||
}
|
||||
}
|
||||
"KB_RAG_EXPLAIN_DEFAULT" => {
|
||||
"KEBAB_RAG_EXPLAIN_DEFAULT" => {
|
||||
self.rag.explain_default = parse_bool(v);
|
||||
}
|
||||
"KB_RAG_MAX_CONTEXT_TOKENS" => {
|
||||
"KEBAB_RAG_MAX_CONTEXT_TOKENS" => {
|
||||
if let Ok(n) = v.parse::<usize>() {
|
||||
self.rag.max_context_tokens = n;
|
||||
}
|
||||
}
|
||||
|
||||
// Unknown KB_* keys are silently ignored — see
|
||||
// Unknown KEBAB_* keys are silently ignored — see
|
||||
// `env_unknown_key_is_ignored` test.
|
||||
_ => {}
|
||||
}
|
||||
@@ -331,58 +331,58 @@ impl Config {
|
||||
self
|
||||
}
|
||||
|
||||
/// `~/.config/kb/config.toml` (honors `XDG_CONFIG_HOME`).
|
||||
/// `~/.config/kebab/config.toml` (honors `XDG_CONFIG_HOME`).
|
||||
pub fn xdg_config_path() -> PathBuf {
|
||||
if let Ok(custom) = std::env::var("XDG_CONFIG_HOME") {
|
||||
if !custom.is_empty() {
|
||||
return PathBuf::from(custom).join("kb").join("config.toml");
|
||||
return PathBuf::from(custom).join("kebab").join("config.toml");
|
||||
}
|
||||
}
|
||||
match dirs::config_dir() {
|
||||
Some(d) => d.join("kb").join("config.toml"),
|
||||
None => PathBuf::from("./kb/config.toml"),
|
||||
Some(d) => d.join("kebab").join("config.toml"),
|
||||
None => PathBuf::from("./kebab/config.toml"),
|
||||
}
|
||||
}
|
||||
|
||||
/// `~/.local/share/kb` (honors `XDG_DATA_HOME`).
|
||||
/// `~/.local/share/kebab` (honors `XDG_DATA_HOME`).
|
||||
pub fn xdg_data_dir() -> PathBuf {
|
||||
if let Ok(custom) = std::env::var("XDG_DATA_HOME") {
|
||||
if !custom.is_empty() {
|
||||
return PathBuf::from(custom).join("kb");
|
||||
return PathBuf::from(custom).join("kebab");
|
||||
}
|
||||
}
|
||||
match dirs::data_dir() {
|
||||
Some(d) => d.join("kb"),
|
||||
None => PathBuf::from("./kb-data"),
|
||||
Some(d) => d.join("kebab"),
|
||||
None => PathBuf::from("./kebab-data"),
|
||||
}
|
||||
}
|
||||
|
||||
/// `~/.cache/kb` (honors `XDG_CACHE_HOME`).
|
||||
/// `~/.cache/kebab` (honors `XDG_CACHE_HOME`).
|
||||
pub fn xdg_cache_dir() -> PathBuf {
|
||||
if let Ok(custom) = std::env::var("XDG_CACHE_HOME") {
|
||||
if !custom.is_empty() {
|
||||
return PathBuf::from(custom).join("kb");
|
||||
return PathBuf::from(custom).join("kebab");
|
||||
}
|
||||
}
|
||||
match dirs::cache_dir() {
|
||||
Some(d) => d.join("kb"),
|
||||
None => PathBuf::from("./kb-cache"),
|
||||
Some(d) => d.join("kebab"),
|
||||
None => PathBuf::from("./kebab-cache"),
|
||||
}
|
||||
}
|
||||
|
||||
/// `~/.local/state/kb` (honors `XDG_STATE_HOME`).
|
||||
/// `~/.local/state/kebab` (honors `XDG_STATE_HOME`).
|
||||
pub fn xdg_state_dir() -> PathBuf {
|
||||
if let Ok(custom) = std::env::var("XDG_STATE_HOME") {
|
||||
if !custom.is_empty() {
|
||||
return PathBuf::from(custom).join("kb");
|
||||
return PathBuf::from(custom).join("kebab");
|
||||
}
|
||||
}
|
||||
// `dirs` doesn't expose state_dir on all platforms; fall back to
|
||||
// `$HOME/.local/state/kb` if XDG_STATE_HOME is unset.
|
||||
// `$HOME/.local/state/kebab` if XDG_STATE_HOME is unset.
|
||||
if let Some(home) = dirs::home_dir() {
|
||||
return home.join(".local").join("state").join("kb");
|
||||
return home.join(".local").join("state").join("kebab");
|
||||
}
|
||||
PathBuf::from("./kb-state")
|
||||
PathBuf::from("./kebab-state")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -417,7 +417,7 @@ mod tests {
|
||||
#[test]
|
||||
fn env_override_score_gate() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KB_RAG_SCORE_GATE".to_string(), "0.5".to_string());
|
||||
env.insert("KEBAB_RAG_SCORE_GATE".to_string(), "0.5".to_string());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert!((c.rag.score_gate - 0.5).abs() < 1e-6);
|
||||
}
|
||||
@@ -425,7 +425,7 @@ mod tests {
|
||||
#[test]
|
||||
fn env_override_search_k() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KB_SEARCH_DEFAULT_K".to_string(), "25".to_string());
|
||||
env.insert("KEBAB_SEARCH_DEFAULT_K".to_string(), "25".to_string());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.search.default_k, 25);
|
||||
}
|
||||
@@ -434,7 +434,7 @@ mod tests {
|
||||
fn env_unknown_key_is_ignored() {
|
||||
let baseline = Config::defaults();
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KB_NOPE_FOO".to_string(), "garbage".to_string());
|
||||
env.insert("KEBAB_NOPE_FOO".to_string(), "garbage".to_string());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c, baseline);
|
||||
}
|
||||
@@ -442,7 +442,7 @@ mod tests {
|
||||
#[test]
|
||||
fn env_overrides_chunking_target_tokens() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert("KB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
|
||||
env.insert("KEBAB_CHUNKING_TARGET_TOKENS".to_string(), "777".to_string());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.chunking.target_tokens, 777);
|
||||
}
|
||||
@@ -451,10 +451,10 @@ mod tests {
|
||||
fn env_overrides_models_llm_endpoint_and_temperature() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert(
|
||||
"KB_MODELS_LLM_ENDPOINT".to_string(),
|
||||
"KEBAB_MODELS_LLM_ENDPOINT".to_string(),
|
||||
"http://10.0.0.1:11434".to_string(),
|
||||
);
|
||||
env.insert("KB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
|
||||
env.insert("KEBAB_MODELS_LLM_TEMPERATURE".to_string(), "0.7".to_string());
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
assert_eq!(c.models.llm.endpoint, "http://10.0.0.1:11434");
|
||||
assert!((c.models.llm.temperature - 0.7).abs() < 1e-6);
|
||||
@@ -464,7 +464,7 @@ mod tests {
|
||||
fn env_overrides_indexing_watch_filesystem_bool() {
|
||||
let mut env = HashMap::new();
|
||||
env.insert(
|
||||
"KB_INDEXING_WATCH_FILESYSTEM".to_string(),
|
||||
"KEBAB_INDEXING_WATCH_FILESYSTEM".to_string(),
|
||||
"true".to_string(),
|
||||
);
|
||||
let c = Config::defaults().apply_env(&env);
|
||||
@@ -477,10 +477,10 @@ mod tests {
|
||||
let prev = std::env::var("XDG_CONFIG_HOME").ok();
|
||||
// SAFETY: tests in this module run sequentially; we restore below.
|
||||
unsafe {
|
||||
std::env::set_var("XDG_CONFIG_HOME", "/tmp/kbtest-xdg-config");
|
||||
std::env::set_var("XDG_CONFIG_HOME", "/tmp/kebabtest-xdg-config");
|
||||
}
|
||||
let p = Config::xdg_config_path();
|
||||
assert_eq!(p, PathBuf::from("/tmp/kbtest-xdg-config/kb/config.toml"));
|
||||
assert_eq!(p, PathBuf::from("/tmp/kebabtest-xdg-config/kebab/config.toml"));
|
||||
// SAFETY: scope-local restore.
|
||||
unsafe {
|
||||
match prev {
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Shared path expansion helper.
|
||||
//!
|
||||
//! `Config::storage.*` fields are stored as raw template strings (e.g.
|
||||
//! `${XDG_DATA_HOME:-~/.local/share}/kb`, `{data_dir}/runs`). Every
|
||||
//! `${XDG_DATA_HOME:-~/.local/share}/kebab`, `{data_dir}/runs`). Every
|
||||
//! crate that turns one of those strings into a real filesystem path
|
||||
//! needs to apply the same set of substitutions; this module is the
|
||||
//! single source of truth so the behavior cannot drift.
|
||||
@@ -133,8 +133,8 @@ mod tests {
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::set_var("XDG_DATA_HOME", "/custom/path") };
|
||||
|
||||
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kb", "");
|
||||
assert_eq!(p, PathBuf::from("/custom/path/kb"));
|
||||
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kebab", "");
|
||||
assert_eq!(p, PathBuf::from("/custom/path/kebab"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -145,8 +145,8 @@ mod tests {
|
||||
unsafe { std::env::remove_var("XDG_DATA_HOME") };
|
||||
|
||||
let home = std::env::var("HOME").expect("HOME must be set in tests");
|
||||
let expected = PathBuf::from(home).join(".local/share/kb");
|
||||
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kb", "");
|
||||
let expected = PathBuf::from(home).join(".local/share/kebab");
|
||||
let p = expand_path("${XDG_DATA_HOME:-~/.local/share}/kebab", "");
|
||||
assert_eq!(p, expected);
|
||||
}
|
||||
|
||||
@@ -180,7 +180,7 @@ mod tests {
|
||||
// SAFETY: lock held for the duration of this test.
|
||||
unsafe { std::env::set_var("XDG_DATA_HOME", "/xdg/data") };
|
||||
|
||||
let p = expand_path("{data_dir}/runs", "/xdg/data/kb");
|
||||
assert_eq!(p, PathBuf::from("/xdg/data/kb/runs"));
|
||||
let p = expand_path("{data_dir}/runs", "/xdg/data/kebab");
|
||||
assert_eq!(p, PathBuf::from("/xdg/data/kebab/runs"));
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-core"
|
||||
name = "kebab-core"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -4,7 +4,7 @@
|
||||
//! `kb-*` crate, so every other crate in the workspace can depend on it
|
||||
//! freely.
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` for
|
||||
//! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` for
|
||||
//! the canonical type bodies — this crate is the byte-for-byte mirror.
|
||||
|
||||
pub mod ids;
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-embed-local"
|
||||
name = "kebab-embed-local"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Local fastembed-rs adapter implementing kb_core::Embedder (multilingual-e5-small default)"
|
||||
|
||||
[dependencies]
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
# Default features bring `ort-download-binaries` (bundled ONNX runtime)
|
||||
# and `hf-hub-native-tls` (first-run model download). No extra features
|
||||
# needed for the multilingual-e5-small path.
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-embed-local` — `FastembedEmbedder`, a local ONNX-backed
|
||||
//! [`Embedder`](kb_embed::Embedder) implementation.
|
||||
//! [`Embedder`](kebab_embed::Embedder) implementation.
|
||||
//!
|
||||
//! Wraps [`fastembed::TextEmbedding`] for the default `multilingual-e5-small`
|
||||
//! (384-dim) model. Honors `config.models.embedding.batch_size` and applies
|
||||
@@ -19,15 +19,15 @@
|
||||
//! rules `kb-store-sqlite` applies to `data_dir` (`${XDG_DATA_HOME:-…}`,
|
||||
//! leading `~`, `{data_dir}` substitution).
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md`
|
||||
//! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md`
|
||||
//! §7.2 (Embedder), §6.4 ([models.embedding]), §9 (versioning).
|
||||
|
||||
use std::sync::Mutex;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use fastembed::{EmbeddingModel, InitOptions, TextEmbedding};
|
||||
use kb_config::expand_path;
|
||||
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
use kebab_config::expand_path;
|
||||
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
|
||||
/// Subdirectory under `config.storage.model_dir` where the fastembed
|
||||
/// adapter writes / reads ONNX + tokenizer files. Hard-coded per task
|
||||
@@ -58,9 +58,9 @@ impl FastembedEmbedder {
|
||||
/// `config.models.embedding.dimensions` matches the model's actual
|
||||
/// dim BEFORE returning, so a mismatch fails at construction (not on
|
||||
/// first `embed`).
|
||||
pub fn new(config: &kb_config::Config) -> Result<Self> {
|
||||
pub fn new(config: &kebab_config::Config) -> Result<Self> {
|
||||
// 1. Resolve `{data_dir}/models/fastembed/` from the config
|
||||
// templates. Goes through the shared `kb_config::expand_path`
|
||||
// templates. Goes through the shared `kebab_config::expand_path`
|
||||
// so every crate resolves storage paths identically.
|
||||
let data_dir = expand_path(&config.storage.data_dir, "");
|
||||
let model_dir = expand_path(&config.storage.model_dir, &data_dir.to_string_lossy());
|
||||
@@ -82,7 +82,7 @@ impl FastembedEmbedder {
|
||||
check_dim(model_info.dim, config.models.embedding.dimensions)?;
|
||||
|
||||
tracing::info!(
|
||||
target: "kb-embed-local",
|
||||
target: "kebab-embed-local",
|
||||
cache_dir = %cache_dir.display(),
|
||||
model = %config.models.embedding.model,
|
||||
dims = model_info.dim,
|
||||
@@ -97,7 +97,7 @@ impl FastembedEmbedder {
|
||||
.with_cache_dir(cache_dir.clone())
|
||||
.with_show_download_progress(false);
|
||||
tracing::info!(
|
||||
target: "kb-embed-local",
|
||||
target: "kebab-embed-local",
|
||||
model = %config.models.embedding.model,
|
||||
cache_dir = %cache_dir.display(),
|
||||
"loading embedding model (first run will download ~470MB)"
|
||||
@@ -106,7 +106,7 @@ impl FastembedEmbedder {
|
||||
.context("fastembed: TextEmbedding::try_new")?;
|
||||
let dimensions = model_info.dim;
|
||||
tracing::info!(
|
||||
target: "kb-embed-local",
|
||||
target: "kebab-embed-local",
|
||||
model = %config.models.embedding.model,
|
||||
dimensions,
|
||||
"embedding model loaded"
|
||||
@@ -224,7 +224,7 @@ pub(crate) fn check_dim(model_dim: usize, cfg_dim: usize) -> Result<()> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_embed::EmbeddingInput;
|
||||
use kebab_embed::EmbeddingInput;
|
||||
|
||||
// ── check_dim ────────────────────────────────────────────────────
|
||||
//
|
||||
@@ -22,16 +22,16 @@ use std::hash::{Hash, Hasher};
|
||||
use std::sync::OnceLock;
|
||||
use std::time::Instant;
|
||||
|
||||
use kb_embed::{Embedder, EmbeddingInput, EmbeddingKind};
|
||||
use kb_embed_local::FastembedEmbedder;
|
||||
use kebab_embed::{Embedder, EmbeddingInput, EmbeddingKind};
|
||||
use kebab_embed_local::FastembedEmbedder;
|
||||
|
||||
/// Build a `Config` whose `data_dir` lives in a per-process temp dir so
|
||||
/// the test never writes into the developer's real `~/.local/share/kb`.
|
||||
/// the test never writes into the developer's real `~/.local/share/kebab`.
|
||||
/// Returns the `Config` and the `TempDir` guard (caller keeps the guard
|
||||
/// alive for the test duration).
|
||||
fn test_config() -> (kb_config::Config, tempfile::TempDir) {
|
||||
fn test_config() -> (kebab_config::Config, tempfile::TempDir) {
|
||||
let tmp = tempfile::tempdir().expect("create tempdir");
|
||||
let mut cfg = kb_config::Config::defaults();
|
||||
let mut cfg = kebab_config::Config::defaults();
|
||||
cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned();
|
||||
// model_dir keeps its default `{data_dir}/models` template; the
|
||||
// adapter resolves it itself.
|
||||
@@ -141,12 +141,12 @@ fn output_vectors_are_l2_normalized() {
|
||||
},
|
||||
];
|
||||
let out = emb.embed(&inputs).expect("embed");
|
||||
// Per `kb_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
|
||||
// Per `kebab_embed::assert_unit_norm` docs: `5e-4` is the safe bound at
|
||||
// 384 dims (f32::EPSILON × √384 ≈ 2.3e-6, but ONNX kernels add
|
||||
// their own per-component noise; 1e-3 is very generous and matches
|
||||
// the spec's `± 1e-3`).
|
||||
kb_embed::assert_unit_norm(&out, 1e-3);
|
||||
kb_embed::assert_vector_shape(&out, 384);
|
||||
kebab_embed::assert_unit_norm(&out, 1e-3);
|
||||
kebab_embed::assert_vector_shape(&out, 384);
|
||||
}
|
||||
|
||||
// ─── determinism ──────────────────────────────────────────────────────
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-embed"
|
||||
name = "kebab-embed"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Embedder trait re-exports + opt-in deterministic MockEmbedder for downstream tests"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
serde = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
@@ -1,9 +1,9 @@
|
||||
//! `kb-embed` — thin re-export crate for the [`Embedder`] trait surface.
|
||||
//!
|
||||
//! This crate exists so downstream code (`kb-store-vector`, `kb-search`,
|
||||
//! adapters in p3-2) can `use kb_embed::Embedder` and stay stable across
|
||||
//! adapters in p3-2) can `use kebab_embed::Embedder` and stay stable across
|
||||
//! kb-core reorganizations. It defines **no new types**; everything is a
|
||||
//! re-export of [`kb_core`].
|
||||
//! re-export of [`kebab_core`].
|
||||
//!
|
||||
//! ## Mock implementation
|
||||
//!
|
||||
@@ -11,7 +11,7 @@
|
||||
//! deterministic test double. Real adapters (fastembed, candle, ollama-embed)
|
||||
//! live in p3-2 and MUST NOT be implemented here.
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2,
|
||||
//! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.1, §7.2,
|
||||
//! §11 for the contract.
|
||||
|
||||
// ── Trait re-exports ──────────────────────────────────────────────────────
|
||||
@@ -19,7 +19,7 @@
|
||||
// Per spec §7.2 — these are the only public-surface types this crate offers.
|
||||
// Adding new types is forbidden by the task contract.
|
||||
|
||||
pub use kb_core::{
|
||||
pub use kebab_core::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
|
||||
};
|
||||
|
||||
@@ -38,7 +38,7 @@
|
||||
//! * Different `text` → different output with overwhelming probability.
|
||||
//! * All output components are finite (`is_finite()`).
|
||||
|
||||
use kb_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
use kebab_core::{Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion};
|
||||
|
||||
/// Deterministic test double. See module docs for the hashing recipe.
|
||||
pub struct MockEmbedder {
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#![cfg(feature = "mock")]
|
||||
|
||||
use kb_embed::{
|
||||
use kebab_embed::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion, MockEmbedder,
|
||||
assert_unit_norm, assert_vector_shape,
|
||||
};
|
||||
@@ -5,7 +5,7 @@
|
||||
//! Runs under both `cargo test -p kb-embed` and
|
||||
//! `cargo test -p kb-embed --features mock`.
|
||||
|
||||
use kb_embed::{
|
||||
use kebab_embed::{
|
||||
Embedder, EmbeddingInput, EmbeddingKind, EmbeddingModelId, EmbeddingVersion,
|
||||
assert_vector_shape,
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-eval"
|
||||
name = "kebab-eval"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -9,10 +9,10 @@ description = "Golden-fixture eval runner: load YAML, drive kb-app search/ask,
|
||||
|
||||
[dependencies]
|
||||
# Allowed deps per p5-1 spec — domain types + facade only.
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-app = { path = "../kb-app" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-app = { path = "../kebab-app" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
serde_yaml = { workspace = true }
|
||||
@@ -14,9 +14,9 @@ use std::fmt::Write as _;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{ChunkId, DocumentId};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{ChunkId, DocumentId};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::loader::load_golden_set;
|
||||
use crate::metrics::{
|
||||
@@ -300,7 +300,7 @@ fn extract_chunker_version(snapshot_json: &str) -> Option<String> {
|
||||
}
|
||||
|
||||
fn parse_results(
|
||||
rows: &[kb_store_sqlite::EvalQueryResultRecord],
|
||||
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
|
||||
) -> Result<HashMap<String, QueryResult>> {
|
||||
let mut out = HashMap::with_capacity(rows.len());
|
||||
for row in rows {
|
||||
@@ -456,9 +456,9 @@ mod tests {
|
||||
let g = GoldenQuery {
|
||||
id: "q1".into(),
|
||||
query: "q".into(),
|
||||
lang: kb_core::Lang(String::new()),
|
||||
lang: kebab_core::Lang(String::new()),
|
||||
expected_doc_ids: vec![],
|
||||
expected_chunk_ids: vec![kb_core::ChunkId("c1".into())],
|
||||
expected_chunk_ids: vec![kebab_core::ChunkId("c1".into())],
|
||||
must_contain: vec![],
|
||||
forbidden: vec![],
|
||||
difficulty: None,
|
||||
@@ -1,7 +1,7 @@
|
||||
//! `kb-eval` — golden-fixture eval runner (P5-1).
|
||||
//!
|
||||
//! Loads `fixtures/golden_queries.yaml`, runs each entry through the
|
||||
//! [`kb_app`] facade (lexical / vector / hybrid + optional RAG), and
|
||||
//! [`kebab_app`] facade (lexical / vector / hybrid + optional RAG), and
|
||||
//! persists results into `eval_runs` / `eval_query_results` plus
|
||||
//! `runs_dir/<run_id>/per_query.jsonl` (design §5.7, §6.3).
|
||||
//!
|
||||
@@ -6,7 +6,7 @@
|
||||
//! tests that don't have a SQLite store handy.
|
||||
//! - [`load_golden_set_validated`] — additionally verifies every
|
||||
//! `expected_doc_id` / `expected_chunk_id` exists in the SQLite DB
|
||||
//! the supplied [`kb_config::Config`] points at. Used by
|
||||
//! the supplied [`kebab_config::Config`] points at. Used by
|
||||
//! [`crate::run_eval`] in production so a stale golden set fails
|
||||
//! fast at run start.
|
||||
|
||||
@@ -14,7 +14,7 @@ use std::collections::{BTreeSet, HashSet};
|
||||
use std::path::Path;
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::types::GoldenQuery;
|
||||
|
||||
@@ -43,11 +43,11 @@ pub fn load_golden_set(path: &Path) -> Result<Vec<GoldenQuery>> {
|
||||
/// Currently used only by the in-module tests below; production code
|
||||
/// inlines `load_golden_set` + `validate_against_db` in
|
||||
/// [`crate::run_eval_with_config`] so the validation can run against
|
||||
/// an already-opened [`kb_config::Config`] without re-parsing YAML.
|
||||
/// an already-opened [`kebab_config::Config`] without re-parsing YAML.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn load_golden_set_validated(
|
||||
yaml_path: &Path,
|
||||
cfg: &kb_config::Config,
|
||||
cfg: &kebab_config::Config,
|
||||
) -> Result<Vec<GoldenQuery>> {
|
||||
let queries = load_golden_set(yaml_path)?;
|
||||
validate_against_db(&queries, cfg)?;
|
||||
@@ -73,7 +73,7 @@ fn check_unique_ids(queries: &[GoldenQuery]) -> Result<()> {
|
||||
/// Read every doc_id / chunk_id referenced by `queries` and confirm
|
||||
/// SQLite has rows for them. Builds a sorted, deduplicated error
|
||||
/// message listing every missing ID.
|
||||
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kb_config::Config) -> Result<()> {
|
||||
pub(crate) fn validate_against_db(queries: &[GoldenQuery], cfg: &kebab_config::Config) -> Result<()> {
|
||||
// Short-circuit when there is nothing to validate — saves opening
|
||||
// SQLite for golden sets that omit expected_*_ids entirely.
|
||||
let needs_check = queries
|
||||
@@ -140,8 +140,8 @@ mod tests {
|
||||
//! `tests/loader.rs`; only the validated-variant cases need to sit
|
||||
//! next to the function so they can see the `pub(crate)` symbol.
|
||||
use super::*;
|
||||
use kb_config::Config;
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use std::fs;
|
||||
use tempfile::tempdir;
|
||||
@@ -13,9 +13,9 @@ use std::path::PathBuf;
|
||||
use anyhow::{Context, Result};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{ChunkId, Citation, DocumentId};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{ChunkId, Citation, DocumentId};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
|
||||
use crate::loader::load_golden_set;
|
||||
use crate::types::{GoldenQuery, QueryResult};
|
||||
@@ -40,10 +40,10 @@ const STORAGE_DECIMALS: u32 = 4;
|
||||
/// (P5-1) used — otherwise `expected_*` / `must_contain` won't line up
|
||||
/// with the stored `query_id`s. `pub(crate)` so the runner shares the
|
||||
/// exact same name + default rather than duplicating constants.
|
||||
pub(crate) const KB_EVAL_GOLDEN: &str = "KB_EVAL_GOLDEN";
|
||||
pub(crate) const KEBAB_EVAL_GOLDEN: &str = "KEBAB_EVAL_GOLDEN";
|
||||
|
||||
/// Default golden YAML path (relative to CWD when set). Same
|
||||
/// rationale as [`KB_EVAL_GOLDEN`] — single source of truth.
|
||||
/// rationale as [`KEBAB_EVAL_GOLDEN`] — single source of truth.
|
||||
pub(crate) const DEFAULT_GOLDEN_PATH: &str = "fixtures/golden_queries.yaml";
|
||||
|
||||
/// Aggregate metrics for one stored eval run.
|
||||
@@ -151,7 +151,7 @@ pub fn store_aggregate_with_config(
|
||||
/// the runner uses, same default path. Pulled into its own helper so
|
||||
/// `compare_runs` can share it.
|
||||
pub(crate) fn resolve_golden_path() -> PathBuf {
|
||||
match std::env::var(KB_EVAL_GOLDEN) {
|
||||
match std::env::var(KEBAB_EVAL_GOLDEN) {
|
||||
Ok(s) if !s.is_empty() => PathBuf::from(s),
|
||||
_ => PathBuf::from(DEFAULT_GOLDEN_PATH),
|
||||
}
|
||||
@@ -161,7 +161,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
|
||||
let path = resolve_golden_path();
|
||||
load_golden_set(&path).with_context(|| {
|
||||
format!(
|
||||
"load golden set from {} (override via KB_EVAL_GOLDEN)",
|
||||
"load golden set from {} (override via KEBAB_EVAL_GOLDEN)",
|
||||
path.display()
|
||||
)
|
||||
})
|
||||
@@ -175,7 +175,7 @@ fn load_golden_for_metrics() -> Result<Vec<GoldenQuery>> {
|
||||
/// `tasks/p5/p5-2-metrics-compare.md`), this will need to take one.
|
||||
pub(crate) fn aggregate_from_rows(
|
||||
queries: &[GoldenQuery],
|
||||
rows: &[kb_store_sqlite::EvalQueryResultRecord],
|
||||
rows: &[kebab_store_sqlite::EvalQueryResultRecord],
|
||||
) -> Result<AggregateMetrics> {
|
||||
let golden_by_id: HashMap<&str, &GoldenQuery> =
|
||||
queries.iter().map(|q| (q.id.as_str(), q)).collect();
|
||||
@@ -395,14 +395,14 @@ fn ratio_or_zero(num: u32, denom: u32) -> f32 {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, RetrievalDetail, SearchHit,
|
||||
SearchMode,
|
||||
};
|
||||
use kb_core::asset::WorkspacePath;
|
||||
use kb_core::media::Lang;
|
||||
use kb_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
|
||||
use kb_core::versions::PromptTemplateVersion;
|
||||
use kebab_core::asset::WorkspacePath;
|
||||
use kebab_core::media::Lang;
|
||||
use kebab_core::answer::{Answer, AnswerCitation, AnswerRetrievalSummary, ModelRef, TokenUsage, TraceId};
|
||||
use kebab_core::versions::PromptTemplateVersion;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
fn gq(id: &str, expected_chunks: &[&str], expected_docs: &[&str]) -> GoldenQuery {
|
||||
@@ -460,9 +460,9 @@ mod tests {
|
||||
}
|
||||
|
||||
fn record(id: &str, hits: Vec<SearchHit>, error: Option<String>, answer: Option<Answer>)
|
||||
-> kb_store_sqlite::EvalQueryResultRecord
|
||||
-> kebab_store_sqlite::EvalQueryResultRecord
|
||||
{
|
||||
kb_store_sqlite::EvalQueryResultRecord {
|
||||
kebab_store_sqlite::EvalQueryResultRecord {
|
||||
query_id: id.into(),
|
||||
result_json: serde_json::to_string(&qr(id, hits, error, answer)).unwrap(),
|
||||
}
|
||||
@@ -6,14 +6,14 @@ use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_app::App;
|
||||
use kb_config::expand_path;
|
||||
use kb_core::{SearchFilters, SearchQuery};
|
||||
use kb_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use kebab_app::App;
|
||||
use kebab_config::expand_path;
|
||||
use kebab_core::{SearchFilters, SearchQuery};
|
||||
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::loader::{load_golden_set, validate_against_db};
|
||||
use crate::metrics::{DEFAULT_GOLDEN_PATH, KB_EVAL_GOLDEN};
|
||||
use crate::metrics::{DEFAULT_GOLDEN_PATH, KEBAB_EVAL_GOLDEN};
|
||||
use crate::types::{EvalRun, EvalRunOpts, GoldenQuery, QueryResult};
|
||||
|
||||
/// Convert a wall-clock duration since `start` into milliseconds clamped
|
||||
@@ -25,18 +25,18 @@ fn elapsed_ms_u32(start: Instant) -> u32 {
|
||||
}
|
||||
|
||||
/// Run the golden suite end-to-end against the active XDG-loaded
|
||||
/// [`kb_config::Config`]. Wraps [`run_eval_with_config`] with
|
||||
/// [`kebab_config::Config`]. Wraps [`run_eval_with_config`] with
|
||||
/// `Config::load(None)`.
|
||||
pub fn run_eval(opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
let cfg = kb_config::Config::load(None).context("load Config for run_eval")?;
|
||||
let cfg = kebab_config::Config::load(None).context("load Config for run_eval")?;
|
||||
run_eval_with_config(&cfg, opts)
|
||||
}
|
||||
|
||||
/// Run the golden suite end-to-end against an explicit
|
||||
/// [`kb_config::Config`]. Used by integration tests (TempDir-backed
|
||||
/// [`kebab_config::Config`]. Used by integration tests (TempDir-backed
|
||||
/// data_dir) and any future caller that wants to drive the runner
|
||||
/// against a non-default config.
|
||||
pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
pub fn run_eval_with_config(cfg: &kebab_config::Config, opts: &EvalRunOpts) -> Result<EvalRun> {
|
||||
let started = Instant::now();
|
||||
|
||||
// ── 1. Load golden set ────────────────────────────────────────────────
|
||||
@@ -46,7 +46,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
|
||||
let golden_path = resolve_golden_path();
|
||||
let queries = load_golden_set(&golden_path).with_context(|| {
|
||||
format!(
|
||||
"load golden set from {} (override via KB_EVAL_GOLDEN)",
|
||||
"load golden set from {} (override via KEBAB_EVAL_GOLDEN)",
|
||||
golden_path.display()
|
||||
)
|
||||
})?;
|
||||
@@ -55,7 +55,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
|
||||
// ── 2. Mint identifiers + open store ──────────────────────────────────
|
||||
let run_id = mint_run_id();
|
||||
let created_at = OffsetDateTime::now_utc();
|
||||
let commit_hash = std::env::var("KB_COMMIT_HASH")
|
||||
let commit_hash = std::env::var("KEBAB_COMMIT_HASH")
|
||||
.ok()
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
@@ -110,7 +110,7 @@ pub fn run_eval_with_config(cfg: &kb_config::Config, opts: &EvalRunOpts) -> Resu
|
||||
|
||||
let duration_ms = elapsed_ms_u32(started);
|
||||
tracing::info!(
|
||||
target: "kb-eval",
|
||||
target: "kebab-eval",
|
||||
run_id = %run_id,
|
||||
suite = %opts.suite,
|
||||
queries = per_query.len(),
|
||||
@@ -136,11 +136,11 @@ fn mint_run_id() -> String {
|
||||
format!("run_{id}")
|
||||
}
|
||||
|
||||
/// Resolve the golden YAML path. Honors the `KB_EVAL_GOLDEN` env
|
||||
/// Resolve the golden YAML path. Honors the `KEBAB_EVAL_GOLDEN` env
|
||||
/// override; otherwise relative to CWD. The path is NOT expanded for
|
||||
/// `~` / `${...}` placeholders — direct file paths only.
|
||||
fn resolve_golden_path() -> PathBuf {
|
||||
match std::env::var(KB_EVAL_GOLDEN) {
|
||||
match std::env::var(KEBAB_EVAL_GOLDEN) {
|
||||
Ok(s) if !s.is_empty() => PathBuf::from(s),
|
||||
_ => PathBuf::from(DEFAULT_GOLDEN_PATH),
|
||||
}
|
||||
@@ -167,7 +167,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
|
||||
// call did not already error out (we want one error per query, not
|
||||
// a duplicated one).
|
||||
let answer = if opts.with_rag && error.is_none() {
|
||||
let ask_opts = kb_app::AskOpts {
|
||||
let ask_opts = kebab_app::AskOpts {
|
||||
k: opts.k,
|
||||
explain: true,
|
||||
mode: opts.mode,
|
||||
@@ -206,7 +206,7 @@ fn execute_query(app: &App, gq: &GoldenQuery, opts: &EvalRunOpts) -> QueryResult
|
||||
/// stable run-time property of the config alone. P5-2 may compose it
|
||||
/// from `embedding.{model,version,dimensions}` if it needs the field
|
||||
/// for compare reports.
|
||||
fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
|
||||
fn build_config_snapshot(cfg: &kebab_config::Config) -> Result<serde_json::Value> {
|
||||
let cfg_value = serde_json::to_value(cfg).context("serialize Config")?;
|
||||
Ok(serde_json::json!({
|
||||
"config": cfg_value,
|
||||
@@ -234,7 +234,7 @@ fn build_config_snapshot(cfg: &kb_config::Config) -> Result<serde_json::Value> {
|
||||
/// `run_id` collision would already have failed the `eval_runs`
|
||||
/// PRIMARY KEY upstream).
|
||||
fn write_per_query_jsonl(
|
||||
cfg: &kb_config::Config,
|
||||
cfg: &kebab_config::Config,
|
||||
run_id: &str,
|
||||
per_query: &[QueryResult],
|
||||
) -> Result<()> {
|
||||
@@ -4,7 +4,7 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use kb_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
|
||||
use kebab_core::{Answer, ChunkId, DocumentId, Lang, SearchHit, SearchMode};
|
||||
|
||||
/// One golden query loaded from `fixtures/golden_queries.yaml`.
|
||||
///
|
||||
@@ -41,10 +41,10 @@ pub struct EvalRunOpts {
|
||||
/// Suite label persisted into `eval_runs.suite`. The shipped
|
||||
/// fixture is `"golden"`; other suites can reuse the same runner.
|
||||
pub suite: String,
|
||||
/// Retrieval mode forwarded to every `kb_app::search` /
|
||||
/// `kb_app::ask` call inside the run.
|
||||
/// Retrieval mode forwarded to every `kebab_app::search` /
|
||||
/// `kebab_app::ask` call inside the run.
|
||||
pub mode: SearchMode,
|
||||
/// When `true`, also call `kb_app::ask` per query and record the
|
||||
/// When `true`, also call `kebab_app::ask` per query and record the
|
||||
/// resulting `Answer` on the `QueryResult`.
|
||||
pub with_rag: bool,
|
||||
/// Top-k forwarded to retrieval (and `AskOpts.k` when `with_rag`).
|
||||
@@ -8,7 +8,7 @@
|
||||
|
||||
use std::fs;
|
||||
|
||||
use kb_eval::load_golden_set;
|
||||
use kebab_eval::load_golden_set;
|
||||
use tempfile::tempdir;
|
||||
|
||||
// ── 1. parser accepts well-formed YAML with optional fields ──────────────────
|
||||
@@ -9,17 +9,17 @@
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, Lang,
|
||||
RetrievalDetail, SearchHit, SearchMode,
|
||||
asset::WorkspacePath,
|
||||
};
|
||||
use kb_eval::{
|
||||
use kebab_eval::{
|
||||
AggregateMetrics, CompareOpts, CompareReport, ComparisonKind, GoldenQuery, QueryResult,
|
||||
compare_runs_with_config, compute_aggregate_with_config, store_aggregate_with_config,
|
||||
};
|
||||
use kb_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use kebab_store_sqlite::{EvalRunRow, SqliteStore};
|
||||
use tempfile::TempDir;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -34,7 +34,7 @@ fn cfg_with_data_dir(tmp: &TempDir, golden_yaml: &str) -> Config {
|
||||
// SAFELY scoped — `set_var` is process-global so callers serialise
|
||||
// tests via the `serial_test`-style guard below.
|
||||
unsafe {
|
||||
std::env::set_var("KB_EVAL_GOLDEN", &golden_path);
|
||||
std::env::set_var("KEBAB_EVAL_GOLDEN", &golden_path);
|
||||
}
|
||||
cfg
|
||||
}
|
||||
@@ -127,9 +127,9 @@ fn write_run(
|
||||
store.record_eval_run_with_results(&row, &results).unwrap();
|
||||
}
|
||||
|
||||
/// Each test mutates a process-global env var (`KB_EVAL_GOLDEN`) and
|
||||
/// Each test mutates a process-global env var (`KEBAB_EVAL_GOLDEN`) and
|
||||
/// expects to see its own write. Take this mutex around the body of
|
||||
/// every test that touches `KB_EVAL_GOLDEN` so two concurrent test
|
||||
/// every test that touches `KEBAB_EVAL_GOLDEN` so two concurrent test
|
||||
/// threads don't trip over each other's golden YAML.
|
||||
fn env_guard() -> std::sync::MutexGuard<'static, ()> {
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
@@ -259,7 +259,7 @@ fn compare_runs_classifies_win_loss_draw_regression() {
|
||||
drop(store);
|
||||
|
||||
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
|
||||
let by_id: std::collections::HashMap<&str, &kb_eval::QueryComparison> =
|
||||
let by_id: std::collections::HashMap<&str, &kebab_eval::QueryComparison> =
|
||||
report.per_query.iter().map(|c| (c.query_id.as_str(), c)).collect();
|
||||
assert_eq!(by_id["q-001"].kind, ComparisonKind::Loss);
|
||||
assert_eq!(by_id["q-002"].kind, ComparisonKind::Win);
|
||||
@@ -414,7 +414,7 @@ fn render_report_md_is_human_readable() {
|
||||
drop(store);
|
||||
|
||||
let report = compare_runs_with_config(&cfg, "run_a", "run_b", &CompareOpts::default()).unwrap();
|
||||
let md = kb_eval::render_report_md(&report);
|
||||
let md = kebab_eval::render_report_md(&report);
|
||||
assert!(md.starts_with("# Eval compare:"), "md = {md}");
|
||||
assert!(md.contains("hit@1"));
|
||||
assert!(md.contains("MRR"));
|
||||
@@ -1,13 +1,13 @@
|
||||
//! Runner integration tests for `kb-eval` (P5-1).
|
||||
//!
|
||||
//! Drives [`kb_eval::run_eval_with_config`] end-to-end against a
|
||||
//! Drives [`kebab_eval::run_eval_with_config`] end-to-end against a
|
||||
//! TempDir-backed config:
|
||||
//!
|
||||
//! - tiny seeded SQLite corpus (3 docs / 3 chunks) used as the
|
||||
//! workspace's source-of-truth,
|
||||
//! - lexical-only retrieval (`SearchMode::Lexical`) so no embedder is
|
||||
//! required (`models.embedding.provider = "none"`),
|
||||
//! - golden YAML pointed at via `KB_EVAL_GOLDEN`.
|
||||
//! - golden YAML pointed at via `KEBAB_EVAL_GOLDEN`.
|
||||
//!
|
||||
//! Determinism: lexical-only with a fixed seed corpus produces
|
||||
//! byte-identical `per_query.jsonl` content (modulo `run_id` /
|
||||
@@ -17,14 +17,14 @@ use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Mutex;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::SearchMode;
|
||||
use kb_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_config::Config;
|
||||
use kebab_core::SearchMode;
|
||||
use kebab_eval::{EvalRunOpts, QueryResult, run_eval_with_config};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
/// `KB_EVAL_GOLDEN` is process-global state. Tests touching it must
|
||||
/// `KEBAB_EVAL_GOLDEN` is process-global state. Tests touching it must
|
||||
/// serialize so they don't trample each other when `cargo test`
|
||||
/// runs them in parallel.
|
||||
static GOLDEN_ENV_LOCK: Mutex<()> = Mutex::new(());
|
||||
@@ -110,7 +110,7 @@ fn seed_corpus(store: &SqliteStore) {
|
||||
// Build the FTS index so lexical search returns hits. Reuses the
|
||||
// same connection guard rather than reopening — the SAVEPOINT
|
||||
// protocol nests correctly under the existing read_conn lock.
|
||||
kb_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
|
||||
kebab_store_sqlite::rebuild_chunks_fts(&conn).unwrap();
|
||||
drop(conn);
|
||||
}
|
||||
|
||||
@@ -143,19 +143,19 @@ fn lexical_opts() -> EvalRunOpts {
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the eval after pointing `KB_EVAL_GOLDEN` at `yaml`. The env
|
||||
/// Run the eval after pointing `KEBAB_EVAL_GOLDEN` at `yaml`. The env
|
||||
/// guard must outlive the call so concurrent tests don't reset the
|
||||
/// var mid-run.
|
||||
fn run_with_golden<F: FnOnce() -> R, R>(yaml: &Path, f: F) -> R {
|
||||
let _g = GOLDEN_ENV_LOCK.lock().unwrap_or_else(|p| p.into_inner());
|
||||
// SAFETY: `KB_EVAL_GOLDEN` is a benign env var; the GOLDEN_ENV_LOCK
|
||||
// SAFETY: `KEBAB_EVAL_GOLDEN` is a benign env var; the GOLDEN_ENV_LOCK
|
||||
// serializes mutations so concurrent tests don't race.
|
||||
unsafe {
|
||||
std::env::set_var("KB_EVAL_GOLDEN", yaml);
|
||||
std::env::set_var("KEBAB_EVAL_GOLDEN", yaml);
|
||||
}
|
||||
let out = f();
|
||||
unsafe {
|
||||
std::env::remove_var("KB_EVAL_GOLDEN");
|
||||
std::env::remove_var("KEBAB_EVAL_GOLDEN");
|
||||
}
|
||||
out
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-llm-local"
|
||||
name = "kebab-llm-local"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,9 +8,9 @@ repository = { workspace = true }
|
||||
description = "Ollama HTTP adapter implementing kb_core::LanguageModel via reqwest::blocking"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
# `default-features = false` drops the `default-tls` (native-tls / openssl)
|
||||
# feature so we don't pull in a system OpenSSL; we explicitly pin rustls.
|
||||
# Note: `default-features = false` does NOT drop tokio — reqwest 0.12's
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-llm-local` — Ollama HTTP adapter implementing
|
||||
//! [`kb_core::LanguageModel`] over the local `POST /api/generate` endpoint.
|
||||
//! [`kebab_core::LanguageModel`] over the local `POST /api/generate` endpoint.
|
||||
//!
|
||||
//! ## Why a separate crate
|
||||
//!
|
||||
@@ -29,7 +29,7 @@
|
||||
//! - **Lazy connect.** [`OllamaLanguageModel::new`] does not hit the network;
|
||||
//! the first error surfaces on [`LanguageModel::generate_stream`].
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.2,
|
||||
//! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.2,
|
||||
//! §6.4 (`[models.llm]`), §0 Q5 (streaming), §10 (errors), and report §11.2
|
||||
//! (Ollama protocol notes).
|
||||
|
||||
@@ -39,11 +39,11 @@ mod ollama;
|
||||
pub use error::LlmError;
|
||||
pub use ollama::OllamaLanguageModel;
|
||||
|
||||
// Re-export the trait surface so adapter consumers can `use kb_llm_local::*`
|
||||
// Re-export the trait surface so adapter consumers can `use kebab_llm_local::*`
|
||||
// without also depending on `kb-llm` directly. These are the same symbols
|
||||
// `kb-llm` re-exports from `kb-core`; this crate adds **no new types** to
|
||||
// the trait surface (`LlmError` and `OllamaLanguageModel` are
|
||||
// implementation-side only).
|
||||
pub use kb_llm::{
|
||||
pub use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
@@ -41,7 +41,7 @@
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::time::Duration;
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -68,7 +68,7 @@ pub struct OllamaLanguageModel {
|
||||
}
|
||||
|
||||
impl OllamaLanguageModel {
|
||||
/// Build an adapter from a workspace [`kb_config::Config`]. Reads
|
||||
/// Build an adapter from a workspace [`kebab_config::Config`]. Reads
|
||||
/// `config.models.llm.{provider, model, endpoint, context_tokens,
|
||||
/// temperature, seed}`.
|
||||
///
|
||||
@@ -76,7 +76,7 @@ impl OllamaLanguageModel {
|
||||
/// expected to have validated `provider == "ollama"`; this constructor
|
||||
/// trusts the config and would happily build for an unknown provider.
|
||||
/// (Provider routing is the App layer's job, not the adapter's.)
|
||||
pub fn new(config: &kb_config::Config) -> anyhow::Result<Self> {
|
||||
pub fn new(config: &kebab_config::Config) -> anyhow::Result<Self> {
|
||||
let llm = &config.models.llm;
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.timeout(REQUEST_TIMEOUT)
|
||||
@@ -292,7 +292,7 @@ impl Iterator for OllamaStream {
|
||||
// pipelines that expect a terminal frame still terminate.
|
||||
self.done = true;
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama stream ended without a `done: true` frame; synthesizing Aborted",
|
||||
);
|
||||
return Some(Ok(TokenChunk::Done {
|
||||
@@ -361,14 +361,14 @@ impl Iterator for OllamaStream {
|
||||
};
|
||||
let prompt_tokens = line.prompt_eval_count.unwrap_or_else(|| {
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama done frame missing prompt_eval_count; defaulting to 0",
|
||||
);
|
||||
0
|
||||
});
|
||||
let completion_tokens = line.eval_count.unwrap_or_else(|| {
|
||||
tracing::warn!(
|
||||
target: "kb_llm_local",
|
||||
target: "kebab_llm_local",
|
||||
"ollama done frame missing eval_count; defaulting to 0",
|
||||
);
|
||||
0
|
||||
@@ -2,8 +2,8 @@
|
||||
//! relevant config fields and exposes them via the trait surface, all
|
||||
//! without touching the network (per design §7.2 lazy-connect contract).
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
|
||||
#[test]
|
||||
fn construction_with_default_config_returns_expected_model_ref() {
|
||||
@@ -11,16 +11,16 @@
|
||||
//! These hit `http://127.0.0.1:11434` directly and require an actual model
|
||||
//! pulled locally. CI runs default (non-ignored) tests only.
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{GenerateRequest, TokenChunk};
|
||||
use kb_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{GenerateRequest, TokenChunk};
|
||||
use kebab_llm_local::{LanguageModel, OllamaLanguageModel};
|
||||
|
||||
#[test]
|
||||
#[ignore = "requires a local Ollama daemon + pulled model"]
|
||||
fn real_ollama_streams_non_empty_response() {
|
||||
// Use whatever model the workspace defaults select. Override via the
|
||||
// KB_MODELS_LLM_MODEL env var if you want a different one for this run
|
||||
// (e.g. `KB_MODELS_LLM_MODEL=qwen2.5:7b-instruct cargo test ... -- --ignored`).
|
||||
// KEBAB_MODELS_LLM_MODEL env var if you want a different one for this run
|
||||
// (e.g. `KEBAB_MODELS_LLM_MODEL=qwen2.5:7b-instruct cargo test ... -- --ignored`).
|
||||
let cfg = Config::load(None).expect("config should load");
|
||||
let llm = OllamaLanguageModel::new(&cfg).unwrap();
|
||||
|
||||
@@ -10,9 +10,9 @@
|
||||
//! error mapping, finish-reason mapping, missing-counter degradation, and
|
||||
//! determinism semantics.
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{FinishReason, GenerateRequest, TokenChunk};
|
||||
use kb_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{FinishReason, GenerateRequest, TokenChunk};
|
||||
use kebab_llm_local::{LanguageModel, LlmError, OllamaLanguageModel};
|
||||
use wiremock::matchers::{method, path};
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-llm"
|
||||
name = "kebab-llm"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,7 +8,7 @@ repository = { workspace = true }
|
||||
description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
anyhow = { workspace = true }
|
||||
|
||||
[features]
|
||||
@@ -1,8 +1,8 @@
|
||||
//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
|
||||
//!
|
||||
//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
|
||||
//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations.
|
||||
//! It defines **no new types**; everything is a re-export of [`kb_core`].
|
||||
//! `use kebab_llm::LanguageModel` and stay stable across kb-core reorganizations.
|
||||
//! It defines **no new types**; everything is a re-export of [`kebab_core`].
|
||||
//!
|
||||
//! ## Mock implementation
|
||||
//!
|
||||
@@ -12,7 +12,7 @@
|
||||
//! from `generate_stream` itself (e.g., connection refused) before any chunk
|
||||
//! is yielded; the mock never does.
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2,
|
||||
//! See `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §7.1, §7.2,
|
||||
//! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract.
|
||||
|
||||
// ── Trait re-exports ──────────────────────────────────────────────────────
|
||||
@@ -20,7 +20,7 @@
|
||||
// Per spec §7.2 — these are the only public-surface types this crate offers.
|
||||
// Adding new types is forbidden by the task contract.
|
||||
|
||||
pub use kb_core::{
|
||||
pub use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
|
||||
//! the constructor was given — the mock does not count.
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#![cfg(feature = "mock")]
|
||||
|
||||
use kb_llm::{
|
||||
use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
@@ -5,7 +5,7 @@
|
||||
//! Runs under both `cargo test -p kb-llm` and
|
||||
//! `cargo test -p kb-llm --features mock`.
|
||||
|
||||
use kb_llm::{
|
||||
use kebab_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-normalize"
|
||||
name = "kebab-normalize"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Lift parser output (kb-parse-types) into kb-core::CanonicalDocument with deterministic IDs (§3.4, §4.2, §4.3)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
unicode-normalization = "0.1"
|
||||
@@ -23,5 +23,5 @@ tracing = { workspace = true }
|
||||
# Forbidden as a regular dep per design §8 (kb-normalize must not depend
|
||||
# on any specific parser); `cargo tree -p kb-normalize --depth 1` (the
|
||||
# default scope, excluding dev-deps) confirms this.
|
||||
kb-parse-md = { path = "../kb-parse-md" }
|
||||
kebab-parse-md = { path = "../kebab-parse-md" }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,5 +1,5 @@
|
||||
//! `kb-normalize` — lift parser output (`kb-parse-types`) into a
|
||||
//! [`kb_core::CanonicalDocument`] with deterministic IDs.
|
||||
//! [`kebab_core::CanonicalDocument`] with deterministic IDs.
|
||||
//!
|
||||
//! Per design §3.4 (CanonicalDocument / Block), §4.2 (ID recipe), §4.3
|
||||
//! (ordinal rule), §3.6 (Provenance), §8 (module boundaries).
|
||||
@@ -20,16 +20,16 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::Result;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Block, BlockId, CanonicalDocument, CodeBlock, CommonBlock, DocumentId, HeadingBlock,
|
||||
ImageRefBlock, Inline, Lang, ListBlock, Metadata, ParserVersion, Provenance, ProvenanceEvent,
|
||||
ProvenanceKind, RawAsset, TableBlock, TextBlock,
|
||||
};
|
||||
use kb_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
|
||||
use kebab_parse_types::{ParsedBlock, ParsedPayload, Warning, WarningKind};
|
||||
use time::OffsetDateTime;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
pub use kb_core::{id_for_block, id_for_doc};
|
||||
pub use kebab_core::{id_for_block, id_for_doc};
|
||||
|
||||
/// Build a [`CanonicalDocument`] from the raw asset, frontmatter
|
||||
/// metadata, parser blocks, parser version, and any warnings.
|
||||
@@ -38,7 +38,7 @@ pub use kb_core::{id_for_block, id_for_doc};
|
||||
///
|
||||
/// * `doc_id = id_for_doc(workspace_path, asset_id, parser_version)` —
|
||||
/// `workspace_path` is consumed verbatim from `asset` (already NFC +
|
||||
/// POSIX per `kb_core::normalize::to_posix`).
|
||||
/// POSIX per `kebab_core::normalize::to_posix`).
|
||||
/// * `block_id = id_for_block(doc_id, kind, heading_path, ordinal,
|
||||
/// source_span)` — `ordinal` is **0-based, scoped to (heading_path,
|
||||
/// block_kind), in document order** per §4.3.
|
||||
@@ -96,7 +96,7 @@ pub fn build_canonical_document(
|
||||
.collect();
|
||||
|
||||
tracing::debug!(
|
||||
target: "kb-normalize",
|
||||
target: "kebab-normalize",
|
||||
"built canonical document doc_id={} blocks={}",
|
||||
doc_id.0,
|
||||
lifted_blocks.len()
|
||||
@@ -329,7 +329,7 @@ fn flatten_inline(i: &Inline, out: &mut String) {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, MediaType, SourceSpan, SourceType, SourceUri,
|
||||
TrustLevel, WorkspacePath, normalize::to_posix,
|
||||
};
|
||||
@@ -386,7 +386,7 @@ mod tests {
|
||||
let h1_b = vec!["B".to_string()];
|
||||
vec![
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 1, end: 1 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -395,7 +395,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 2, end: 2 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -404,7 +404,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_a.clone(),
|
||||
source_span: SourceSpan::Line { start: 3, end: 3 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -413,7 +413,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Code,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Code,
|
||||
heading_path: h1_a,
|
||||
source_span: SourceSpan::Line { start: 4, end: 5 },
|
||||
payload: ParsedPayload::Code {
|
||||
@@ -422,7 +422,7 @@ mod tests {
|
||||
},
|
||||
},
|
||||
ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: h1_b,
|
||||
source_span: SourceSpan::Line { start: 6, end: 6 },
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -715,7 +715,7 @@ mod tests {
|
||||
fn audio_ref_block_skipped_with_warning() {
|
||||
let span = SourceSpan::Line { start: 1, end: 1 };
|
||||
let blocks = vec![ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::AudioRef,
|
||||
kind: kebab_parse_types::ParsedBlockKind::AudioRef,
|
||||
heading_path: vec![],
|
||||
source_span: span,
|
||||
payload: ParsedPayload::AudioRef {
|
||||
@@ -759,7 +759,7 @@ mod tests {
|
||||
let nfd_heading = "\u{1100}\u{1161}".to_string(); // 가 (NFD)
|
||||
let nfc_heading = "\u{AC00}".to_string(); // 가 (NFC)
|
||||
let mk_block = |heading: String| ParsedBlock {
|
||||
kind: kb_parse_types::ParsedBlockKind::Paragraph,
|
||||
kind: kebab_parse_types::ParsedBlockKind::Paragraph,
|
||||
heading_path: vec![heading],
|
||||
source_span: span.clone(),
|
||||
payload: ParsedPayload::Paragraph {
|
||||
@@ -15,12 +15,12 @@
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, AssetStorage, Checksum, MediaType, ParserVersion, RawAsset, SourceUri,
|
||||
WorkspacePath,
|
||||
};
|
||||
use kb_normalize::build_canonical_document;
|
||||
use kb_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use kebab_normalize::build_canonical_document;
|
||||
use kebab_parse_md::{BodyHints, parse_blocks, parse_frontmatter};
|
||||
use serde_json::Value;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-parse-md"
|
||||
name = "kebab-parse-md"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,8 +8,8 @@ repository = { workspace = true }
|
||||
description = "Markdown frontmatter and block parsing into kb-core::Metadata / kb-parse-types intermediates"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-parse-types = { path = "../kb-parse-types" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-parse-types = { path = "../kebab-parse-types" }
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -1,10 +1,10 @@
|
||||
//! Markdown body → flat `Vec<kb_parse_types::ParsedBlock>` (§3.4 / §3.7b).
|
||||
//! Markdown body → flat `Vec<kebab_parse_types::ParsedBlock>` (§3.4 / §3.7b).
|
||||
//!
|
||||
//! Uses `pulldown-cmark` (with GFM tables enabled at runtime via
|
||||
//! `Options::ENABLE_TABLES`) to walk the body once and emit a flat list of
|
||||
//! parsed blocks. Heading paths are computed by tracking the most-recent
|
||||
//! heading text at each level. Source spans are reported as
|
||||
//! [`kb_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
|
||||
//! [`kebab_core::SourceSpan::Line`] in 1-indexed file-line coordinates by
|
||||
//! converting `pulldown-cmark`'s byte offsets to line numbers and adding the
|
||||
//! caller-supplied `body_offset_lines`.
|
||||
//!
|
||||
@@ -19,10 +19,10 @@
|
||||
//!
|
||||
//! ## Inline filter
|
||||
//!
|
||||
//! [`kb_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
|
||||
//! [`kebab_core::Inline`] only models `Text | Code | Link | Strong | Emph`.
|
||||
//! Inline images, footnotes, hard breaks, etc. are dropped silently per
|
||||
//! design §3.4. Block-level `` (an image as the sole content of a
|
||||
//! paragraph) is lifted to [`kb_parse_types::ParsedPayload::ImageRef`].
|
||||
//! paragraph) is lifted to [`kebab_parse_types::ParsedPayload::ImageRef`].
|
||||
//!
|
||||
//! ## CRLF
|
||||
//!
|
||||
@@ -33,8 +33,8 @@
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use kb_core::{Inline, SourceSpan};
|
||||
use kb_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
|
||||
use kebab_core::{Inline, SourceSpan};
|
||||
use kebab_parse_types::{ParsedBlock, ParsedBlockKind, ParsedPayload, Warning, WarningKind};
|
||||
use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
|
||||
|
||||
/// Parse a Markdown body into a flat `Vec<ParsedBlock>` plus any warnings.
|
||||
@@ -1595,7 +1595,7 @@ mod tests {
|
||||
let (blocks, _) = parse(body, 1);
|
||||
assert_eq!(blocks.len(), 1, "expected single list block");
|
||||
match &blocks[0].kind {
|
||||
kb_parse_types::ParsedBlockKind::List => {}
|
||||
kebab_parse_types::ParsedBlockKind::List => {}
|
||||
other => panic!("expected list, got {other:?}"),
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Markdown frontmatter parsing → `kb_core::Metadata`.
|
||||
//! Markdown frontmatter parsing → `kebab_core::Metadata`.
|
||||
//!
|
||||
//! Implements the contract pinned in design §0 Q9 (frontmatter derive table)
|
||||
//! and §3.6 (Metadata shape). Produces structured warnings via
|
||||
@@ -18,8 +18,8 @@
|
||||
use std::ops::Range;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use kb_core::{Metadata, SourceType, TrustLevel};
|
||||
use kb_parse_types::{Warning, WarningKind};
|
||||
use kebab_core::{Metadata, SourceType, TrustLevel};
|
||||
use kebab_parse_types::{Warning, WarningKind};
|
||||
use lingua::{IsoCode639_1, Language, LanguageDetector, LanguageDetectorBuilder};
|
||||
use serde::Deserialize;
|
||||
use serde_json::{Map, Value};
|
||||
@@ -59,7 +59,7 @@ pub struct FrontmatterSpan {
|
||||
}
|
||||
|
||||
/// Parse the frontmatter (if any) from a Markdown byte slice into a
|
||||
/// `kb_core::Metadata`, applying the §0 Q9 derive table for missing fields.
|
||||
/// `kebab_core::Metadata`, applying the §0 Q9 derive table for missing fields.
|
||||
///
|
||||
/// On a malformed frontmatter the function still returns `Ok` — the
|
||||
/// frontmatter contents are discarded and the caller is told via a
|
||||
@@ -589,7 +589,7 @@ fn iso_code(lang: Language) -> &'static str {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
AssetId, WorkspacePath,
|
||||
ids::id_for_doc,
|
||||
versions::ParserVersion,
|
||||
@@ -10,13 +10,13 @@
|
||||
//! env-var pattern. Migrating kb-parse-md to the env-var style is out of
|
||||
//! scope; both styles are intentional for now.
|
||||
//!
|
||||
//! Following the kb_core::Inline schema migration (struct-variant shape),
|
||||
//! Following the kebab_core::Inline schema migration (struct-variant shape),
|
||||
//! `ParsedBlock` now serializes directly through serde — no projection
|
||||
//! shim is required. Inlines surface as structured objects, e.g.
|
||||
//! `[{"kind":"text","text":"…"},{"kind":"code","code":"…"}]`.
|
||||
|
||||
use kb_parse_md::parse_blocks;
|
||||
use kb_parse_types::{ParsedBlock, Warning};
|
||||
use kebab_parse_md::parse_blocks;
|
||||
use kebab_parse_types::{ParsedBlock, Warning};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
@@ -5,7 +5,7 @@
|
||||
//! and therefore stable; lingua autodetect over our fixtures is also
|
||||
//! stable for the language set we configured.
|
||||
|
||||
use kb_parse_md::{BodyHints, parse_frontmatter};
|
||||
use kebab_parse_md::{BodyHints, parse_frontmatter};
|
||||
use serde::Serialize;
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
@@ -18,9 +18,9 @@ use time::macros::datetime;
|
||||
/// snapshot focuses on the §0 Q9 derive contract.
|
||||
#[derive(Serialize)]
|
||||
struct Snapshot {
|
||||
metadata: kb_core::Metadata,
|
||||
metadata: kebab_core::Metadata,
|
||||
span_present: bool,
|
||||
warnings: Vec<kb_parse_types::Warning>,
|
||||
warnings: Vec<kebab_parse_types::Warning>,
|
||||
}
|
||||
|
||||
fn fixtures_dir() -> PathBuf {
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-parse-types"
|
||||
name = "kebab-parse-types"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,5 +8,5 @@ repository = { workspace = true }
|
||||
description = "Parser intermediate representations (no parser libs allowed)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
serde = { workspace = true }
|
||||
@@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize};
|
||||
pub struct ParsedBlock {
|
||||
pub kind: ParsedBlockKind,
|
||||
pub heading_path: Vec<String>,
|
||||
pub source_span: kb_core::SourceSpan,
|
||||
pub source_span: kebab_core::SourceSpan,
|
||||
pub payload: ParsedPayload,
|
||||
}
|
||||
|
||||
@@ -36,11 +36,11 @@ pub enum ParsedPayload {
|
||||
},
|
||||
Paragraph {
|
||||
text: String,
|
||||
inlines: Vec<kb_core::Inline>,
|
||||
inlines: Vec<kebab_core::Inline>,
|
||||
},
|
||||
List {
|
||||
ordered: bool,
|
||||
items: Vec<Vec<kb_core::Inline>>,
|
||||
items: Vec<Vec<kebab_core::Inline>>,
|
||||
},
|
||||
Code {
|
||||
lang: Option<String>,
|
||||
@@ -52,7 +52,7 @@ pub enum ParsedPayload {
|
||||
},
|
||||
Quote {
|
||||
text: String,
|
||||
inlines: Vec<kb_core::Inline>,
|
||||
inlines: Vec<kebab_core::Inline>,
|
||||
},
|
||||
ImageRef {
|
||||
src: String,
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-rag"
|
||||
name = "kebab-rag"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,11 +8,11 @@ repository = { workspace = true }
|
||||
description = "RAG pipeline: retrieve → gate → pack → generate → cite-validate"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-search = { path = "../kb-search" }
|
||||
kb-llm = { path = "../kb-llm" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-search = { path = "../kebab-search" }
|
||||
kebab-llm = { path = "../kebab-llm" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
@@ -23,7 +23,7 @@ anyhow = { workspace = true }
|
||||
blake3 = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
kb-llm = { path = "../kb-llm", features = ["mock"] }
|
||||
kebab-llm = { path = "../kebab-llm", features = ["mock"] }
|
||||
tempfile = { workspace = true }
|
||||
rusqlite = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -18,7 +18,7 @@
|
||||
//! reachable via `Retriever`), `kb-embed*` (only via `Retriever`),
|
||||
//! `kb-llm-local` (only via `LanguageModel`), `kb-tui`, `kb-desktop`.
|
||||
|
||||
pub use kb_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
|
||||
pub use kebab_core::{Answer, AnswerCitation, AnswerRetrievalSummary, RefusalReason};
|
||||
|
||||
mod pipeline;
|
||||
|
||||
@@ -33,13 +33,13 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
Answer, AnswerCitation, AnswerRetrievalSummary, Citation, FinishReason,
|
||||
GenerateRequest, LanguageModel, ModelRef, RefusalReason, Retriever, SearchFilters,
|
||||
SearchHit, SearchMode, SearchQuery, TokenChunk, TokenUsage, TraceId,
|
||||
};
|
||||
use kb_core::versions::PromptTemplateVersion;
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_core::versions::PromptTemplateVersion;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use regex::Regex;
|
||||
use std::sync::OnceLock;
|
||||
use time::OffsetDateTime;
|
||||
@@ -86,7 +86,7 @@ pub struct AskOpts {
|
||||
|
||||
/// Single-threaded RAG orchestrator. See module docs for the stage list.
|
||||
pub struct RagPipeline {
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
retriever: Arc<dyn Retriever>,
|
||||
llm: Arc<dyn LanguageModel>,
|
||||
docs: Arc<SqliteStore>,
|
||||
@@ -98,7 +98,7 @@ impl RagPipeline {
|
||||
/// `Arc`'d trait objects (kb-app builds them from config; tests
|
||||
/// inject mocks).
|
||||
pub fn new(
|
||||
config: kb_config::Config,
|
||||
config: kebab_config::Config,
|
||||
retriever: Arc<dyn Retriever>,
|
||||
llm: Arc<dyn LanguageModel>,
|
||||
docs: Arc<SqliteStore>,
|
||||
@@ -135,7 +135,7 @@ impl RagPipeline {
|
||||
let top_score = hits.first().map(|h| h.retrieval.fusion_score).unwrap_or(0.0);
|
||||
|
||||
tracing::debug!(
|
||||
target: "kb-rag",
|
||||
target: "kebab-rag",
|
||||
chunks_returned,
|
||||
top_score,
|
||||
mode = ?opts.mode,
|
||||
@@ -161,7 +161,7 @@ impl RagPipeline {
|
||||
// collapse to the more accurate `NoChunks` refusal here.
|
||||
if packed_entries.is_empty() {
|
||||
tracing::warn!(
|
||||
target: "kb-rag",
|
||||
target: "kebab-rag",
|
||||
chunks_returned = hits.len(),
|
||||
"kb-rag: all retrieved chunks were unfetchable from the store; \
|
||||
falling back to NoChunks refusal"
|
||||
@@ -324,7 +324,7 @@ impl RagPipeline {
|
||||
// Drop the moved `finish_reason` early into a tracing breadcrumb; the
|
||||
// wire schema does not surface it (per design §3.8).
|
||||
tracing::debug!(
|
||||
target: "kb-rag",
|
||||
target: "kebab-rag",
|
||||
grounded = answer.grounded,
|
||||
refusal = ?answer.refusal_reason,
|
||||
refusal_phrase_detected = matched_refusal_phrase,
|
||||
@@ -354,7 +354,7 @@ impl RagPipeline {
|
||||
self.docs.put_answer(&answer, query, packed_chunks_json.as_deref())
|
||||
{
|
||||
tracing::warn!(
|
||||
target: "kb-rag",
|
||||
target: "kebab-rag",
|
||||
error = %e,
|
||||
"kb-rag: put_answer failed; in-memory Answer still returned"
|
||||
);
|
||||
@@ -380,13 +380,13 @@ impl RagPipeline {
|
||||
|
||||
for hit in hits {
|
||||
let chunk_full =
|
||||
<SqliteStore as kb_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
|
||||
<SqliteStore as kebab_core::DocumentStore>::get_chunk(&self.docs, &hit.chunk_id)
|
||||
.context("kb-rag: docs.get_chunk")?;
|
||||
let chunk_text = match chunk_full {
|
||||
Some(c) => c.text,
|
||||
None => {
|
||||
tracing::warn!(
|
||||
target: "kb-rag",
|
||||
target: "kebab-rag",
|
||||
chunk_id = %hit.chunk_id.0,
|
||||
"kb-rag: chunk not found in store; skipping"
|
||||
);
|
||||
@@ -454,7 +454,7 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kb-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (NoChunks) failed");
|
||||
}
|
||||
Ok(answer)
|
||||
}
|
||||
@@ -529,7 +529,7 @@ impl RagPipeline {
|
||||
created_at: OffsetDateTime::now_utc(),
|
||||
};
|
||||
if let Err(e) = self.docs.put_answer(&answer, query, None) {
|
||||
tracing::warn!(target: "kb-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
|
||||
tracing::warn!(target: "kebab-rag", error = %e, "kb-rag: put_answer (ScoreGate) failed");
|
||||
}
|
||||
Ok(answer)
|
||||
}
|
||||
@@ -542,7 +542,7 @@ impl RagPipeline {
|
||||
/// paths attach the configured embedding model so `kb explain` can
|
||||
/// later identify which embedder shaped the retrieval (even on
|
||||
/// refusals — see `refuse_score_gate`).
|
||||
fn embedding_ref_for(mode: SearchMode, cfg: &kb_config::Config) -> Option<ModelRef> {
|
||||
fn embedding_ref_for(mode: SearchMode, cfg: &kebab_config::Config) -> Option<ModelRef> {
|
||||
match mode {
|
||||
SearchMode::Lexical => None,
|
||||
SearchMode::Vector | SearchMode::Hybrid => Some(ModelRef {
|
||||
@@ -14,12 +14,12 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkerVersion, ChunkId, Citation, DocumentId, IndexVersion, RetrievalDetail,
|
||||
Retriever, SearchHit, SearchMode, SearchQuery, WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -176,7 +176,7 @@ impl Retriever for MockRetriever {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
|
||||
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
|
||||
pub fn id32(prefix: &str) -> String {
|
||||
let mut s = prefix.to_string();
|
||||
while s.len() < 32 {
|
||||
@@ -10,11 +10,11 @@ use std::sync::Arc;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use common::{MockRetriever, RagEnv, id32, mk_hit};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
FinishReason, LanguageModel, Retriever, SearchMode, TokenChunk, TokenUsage,
|
||||
};
|
||||
use kb_llm::MockLanguageModel;
|
||||
use kb_rag::{AskOpts, RagPipeline, RefusalReason};
|
||||
use kebab_llm::MockLanguageModel;
|
||||
use kebab_rag::{AskOpts, RagPipeline, RefusalReason};
|
||||
|
||||
/// LM ID used everywhere — kept short so snapshots stay stable.
|
||||
const TEST_LM_ID: &str = "mock-lm";
|
||||
@@ -49,7 +49,7 @@ impl CountingLm {
|
||||
}
|
||||
|
||||
impl LanguageModel for CountingLm {
|
||||
fn model_ref(&self) -> kb_core::ModelRef {
|
||||
fn model_ref(&self) -> kebab_core::ModelRef {
|
||||
self.inner.model_ref()
|
||||
}
|
||||
fn context_tokens(&self) -> usize {
|
||||
@@ -57,7 +57,7 @@ impl LanguageModel for CountingLm {
|
||||
}
|
||||
fn generate_stream(
|
||||
&self,
|
||||
req: kb_core::GenerateRequest,
|
||||
req: kebab_core::GenerateRequest,
|
||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
|
||||
self.calls.fetch_add(1, Ordering::SeqCst);
|
||||
self.inner.generate_stream(req)
|
||||
@@ -1,5 +1,5 @@
|
||||
[package]
|
||||
name = "kb-search"
|
||||
name = "kebab-search"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
@@ -8,17 +8,17 @@ repository = { workspace = true }
|
||||
description = "Retriever implementations for kb (P2-2 lexical FTS5; P3 vector / hybrid will follow)"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
kb-config = { path = "../kb-config" }
|
||||
kb-store-sqlite = { path = "../kb-store-sqlite" }
|
||||
kebab-core = { path = "../kebab-core" }
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
kebab-store-sqlite = { path = "../kebab-store-sqlite" }
|
||||
# P3-4 hybrid retriever wraps a `dyn VectorStore` (typically backed by
|
||||
# `kb-store-vector::LanceVectorStore`) and a `dyn Embedder` (any P3-2
|
||||
# adapter). Listed as a runtime dep so callers can construct
|
||||
# `VectorRetriever::new` against the trait objects without a concrete
|
||||
# adapter — the concrete adapter (`kb-embed-local`) stays out of this
|
||||
# crate per the spec's Forbidden deps list.
|
||||
kb-store-vector = { path = "../kb-store-vector" }
|
||||
kb-embed = { path = "../kb-embed" }
|
||||
kebab-store-vector = { path = "../kebab-store-vector" }
|
||||
kebab-embed = { path = "../kebab-embed" }
|
||||
rusqlite = { workspace = true }
|
||||
globset = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
@@ -32,4 +32,4 @@ tempfile = { workspace = true }
|
||||
# feature) and stand up a real `LanceVectorStore` on a tmp directory.
|
||||
# The mock-retriever unit tests (the bulk of the hybrid suite) do not
|
||||
# need either, but the integration / snapshot lane does.
|
||||
kb-embed = { path = "../kb-embed", features = ["mock"] }
|
||||
kebab-embed = { path = "../kebab-embed", features = ["mock"] }
|
||||
@@ -1,4 +1,4 @@
|
||||
//! Shared helpers for building `kb_core::Citation` values from a
|
||||
//! Shared helpers for building `kebab_core::Citation` values from a
|
||||
//! chunk's first `SourceSpan`.
|
||||
//!
|
||||
//! Both the lexical and vector retrievers join against the same
|
||||
@@ -9,7 +9,7 @@
|
||||
//! §1.6). Living here means a future PDF / image / audio extractor can
|
||||
//! enrich the mapping in one place rather than two.
|
||||
|
||||
use kb_core::{Citation, SourceSpan, WorkspacePath};
|
||||
use kebab_core::{Citation, SourceSpan, WorkspacePath};
|
||||
|
||||
/// Build a `Citation` from the chunk's first `SourceSpan`. P1 markdown
|
||||
/// only emits `Line`, so the other variants are mostly defensive — we
|
||||
@@ -20,7 +20,7 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
|
||||
};
|
||||
|
||||
@@ -75,7 +75,7 @@ impl HybridRetriever {
|
||||
/// retrievers. Reads `config.search.hybrid_fusion` (only `"rrf"`
|
||||
/// is recognised today) and `config.search.rrf_k`.
|
||||
pub fn new(
|
||||
config: &kb_config::Config,
|
||||
config: &kebab_config::Config,
|
||||
lexical: Arc<dyn Retriever>,
|
||||
vector: Arc<dyn Retriever>,
|
||||
) -> Self {
|
||||
@@ -93,7 +93,7 @@ impl HybridRetriever {
|
||||
let vec_iv = vector.index_version();
|
||||
if lex_iv.0 != vec_iv.0 {
|
||||
tracing::warn!(
|
||||
target: "kb-search",
|
||||
target: "kebab-search",
|
||||
lexical_index = %lex_iv.0,
|
||||
vector_index = %vec_iv.0,
|
||||
"kb-search hybrid: lexical and vector index_version differ; consider re-indexing"
|
||||
@@ -323,7 +323,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
|
||||
"rrf" => FusionPolicy::Rrf { k_rrf: k },
|
||||
other => {
|
||||
tracing::warn!(
|
||||
target: "kb-search",
|
||||
target: "kebab-search",
|
||||
policy = other,
|
||||
"kb-search hybrid: unknown fusion policy; falling back to RRF"
|
||||
);
|
||||
@@ -335,7 +335,7 @@ fn parse_fusion(name: &str, k_rrf: u32) -> FusionPolicy {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, Citation, DocumentId, IndexVersion, SearchFilters,
|
||||
SearchHit, SearchMode, WorkspacePath,
|
||||
};
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Lexical (FTS5 + bm25) retriever — design §3.7 / §1.5 / §2.2 / §6.4.
|
||||
//!
|
||||
//! Owns the SQL pattern documented in `tasks/p2/p2-2-lexical-retriever.md`
|
||||
//! and constructs `kb_core::SearchHit` values directly from the joined
|
||||
//! and constructs `kebab_core::SearchHit` values directly from the joined
|
||||
//! `chunks_fts` / `chunks` / `documents` rows. Reads only — never mutates
|
||||
//! the underlying SQLite file.
|
||||
|
||||
@@ -9,12 +9,12 @@ use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use globset::GlobMatcher;
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, DocumentId, IndexVersion, RetrievalDetail, Retriever,
|
||||
SearchFilters, SearchHit, SearchMode, SearchQuery, SourceSpan, TrustLevel,
|
||||
WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::{params_from_iter, Connection, Row, ToSql};
|
||||
|
||||
use crate::citation_helper::citation_from_first_span;
|
||||
@@ -57,7 +57,7 @@ impl LexicalRetriever {
|
||||
/// Construct with default settings derived from `kb-config`'s defaults.
|
||||
/// Snippet width is computed from `Config::defaults().search.snippet_chars`.
|
||||
pub fn new(store: Arc<SqliteStore>, index_version: IndexVersion) -> Self {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
Self::with_settings(store, index_version, cfg.search.snippet_chars)
|
||||
}
|
||||
|
||||
@@ -297,7 +297,7 @@ fn run_query(
|
||||
params.push(Box::new(lang.0.clone()));
|
||||
}
|
||||
if let Some(trust_min) = &filters.trust_min {
|
||||
// Mirror `kb_store_sqlite::documents::list_documents` ranking:
|
||||
// Mirror `kebab_store_sqlite::documents::list_documents` ranking:
|
||||
// Generated < Secondary < Primary. Doing the rank in SQL
|
||||
// (rather than post-filtering) keeps the row stream short
|
||||
// when the workspace contains many low-trust docs.
|
||||
@@ -523,7 +523,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_line_round_trip() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("a/b.md".to_string()).unwrap();
|
||||
let span = SourceSpan::Line { start: 7, end: 12 };
|
||||
let c = citation_from_first_span("c1", p.clone(), Some("S1".to_string()), Some(&span));
|
||||
@@ -545,7 +545,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_page_forwards_section() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("doc.pdf".to_string()).unwrap();
|
||||
let span = SourceSpan::Page {
|
||||
page: 4,
|
||||
@@ -568,7 +568,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn build_citation_none_falls_back_to_line_one() {
|
||||
use kb_core::Citation;
|
||||
use kebab_core::Citation;
|
||||
let p = WorkspacePath::new("x.md".to_string()).unwrap();
|
||||
let c = citation_from_first_span("c1", p, None, None);
|
||||
match c {
|
||||
@@ -1,4 +1,4 @@
|
||||
//! `kb-search` — `kb_core::Retriever` implementations.
|
||||
//! `kb-search` — `kebab_core::Retriever` implementations.
|
||||
//!
|
||||
//! - [`LexicalRetriever`] (P2-2): SQLite-FTS5 + bm25 backed retriever
|
||||
//! for `SearchMode::Lexical`.
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Vector retriever — design §3.7 / §7.2 / §1.6.
|
||||
//!
|
||||
//! Wraps a `dyn VectorStore` + `dyn Embedder` + the SQLite metadata
|
||||
//! store into a `kb_core::Retriever`. The vector store knows how to
|
||||
//! store into a `kebab_core::Retriever`. The vector store knows how to
|
||||
//! find the nearest chunks by cosine on the embedding column; SQLite
|
||||
//! owns the human-readable metadata (heading_path / section_label /
|
||||
//! source_spans / chunker_version / workspace_path) needed for
|
||||
@@ -19,12 +19,12 @@ use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, Result};
|
||||
use kb_core::{
|
||||
use kebab_core::{
|
||||
ChunkId, ChunkerVersion, DocumentId, Embedder, EmbeddingInput, EmbeddingKind,
|
||||
IndexVersion, RetrievalDetail, Retriever, SearchHit, SearchMode, SearchQuery,
|
||||
SourceSpan, VectorHit, VectorStore, WorkspacePath,
|
||||
};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use rusqlite::params_from_iter;
|
||||
|
||||
use crate::citation_helper::citation_from_first_span;
|
||||
@@ -67,7 +67,7 @@ impl VectorRetriever {
|
||||
sqlite: Arc<SqliteStore>,
|
||||
index_version: IndexVersion,
|
||||
) -> Self {
|
||||
let cfg = kb_config::Config::defaults();
|
||||
let cfg = kebab_config::Config::defaults();
|
||||
Self::with_settings(store, embed, sqlite, index_version, cfg.search.snippet_chars)
|
||||
}
|
||||
|
||||
@@ -268,7 +268,7 @@ fn build_hit(
|
||||
meta: &ChunkMeta,
|
||||
rank: u32,
|
||||
index_version: &IndexVersion,
|
||||
model_id: &kb_core::EmbeddingModelId,
|
||||
model_id: &kebab_core::EmbeddingModelId,
|
||||
snippet_chars: usize,
|
||||
) -> Result<SearchHit> {
|
||||
let heading_path: Vec<String> = serde_json::from_str(&meta.heading_path_json)
|
||||
@@ -16,15 +16,15 @@
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use kb_config::Config;
|
||||
use kb_core::{
|
||||
use kebab_config::Config;
|
||||
use kebab_core::{
|
||||
ChunkId, DocumentId, EmbeddingId, EmbeddingInput, EmbeddingKind,
|
||||
EmbeddingModelId, EmbeddingVersion, IndexVersion, VectorRecord, VectorStore,
|
||||
};
|
||||
use kb_embed::{Embedder, MockEmbedder};
|
||||
use kb_search::{LexicalRetriever, VectorRetriever};
|
||||
use kb_store_sqlite::SqliteStore;
|
||||
use kb_store_vector::LanceVectorStore;
|
||||
use kebab_embed::{Embedder, MockEmbedder};
|
||||
use kebab_search::{LexicalRetriever, VectorRetriever};
|
||||
use kebab_store_sqlite::SqliteStore;
|
||||
use kebab_store_vector::LanceVectorStore;
|
||||
use rusqlite::params;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@@ -205,7 +205,7 @@ impl HybridEnv {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pad a short prefix to the 32-hex shape `kb_core` newtypes expect.
|
||||
/// Pad a short prefix to the 32-hex shape `kebab_core` newtypes expect.
|
||||
pub fn id32(prefix: &str) -> String {
|
||||
let mut s = prefix.to_string();
|
||||
while s.len() < 32 {
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user