feat(p4-1): kb-llm crate — LanguageModel trait re-export + MockLanguageModel

Establishes the kb-llm trait crate so concrete LLM adapters (p4-2 Ollama, future llama.cpp / candle) target a stable surface. Pure re- export of kb_core::{LanguageModel, GenerateRequest, TokenChunk, FinishReason, TokenUsage, ModelRef} plus a feature-gated deterministic mock for downstream RAG tests (p4-3) that need an LLM trait object without an Ollama dependency. MockLanguageModel (cfg(feature = "mock"), default OFF): - Holds canned_response + canned_finish + canned_usage + (model_id, provider, context_tokens). Pure in-memory; no I/O. - generate_stream() honors GenerateRequest.stop: scans every non-empty stop string against the canned response, takes the earliest byte position (Iterator::min returns the first equal element on ties so declaration order in req.stop wins), truncates with a direct byte- slice (str::find returns a UTF-8 char boundary by contract). - When a stop matches, finish_reason is overridden to Stop (matches OpenAI / Ollama real-world behaviour); otherwise the caller's canned_finish passes through verbatim. - Emits one TokenChunk::Token per Unicode scalar value (char), NOT per grapheme cluster — Hangul jamo, emoji ZWJ sequences, combining marks split. Acceptable for trait-shape testing; real adapters MAY combine. Documented in module docs. - Always terminates with TokenChunk::Done { finish_reason, usage } even if the canned response is empty. The returned iterator is a boxed Vec<TokenChunk>::into_iter().map(Ok), trivially Send. - Real adapters MAY return Err from generate_stream itself (e.g. connection refused) before any chunk is yielded; the mock never does. Documented for the trait re-exporter consumer audience. Helpers: - assert_finish_chunk(chunks) — asserts the last chunk is a Done. Useful for proptests asserting trait contract over random inputs. Tests: - cargo test -p kb-llm (no features): 2 reexport / dyn-dispatch tests. - cargo test -p kb-llm --features mock: 9 tests including 100-case proptest over random Unicode strings asserting Done terminator, char-count == streamed Token chunks, concat == canned (truncated by stop), plus explicit cases for stop-string truncation, first-stop- match precedence, model_ref dimensions=None invariant, finish reason pass-through. - All 271 workspace tests pass; clippy clean for both default and mock-on feature configurations. Symbol gating verified: - cargo build --release -p kb-llm (default): nm shows zero MockLanguageModel symbols. - cargo build --release -p kb-llm --features mock: three trait-impl symbols present. Spec invariant "release builds MUST NOT include MockLanguageModel" enforced at the symbol level. Allowed deps respected: only kb-core (path) and anyhow (workspace, forced by trait return type). Dropped kb-config / serde / thiserror / tracing from the spec's allowed list — they are listed as Allowed but nothing in this skeleton crate references them, and dropping them keeps the dependency graph slim for downstream consumers. p4-2/p4-3 will add what they need at their own dep sites. Forbidden deps (reqwest, ureq, tokio, whisper-rs, kb-source-fs, kb-parse-md, kb-normalize, kb-chunk, kb-store-*, kb-embed*, kb-search, kb-rag, kb-tui, kb-desktop) all absent from cargo tree -p kb-llm. Out of scope: real adapter (p4-2 Ollama), token counting against the real tokenizer, server-side cancellation / abort signals (P+). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 13:37:46 +00:00
parent 38ff886c37
commit 27c669fbf9
7 changed files with 481 additions and 0 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3439,6 +3439,15 @@ dependencies = [
 "tracing",
 ]

+[[package]]
+name = "kb-llm"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "kb-core",
+ "proptest",
+]
+
 [[package]]
 name = "kb-normalize"
 version = "0.1.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,6 +13,7 @@ members = [
    "crates/kb-search",
    "crates/kb-embed",
    "crates/kb-embed-local",
+    "crates/kb-llm",
    "crates/kb-app",
    "crates/kb-cli",
 ]
--- a/crates/kb-llm/Cargo.toml
+++ b/crates/kb-llm/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name          = "kb-llm"
+version       = { workspace = true }
+edition       = { workspace = true }
+rust-version  = { workspace = true }
+license       = { workspace = true }
+repository    = { workspace = true }
+description   = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
+
+[dependencies]
+kb-core    = { path = "../kb-core" }
+anyhow     = { workspace = true }
+
+[features]
+default = []
+# Opt-in `MockLanguageModel`. Default OFF so release builds (no `--features mock`)
+# compile the symbol out entirely (verifiable via `nm`/`cargo bloat`).
+mock    = []
+
+[dev-dependencies]
+proptest = { workspace = true }
--- a/crates/kb-llm/src/lib.rs
+++ b/crates/kb-llm/src/lib.rs
@@ -0,0 +1,49 @@
+//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
+//!
+//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
+//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations.
+//! It defines **no new types**; everything is a re-export of [`kb_core`].
+//!
+//! ## Mock implementation
+//!
+//! [`MockLanguageModel`] (gated behind the `mock` feature, default **OFF**) is
+//! a deterministic test double. Real adapters (Ollama, llama.cpp, candle) live
+//! in p4-2 and MUST NOT be implemented here. Real adapters MAY return `Err`
+//! from `generate_stream` itself (e.g., connection refused) before any chunk
+//! is yielded; the mock never does.
+//!
+//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2,
+//! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract.
+
+// ── Trait re-exports ──────────────────────────────────────────────────────
+//
+// Per spec §7.2 — these are the only public-surface types this crate offers.
+// Adding new types is forbidden by the task contract.
+
+pub use kb_core::{
+    FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
+};
+
+// ── Test helper ───────────────────────────────────────────────────────────
+
+/// Assert the streamed `TokenChunk` sequence ends with a [`TokenChunk::Done`]
+/// frame. Per spec §7.2 / §0 Q5 every stream — even an erroring one — must
+/// terminate with a `Done` chunk; this helper centralizes that contract check
+/// so downstream test crates don't each rewrite it.
+///
+/// Panics on mismatch (test-only helper — callers are tests).
+pub fn assert_finish_chunk(chunks: &[TokenChunk]) {
+    assert!(
+        matches!(chunks.last(), Some(TokenChunk::Done { .. })),
+        "stream must end with TokenChunk::Done; got {:?}",
+        chunks.last(),
+    );
+}
+
+// ── MockLanguageModel (feature = "mock") ──────────────────────────────────
+
+#[cfg(feature = "mock")]
+mod mock;
+
+#[cfg(feature = "mock")]
+pub use mock::MockLanguageModel;
--- a/crates/kb-llm/src/mock.rs
+++ b/crates/kb-llm/src/mock.rs
@@ -0,0 +1,117 @@
+//! Deterministic mock language model for downstream tests.
+//!
+//! Compiled only when the `mock` feature is enabled. Default builds
+//! (`cargo build --release -p kb-llm`) MUST NOT contain the `MockLanguageModel`
+//! symbol — verifiable by symbol scan (`nm`/`cargo bloat`).
+//!
+//! ## Streaming contract
+//!
+//! For every call to [`MockLanguageModel::generate_stream`]:
+//!
+//! 1. The configured `canned_response` is examined for any of `req.stop`. If
+//!    one or more stop strings are substrings of the response, the response
+//!    is truncated at the **earliest byte position** of any match (i.e., the
+//!    first stop string to land — ties broken by the order entries appear in
+//!    `req.stop`, since `Iterator::min` returns the first equal element on
+//!    ties, breaking by `req.stop` declaration order).
+//! 2. The (possibly truncated) string is iterated by Unicode scalar
+//!    (`str::chars()`) and each character is yielded as
+//!    [`TokenChunk::Token`]`(c.to_string())`. This makes streaming UTF-8 safe
+//!    by construction (no character is split across chunks). Emits one
+//!    `TokenChunk` per Unicode scalar value (`char`), not per grapheme
+//!    cluster — Hangul jamo, emoji ZWJ sequences, and combining marks split
+//!    into multiple chunks. Acceptable for trait-shape testing; real adapters
+//!    MAY combine.
+//! 3. After all tokens, a single terminal [`TokenChunk::Done`] is yielded
+//!    with:
+//!     * `finish_reason = FinishReason::Stop` if a stop string truncated the
+//!       canned text — mirroring real LLM behavior, which reports Stop on
+//!       stop-sequence termination regardless of the configured finish.
+//!     * `finish_reason = canned_finish.clone()` otherwise.
+//!     * `usage = canned_usage.clone()` always.
+//!
+//! ## Non-effects
+//!
+//! - No network. No filesystem. No async runtime.
+//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
+//!   the constructor was given — the mock does not count.
+
+use kb_core::{
+    FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
+};
+
+/// Deterministic test double. See module docs for the streaming recipe.
+pub struct MockLanguageModel {
+    pub model_id: String,
+    pub provider: String,
+    pub context_tokens: usize,
+    pub canned_response: String,
+    pub canned_finish: FinishReason,
+    pub canned_usage: TokenUsage,
+}
+
+impl MockLanguageModel {
+    /// Apply `req.stop` to `canned_response`. Returns `(truncated_text,
+    /// stop_hit)` where `stop_hit` is true iff any stop string was found.
+    fn apply_stop<'a>(canned: &'a str, stop: &[String]) -> (&'a str, bool) {
+        // Earliest byte position wins. Ties break by first occurrence in
+        // `stop` (Iterator::min returns the first equal element, and we
+        // iterate `stop` in its declared order). Empty stop strings are
+        // ignored — they would otherwise match at position 0 and silently
+        // eat the entire response.
+        let earliest = stop
+            .iter()
+            .filter(|s| !s.is_empty())
+            .filter_map(|s| canned.find(s.as_str()))
+            .min();
+        match earliest {
+            // `str::find` returns a UTF-8 char boundary by contract, so direct byte-slice is sound.
+            Some(idx) => (&canned[..idx], true),
+            None => (canned, false),
+        }
+    }
+}
+
+impl LanguageModel for MockLanguageModel {
+    fn model_ref(&self) -> ModelRef {
+        ModelRef {
+            id: self.model_id.clone(),
+            provider: self.provider.clone(),
+            // Per §3.8: `dimensions` carries the embedder's output dim and is
+            // intentionally None for chat models.
+            dimensions: None,
+        }
+    }
+
+    fn context_tokens(&self) -> usize {
+        self.context_tokens
+    }
+
+    fn generate_stream(
+        &self,
+        req: GenerateRequest,
+    ) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
+        let (truncated, stop_hit) = Self::apply_stop(&self.canned_response, &req.stop);
+
+        // Pre-materialize the full chunk sequence into an owned Vec. This
+        // sidesteps lifetime juggling around `&self.canned_response` inside
+        // a `'static` iterator and trivially gives `Send` (Vec<TokenChunk>
+        // is Send because TokenChunk is Send).
+        let mut chunks: Vec<TokenChunk> = truncated
+            .chars()
+            .map(|c| TokenChunk::Token(c.to_string()))
+            .collect();
+
+        let finish_reason = if stop_hit {
+            FinishReason::Stop
+        } else {
+            self.canned_finish.clone()
+        };
+        chunks.push(TokenChunk::Done {
+            finish_reason,
+            usage: self.canned_usage.clone(),
+        });
+
+        Ok(Box::new(chunks.into_iter().map(Ok)))
+    }
+}
--- a/crates/kb-llm/tests/mock.rs
+++ b/crates/kb-llm/tests/mock.rs
@@ -0,0 +1,210 @@
+//! Integration tests for `MockLanguageModel`. Gated behind the `mock` feature.
+//!
+//! Canonical invocation: `cargo test -p kb-llm --features mock`.
+
+#![cfg(feature = "mock")]
+
+use kb_llm::{
+    FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
+    assert_finish_chunk,
+};
+use proptest::prelude::*;
+
+fn usage() -> TokenUsage {
+    TokenUsage {
+        prompt_tokens: 10,
+        completion_tokens: 20,
+        latency_ms: 30,
+    }
+}
+
+fn req_with_stop(stop: Vec<&str>) -> GenerateRequest {
+    GenerateRequest {
+        system: "sys".into(),
+        user: "usr".into(),
+        stop: stop.into_iter().map(String::from).collect(),
+        max_tokens: 64,
+        temperature: 0.0,
+        seed: None,
+    }
+}
+
+fn mk(canned: &str, finish: FinishReason) -> MockLanguageModel {
+    MockLanguageModel {
+        model_id: "mock-test".into(),
+        provider: "mock".into(),
+        context_tokens: 4096,
+        canned_response: canned.into(),
+        canned_finish: finish,
+        canned_usage: usage(),
+    }
+}
+
+fn drain(m: &dyn LanguageModel, req: GenerateRequest) -> Vec<TokenChunk> {
+    m.generate_stream(req)
+        .expect("generate_stream")
+        .map(|r| r.expect("ok chunk"))
+        .collect()
+}
+
+#[test]
+fn streams_then_done() {
+    let m = mk("hello", FinishReason::Stop);
+    let chunks = drain(&m, req_with_stop(vec![]));
+
+    // 5 Token chunks ("h", "e", "l", "l", "o") + Done.
+    assert_eq!(chunks.len(), 6);
+    assert_finish_chunk(&chunks);
+
+    let tokens: Vec<&str> = chunks
+        .iter()
+        .filter_map(|c| match c {
+            TokenChunk::Token(s) => Some(s.as_str()),
+            _ => None,
+        })
+        .collect();
+    assert_eq!(tokens, vec!["h", "e", "l", "l", "o"]);
+
+    match chunks.last().unwrap() {
+        TokenChunk::Done {
+            finish_reason,
+            usage: u,
+        } => {
+            assert_eq!(*finish_reason, FinishReason::Stop);
+            assert_eq!(*u, usage());
+        }
+        _ => unreachable!(),
+    }
+}
+
+#[test]
+fn honors_stop_strings() {
+    // canned has "STOP" embedded; req.stop=["STOP"] truncates before it.
+    let m = mk("abc STOP defg", FinishReason::Length);
+    let chunks = drain(&m, req_with_stop(vec!["STOP"]));
+
+    let concat: String = chunks
+        .iter()
+        .filter_map(|c| match c {
+            TokenChunk::Token(s) => Some(s.as_str()),
+            _ => None,
+        })
+        .collect();
+    assert_eq!(concat, "abc ");
+
+    // Stop-string truncation forces FinishReason::Stop, overriding the
+    // configured `canned_finish` (Length here).
+    match chunks.last().unwrap() {
+        TokenChunk::Done { finish_reason, .. } => {
+            assert_eq!(*finish_reason, FinishReason::Stop);
+        }
+        _ => panic!("last chunk must be Done"),
+    }
+}
+
+#[test]
+fn honors_first_stop_match() {
+    // Two stop strings; "BAR" appears at byte 4, "FOO" at byte 12. Earliest
+    // wins regardless of order in req.stop.
+    let m = mk("abc BAR xyz FOO end", FinishReason::Stop);
+    let chunks = drain(&m, req_with_stop(vec!["FOO", "BAR"]));
+
+    let concat: String = chunks
+        .iter()
+        .filter_map(|c| match c {
+            TokenChunk::Token(s) => Some(s.as_str()),
+            _ => None,
+        })
+        .collect();
+    assert_eq!(concat, "abc ");
+}
+
+#[test]
+fn dyn_dispatch_via_box() {
+    let m: Box<dyn LanguageModel> = Box::new(mk("xy", FinishReason::Stop));
+    assert_eq!(m.model_ref().id, "mock-test");
+    assert_eq!(m.model_ref().provider, "mock");
+    assert!(m.model_ref().dimensions.is_none());
+    assert_eq!(m.context_tokens(), 4096);
+
+    let chunks: Vec<TokenChunk> = m
+        .generate_stream(req_with_stop(vec![]))
+        .expect("stream")
+        .map(|r| r.unwrap())
+        .collect();
+    assert_eq!(chunks.len(), 3); // x, y, Done
+    assert_finish_chunk(&chunks);
+}
+
+#[test]
+fn concat_equals_canned() {
+    let canned = "the quick brown fox";
+    let m = mk(canned, FinishReason::Stop);
+    let chunks = drain(&m, req_with_stop(vec![]));
+    let concat: String = chunks
+        .iter()
+        .filter_map(|c| match c {
+            TokenChunk::Token(s) => Some(s.as_str()),
+            _ => None,
+        })
+        .collect();
+    assert_eq!(concat, canned);
+}
+
+#[test]
+fn model_ref_has_no_dimensions() {
+    let m = mk("anything", FinishReason::Stop);
+    let r = m.model_ref();
+    assert_eq!(r.id, "mock-test");
+    assert_eq!(r.provider, "mock");
+    assert!(r.dimensions.is_none());
+}
+
+#[test]
+fn finish_reason_passes_through_when_no_stop_match() {
+    // No stop hit → `canned_finish` is preserved verbatim.
+    let m = mk("hi", FinishReason::Length);
+    let chunks = drain(&m, req_with_stop(vec!["NEVER_MATCHES"]));
+    match chunks.last().unwrap() {
+        TokenChunk::Done { finish_reason, .. } => {
+            assert_eq!(*finish_reason, FinishReason::Length);
+        }
+        _ => panic!("last chunk must be Done"),
+    }
+}
+
+proptest! {
+    #![proptest_config(ProptestConfig {
+        cases: 100,
+        ..ProptestConfig::default()
+    })]
+
+    /// 100 random Unicode canned strings: with no stop strings configured,
+    /// the stream MUST end in Done, contain exactly `canned.chars().count()`
+    /// Token chunks, and concatenate back to the canned text byte-equal.
+    #[test]
+    fn proptest_random_canned_strings(canned in ".{0,256}") {
+        let m = mk(&canned, FinishReason::Stop);
+        let chunks = drain(&m, req_with_stop(vec![]));
+
+        // Last chunk must be Done.
+        assert_finish_chunk(&chunks);
+
+        // Token-chunk count == canned.chars().count().
+        let token_count = chunks
+            .iter()
+            .filter(|c| matches!(c, TokenChunk::Token(_)))
+            .count();
+        prop_assert_eq!(token_count, canned.chars().count());
+
+        // Concatenation == canned (byte-equal).
+        let concat: String = chunks
+            .iter()
+            .filter_map(|c| match c {
+                TokenChunk::Token(s) => Some(s.as_str()),
+                _ => None,
+            })
+            .collect();
+        prop_assert_eq!(concat, canned);
+    }
+}
--- a/crates/kb-llm/tests/reexports.rs
+++ b/crates/kb-llm/tests/reexports.rs
@@ -0,0 +1,74 @@
+//! Compile-only test: verifies the crate's public surface (trait re-exports
+//! and the `assert_finish_chunk` helper) is reachable without the `mock`
+//! feature.
+//!
+//! Runs under both `cargo test -p kb-llm` and
+//! `cargo test -p kb-llm --features mock`.
+
+use kb_llm::{
+    FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
+    assert_finish_chunk,
+};
+
+/// A trivial in-test impl that does NOT rely on the `mock` feature — proves
+/// the trait surface alone is enough to write a `LanguageModel`. It returns a
+/// stream that terminates immediately with `Done`.
+struct ZeroLanguageModel;
+
+impl LanguageModel for ZeroLanguageModel {
+    fn model_ref(&self) -> ModelRef {
+        ModelRef {
+            id: "zero".into(),
+            provider: "zero".into(),
+            dimensions: None,
+        }
+    }
+    fn context_tokens(&self) -> usize {
+        0
+    }
+    fn generate_stream(
+        &self,
+        _req: GenerateRequest,
+    ) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
+        let chunks = vec![TokenChunk::Done {
+            finish_reason: FinishReason::Stop,
+            usage: TokenUsage {
+                prompt_tokens: 0,
+                completion_tokens: 0,
+                latency_ms: 0,
+            },
+        }];
+        Ok(Box::new(chunks.into_iter().map(Ok)))
+    }
+}
+
+#[test]
+fn dyn_dispatch_via_box_works() {
+    let m: Box<dyn LanguageModel> = Box::new(ZeroLanguageModel);
+    assert_eq!(m.model_ref().id, "zero");
+    assert_eq!(m.context_tokens(), 0);
+
+    let req = GenerateRequest {
+        system: "sys".into(),
+        user: "usr".into(),
+        stop: vec![],
+        max_tokens: 16,
+        temperature: 0.0,
+        seed: None,
+    };
+    let stream = m.generate_stream(req).expect("stream");
+    let chunks: Vec<TokenChunk> = stream.map(|r| r.expect("ok chunk")).collect();
+    assert_eq!(chunks.len(), 1);
+    assert_finish_chunk(&chunks);
+}
+
+/// Sanity: when built WITHOUT `--features mock`, the `MockLanguageModel`
+/// symbol is absent. We can't usefully test `nm` from inside a unit test, but
+/// we can at least confirm the cfg gate parses both ways. See PR notes for
+/// the CI-side `nm`/`cargo bloat` symbol scan.
+#[cfg(not(feature = "mock"))]
+#[test]
+fn mock_feature_off_compiles() {
+    // No-op — the test's existence proves the `not(feature = "mock")` gate
+    // compiles and the crate is usable without `MockLanguageModel`.
+}