diff --git a/Cargo.lock b/Cargo.lock index f9e1952..407e23b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3439,6 +3439,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "kb-llm" +version = "0.1.0" +dependencies = [ + "anyhow", + "kb-core", + "proptest", +] + [[package]] name = "kb-normalize" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index da6b4f4..8145bae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,7 @@ members = [ "crates/kb-search", "crates/kb-embed", "crates/kb-embed-local", + "crates/kb-llm", "crates/kb-app", "crates/kb-cli", ] diff --git a/crates/kb-llm/Cargo.toml b/crates/kb-llm/Cargo.toml new file mode 100644 index 0000000..f70ab76 --- /dev/null +++ b/crates/kb-llm/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "kb-llm" +version = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests" + +[dependencies] +kb-core = { path = "../kb-core" } +anyhow = { workspace = true } + +[features] +default = [] +# Opt-in `MockLanguageModel`. Default OFF so release builds (no `--features mock`) +# compile the symbol out entirely (verifiable via `nm`/`cargo bloat`). +mock = [] + +[dev-dependencies] +proptest = { workspace = true } diff --git a/crates/kb-llm/src/lib.rs b/crates/kb-llm/src/lib.rs new file mode 100644 index 0000000..d2d7ccd --- /dev/null +++ b/crates/kb-llm/src/lib.rs @@ -0,0 +1,49 @@ +//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface. +//! +//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can +//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations. +//! It defines **no new types**; everything is a re-export of [`kb_core`]. +//! +//! ## Mock implementation +//! +//! [`MockLanguageModel`] (gated behind the `mock` feature, default **OFF**) is +//! a deterministic test double. Real adapters (Ollama, llama.cpp, candle) live +//! in p4-2 and MUST NOT be implemented here. Real adapters MAY return `Err` +//! from `generate_stream` itself (e.g., connection refused) before any chunk +//! is yielded; the mock never does. +//! +//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2, +//! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract. + +// ── Trait re-exports ────────────────────────────────────────────────────── +// +// Per spec §7.2 — these are the only public-surface types this crate offers. +// Adding new types is forbidden by the task contract. + +pub use kb_core::{ + FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, +}; + +// ── Test helper ─────────────────────────────────────────────────────────── + +/// Assert the streamed `TokenChunk` sequence ends with a [`TokenChunk::Done`] +/// frame. Per spec §7.2 / §0 Q5 every stream — even an erroring one — must +/// terminate with a `Done` chunk; this helper centralizes that contract check +/// so downstream test crates don't each rewrite it. +/// +/// Panics on mismatch (test-only helper — callers are tests). +pub fn assert_finish_chunk(chunks: &[TokenChunk]) { + assert!( + matches!(chunks.last(), Some(TokenChunk::Done { .. })), + "stream must end with TokenChunk::Done; got {:?}", + chunks.last(), + ); +} + +// ── MockLanguageModel (feature = "mock") ────────────────────────────────── + +#[cfg(feature = "mock")] +mod mock; + +#[cfg(feature = "mock")] +pub use mock::MockLanguageModel; diff --git a/crates/kb-llm/src/mock.rs b/crates/kb-llm/src/mock.rs new file mode 100644 index 0000000..e84c90d --- /dev/null +++ b/crates/kb-llm/src/mock.rs @@ -0,0 +1,117 @@ +//! Deterministic mock language model for downstream tests. +//! +//! Compiled only when the `mock` feature is enabled. Default builds +//! (`cargo build --release -p kb-llm`) MUST NOT contain the `MockLanguageModel` +//! symbol — verifiable by symbol scan (`nm`/`cargo bloat`). +//! +//! ## Streaming contract +//! +//! For every call to [`MockLanguageModel::generate_stream`]: +//! +//! 1. The configured `canned_response` is examined for any of `req.stop`. If +//! one or more stop strings are substrings of the response, the response +//! is truncated at the **earliest byte position** of any match (i.e., the +//! first stop string to land — ties broken by the order entries appear in +//! `req.stop`, since `Iterator::min` returns the first equal element on +//! ties, breaking by `req.stop` declaration order). +//! 2. The (possibly truncated) string is iterated by Unicode scalar +//! (`str::chars()`) and each character is yielded as +//! [`TokenChunk::Token`]`(c.to_string())`. This makes streaming UTF-8 safe +//! by construction (no character is split across chunks). Emits one +//! `TokenChunk` per Unicode scalar value (`char`), not per grapheme +//! cluster — Hangul jamo, emoji ZWJ sequences, and combining marks split +//! into multiple chunks. Acceptable for trait-shape testing; real adapters +//! MAY combine. +//! 3. After all tokens, a single terminal [`TokenChunk::Done`] is yielded +//! with: +//! * `finish_reason = FinishReason::Stop` if a stop string truncated the +//! canned text — mirroring real LLM behavior, which reports Stop on +//! stop-sequence termination regardless of the configured finish. +//! * `finish_reason = canned_finish.clone()` otherwise. +//! * `usage = canned_usage.clone()` always. +//! +//! ## Non-effects +//! +//! - No network. No filesystem. No async runtime. +//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever +//! the constructor was given — the mock does not count. + +use kb_core::{ + FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, +}; + +/// Deterministic test double. See module docs for the streaming recipe. +pub struct MockLanguageModel { + pub model_id: String, + pub provider: String, + pub context_tokens: usize, + pub canned_response: String, + pub canned_finish: FinishReason, + pub canned_usage: TokenUsage, +} + +impl MockLanguageModel { + /// Apply `req.stop` to `canned_response`. Returns `(truncated_text, + /// stop_hit)` where `stop_hit` is true iff any stop string was found. + fn apply_stop<'a>(canned: &'a str, stop: &[String]) -> (&'a str, bool) { + // Earliest byte position wins. Ties break by first occurrence in + // `stop` (Iterator::min returns the first equal element, and we + // iterate `stop` in its declared order). Empty stop strings are + // ignored — they would otherwise match at position 0 and silently + // eat the entire response. + let earliest = stop + .iter() + .filter(|s| !s.is_empty()) + .filter_map(|s| canned.find(s.as_str())) + .min(); + match earliest { + // `str::find` returns a UTF-8 char boundary by contract, so direct byte-slice is sound. + Some(idx) => (&canned[..idx], true), + None => (canned, false), + } + } +} + +impl LanguageModel for MockLanguageModel { + fn model_ref(&self) -> ModelRef { + ModelRef { + id: self.model_id.clone(), + provider: self.provider.clone(), + // Per §3.8: `dimensions` carries the embedder's output dim and is + // intentionally None for chat models. + dimensions: None, + } + } + + fn context_tokens(&self) -> usize { + self.context_tokens + } + + fn generate_stream( + &self, + req: GenerateRequest, + ) -> anyhow::Result> + Send>> { + let (truncated, stop_hit) = Self::apply_stop(&self.canned_response, &req.stop); + + // Pre-materialize the full chunk sequence into an owned Vec. This + // sidesteps lifetime juggling around `&self.canned_response` inside + // a `'static` iterator and trivially gives `Send` (Vec + // is Send because TokenChunk is Send). + let mut chunks: Vec = truncated + .chars() + .map(|c| TokenChunk::Token(c.to_string())) + .collect(); + + let finish_reason = if stop_hit { + FinishReason::Stop + } else { + self.canned_finish.clone() + }; + chunks.push(TokenChunk::Done { + finish_reason, + usage: self.canned_usage.clone(), + }); + + Ok(Box::new(chunks.into_iter().map(Ok))) + } +} diff --git a/crates/kb-llm/tests/mock.rs b/crates/kb-llm/tests/mock.rs new file mode 100644 index 0000000..c086cda --- /dev/null +++ b/crates/kb-llm/tests/mock.rs @@ -0,0 +1,210 @@ +//! Integration tests for `MockLanguageModel`. Gated behind the `mock` feature. +//! +//! Canonical invocation: `cargo test -p kb-llm --features mock`. + +#![cfg(feature = "mock")] + +use kb_llm::{ + FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage, + assert_finish_chunk, +}; +use proptest::prelude::*; + +fn usage() -> TokenUsage { + TokenUsage { + prompt_tokens: 10, + completion_tokens: 20, + latency_ms: 30, + } +} + +fn req_with_stop(stop: Vec<&str>) -> GenerateRequest { + GenerateRequest { + system: "sys".into(), + user: "usr".into(), + stop: stop.into_iter().map(String::from).collect(), + max_tokens: 64, + temperature: 0.0, + seed: None, + } +} + +fn mk(canned: &str, finish: FinishReason) -> MockLanguageModel { + MockLanguageModel { + model_id: "mock-test".into(), + provider: "mock".into(), + context_tokens: 4096, + canned_response: canned.into(), + canned_finish: finish, + canned_usage: usage(), + } +} + +fn drain(m: &dyn LanguageModel, req: GenerateRequest) -> Vec { + m.generate_stream(req) + .expect("generate_stream") + .map(|r| r.expect("ok chunk")) + .collect() +} + +#[test] +fn streams_then_done() { + let m = mk("hello", FinishReason::Stop); + let chunks = drain(&m, req_with_stop(vec![])); + + // 5 Token chunks ("h", "e", "l", "l", "o") + Done. + assert_eq!(chunks.len(), 6); + assert_finish_chunk(&chunks); + + let tokens: Vec<&str> = chunks + .iter() + .filter_map(|c| match c { + TokenChunk::Token(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + assert_eq!(tokens, vec!["h", "e", "l", "l", "o"]); + + match chunks.last().unwrap() { + TokenChunk::Done { + finish_reason, + usage: u, + } => { + assert_eq!(*finish_reason, FinishReason::Stop); + assert_eq!(*u, usage()); + } + _ => unreachable!(), + } +} + +#[test] +fn honors_stop_strings() { + // canned has "STOP" embedded; req.stop=["STOP"] truncates before it. + let m = mk("abc STOP defg", FinishReason::Length); + let chunks = drain(&m, req_with_stop(vec!["STOP"])); + + let concat: String = chunks + .iter() + .filter_map(|c| match c { + TokenChunk::Token(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + assert_eq!(concat, "abc "); + + // Stop-string truncation forces FinishReason::Stop, overriding the + // configured `canned_finish` (Length here). + match chunks.last().unwrap() { + TokenChunk::Done { finish_reason, .. } => { + assert_eq!(*finish_reason, FinishReason::Stop); + } + _ => panic!("last chunk must be Done"), + } +} + +#[test] +fn honors_first_stop_match() { + // Two stop strings; "BAR" appears at byte 4, "FOO" at byte 12. Earliest + // wins regardless of order in req.stop. + let m = mk("abc BAR xyz FOO end", FinishReason::Stop); + let chunks = drain(&m, req_with_stop(vec!["FOO", "BAR"])); + + let concat: String = chunks + .iter() + .filter_map(|c| match c { + TokenChunk::Token(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + assert_eq!(concat, "abc "); +} + +#[test] +fn dyn_dispatch_via_box() { + let m: Box = Box::new(mk("xy", FinishReason::Stop)); + assert_eq!(m.model_ref().id, "mock-test"); + assert_eq!(m.model_ref().provider, "mock"); + assert!(m.model_ref().dimensions.is_none()); + assert_eq!(m.context_tokens(), 4096); + + let chunks: Vec = m + .generate_stream(req_with_stop(vec![])) + .expect("stream") + .map(|r| r.unwrap()) + .collect(); + assert_eq!(chunks.len(), 3); // x, y, Done + assert_finish_chunk(&chunks); +} + +#[test] +fn concat_equals_canned() { + let canned = "the quick brown fox"; + let m = mk(canned, FinishReason::Stop); + let chunks = drain(&m, req_with_stop(vec![])); + let concat: String = chunks + .iter() + .filter_map(|c| match c { + TokenChunk::Token(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + assert_eq!(concat, canned); +} + +#[test] +fn model_ref_has_no_dimensions() { + let m = mk("anything", FinishReason::Stop); + let r = m.model_ref(); + assert_eq!(r.id, "mock-test"); + assert_eq!(r.provider, "mock"); + assert!(r.dimensions.is_none()); +} + +#[test] +fn finish_reason_passes_through_when_no_stop_match() { + // No stop hit → `canned_finish` is preserved verbatim. + let m = mk("hi", FinishReason::Length); + let chunks = drain(&m, req_with_stop(vec!["NEVER_MATCHES"])); + match chunks.last().unwrap() { + TokenChunk::Done { finish_reason, .. } => { + assert_eq!(*finish_reason, FinishReason::Length); + } + _ => panic!("last chunk must be Done"), + } +} + +proptest! { + #![proptest_config(ProptestConfig { + cases: 100, + ..ProptestConfig::default() + })] + + /// 100 random Unicode canned strings: with no stop strings configured, + /// the stream MUST end in Done, contain exactly `canned.chars().count()` + /// Token chunks, and concatenate back to the canned text byte-equal. + #[test] + fn proptest_random_canned_strings(canned in ".{0,256}") { + let m = mk(&canned, FinishReason::Stop); + let chunks = drain(&m, req_with_stop(vec![])); + + // Last chunk must be Done. + assert_finish_chunk(&chunks); + + // Token-chunk count == canned.chars().count(). + let token_count = chunks + .iter() + .filter(|c| matches!(c, TokenChunk::Token(_))) + .count(); + prop_assert_eq!(token_count, canned.chars().count()); + + // Concatenation == canned (byte-equal). + let concat: String = chunks + .iter() + .filter_map(|c| match c { + TokenChunk::Token(s) => Some(s.as_str()), + _ => None, + }) + .collect(); + prop_assert_eq!(concat, canned); + } +} diff --git a/crates/kb-llm/tests/reexports.rs b/crates/kb-llm/tests/reexports.rs new file mode 100644 index 0000000..88f5db2 --- /dev/null +++ b/crates/kb-llm/tests/reexports.rs @@ -0,0 +1,74 @@ +//! Compile-only test: verifies the crate's public surface (trait re-exports +//! and the `assert_finish_chunk` helper) is reachable without the `mock` +//! feature. +//! +//! Runs under both `cargo test -p kb-llm` and +//! `cargo test -p kb-llm --features mock`. + +use kb_llm::{ + FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage, + assert_finish_chunk, +}; + +/// A trivial in-test impl that does NOT rely on the `mock` feature — proves +/// the trait surface alone is enough to write a `LanguageModel`. It returns a +/// stream that terminates immediately with `Done`. +struct ZeroLanguageModel; + +impl LanguageModel for ZeroLanguageModel { + fn model_ref(&self) -> ModelRef { + ModelRef { + id: "zero".into(), + provider: "zero".into(), + dimensions: None, + } + } + fn context_tokens(&self) -> usize { + 0 + } + fn generate_stream( + &self, + _req: GenerateRequest, + ) -> anyhow::Result> + Send>> { + let chunks = vec![TokenChunk::Done { + finish_reason: FinishReason::Stop, + usage: TokenUsage { + prompt_tokens: 0, + completion_tokens: 0, + latency_ms: 0, + }, + }]; + Ok(Box::new(chunks.into_iter().map(Ok))) + } +} + +#[test] +fn dyn_dispatch_via_box_works() { + let m: Box = Box::new(ZeroLanguageModel); + assert_eq!(m.model_ref().id, "zero"); + assert_eq!(m.context_tokens(), 0); + + let req = GenerateRequest { + system: "sys".into(), + user: "usr".into(), + stop: vec![], + max_tokens: 16, + temperature: 0.0, + seed: None, + }; + let stream = m.generate_stream(req).expect("stream"); + let chunks: Vec = stream.map(|r| r.expect("ok chunk")).collect(); + assert_eq!(chunks.len(), 1); + assert_finish_chunk(&chunks); +} + +/// Sanity: when built WITHOUT `--features mock`, the `MockLanguageModel` +/// symbol is absent. We can't usefully test `nm` from inside a unit test, but +/// we can at least confirm the cfg gate parses both ways. See PR notes for +/// the CI-side `nm`/`cargo bloat` symbol scan. +#[cfg(not(feature = "mock"))] +#[test] +fn mock_feature_off_compiles() { + // No-op — the test's existence proves the `not(feature = "mock")` gate + // compiles and the crate is usable without `MockLanguageModel`. +}