Merge pull request 'feat(p4-1): llm-trait — kb-llm 크레이트 + MockLanguageModel' (#21) from feat/p4-1-llm-trait into main
Reviewed-on: altair823-org/kb#21
This commit was merged in pull request #21.
This commit is contained in:
9
Cargo.lock
generated
9
Cargo.lock
generated
@@ -3439,6 +3439,15 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-llm"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kb-core",
|
||||
"proptest",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kb-normalize"
|
||||
version = "0.1.0"
|
||||
|
||||
@@ -13,6 +13,7 @@ members = [
|
||||
"crates/kb-search",
|
||||
"crates/kb-embed",
|
||||
"crates/kb-embed-local",
|
||||
"crates/kb-llm",
|
||||
"crates/kb-app",
|
||||
"crates/kb-cli",
|
||||
]
|
||||
|
||||
21
crates/kb-llm/Cargo.toml
Normal file
21
crates/kb-llm/Cargo.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[package]
|
||||
name = "kb-llm"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
description = "LanguageModel trait re-export + feature-gated MockLanguageModel for downstream tests"
|
||||
|
||||
[dependencies]
|
||||
kb-core = { path = "../kb-core" }
|
||||
anyhow = { workspace = true }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
# Opt-in `MockLanguageModel`. Default OFF so release builds (no `--features mock`)
|
||||
# compile the symbol out entirely (verifiable via `nm`/`cargo bloat`).
|
||||
mock = []
|
||||
|
||||
[dev-dependencies]
|
||||
proptest = { workspace = true }
|
||||
49
crates/kb-llm/src/lib.rs
Normal file
49
crates/kb-llm/src/lib.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! `kb-llm` — thin re-export crate for the [`LanguageModel`] trait surface.
|
||||
//!
|
||||
//! This crate exists so downstream code (`kb-rag`, adapters in p4-2) can
|
||||
//! `use kb_llm::LanguageModel` and stay stable across kb-core reorganizations.
|
||||
//! It defines **no new types**; everything is a re-export of [`kb_core`].
|
||||
//!
|
||||
//! ## Mock implementation
|
||||
//!
|
||||
//! [`MockLanguageModel`] (gated behind the `mock` feature, default **OFF**) is
|
||||
//! a deterministic test double. Real adapters (Ollama, llama.cpp, candle) live
|
||||
//! in p4-2 and MUST NOT be implemented here. Real adapters MAY return `Err`
|
||||
//! from `generate_stream` itself (e.g., connection refused) before any chunk
|
||||
//! is yielded; the mock never does.
|
||||
//!
|
||||
//! See `docs/superpowers/specs/2026-04-27-kb-final-form-design.md` §7.1, §7.2,
|
||||
//! §0 Q5 (streaming), §3.8 (`ModelRef`) for the contract.
|
||||
|
||||
// ── Trait re-exports ──────────────────────────────────────────────────────
|
||||
//
|
||||
// Per spec §7.2 — these are the only public-surface types this crate offers.
|
||||
// Adding new types is forbidden by the task contract.
|
||||
|
||||
pub use kb_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
// ── Test helper ───────────────────────────────────────────────────────────
|
||||
|
||||
/// Assert the streamed `TokenChunk` sequence ends with a [`TokenChunk::Done`]
|
||||
/// frame. Per spec §7.2 / §0 Q5 every stream — even an erroring one — must
|
||||
/// terminate with a `Done` chunk; this helper centralizes that contract check
|
||||
/// so downstream test crates don't each rewrite it.
|
||||
///
|
||||
/// Panics on mismatch (test-only helper — callers are tests).
|
||||
pub fn assert_finish_chunk(chunks: &[TokenChunk]) {
|
||||
assert!(
|
||||
matches!(chunks.last(), Some(TokenChunk::Done { .. })),
|
||||
"stream must end with TokenChunk::Done; got {:?}",
|
||||
chunks.last(),
|
||||
);
|
||||
}
|
||||
|
||||
// ── MockLanguageModel (feature = "mock") ──────────────────────────────────
|
||||
|
||||
#[cfg(feature = "mock")]
|
||||
mod mock;
|
||||
|
||||
#[cfg(feature = "mock")]
|
||||
pub use mock::MockLanguageModel;
|
||||
117
crates/kb-llm/src/mock.rs
Normal file
117
crates/kb-llm/src/mock.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
//! Deterministic mock language model for downstream tests.
|
||||
//!
|
||||
//! Compiled only when the `mock` feature is enabled. Default builds
|
||||
//! (`cargo build --release -p kb-llm`) MUST NOT contain the `MockLanguageModel`
|
||||
//! symbol — verifiable by symbol scan (`nm`/`cargo bloat`).
|
||||
//!
|
||||
//! ## Streaming contract
|
||||
//!
|
||||
//! For every call to [`MockLanguageModel::generate_stream`]:
|
||||
//!
|
||||
//! 1. The configured `canned_response` is examined for any of `req.stop`. If
|
||||
//! one or more stop strings are substrings of the response, the response
|
||||
//! is truncated at the **earliest byte position** of any match (i.e., the
|
||||
//! first stop string to land — ties broken by the order entries appear in
|
||||
//! `req.stop`, since `Iterator::min` returns the first equal element on
|
||||
//! ties, breaking by `req.stop` declaration order).
|
||||
//! 2. The (possibly truncated) string is iterated by Unicode scalar
|
||||
//! (`str::chars()`) and each character is yielded as
|
||||
//! [`TokenChunk::Token`]`(c.to_string())`. This makes streaming UTF-8 safe
|
||||
//! by construction (no character is split across chunks). Emits one
|
||||
//! `TokenChunk` per Unicode scalar value (`char`), not per grapheme
|
||||
//! cluster — Hangul jamo, emoji ZWJ sequences, and combining marks split
|
||||
//! into multiple chunks. Acceptable for trait-shape testing; real adapters
|
||||
//! MAY combine.
|
||||
//! 3. After all tokens, a single terminal [`TokenChunk::Done`] is yielded
|
||||
//! with:
|
||||
//! * `finish_reason = FinishReason::Stop` if a stop string truncated the
|
||||
//! canned text — mirroring real LLM behavior, which reports Stop on
|
||||
//! stop-sequence termination regardless of the configured finish.
|
||||
//! * `finish_reason = canned_finish.clone()` otherwise.
|
||||
//! * `usage = canned_usage.clone()` always.
|
||||
//!
|
||||
//! ## Non-effects
|
||||
//!
|
||||
//! - No network. No filesystem. No async runtime.
|
||||
//! - No tokenizer. `usage.prompt_tokens` / `completion_tokens` are whatever
|
||||
//! the constructor was given — the mock does not count.
|
||||
|
||||
use kb_core::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
};
|
||||
|
||||
/// Deterministic test double. See module docs for the streaming recipe.
|
||||
pub struct MockLanguageModel {
|
||||
pub model_id: String,
|
||||
pub provider: String,
|
||||
pub context_tokens: usize,
|
||||
pub canned_response: String,
|
||||
pub canned_finish: FinishReason,
|
||||
pub canned_usage: TokenUsage,
|
||||
}
|
||||
|
||||
impl MockLanguageModel {
|
||||
/// Apply `req.stop` to `canned_response`. Returns `(truncated_text,
|
||||
/// stop_hit)` where `stop_hit` is true iff any stop string was found.
|
||||
fn apply_stop<'a>(canned: &'a str, stop: &[String]) -> (&'a str, bool) {
|
||||
// Earliest byte position wins. Ties break by first occurrence in
|
||||
// `stop` (Iterator::min returns the first equal element, and we
|
||||
// iterate `stop` in its declared order). Empty stop strings are
|
||||
// ignored — they would otherwise match at position 0 and silently
|
||||
// eat the entire response.
|
||||
let earliest = stop
|
||||
.iter()
|
||||
.filter(|s| !s.is_empty())
|
||||
.filter_map(|s| canned.find(s.as_str()))
|
||||
.min();
|
||||
match earliest {
|
||||
// `str::find` returns a UTF-8 char boundary by contract, so direct byte-slice is sound.
|
||||
Some(idx) => (&canned[..idx], true),
|
||||
None => (canned, false),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LanguageModel for MockLanguageModel {
|
||||
fn model_ref(&self) -> ModelRef {
|
||||
ModelRef {
|
||||
id: self.model_id.clone(),
|
||||
provider: self.provider.clone(),
|
||||
// Per §3.8: `dimensions` carries the embedder's output dim and is
|
||||
// intentionally None for chat models.
|
||||
dimensions: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn context_tokens(&self) -> usize {
|
||||
self.context_tokens
|
||||
}
|
||||
|
||||
fn generate_stream(
|
||||
&self,
|
||||
req: GenerateRequest,
|
||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
|
||||
let (truncated, stop_hit) = Self::apply_stop(&self.canned_response, &req.stop);
|
||||
|
||||
// Pre-materialize the full chunk sequence into an owned Vec. This
|
||||
// sidesteps lifetime juggling around `&self.canned_response` inside
|
||||
// a `'static` iterator and trivially gives `Send` (Vec<TokenChunk>
|
||||
// is Send because TokenChunk is Send).
|
||||
let mut chunks: Vec<TokenChunk> = truncated
|
||||
.chars()
|
||||
.map(|c| TokenChunk::Token(c.to_string()))
|
||||
.collect();
|
||||
|
||||
let finish_reason = if stop_hit {
|
||||
FinishReason::Stop
|
||||
} else {
|
||||
self.canned_finish.clone()
|
||||
};
|
||||
chunks.push(TokenChunk::Done {
|
||||
finish_reason,
|
||||
usage: self.canned_usage.clone(),
|
||||
});
|
||||
|
||||
Ok(Box::new(chunks.into_iter().map(Ok)))
|
||||
}
|
||||
}
|
||||
210
crates/kb-llm/tests/mock.rs
Normal file
210
crates/kb-llm/tests/mock.rs
Normal file
@@ -0,0 +1,210 @@
|
||||
//! Integration tests for `MockLanguageModel`. Gated behind the `mock` feature.
|
||||
//!
|
||||
//! Canonical invocation: `cargo test -p kb-llm --features mock`.
|
||||
|
||||
#![cfg(feature = "mock")]
|
||||
|
||||
use kb_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, MockLanguageModel, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
use proptest::prelude::*;
|
||||
|
||||
fn usage() -> TokenUsage {
|
||||
TokenUsage {
|
||||
prompt_tokens: 10,
|
||||
completion_tokens: 20,
|
||||
latency_ms: 30,
|
||||
}
|
||||
}
|
||||
|
||||
fn req_with_stop(stop: Vec<&str>) -> GenerateRequest {
|
||||
GenerateRequest {
|
||||
system: "sys".into(),
|
||||
user: "usr".into(),
|
||||
stop: stop.into_iter().map(String::from).collect(),
|
||||
max_tokens: 64,
|
||||
temperature: 0.0,
|
||||
seed: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn mk(canned: &str, finish: FinishReason) -> MockLanguageModel {
|
||||
MockLanguageModel {
|
||||
model_id: "mock-test".into(),
|
||||
provider: "mock".into(),
|
||||
context_tokens: 4096,
|
||||
canned_response: canned.into(),
|
||||
canned_finish: finish,
|
||||
canned_usage: usage(),
|
||||
}
|
||||
}
|
||||
|
||||
fn drain(m: &dyn LanguageModel, req: GenerateRequest) -> Vec<TokenChunk> {
|
||||
m.generate_stream(req)
|
||||
.expect("generate_stream")
|
||||
.map(|r| r.expect("ok chunk"))
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn streams_then_done() {
|
||||
let m = mk("hello", FinishReason::Stop);
|
||||
let chunks = drain(&m, req_with_stop(vec![]));
|
||||
|
||||
// 5 Token chunks ("h", "e", "l", "l", "o") + Done.
|
||||
assert_eq!(chunks.len(), 6);
|
||||
assert_finish_chunk(&chunks);
|
||||
|
||||
let tokens: Vec<&str> = chunks
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
TokenChunk::Token(s) => Some(s.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(tokens, vec!["h", "e", "l", "l", "o"]);
|
||||
|
||||
match chunks.last().unwrap() {
|
||||
TokenChunk::Done {
|
||||
finish_reason,
|
||||
usage: u,
|
||||
} => {
|
||||
assert_eq!(*finish_reason, FinishReason::Stop);
|
||||
assert_eq!(*u, usage());
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn honors_stop_strings() {
|
||||
// canned has "STOP" embedded; req.stop=["STOP"] truncates before it.
|
||||
let m = mk("abc STOP defg", FinishReason::Length);
|
||||
let chunks = drain(&m, req_with_stop(vec!["STOP"]));
|
||||
|
||||
let concat: String = chunks
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
TokenChunk::Token(s) => Some(s.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(concat, "abc ");
|
||||
|
||||
// Stop-string truncation forces FinishReason::Stop, overriding the
|
||||
// configured `canned_finish` (Length here).
|
||||
match chunks.last().unwrap() {
|
||||
TokenChunk::Done { finish_reason, .. } => {
|
||||
assert_eq!(*finish_reason, FinishReason::Stop);
|
||||
}
|
||||
_ => panic!("last chunk must be Done"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn honors_first_stop_match() {
|
||||
// Two stop strings; "BAR" appears at byte 4, "FOO" at byte 12. Earliest
|
||||
// wins regardless of order in req.stop.
|
||||
let m = mk("abc BAR xyz FOO end", FinishReason::Stop);
|
||||
let chunks = drain(&m, req_with_stop(vec!["FOO", "BAR"]));
|
||||
|
||||
let concat: String = chunks
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
TokenChunk::Token(s) => Some(s.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(concat, "abc ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dyn_dispatch_via_box() {
|
||||
let m: Box<dyn LanguageModel> = Box::new(mk("xy", FinishReason::Stop));
|
||||
assert_eq!(m.model_ref().id, "mock-test");
|
||||
assert_eq!(m.model_ref().provider, "mock");
|
||||
assert!(m.model_ref().dimensions.is_none());
|
||||
assert_eq!(m.context_tokens(), 4096);
|
||||
|
||||
let chunks: Vec<TokenChunk> = m
|
||||
.generate_stream(req_with_stop(vec![]))
|
||||
.expect("stream")
|
||||
.map(|r| r.unwrap())
|
||||
.collect();
|
||||
assert_eq!(chunks.len(), 3); // x, y, Done
|
||||
assert_finish_chunk(&chunks);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn concat_equals_canned() {
|
||||
let canned = "the quick brown fox";
|
||||
let m = mk(canned, FinishReason::Stop);
|
||||
let chunks = drain(&m, req_with_stop(vec![]));
|
||||
let concat: String = chunks
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
TokenChunk::Token(s) => Some(s.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(concat, canned);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_ref_has_no_dimensions() {
|
||||
let m = mk("anything", FinishReason::Stop);
|
||||
let r = m.model_ref();
|
||||
assert_eq!(r.id, "mock-test");
|
||||
assert_eq!(r.provider, "mock");
|
||||
assert!(r.dimensions.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn finish_reason_passes_through_when_no_stop_match() {
|
||||
// No stop hit → `canned_finish` is preserved verbatim.
|
||||
let m = mk("hi", FinishReason::Length);
|
||||
let chunks = drain(&m, req_with_stop(vec!["NEVER_MATCHES"]));
|
||||
match chunks.last().unwrap() {
|
||||
TokenChunk::Done { finish_reason, .. } => {
|
||||
assert_eq!(*finish_reason, FinishReason::Length);
|
||||
}
|
||||
_ => panic!("last chunk must be Done"),
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#![proptest_config(ProptestConfig {
|
||||
cases: 100,
|
||||
..ProptestConfig::default()
|
||||
})]
|
||||
|
||||
/// 100 random Unicode canned strings: with no stop strings configured,
|
||||
/// the stream MUST end in Done, contain exactly `canned.chars().count()`
|
||||
/// Token chunks, and concatenate back to the canned text byte-equal.
|
||||
#[test]
|
||||
fn proptest_random_canned_strings(canned in ".{0,256}") {
|
||||
let m = mk(&canned, FinishReason::Stop);
|
||||
let chunks = drain(&m, req_with_stop(vec![]));
|
||||
|
||||
// Last chunk must be Done.
|
||||
assert_finish_chunk(&chunks);
|
||||
|
||||
// Token-chunk count == canned.chars().count().
|
||||
let token_count = chunks
|
||||
.iter()
|
||||
.filter(|c| matches!(c, TokenChunk::Token(_)))
|
||||
.count();
|
||||
prop_assert_eq!(token_count, canned.chars().count());
|
||||
|
||||
// Concatenation == canned (byte-equal).
|
||||
let concat: String = chunks
|
||||
.iter()
|
||||
.filter_map(|c| match c {
|
||||
TokenChunk::Token(s) => Some(s.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
prop_assert_eq!(concat, canned);
|
||||
}
|
||||
}
|
||||
74
crates/kb-llm/tests/reexports.rs
Normal file
74
crates/kb-llm/tests/reexports.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
//! Compile-only test: verifies the crate's public surface (trait re-exports
|
||||
//! and the `assert_finish_chunk` helper) is reachable without the `mock`
|
||||
//! feature.
|
||||
//!
|
||||
//! Runs under both `cargo test -p kb-llm` and
|
||||
//! `cargo test -p kb-llm --features mock`.
|
||||
|
||||
use kb_llm::{
|
||||
FinishReason, GenerateRequest, LanguageModel, ModelRef, TokenChunk, TokenUsage,
|
||||
assert_finish_chunk,
|
||||
};
|
||||
|
||||
/// A trivial in-test impl that does NOT rely on the `mock` feature — proves
|
||||
/// the trait surface alone is enough to write a `LanguageModel`. It returns a
|
||||
/// stream that terminates immediately with `Done`.
|
||||
struct ZeroLanguageModel;
|
||||
|
||||
impl LanguageModel for ZeroLanguageModel {
|
||||
fn model_ref(&self) -> ModelRef {
|
||||
ModelRef {
|
||||
id: "zero".into(),
|
||||
provider: "zero".into(),
|
||||
dimensions: None,
|
||||
}
|
||||
}
|
||||
fn context_tokens(&self) -> usize {
|
||||
0
|
||||
}
|
||||
fn generate_stream(
|
||||
&self,
|
||||
_req: GenerateRequest,
|
||||
) -> anyhow::Result<Box<dyn Iterator<Item = anyhow::Result<TokenChunk>> + Send>> {
|
||||
let chunks = vec![TokenChunk::Done {
|
||||
finish_reason: FinishReason::Stop,
|
||||
usage: TokenUsage {
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
latency_ms: 0,
|
||||
},
|
||||
}];
|
||||
Ok(Box::new(chunks.into_iter().map(Ok)))
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dyn_dispatch_via_box_works() {
|
||||
let m: Box<dyn LanguageModel> = Box::new(ZeroLanguageModel);
|
||||
assert_eq!(m.model_ref().id, "zero");
|
||||
assert_eq!(m.context_tokens(), 0);
|
||||
|
||||
let req = GenerateRequest {
|
||||
system: "sys".into(),
|
||||
user: "usr".into(),
|
||||
stop: vec![],
|
||||
max_tokens: 16,
|
||||
temperature: 0.0,
|
||||
seed: None,
|
||||
};
|
||||
let stream = m.generate_stream(req).expect("stream");
|
||||
let chunks: Vec<TokenChunk> = stream.map(|r| r.expect("ok chunk")).collect();
|
||||
assert_eq!(chunks.len(), 1);
|
||||
assert_finish_chunk(&chunks);
|
||||
}
|
||||
|
||||
/// Sanity: when built WITHOUT `--features mock`, the `MockLanguageModel`
|
||||
/// symbol is absent. We can't usefully test `nm` from inside a unit test, but
|
||||
/// we can at least confirm the cfg gate parses both ways. See PR notes for
|
||||
/// the CI-side `nm`/`cargo bloat` symbol scan.
|
||||
#[cfg(not(feature = "mock"))]
|
||||
#[test]
|
||||
fn mock_feature_off_compiles() {
|
||||
// No-op — the test's existence proves the `not(feature = "mock")` gate
|
||||
// compiles and the crate is usable without `MockLanguageModel`.
|
||||
}
|
||||
Reference in New Issue
Block a user