feat(nli): fb-41 PR-9a — kebab-nli crate skeleton + workspace deps
- 신규 crate kebab-nli (trait + impl 동일 crate, v0.18 scope = ONNX adapter 1개).
- NliVerifier trait + NliScores struct (XNLI 3-channel: entailment/neutral/contradiction).
- private softmax3 (log-sum-exp 안전).
- OnnxNliVerifier placeholder (PR-9b 가 ONNX inference + model download 추가).
- workspace.dependencies 추가: ort 2.0-rc.9, tokenizers 0.21 (default-features=false, onig), hf-hub 0.4, ndarray 0.16.
Pre-flight (PR-9 design contract 의 gate):
- HF Xenova/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7 model.onnx + tokenizer.json → HTTP/2 302 (HF S3 routing, file 존재).
- tokenizers --no-default-features -F onig 의 standalone repro: SentencePiece mDeBERTa tokenizer.json 로드 OK (KR 9 tokens / EN 11 tokens 정상 encode).
- Cargo features 결정 trace: tokenizers = { default-features = false, features = ["onig"] } lock.
Tests: 6 unit (softmax3 정규화 + 불변성 + XNLI logits 변환 + faithfulness + new + score stub) — 통과.
Verification: cargo test -p kebab-nli -j 1 (6/6) + cargo clippy -p kebab-nli --all-targets -j 1 -- -D warnings clean.
Workspace: cargo test --workspace -j 1 — pre-existing kebab-mcp::tools_call_ask_multi_hop 1 fail (main baseline 동일 fail, PR-9a 무관 — ingest fixture/Ollama 의존 flaky).
Wire 영향: 없음 (crate 도입만).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
10
Cargo.lock
generated
10
Cargo.lock
generated
@@ -4327,6 +4327,16 @@ dependencies = [
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kebab-nli"
|
||||
version = "0.17.2"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"kebab-config",
|
||||
"serde",
|
||||
"tempfile",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kebab-normalize"
|
||||
version = "0.17.2"
|
||||
|
||||
14
Cargo.toml
14
Cargo.toml
@@ -24,6 +24,7 @@ members = [
|
||||
"crates/kebab-tui",
|
||||
"crates/kebab-mcp",
|
||||
"crates/kebab-parse-code",
|
||||
"crates/kebab-nli",
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
@@ -102,6 +103,19 @@ tree-sitter-kotlin-ng = "1.1.0" # bare tree-sitter-kotlin requires ts <0.23;
|
||||
# C/C++ family grammars for code ingest (kebab-parse-code, p10-1D).
|
||||
tree-sitter-c = "0.24.2"
|
||||
tree-sitter-cpp = "0.23.4"
|
||||
# fb-41 PR-9 (kebab-nli): mDeBERTa-v3 XNLI verifier deps. Versions match
|
||||
# the fastembed 4.9 transitive set so the ONNX Runtime + tokenizer stack
|
||||
# stays single-versioned across the workspace. ort `default-features=false`
|
||||
# drops the bundled binary downloader (fastembed already provides one);
|
||||
# tokenizers `default-features=false, onig` swaps the default `esaxx` regex
|
||||
# backend for `onig` so the build doesn't need libstdc++ headers (verified
|
||||
# via PR-9a pre-flight: SentencePiece tokenizer.json loads + KR/EN encode).
|
||||
# hf-hub uses `ureq + rustls-tls` to stay aligned with kebab-embed-local's
|
||||
# pure-Rust TLS stack.
|
||||
ort = { version = "=2.0.0-rc.9", default-features = false, features = ["ndarray"] }
|
||||
tokenizers = { version = "0.21", default-features = false, features = ["onig"] }
|
||||
hf-hub = { version = "0.4", default-features = false, features = ["ureq", "rustls-tls"] }
|
||||
ndarray = "0.16"
|
||||
|
||||
# Disk-footprint trim for dev / test builds. Codegen, opt-level, and
|
||||
# behavior are unchanged — only DWARF debug info is reduced (line
|
||||
|
||||
20
crates/kebab-nli/Cargo.toml
Normal file
20
crates/kebab-nli/Cargo.toml
Normal file
@@ -0,0 +1,20 @@
|
||||
[package]
|
||||
name = "kebab-nli"
|
||||
version = { workspace = true }
|
||||
edition = { workspace = true }
|
||||
rust-version = { workspace = true }
|
||||
license = { workspace = true }
|
||||
repository = { workspace = true }
|
||||
description = "fb-41: NLI-based post-synthesis verification (XNLI mDeBERTa-v3). PR-9a = trait + scaffolding; ONNX inference lands in PR-9b."
|
||||
|
||||
[dependencies]
|
||||
# PR-9a scope: kebab-config for the OnnxNliVerifier::new(&Config) signature
|
||||
# the rag crate will call once PR-9d wires verification into ask_multi_hop.
|
||||
# ort / tokenizers / hf-hub / ndarray are intentionally NOT depended on here
|
||||
# — they sit in workspace.dependencies until PR-9b adds the real adapter.
|
||||
kebab-config = { path = "../kebab-config" }
|
||||
anyhow = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = { workspace = true }
|
||||
114
crates/kebab-nli/src/lib.rs
Normal file
114
crates/kebab-nli/src/lib.rs
Normal file
@@ -0,0 +1,114 @@
|
||||
//! `kebab-nli` — NLI-based post-synthesis verification for multi-hop RAG.
|
||||
//!
|
||||
//! fb-41 introduces a mDeBERTa-v3 XNLI verifier that runs on
|
||||
//! `(packed_chunks, generated_answer)` after synthesize. If
|
||||
//! `NliScores::faithfulness()` < threshold the rag crate refuses the answer
|
||||
//! with `NliVerificationFailed`. PR-9a (this file) is the trait surface +
|
||||
//! scaffolding only — `OnnxNliVerifier::score` returns a stub error until
|
||||
//! PR-9b adds the real ONNX inference path.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub mod onnx;
|
||||
|
||||
pub use onnx::OnnxNliVerifier;
|
||||
|
||||
/// Three-channel XNLI output. Channel order matches the standard XNLI
|
||||
/// `id2label` mapping `[entailment, neutral, contradiction]` shipped with
|
||||
/// the Xenova mDeBERTa-v3 model.
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NliScores {
|
||||
pub entailment: f32,
|
||||
pub neutral: f32,
|
||||
pub contradiction: f32,
|
||||
}
|
||||
|
||||
impl NliScores {
|
||||
/// Faithfulness score = entailment channel. The rag crate compares this
|
||||
/// against `rag.nli_faithfulness_min` to decide whether to refuse.
|
||||
pub fn faithfulness(&self) -> f32 {
|
||||
self.entailment
|
||||
}
|
||||
|
||||
/// Wrap raw XNLI logits (`[entailment, neutral, contradiction]`) into
|
||||
/// a normalised `NliScores`. Applies a numerically-stable softmax3.
|
||||
pub fn from_xnli_logits(logits: [f32; 3]) -> Self {
|
||||
let probs = softmax3(logits);
|
||||
Self {
|
||||
entailment: probs[0],
|
||||
neutral: probs[1],
|
||||
contradiction: probs[2],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Abstract NLI verifier. `score` is called with `(premise = packed chunks,
|
||||
/// hypothesis = generated answer)` — the standard NLI direction (premise
|
||||
/// entails hypothesis ⇒ answer is grounded in retrieved evidence).
|
||||
pub trait NliVerifier: Send + Sync {
|
||||
fn score(&self, premise: &str, hypothesis: &str) -> anyhow::Result<NliScores>;
|
||||
}
|
||||
|
||||
/// Numerically stable 3-way softmax (subtract max for log-sum-exp safety).
|
||||
/// Private — call sites should go through `NliScores::from_xnli_logits`.
|
||||
fn softmax3(logits: [f32; 3]) -> [f32; 3] {
|
||||
let max = logits[0].max(logits[1]).max(logits[2]);
|
||||
let e0 = (logits[0] - max).exp();
|
||||
let e1 = (logits[1] - max).exp();
|
||||
let e2 = (logits[2] - max).exp();
|
||||
let sum = e0 + e1 + e2;
|
||||
[e0 / sum, e1 / sum, e2 / sum]
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn approx_eq(a: f32, b: f32, eps: f32) -> bool {
|
||||
(a - b).abs() <= eps
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn softmax3_normalises_to_unit() {
|
||||
let p = softmax3([1.0, 2.0, 3.0]);
|
||||
assert!(p.iter().all(|x| *x > 0.0));
|
||||
assert!(approx_eq(p[0] + p[1] + p[2], 1.0, 1e-6));
|
||||
// Monotonic: larger logit ⇒ larger probability.
|
||||
assert!(p[0] < p[1] && p[1] < p[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn softmax3_is_invariant_to_constant_shift() {
|
||||
let a = softmax3([1.0, 2.0, 3.0]);
|
||||
let b = softmax3([101.0, 102.0, 103.0]);
|
||||
for i in 0..3 {
|
||||
assert!(
|
||||
approx_eq(a[i], b[i], 1e-6),
|
||||
"channel {i} drifted: a={a:?} b={b:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nli_scores_from_xnli_logits_orders_correctly() {
|
||||
// entailment dominates ⇒ entailment is the max probability channel.
|
||||
let s = NliScores::from_xnli_logits([5.0, 1.0, 0.5]);
|
||||
assert!(s.entailment > s.neutral);
|
||||
assert!(s.entailment > s.contradiction);
|
||||
assert!(approx_eq(
|
||||
s.entailment + s.neutral + s.contradiction,
|
||||
1.0,
|
||||
1e-6
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn faithfulness_returns_entailment_channel() {
|
||||
let s = NliScores {
|
||||
entailment: 0.7,
|
||||
neutral: 0.2,
|
||||
contradiction: 0.1,
|
||||
};
|
||||
assert!(approx_eq(s.faithfulness(), 0.7, f32::EPSILON));
|
||||
}
|
||||
}
|
||||
58
crates/kebab-nli/src/onnx.rs
Normal file
58
crates/kebab-nli/src/onnx.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
//! ONNX-backed `NliVerifier` adapter (mDeBERTa-v3 XNLI).
|
||||
//!
|
||||
//! PR-9a: scaffolding only. `new` succeeds against the default `Config`
|
||||
//! and `score` returns an explicit `"PR-9a stub"` error so any caller that
|
||||
//! wires this up before PR-9b lands gets a loud failure instead of silent
|
||||
//! all-zero scores. PR-9b will add ort `Session` + `Tokenizer` lazy init
|
||||
//! and real inference.
|
||||
|
||||
use crate::{NliScores, NliVerifier};
|
||||
|
||||
/// ONNX-runtime mDeBERTa-v3 XNLI verifier.
|
||||
///
|
||||
/// PR-9a scaffolding holds no state — fields land in PR-9b
|
||||
/// (`model_id`, `cache_dir`, `session: OnceLock<ort::Session>`,
|
||||
/// `tokenizer: OnceLock<tokenizers::Tokenizer>`).
|
||||
pub struct OnnxNliVerifier {
|
||||
_private: (),
|
||||
}
|
||||
|
||||
impl OnnxNliVerifier {
|
||||
/// Construct a verifier from the user's `Config`. PR-9a always returns
|
||||
/// `Ok` because the real model + tokenizer download is deferred to
|
||||
/// PR-9b's first `score` call.
|
||||
pub fn new(_config: &kebab_config::Config) -> anyhow::Result<Self> {
|
||||
Ok(Self { _private: () })
|
||||
}
|
||||
}
|
||||
|
||||
impl NliVerifier for OnnxNliVerifier {
|
||||
fn score(&self, _premise: &str, _hypothesis: &str) -> anyhow::Result<NliScores> {
|
||||
anyhow::bail!("PR-9a stub — ONNX inference lands in PR-9b")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use kebab_config::Config;
|
||||
|
||||
#[test]
|
||||
fn new_succeeds_on_default_config() {
|
||||
let cfg = Config::defaults();
|
||||
let v = OnnxNliVerifier::new(&cfg).expect("new should succeed on default config");
|
||||
// Silence unused-binding lint without weakening the assertion.
|
||||
let _ = &v;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn score_returns_err_in_skeleton() {
|
||||
let cfg = Config::defaults();
|
||||
let v = OnnxNliVerifier::new(&cfg).unwrap();
|
||||
let err = v.score("a", "b").expect_err("PR-9a stub must error");
|
||||
assert!(
|
||||
err.to_string().contains("PR-9a stub"),
|
||||
"unexpected error message: {err}"
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user