From 1eb0bbecb3832e0cb95e273c39c47cb2ba0d8f4c Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 25 May 2026 21:22:38 +0000 Subject: [PATCH] =?UTF-8?q?feat(nli):=20fb-41=20PR-9a=20=E2=80=94=20kebab-?= =?UTF-8?q?nli=20crate=20skeleton=20+=20workspace=20deps?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 신규 crate kebab-nli (trait + impl 동일 crate, v0.18 scope = ONNX adapter 1개). - NliVerifier trait + NliScores struct (XNLI 3-channel: entailment/neutral/contradiction). - private softmax3 (log-sum-exp 안전). - OnnxNliVerifier placeholder (PR-9b 가 ONNX inference + model download 추가). - workspace.dependencies 추가: ort 2.0-rc.9, tokenizers 0.21 (default-features=false, onig), hf-hub 0.4, ndarray 0.16. Pre-flight (PR-9 design contract 의 gate): - HF Xenova/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7 model.onnx + tokenizer.json → HTTP/2 302 (HF S3 routing, file 존재). - tokenizers --no-default-features -F onig 의 standalone repro: SentencePiece mDeBERTa tokenizer.json 로드 OK (KR 9 tokens / EN 11 tokens 정상 encode). - Cargo features 결정 trace: tokenizers = { default-features = false, features = ["onig"] } lock. Tests: 6 unit (softmax3 정규화 + 불변성 + XNLI logits 변환 + faithfulness + new + score stub) — 통과. Verification: cargo test -p kebab-nli -j 1 (6/6) + cargo clippy -p kebab-nli --all-targets -j 1 -- -D warnings clean. Workspace: cargo test --workspace -j 1 — pre-existing kebab-mcp::tools_call_ask_multi_hop 1 fail (main baseline 동일 fail, PR-9a 무관 — ingest fixture/Ollama 의존 flaky). Wire 영향: 없음 (crate 도입만). Co-Authored-By: Claude Opus 4.7 (1M context) --- Cargo.lock | 10 +++ Cargo.toml | 14 +++++ crates/kebab-nli/Cargo.toml | 20 ++++++ crates/kebab-nli/src/lib.rs | 114 +++++++++++++++++++++++++++++++++++ crates/kebab-nli/src/onnx.rs | 58 ++++++++++++++++++ 5 files changed, 216 insertions(+) create mode 100644 crates/kebab-nli/Cargo.toml create mode 100644 crates/kebab-nli/src/lib.rs create mode 100644 crates/kebab-nli/src/onnx.rs diff --git a/Cargo.lock b/Cargo.lock index 6d6b211..91c7e4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4327,6 +4327,16 @@ dependencies = [ "tracing", ] +[[package]] +name = "kebab-nli" +version = "0.17.2" +dependencies = [ + "anyhow", + "kebab-config", + "serde", + "tempfile", +] + [[package]] name = "kebab-normalize" version = "0.17.2" diff --git a/Cargo.toml b/Cargo.toml index d2d3d01..0d332f5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,7 @@ members = [ "crates/kebab-tui", "crates/kebab-mcp", "crates/kebab-parse-code", + "crates/kebab-nli", ] [workspace.package] @@ -102,6 +103,19 @@ tree-sitter-kotlin-ng = "1.1.0" # bare tree-sitter-kotlin requires ts <0.23; # C/C++ family grammars for code ingest (kebab-parse-code, p10-1D). tree-sitter-c = "0.24.2" tree-sitter-cpp = "0.23.4" +# fb-41 PR-9 (kebab-nli): mDeBERTa-v3 XNLI verifier deps. Versions match +# the fastembed 4.9 transitive set so the ONNX Runtime + tokenizer stack +# stays single-versioned across the workspace. ort `default-features=false` +# drops the bundled binary downloader (fastembed already provides one); +# tokenizers `default-features=false, onig` swaps the default `esaxx` regex +# backend for `onig` so the build doesn't need libstdc++ headers (verified +# via PR-9a pre-flight: SentencePiece tokenizer.json loads + KR/EN encode). +# hf-hub uses `ureq + rustls-tls` to stay aligned with kebab-embed-local's +# pure-Rust TLS stack. +ort = { version = "=2.0.0-rc.9", default-features = false, features = ["ndarray"] } +tokenizers = { version = "0.21", default-features = false, features = ["onig"] } +hf-hub = { version = "0.4", default-features = false, features = ["ureq", "rustls-tls"] } +ndarray = "0.16" # Disk-footprint trim for dev / test builds. Codegen, opt-level, and # behavior are unchanged — only DWARF debug info is reduced (line diff --git a/crates/kebab-nli/Cargo.toml b/crates/kebab-nli/Cargo.toml new file mode 100644 index 0000000..7336062 --- /dev/null +++ b/crates/kebab-nli/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "kebab-nli" +version = { workspace = true } +edition = { workspace = true } +rust-version = { workspace = true } +license = { workspace = true } +repository = { workspace = true } +description = "fb-41: NLI-based post-synthesis verification (XNLI mDeBERTa-v3). PR-9a = trait + scaffolding; ONNX inference lands in PR-9b." + +[dependencies] +# PR-9a scope: kebab-config for the OnnxNliVerifier::new(&Config) signature +# the rag crate will call once PR-9d wires verification into ask_multi_hop. +# ort / tokenizers / hf-hub / ndarray are intentionally NOT depended on here +# — they sit in workspace.dependencies until PR-9b adds the real adapter. +kebab-config = { path = "../kebab-config" } +anyhow = { workspace = true } +serde = { workspace = true } + +[dev-dependencies] +tempfile = { workspace = true } diff --git a/crates/kebab-nli/src/lib.rs b/crates/kebab-nli/src/lib.rs new file mode 100644 index 0000000..6bc4b3b --- /dev/null +++ b/crates/kebab-nli/src/lib.rs @@ -0,0 +1,114 @@ +//! `kebab-nli` — NLI-based post-synthesis verification for multi-hop RAG. +//! +//! fb-41 introduces a mDeBERTa-v3 XNLI verifier that runs on +//! `(packed_chunks, generated_answer)` after synthesize. If +//! `NliScores::faithfulness()` < threshold the rag crate refuses the answer +//! with `NliVerificationFailed`. PR-9a (this file) is the trait surface + +//! scaffolding only — `OnnxNliVerifier::score` returns a stub error until +//! PR-9b adds the real ONNX inference path. + +use serde::{Deserialize, Serialize}; + +pub mod onnx; + +pub use onnx::OnnxNliVerifier; + +/// Three-channel XNLI output. Channel order matches the standard XNLI +/// `id2label` mapping `[entailment, neutral, contradiction]` shipped with +/// the Xenova mDeBERTa-v3 model. +#[derive(Clone, Copy, Debug, Default, PartialEq, Serialize, Deserialize)] +pub struct NliScores { + pub entailment: f32, + pub neutral: f32, + pub contradiction: f32, +} + +impl NliScores { + /// Faithfulness score = entailment channel. The rag crate compares this + /// against `rag.nli_faithfulness_min` to decide whether to refuse. + pub fn faithfulness(&self) -> f32 { + self.entailment + } + + /// Wrap raw XNLI logits (`[entailment, neutral, contradiction]`) into + /// a normalised `NliScores`. Applies a numerically-stable softmax3. + pub fn from_xnli_logits(logits: [f32; 3]) -> Self { + let probs = softmax3(logits); + Self { + entailment: probs[0], + neutral: probs[1], + contradiction: probs[2], + } + } +} + +/// Abstract NLI verifier. `score` is called with `(premise = packed chunks, +/// hypothesis = generated answer)` — the standard NLI direction (premise +/// entails hypothesis ⇒ answer is grounded in retrieved evidence). +pub trait NliVerifier: Send + Sync { + fn score(&self, premise: &str, hypothesis: &str) -> anyhow::Result; +} + +/// Numerically stable 3-way softmax (subtract max for log-sum-exp safety). +/// Private — call sites should go through `NliScores::from_xnli_logits`. +fn softmax3(logits: [f32; 3]) -> [f32; 3] { + let max = logits[0].max(logits[1]).max(logits[2]); + let e0 = (logits[0] - max).exp(); + let e1 = (logits[1] - max).exp(); + let e2 = (logits[2] - max).exp(); + let sum = e0 + e1 + e2; + [e0 / sum, e1 / sum, e2 / sum] +} + +#[cfg(test)] +mod tests { + use super::*; + + fn approx_eq(a: f32, b: f32, eps: f32) -> bool { + (a - b).abs() <= eps + } + + #[test] + fn softmax3_normalises_to_unit() { + let p = softmax3([1.0, 2.0, 3.0]); + assert!(p.iter().all(|x| *x > 0.0)); + assert!(approx_eq(p[0] + p[1] + p[2], 1.0, 1e-6)); + // Monotonic: larger logit ⇒ larger probability. + assert!(p[0] < p[1] && p[1] < p[2]); + } + + #[test] + fn softmax3_is_invariant_to_constant_shift() { + let a = softmax3([1.0, 2.0, 3.0]); + let b = softmax3([101.0, 102.0, 103.0]); + for i in 0..3 { + assert!( + approx_eq(a[i], b[i], 1e-6), + "channel {i} drifted: a={a:?} b={b:?}" + ); + } + } + + #[test] + fn nli_scores_from_xnli_logits_orders_correctly() { + // entailment dominates ⇒ entailment is the max probability channel. + let s = NliScores::from_xnli_logits([5.0, 1.0, 0.5]); + assert!(s.entailment > s.neutral); + assert!(s.entailment > s.contradiction); + assert!(approx_eq( + s.entailment + s.neutral + s.contradiction, + 1.0, + 1e-6 + )); + } + + #[test] + fn faithfulness_returns_entailment_channel() { + let s = NliScores { + entailment: 0.7, + neutral: 0.2, + contradiction: 0.1, + }; + assert!(approx_eq(s.faithfulness(), 0.7, f32::EPSILON)); + } +} diff --git a/crates/kebab-nli/src/onnx.rs b/crates/kebab-nli/src/onnx.rs new file mode 100644 index 0000000..da1f025 --- /dev/null +++ b/crates/kebab-nli/src/onnx.rs @@ -0,0 +1,58 @@ +//! ONNX-backed `NliVerifier` adapter (mDeBERTa-v3 XNLI). +//! +//! PR-9a: scaffolding only. `new` succeeds against the default `Config` +//! and `score` returns an explicit `"PR-9a stub"` error so any caller that +//! wires this up before PR-9b lands gets a loud failure instead of silent +//! all-zero scores. PR-9b will add ort `Session` + `Tokenizer` lazy init +//! and real inference. + +use crate::{NliScores, NliVerifier}; + +/// ONNX-runtime mDeBERTa-v3 XNLI verifier. +/// +/// PR-9a scaffolding holds no state — fields land in PR-9b +/// (`model_id`, `cache_dir`, `session: OnceLock`, +/// `tokenizer: OnceLock`). +pub struct OnnxNliVerifier { + _private: (), +} + +impl OnnxNliVerifier { + /// Construct a verifier from the user's `Config`. PR-9a always returns + /// `Ok` because the real model + tokenizer download is deferred to + /// PR-9b's first `score` call. + pub fn new(_config: &kebab_config::Config) -> anyhow::Result { + Ok(Self { _private: () }) + } +} + +impl NliVerifier for OnnxNliVerifier { + fn score(&self, _premise: &str, _hypothesis: &str) -> anyhow::Result { + anyhow::bail!("PR-9a stub — ONNX inference lands in PR-9b") + } +} + +#[cfg(test)] +mod tests { + use super::*; + use kebab_config::Config; + + #[test] + fn new_succeeds_on_default_config() { + let cfg = Config::defaults(); + let v = OnnxNliVerifier::new(&cfg).expect("new should succeed on default config"); + // Silence unused-binding lint without weakening the assertion. + let _ = &v; + } + + #[test] + fn score_returns_err_in_skeleton() { + let cfg = Config::defaults(); + let v = OnnxNliVerifier::new(&cfg).unwrap(); + let err = v.score("a", "b").expect_err("PR-9a stub must error"); + assert!( + err.to_string().contains("PR-9a stub"), + "unexpected error message: {err}" + ); + } +}