From 6ffbe0a5a327df49801967c20a42f88060bd6e58 Mon Sep 17 00:00:00 2001 From: altair823 Date: Mon, 25 May 2026 22:22:30 +0000 Subject: [PATCH] =?UTF-8?q?chore(nli):=20PR=20#177=20=ED=9A=8C=EC=B0=A8=20?= =?UTF-8?q?1=20=EB=A6=AC=EB=B7=B0=20=EB=B0=98=EC=98=81=20(N1=20cache-hit?= =?UTF-8?q?=20probe=20+=20N2=20test=20pollution)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - N1: fetch 의 cache-hit 검사 경로가 실제로는 download 트리거 (ApiRepo::get 가 cache miss 시 download 후 path 반환). log 의 "NLI artifact cache hit" 가 *방금 download 한 직후* 출력 — misleading. hf_hub::Cache::new(cache_dir).repo(repo).get(filename).is_some() 로 변경 — Cache::get 은 fs lookup only, 네트워크 안 탐. actual download 횟수는 변화 없음 (1번), log accuracy 만 개선. - N2: new_succeeds_on_default_config / score_empty_hypothesis_returns_err 가 XDG 실 디렉토리 (`~/.local/share/kebab/models/nli/...`) 를 create_dir_all → test pollution. tempdir_config() 헬퍼 추가 — TempDir 으로 storage.data_dir override, model_dir 는 `{data_dir}/models` 그대로 두어 expand_path 의 substitution 검증도 유지. cargo test -p kebab-nli -j 1 → 6 passed / 0 failed (unit) + 5 ignored (integration, manual). cargo clippy -p kebab-nli --all-targets -j 1 -- -D warnings clean. inference.rs 미수정 → manual --ignored smoke 결과 (5/5 PASS) 그대로 유효. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-nli/src/onnx.rs | 71 ++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/crates/kebab-nli/src/onnx.rs b/crates/kebab-nli/src/onnx.rs index 17b1e4f..ec49002 100644 --- a/crates/kebab-nli/src/onnx.rs +++ b/crates/kebab-nli/src/onnx.rs @@ -133,29 +133,15 @@ impl OnnxNliVerifier { /// so a user reading kebab logs can see which artifact source the /// pipeline picked. fn fetch(&self, filename: &str) -> Result { - let api = hf_hub::api::sync::ApiBuilder::new() - .with_cache_dir(self.cache_dir.clone()) - .build() - .with_context(|| { - format!( - "kebab-nli: hf-hub ApiBuilder::build failed (cache_dir={})", - self.cache_dir.display() - ) - })?; - let repo = api.model(self.model_id.clone()); - - // `ApiRepo::get` returns the local path if cached, otherwise - // downloads. We can't tell after the fact whether the file - // was already cached without an extra `Cache::repo::get` - // probe, so do that probe first to emit the right log line. - let cache_path = api - .repo(hf_hub::Repo::new( - self.model_id.clone(), - hf_hub::RepoType::Model, - )) + // Round-1 review N1 fix: `Api::get` triggers download on miss, + // so we can't use it as a hit probe. `Cache::get` is fs-only — + // returns Some(path) if cached, None otherwise. No network. + let repo = hf_hub::Repo::new(self.model_id.clone(), hf_hub::RepoType::Model); + let cached = hf_hub::Cache::new(self.cache_dir.clone()) + .repo(repo.clone()) .get(filename) - .ok(); - if cache_path.is_some() { + .is_some(); + if cached { tracing::info!( target: "kebab-nli", model_id = %self.model_id, @@ -172,13 +158,24 @@ impl OnnxNliVerifier { ); } - repo.get(filename).with_context(|| { - format!( - "kebab-nli: hf-hub fetch failed for {filename} (model_id={}, cache_dir={})", - self.model_id, - self.cache_dir.display() - ) - }) + let api = hf_hub::api::sync::ApiBuilder::new() + .with_cache_dir(self.cache_dir.clone()) + .build() + .with_context(|| { + format!( + "kebab-nli: hf-hub ApiBuilder::build failed (cache_dir={})", + self.cache_dir.display() + ) + })?; + api.model(self.model_id.clone()) + .get(filename) + .with_context(|| { + format!( + "kebab-nli: hf-hub fetch failed for {filename} (model_id={}, cache_dir={})", + self.model_id, + self.cache_dir.display() + ) + }) } fn load_session(&self) -> Result { @@ -290,10 +287,22 @@ fn sanitize_model_id(s: &str) -> String { mod tests { use super::*; use kebab_config::Config; + use tempfile::TempDir; + + /// Round-1 review N2 fix: redirect Config.storage.{data,model}_dir + /// into a tempdir so unit tests don't litter the user's XDG dirs + /// with empty `nli/` subdirs. + fn tempdir_config() -> (TempDir, Config) { + let tmp = TempDir::new().expect("tempdir"); + let mut cfg = Config::defaults(); + cfg.storage.data_dir = tmp.path().to_string_lossy().into_owned(); + cfg.storage.model_dir = "{data_dir}/models".to_string(); + (tmp, cfg) + } #[test] fn new_succeeds_on_default_config() { - let cfg = Config::defaults(); + let (_tmp, cfg) = tempdir_config(); let v = OnnxNliVerifier::new(&cfg).expect("new should succeed on default config"); // cache_dir must include the sanitized model id (no '/'). let s = v.cache_dir.to_string_lossy(); @@ -313,7 +322,7 @@ mod tests { /// Replaces PR-9a's `score_returns_err_in_skeleton` (stub-only). #[test] fn score_empty_hypothesis_returns_err() { - let cfg = Config::defaults(); + let (_tmp, cfg) = tempdir_config(); let v = OnnxNliVerifier::new(&cfg).unwrap(); let err = v.score("anything", "").expect_err("empty hypothesis must error"); assert!(