🏗️ refactor(fb-27): apply round 1 review nits
- schema.rs: extract `SCHEMA_V1_ID` const + re-export via kebab-app::lib.rs. wire.rs::wire_schema 의 2 literal 도 import 해서 single source of truth. - schema.rs::collect_models: parser_version 가 markdown 만 surface 함을 주석으로 명시 (PDF/image extractor 의 자체 version 은 SchemaV1.models 가 multi-medium map 으로 진화 시 surface). - main.rs::print_schema_text: 헤더 줄 끝의 `\n` 제거 + `println!()` 추가 — 다른 section 들과 패턴 일관. - error_classify.rs::llm_unreachable_classifies: timeout 50ms → 500ms (10x headroom) + 접근 방식 + 한계 주석 추가. - HOTFIXES: open_existing 의 RW flag + 주석-only enforcement 갭을 Known-limitation 에 명시. Round 1 review summary: #104 (comment) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -65,7 +65,7 @@ pub mod schema;
|
||||
pub use app::App;
|
||||
pub use ingest_progress::{AggregateCounts, IngestEvent, render_skipped_breakdown};
|
||||
pub use reset::{ResetReport, ResetScope};
|
||||
pub use schema::{Capabilities, Models, SchemaV1, Stats, WireBlock, schema_with_config};
|
||||
pub use schema::{Capabilities, Models, SCHEMA_V1_ID, SchemaV1, Stats, WireBlock, schema_with_config};
|
||||
|
||||
/// p9-fb-25: sentinel for files without an extension in
|
||||
/// `IngestReport.skipped_by_extension` keys + `IngestItem.warnings`
|
||||
|
||||
@@ -54,6 +54,10 @@ pub struct Stats {
|
||||
|
||||
const KEBAB_VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
|
||||
/// Wire schema id for [`SchemaV1`]. Single source of truth — `kebab-cli`
|
||||
/// re-uses this via `kebab_app::schema::SCHEMA_V1_ID` when wrapping.
|
||||
pub const SCHEMA_V1_ID: &str = "schema.v1";
|
||||
|
||||
// Authoritative list of wire schemas this binary emits. Keep in sync with
|
||||
// `docs/wire-schema/v1/*.schema.json` and `kebab-cli::wire::wire_*` helpers.
|
||||
const WIRE_SCHEMAS: &[&str] = &[
|
||||
@@ -83,7 +87,7 @@ pub fn schema_with_config(cfg: &Config) -> anyhow::Result<SchemaV1> {
|
||||
let stats = collect_stats(&store)?;
|
||||
let models = collect_models(cfg, &store);
|
||||
Ok(SchemaV1 {
|
||||
schema_version: "schema.v1".to_string(),
|
||||
schema_version: SCHEMA_V1_ID.to_string(),
|
||||
kebab_version: KEBAB_VERSION.to_string(),
|
||||
wire: WireBlock {
|
||||
schemas: WIRE_SCHEMAS.iter().map(|s| (*s).to_string()).collect(),
|
||||
@@ -131,6 +135,9 @@ fn collect_stats(store: &kebab_store_sqlite::SqliteStore) -> anyhow::Result<Stat
|
||||
|
||||
fn collect_models(cfg: &Config, store: &kebab_store_sqlite::SqliteStore) -> Models {
|
||||
Models {
|
||||
// markdown parser only — pdf-page-v1 (P7) / image extractors (P6)
|
||||
// maintain their own versions; surface those when SchemaV1.models
|
||||
// becomes a multi-medium map (P+).
|
||||
parser_version: kebab_parse_md::PARSER_VERSION.to_string(),
|
||||
chunker_version: cfg.chunking.chunker_version.clone(),
|
||||
// EmbeddingModelCfg uses `.model` (not `.id`) — adapt from plan.
|
||||
|
||||
@@ -133,9 +133,13 @@ mod tests {
|
||||
#[test]
|
||||
fn llm_unreachable_classifies_to_model_unreachable() {
|
||||
// We cannot construct a reqwest::Error from scratch (private constructor).
|
||||
// Use a real network call with a guaranteed-unroutable endpoint:
|
||||
// Approach: send a real request to a guaranteed-unroutable endpoint
|
||||
// (port 1 is reserved + connect-refused on all conformant TCP stacks).
|
||||
// 500ms timeout chosen as headroom over 50ms baseline — heavily loaded
|
||||
// CI may hit timeout race instead of connect-refused, but either way
|
||||
// the resulting LlmError::Unreachable maps to "model_unreachable".
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.timeout(std::time::Duration::from_millis(50))
|
||||
.timeout(std::time::Duration::from_millis(500))
|
||||
.build().unwrap();
|
||||
let err = client.get("http://127.0.0.1:1").send().unwrap_err();
|
||||
let llm = LlmError::Unreachable {
|
||||
|
||||
@@ -744,7 +744,8 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
|
||||
}
|
||||
|
||||
fn print_schema_text(s: &kebab_app::SchemaV1) {
|
||||
println!("kebab v{}\n", s.kebab_version);
|
||||
println!("kebab v{}", s.kebab_version);
|
||||
println!();
|
||||
|
||||
println!("wire schemas");
|
||||
println!(" {}", s.wire.schemas.join(", "));
|
||||
|
||||
@@ -144,12 +144,12 @@ pub fn wire_schema(s: &kebab_app::SchemaV1) -> Value {
|
||||
if let Value::Object(ref map) = v {
|
||||
if matches!(
|
||||
map.get("schema_version"),
|
||||
Some(Value::String(s)) if s == "schema.v1"
|
||||
Some(Value::String(s)) if s == kebab_app::SCHEMA_V1_ID
|
||||
) {
|
||||
return v;
|
||||
}
|
||||
}
|
||||
tag_object(v, "schema.v1")
|
||||
tag_object(v, kebab_app::SCHEMA_V1_ID)
|
||||
}
|
||||
|
||||
/// Wrap an [`crate::error_classify::ErrorV1`] as `error.v1`.
|
||||
|
||||
@@ -46,6 +46,7 @@ git history.
|
||||
- `LlmError::Stream` / `Malformed` 가 `code: "generic"` fallback — 후속 task 에서 `stream_aborted` / `malformed_response` 같은 dedicated code 도입 검토 (design §10.1 future-extensions 절 참조).
|
||||
- `not_indexed.details` 가 `{ expected, found }` 만 emit (spec literal 의 `{ data_dir, expected, found }` 아님 — `expected` 가 full DB path 라 data_dir 은 caller 에서 derive 해야 함, NotIndexed signal 자체는 path 한 개만 carry).
|
||||
- README 의 wire schema 목록과 CLAUDE.md 의 wire schema 목록이 fb-27 머지 시점에 약간 일치 안 함 (CLAUDE.md 가 `eval_run.v1`/`eval_compare.v1`/`list_docs.v1` 포함, 실제 docs/wire-schema/v1/ 에 해당 파일 없음). 별 follow-up 에서 doc / 실제 wire 동기화 sweep 진행.
|
||||
- `SqliteStore::open_existing` 가 `SQLITE_OPEN_READ_WRITE` 로 열고 doc 으로만 "callers should not issue mutations" 명시 — 컴파일러 enforcement 없음. 후속 PR 에서 `apply_pragmas` 의 WAL 라인을 분리한 `apply_read_pragmas` + `SQLITE_OPEN_READ_ONLY` 변형 도입 검토 (WAL mode 는 DB 헤더에 영속이라 RO 도 동작 가능).
|
||||
|
||||
**Amends**:
|
||||
- design §10 (capability matrix subsection 추가).
|
||||
|
||||
Reference in New Issue
Block a user