Files
kebab/docs/wire-schema/v1/search_hit.schema.json
altair823 be79bdb83d docs(#7): distinguish vector-store vs FTS5 index_version (Todo #7)
schema.schema.json models.index_version: vector store (LanceDB) version 임을 명시.
search_hit.schema.json index_version: lexical (FTS5) version 임을 명시.
search_hit.schema.json retrieval: 내부 필드 목록 + hybrid 전용 fusion 설명 추가 (hunk 공유).
README kebab schema 행: index_version 두 곳의 의미가 다름을 주의 표기 추가.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 02:45:04 +00:00

50 lines
2.5 KiB
JSON

{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://kb.local/wire/v1/search_hit.schema.json",
"title": "SearchHit v1",
"description": "Stub schema — declares the schema_version label and the required top-level fields per design §2.2.",
"type": "object",
"required": [
"schema_version",
"rank",
"score",
"chunk_id",
"doc_id",
"doc_path",
"heading_path",
"snippet",
"citation",
"retrieval",
"index_version",
"chunker_version",
"indexed_at",
"stale"
],
"properties": {
"schema_version": { "const": "search_hit.v1" },
"rank": { "type": "integer", "minimum": 1 },
"score": { "type": "number", "description": "canonical ranking score. 의미는 `score_kind` 가 선언 (rrf/bm25/cosine). single-mode 에서는 fusion 미실행 → `retrieval.fusion_score` 와 동일." },
"score_kind": {
"type": "string",
"enum": ["rrf", "bm25", "cosine"],
"description": "p9-fb-38: kind of `score` value. `rrf` = RRF normalized [0,1] (hybrid mode); `bm25` = raw BM25 score (lexical-only); `cosine` = raw cosine similarity (vector-only). Older clients that omit this field can treat absence as `rrf` (the historical default)."
},
"chunk_id": { "type": "string" },
"doc_id": { "type": "string" },
"doc_path": { "type": "string" },
"heading_path": { "type": "array", "items": { "type": "string" } },
"section_label": { "type": ["string", "null"] },
"snippet": { "type": "string" },
"snippet_full_text": { "type": "boolean" },
"citation": { "type": "object" },
"retrieval": { "type": "object", "description": "retrieval detail. `fusion_score` / `lexical_score` / `vector_score` / `lexical_rank` / `vector_rank` 가 여기 안에 있다 (top-level 아님). hybrid 에서만 `fusion_score` 가 RRF normalized 값." },
"index_version": { "type": "string", "description": "v0.20.2 (Todo #7): lexical (FTS5) index version (예 \"fts5-v009-korean-morphological\"). schema.v1 의 `models.index_version` (vector store / LanceDB, 예 \"v1\") 과는 다른 의미." },
"embedding_model": { "type": ["string", "null"] },
"chunker_version": { "type": "string" },
"indexed_at": { "type": "string", "format": "date-time" },
"stale": { "type": "boolean" },
"repo": { "type": ["string", "null"] },
"code_lang": { "type": ["string", "null"] }
}
}