feat(search): provenance 출처 필터 — [[workspace.sources]] 멀티소스 + --source/--source-type
혼합 출처 KB(위키+jira 등)에서 색인은 전부 하되 질의 시 출처로 좁히는 provenance 레버. 전역 trust 곱셈가중(weighted-RRF)은 A/B 에서 반증(θ=0.85 만으로 incident MRR 0.918→0.340 절벽, 점수 압축) — 필터가 see-saw 없는 올바른 레버. - config [[workspace.sources]] (각 id/root/exclude/trust_level/source_type); 단일 root 는 implicit `default` source 로 정규화. validate: id 유일·비어있지 않음. - config schema v3→v4 (step_3_to_4, root→[[workspace.sources]] id=default 미러, 멱등) - V014 documents.source_id 컬럼+인덱스 (additive, DEFAULT 'default', 재색인 0) - Metadata.source_id + BodyHints trust precedence(frontmatter > source 기본값 > Primary) - ingest: --root 미지정 시 resolved_sources() 순회 + doc 마다 source_id/trust stamp - 검색 SearchFilters.source_type/source_id → lexical + vector 두 site (IN, OR) - CLI kebab search --source <id> / --source-type <type> (repeatable/comma-sep) 도그푸딩(620 doc, jira400+wiki220): --source wiki 로 개념 질의 MRR 0.780→0.810, --source jira 로 incident 0.918→0.975. trust precedence 실측(jira=secondary 기본값). version bump 0.28.0 → 0.29.0 (신규 CLI flag + config 키 + V014 migration → minor). follow-up: MCP search 필터 미노출 · kebab list source_id 미표시 · RAG provenance 라벨. 자세한 내용: tasks/HOTFIXES.md (2026-06-21), docs/release-notes/v0.29.0-draft.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Claude-Session: https://claude.ai/code/session_012Mc6W1fgsrbFKTsqA6P8La
This commit is contained in:
@@ -12,6 +12,12 @@ mod paths;
|
||||
pub mod migrate;
|
||||
pub use paths::{expand_path, expand_path_with_base};
|
||||
|
||||
/// Implicit source id used when a single-root `[workspace]` config (no
|
||||
/// `[[workspace.sources]]`) is normalized into the multi-source model, and
|
||||
/// the `DEFAULT` value of the `documents.source_id` column. Kept in sync
|
||||
/// with the migration default in `migrations/V0XX__documents_source_id.sql`.
|
||||
pub const DEFAULT_SOURCE_ID: &str = "default";
|
||||
|
||||
/// f32 의 shortest round-trip(Display)을 f64 로 재파싱해 직렬화한다.
|
||||
/// `0.3_f32` 가 `0.30000001192092896` 으로 새지 않고 `0.3` 으로 출력되게 한다.
|
||||
/// 마이그레이션 시 toml_edit relocation 의 무손실 비교를 깨지 않도록, 그리고
|
||||
@@ -88,8 +94,67 @@ pub struct Config {
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct WorkspaceCfg {
|
||||
pub root: String,
|
||||
/// Single-root workspace (legacy / common case). `Option` so that a
|
||||
/// config that declares only `[[workspace.sources]]` (no bare `root`)
|
||||
/// parses — and, symmetrically, a legacy single-`root` config (no
|
||||
/// `sources`) still parses unchanged. The load-time normalizer
|
||||
/// ([`Config::normalize_sources`]) reconciles the two into a single
|
||||
/// non-empty `sources` list (`id = "default"` synthesized from `root`).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub root: Option<String>,
|
||||
pub exclude: Vec<String>,
|
||||
/// `[[workspace.sources]]`: named multi-source declaration. When empty
|
||||
/// and `root` is set, the load path normalizes to a single implicit
|
||||
/// `default` source. Each entry stamps its `id` onto every document it
|
||||
/// ingests and supplies per-source `trust_level` / `source_type`
|
||||
/// defaults (frontmatter still wins per the §0 Q9 derive table).
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub sources: Vec<SourceCfg>,
|
||||
}
|
||||
|
||||
/// One named source under `[[workspace.sources]]`.
|
||||
///
|
||||
/// `trust_level` / `source_type` are the **source-level defaults**: they
|
||||
/// apply when a document's frontmatter does not specify the field. The
|
||||
/// precedence is `frontmatter > source default > hardcoded`
|
||||
/// (`TrustLevel::Primary` / `SourceType::Markdown`) — implemented in the
|
||||
/// markdown derive via `BodyHints::fallback_trust_level`.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SourceCfg {
|
||||
/// Stable identifier stamped onto `documents.source_id` for every
|
||||
/// document ingested from this source. Must be unique and non-empty
|
||||
/// across the workspace (enforced in [`Config::validate`]).
|
||||
pub id: String,
|
||||
/// Root directory to walk for this source. Accepts the same
|
||||
/// absolute / `~` / `${VAR}` / relative(=config-dir-based) forms as
|
||||
/// the legacy `workspace.root`.
|
||||
pub root: String,
|
||||
/// Per-source denylist globs, merged on top of `workspace.exclude`.
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub exclude: Vec<String>,
|
||||
/// Per-source default `trust_level` (frontmatter overrides it).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub trust_level: Option<kebab_core::TrustLevel>,
|
||||
/// Per-source default `source_type` (frontmatter overrides it).
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
pub source_type: Option<kebab_core::SourceType>,
|
||||
}
|
||||
|
||||
/// A source with its `root` resolved to an absolute path and its `exclude`
|
||||
/// merged with `workspace.exclude`. Produced by [`Config::resolved_sources`]
|
||||
/// — the single entry point the ingest pipeline iterates over.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct ResolvedSource {
|
||||
/// Stamped onto `documents.source_id`.
|
||||
pub id: String,
|
||||
/// Absolute walk root (tilde / `${VAR}` / relative-to-config resolved).
|
||||
pub root: PathBuf,
|
||||
/// `workspace.exclude` ∪ per-source `exclude`.
|
||||
pub exclude: Vec<String>,
|
||||
/// Per-source default trust level (None → fall back to `Primary`).
|
||||
pub trust_level: Option<kebab_core::TrustLevel>,
|
||||
/// Per-source default source type (None → fall back to `Markdown`).
|
||||
pub source_type: Option<kebab_core::SourceType>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
@@ -782,12 +847,13 @@ impl Config {
|
||||
Self {
|
||||
schema_version: crate::migrate::CURRENT_SCHEMA_VERSION,
|
||||
workspace: WorkspaceCfg {
|
||||
root: "~/KnowledgeBase".to_string(),
|
||||
root: Some("~/KnowledgeBase".to_string()),
|
||||
exclude: vec![
|
||||
".git/**".to_string(),
|
||||
"node_modules/**".to_string(),
|
||||
".obsidian/**".to_string(),
|
||||
],
|
||||
sources: vec![],
|
||||
},
|
||||
storage: StorageCfg {
|
||||
data_dir: "${XDG_DATA_HOME:-~/.local/share}/kebab".to_string(),
|
||||
@@ -906,7 +972,78 @@ impl Config {
|
||||
PathBuf::from(".")
|
||||
})
|
||||
});
|
||||
paths::expand_path_with_base(&self.workspace.root, "", &base)
|
||||
paths::expand_path_with_base(&self.primary_root_raw(), "", &base)
|
||||
}
|
||||
|
||||
/// The raw (unexpanded) string for the *primary* workspace root, used by
|
||||
/// [`resolve_workspace_root`](Self::resolve_workspace_root) and any
|
||||
/// single-root code path. Order: first `[[workspace.sources]]` entry's
|
||||
/// `root` → bare `workspace.root` → `~/KnowledgeBase` default. This keeps
|
||||
/// every pre-existing single-root call site working when only `sources`
|
||||
/// is declared.
|
||||
fn primary_root_raw(&self) -> String {
|
||||
if let Some(s) = self.workspace.sources.first() {
|
||||
return s.root.clone();
|
||||
}
|
||||
self.workspace
|
||||
.root
|
||||
.clone()
|
||||
.unwrap_or_else(|| "~/KnowledgeBase".to_string())
|
||||
}
|
||||
|
||||
/// The base directory for resolving relative source roots: the config
|
||||
/// file's directory when loaded from disk, else the current dir (mirrors
|
||||
/// [`resolve_workspace_root`](Self::resolve_workspace_root)).
|
||||
fn root_resolution_base(&self) -> PathBuf {
|
||||
self.source_dir.clone().unwrap_or_else(|| {
|
||||
std::env::current_dir().unwrap_or_else(|e| {
|
||||
tracing::warn!(
|
||||
target: "kebab-config",
|
||||
error = %e,
|
||||
"current_dir() failed; falling back to '.' for source root resolution"
|
||||
);
|
||||
PathBuf::from(".")
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/// Normalized, resolved list of sources to ingest. Always non-empty:
|
||||
///
|
||||
/// - If `[[workspace.sources]]` is declared, each entry is returned with
|
||||
/// its `root` expanded and `exclude` merged with `workspace.exclude`.
|
||||
/// - Otherwise a single implicit source `id = "default"` is synthesized
|
||||
/// from `workspace.root` (the legacy single-root path).
|
||||
///
|
||||
/// This is the single entry point the ingest pipeline iterates over, so
|
||||
/// single-root and multi-source configs share one code path.
|
||||
pub fn resolved_sources(&self) -> Vec<ResolvedSource> {
|
||||
let base = self.root_resolution_base();
|
||||
if self.workspace.sources.is_empty() {
|
||||
let root = paths::expand_path_with_base(&self.primary_root_raw(), "", &base);
|
||||
return vec![ResolvedSource {
|
||||
id: DEFAULT_SOURCE_ID.to_string(),
|
||||
root,
|
||||
exclude: self.workspace.exclude.clone(),
|
||||
trust_level: None,
|
||||
source_type: None,
|
||||
}];
|
||||
}
|
||||
self.workspace
|
||||
.sources
|
||||
.iter()
|
||||
.map(|s| {
|
||||
let root = paths::expand_path_with_base(&s.root, "", &base);
|
||||
let mut exclude = self.workspace.exclude.clone();
|
||||
exclude.extend(s.exclude.iter().cloned());
|
||||
ResolvedSource {
|
||||
id: s.id.clone(),
|
||||
root,
|
||||
exclude,
|
||||
trust_level: s.trust_level,
|
||||
source_type: s.source_type,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Read config from disk and merge env overrides on top of it. If the
|
||||
@@ -1019,10 +1156,41 @@ impl Config {
|
||||
cause: format!("parse_failed: {e}"),
|
||||
})
|
||||
})?;
|
||||
cfg.validate_sources().map_err(|cause| {
|
||||
anyhow::Error::new(ConfigInvalid {
|
||||
path: path.to_path_buf(),
|
||||
cause,
|
||||
})
|
||||
})?;
|
||||
cfg.source_dir = path.parent().map(Path::to_path_buf);
|
||||
Ok(cfg)
|
||||
}
|
||||
|
||||
/// Validate `[[workspace.sources]]`: every `id` must be non-empty and
|
||||
/// unique across the workspace. Empty `sources` (legacy single-root) is
|
||||
/// always valid. Returns the failure cause string for `ConfigInvalid`.
|
||||
fn validate_sources(&self) -> Result<(), String> {
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for s in &self.workspace.sources {
|
||||
if s.id.trim().is_empty() {
|
||||
return Err("workspace.sources: an entry has an empty `id`".to_string());
|
||||
}
|
||||
if s.root.trim().is_empty() {
|
||||
return Err(format!(
|
||||
"workspace.sources: source `{}` has an empty `root`",
|
||||
s.id
|
||||
));
|
||||
}
|
||||
if !seen.insert(s.id.as_str()) {
|
||||
return Err(format!(
|
||||
"workspace.sources: duplicate source id `{}` (ids must be unique)",
|
||||
s.id
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Apply `KEBAB_<SECTION>_<KEY>` env overrides. Unknown keys are ignored.
|
||||
///
|
||||
/// The mapping is an explicit grep-friendly whitelist — one match arm
|
||||
@@ -1037,7 +1205,7 @@ impl Config {
|
||||
}
|
||||
match k.as_str() {
|
||||
// workspace
|
||||
"KEBAB_WORKSPACE_ROOT" => self.workspace.root = v.clone(),
|
||||
"KEBAB_WORKSPACE_ROOT" => self.workspace.root = Some(v.clone()),
|
||||
|
||||
// storage
|
||||
"KEBAB_STORAGE_DATA_DIR" => self.storage.data_dir = v.clone(),
|
||||
@@ -2034,7 +2202,7 @@ max_context_tokens = 8000
|
||||
#[test]
|
||||
fn legacy_include_field_is_ignored_silently() {
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.root = "/tmp/kebab-legacy".to_string();
|
||||
cfg.workspace.root = Some("/tmp/kebab-legacy".to_string());
|
||||
let mut toml_text = toml::to_string(&cfg).expect("default round-trips");
|
||||
// Inject a legacy `include = [...]` line into the [workspace] block.
|
||||
toml_text = toml_text.replace(
|
||||
@@ -2048,20 +2216,105 @@ max_context_tokens = 8000
|
||||
parsed.err()
|
||||
);
|
||||
let cfg = parsed.unwrap();
|
||||
assert_eq!(cfg.workspace.root, "/tmp/kebab-legacy");
|
||||
assert_eq!(cfg.workspace.root.as_deref(), Some("/tmp/kebab-legacy"));
|
||||
}
|
||||
|
||||
/// p9-fb-25: `WorkspaceCfg` must NOT have an `include` field.
|
||||
/// Compile-time proof: exhaustive destructure.
|
||||
#[test]
|
||||
fn workspace_cfg_has_only_root_and_exclude_fields() {
|
||||
fn workspace_cfg_has_only_root_exclude_sources_fields() {
|
||||
let ws = Config::defaults().workspace;
|
||||
let WorkspaceCfg {
|
||||
root: _,
|
||||
exclude: _,
|
||||
sources: _,
|
||||
} = &ws;
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn legacy_single_root_normalizes_to_default_source() {
|
||||
// A single-root config (no [[workspace.sources]]) must resolve to
|
||||
// exactly one source `id = "default"` rooted at workspace.root.
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.root = Some("/tmp/kb-notes".to_string());
|
||||
let resolved = cfg.resolved_sources();
|
||||
assert_eq!(resolved.len(), 1);
|
||||
assert_eq!(resolved[0].id, DEFAULT_SOURCE_ID);
|
||||
assert_eq!(resolved[0].root, std::path::PathBuf::from("/tmp/kb-notes"));
|
||||
assert_eq!(resolved[0].trust_level, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multi_source_config_resolves_each_with_merged_exclude() {
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.root = None;
|
||||
cfg.workspace.exclude = vec![".git/**".to_string()];
|
||||
cfg.workspace.sources = vec![
|
||||
SourceCfg {
|
||||
id: "notes".to_string(),
|
||||
root: "/tmp/notes".to_string(),
|
||||
exclude: vec![],
|
||||
trust_level: Some(kebab_core::TrustLevel::Primary),
|
||||
source_type: None,
|
||||
},
|
||||
SourceCfg {
|
||||
id: "refs".to_string(),
|
||||
root: "/tmp/refs".to_string(),
|
||||
exclude: vec!["draft/**".to_string()],
|
||||
trust_level: Some(kebab_core::TrustLevel::Secondary),
|
||||
source_type: Some(kebab_core::SourceType::Reference),
|
||||
},
|
||||
];
|
||||
// A multi-source config (no bare root) must round-trip through TOML.
|
||||
let toml_text = toml::to_string(&cfg).expect("multi-source serializes");
|
||||
let cfg: Config = toml::from_str(&toml_text).expect("multi-source parses");
|
||||
cfg.validate_sources().expect("valid sources");
|
||||
let resolved = cfg.resolved_sources();
|
||||
assert_eq!(resolved.len(), 2);
|
||||
assert_eq!(resolved[0].id, "notes");
|
||||
assert_eq!(resolved[0].root, std::path::PathBuf::from("/tmp/notes"));
|
||||
assert_eq!(resolved[0].exclude, vec![".git/**".to_string()]);
|
||||
assert_eq!(resolved[0].trust_level, Some(kebab_core::TrustLevel::Primary));
|
||||
assert_eq!(resolved[1].id, "refs");
|
||||
// workspace.exclude ∪ per-source exclude.
|
||||
assert_eq!(
|
||||
resolved[1].exclude,
|
||||
vec![".git/**".to_string(), "draft/**".to_string()]
|
||||
);
|
||||
assert_eq!(
|
||||
resolved[1].source_type,
|
||||
Some(kebab_core::SourceType::Reference)
|
||||
);
|
||||
assert_eq!(
|
||||
resolved[1].trust_level,
|
||||
Some(kebab_core::TrustLevel::Secondary)
|
||||
);
|
||||
}
|
||||
|
||||
fn source_cfg(id: &str, root: &str) -> SourceCfg {
|
||||
SourceCfg {
|
||||
id: id.to_string(),
|
||||
root: root.to_string(),
|
||||
exclude: vec![],
|
||||
trust_level: None,
|
||||
source_type: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_source_ids_rejected() {
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.sources = vec![source_cfg("dup", "/a"), source_cfg("dup", "/b")];
|
||||
assert!(cfg.validate_sources().is_err(), "duplicate ids must fail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_source_id_rejected() {
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.sources = vec![source_cfg("", "/a")];
|
||||
assert!(cfg.validate_sources().is_err(), "empty id must fail");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_stale_threshold_is_30() {
|
||||
let c = Config::defaults();
|
||||
|
||||
@@ -9,7 +9,7 @@ use toml_edit::{DocumentMut, Item};
|
||||
|
||||
/// 현재 바이너리가 이해하는 config 스키마 버전. 마이그레이션 완료 시
|
||||
/// 사용자 파일의 `schema_version` 을 이 값으로 stamp 한다.
|
||||
pub const CURRENT_SCHEMA_VERSION: u32 = 3;
|
||||
pub const CURRENT_SCHEMA_VERSION: u32 = 4;
|
||||
|
||||
/// 한 번의 마이그레이션에서 발생한 개별 변경.
|
||||
#[derive(Clone, Debug, PartialEq, serde::Serialize)]
|
||||
@@ -68,6 +68,7 @@ const HEADER: &str = "\
|
||||
fn section_comment(path: &str) -> Option<&'static str> {
|
||||
Some(match path {
|
||||
"workspace" => "# 색인 대상 워크스페이스.",
|
||||
"workspace.sources" => "# named multi-source (각 source 의 id 가 documents.source_id 로 stamp).",
|
||||
"storage" => "# XDG 저장 경로(데이터/sqlite/벡터/에셋/모델).",
|
||||
"indexing" => "# 병렬도 + 파일시스템 watch.",
|
||||
"chunking" => "# 청크 크기·오버랩·heading 존중.",
|
||||
@@ -376,6 +377,39 @@ pub fn step_2_to_3(doc: &mut DocumentMut, changes: &mut Vec<MigrationChange>) {
|
||||
copy_image_paddle_to_pdf(doc);
|
||||
}
|
||||
|
||||
/// v3 → v4: 단일 `workspace.root` 를 `[[workspace.sources]]` 의 implicit
|
||||
/// `default` source 로 미러링한다(`id = "default"`, `root = <기존 root>`).
|
||||
/// 기존 `workspace.root` 키는 그대로 둔다 — `resolved_sources()` 가 sources
|
||||
/// 가 있으면 그쪽을 우선하므로 무해하고, defaults reconcile 이 root 를 다시
|
||||
/// 추가하려 하지 않게 한다. 멱등: `[[workspace.sources]]` 가 이미 있으면 no-op.
|
||||
pub fn step_3_to_4(doc: &mut DocumentMut, changes: &mut Vec<MigrationChange>) {
|
||||
let Some(ws) = doc.get_mut("workspace").and_then(Item::as_table_mut) else {
|
||||
return;
|
||||
};
|
||||
// 이미 sources 가 선언돼 있으면(array-of-tables 든 inline 이든) 손대지 않음.
|
||||
if ws.contains_key("sources") {
|
||||
return;
|
||||
}
|
||||
// root 가 없으면 만들 게 없음(defaults 에는 항상 있지만 방어).
|
||||
let Some(root_val) = ws.get("root").and_then(Item::as_str).map(str::to_string) else {
|
||||
return;
|
||||
};
|
||||
|
||||
let mut entry = toml_edit::Table::new();
|
||||
entry.insert("id", toml_edit::value("default"));
|
||||
entry.insert("root", toml_edit::value(root_val));
|
||||
|
||||
let mut aot = toml_edit::ArrayOfTables::new();
|
||||
aot.push(entry);
|
||||
ws.insert("sources", Item::ArrayOfTables(aot));
|
||||
|
||||
changes.push(MigrationChange {
|
||||
kind: ChangeKind::AddedSection,
|
||||
path: "workspace.sources".to_string(),
|
||||
detail: "workspace.root → [[workspace.sources]] id=default".to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
/// 파일의 schema_version(없으면 1) 부터 CURRENT 까지 step 적용.
|
||||
fn run_steps(doc: &mut DocumentMut, from: u32, changes: &mut Vec<MigrationChange>) {
|
||||
if from < 2 {
|
||||
@@ -384,6 +418,9 @@ fn run_steps(doc: &mut DocumentMut, from: u32, changes: &mut Vec<MigrationChange
|
||||
if from < 3 {
|
||||
step_2_to_3(doc, changes);
|
||||
}
|
||||
if from < 4 {
|
||||
step_3_to_4(doc, changes);
|
||||
}
|
||||
}
|
||||
|
||||
/// 사용자 config.toml 텍스트를 받아 step 체인 + reconciliation + version
|
||||
@@ -648,6 +685,76 @@ engine = \"paddle-onnx\"
|
||||
assert!(again.is_empty(), "not idempotent: {again:?}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn step_3_to_4_mirrors_root_into_default_source() {
|
||||
let v3 = "\
|
||||
schema_version = 3
|
||||
|
||||
[workspace]
|
||||
root = \"/my/notes\"
|
||||
exclude = [\".git/**\"]
|
||||
";
|
||||
let mut doc: DocumentMut = v3.parse().unwrap();
|
||||
let mut changes = Vec::new();
|
||||
step_3_to_4(&mut doc, &mut changes);
|
||||
let out = doc.to_string();
|
||||
// 새 array-of-tables 가 id=default 로 추가.
|
||||
assert!(out.contains("[[workspace.sources]]"), "{out}");
|
||||
assert!(out.contains("id = \"default\""), "{out}");
|
||||
// 기존 root 는 보존(reconcile 이 다시 추가하지 않게).
|
||||
assert!(out.contains("root = \"/my/notes\""), "{out}");
|
||||
// 재파싱 후 sources.default 가 root 를 미러.
|
||||
let reparsed: DocumentMut = out.parse().unwrap();
|
||||
let src0 = reparsed["workspace"]["sources"][0].as_table().unwrap();
|
||||
assert_eq!(src0["id"].as_str(), Some("default"));
|
||||
assert_eq!(src0["root"].as_str(), Some("/my/notes"));
|
||||
// 멱등.
|
||||
let mut changes2 = Vec::new();
|
||||
step_3_to_4(&mut doc, &mut changes2);
|
||||
assert!(changes2.is_empty(), "step_3_to_4 not idempotent");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn step_3_to_4_noop_when_sources_already_present() {
|
||||
let v4 = "\
|
||||
schema_version = 4
|
||||
|
||||
[workspace]
|
||||
root = \"/my/notes\"
|
||||
exclude = []
|
||||
|
||||
[[workspace.sources]]
|
||||
id = \"notes\"
|
||||
root = \"/my/notes\"
|
||||
";
|
||||
let mut doc: DocumentMut = v4.parse().unwrap();
|
||||
let mut changes = Vec::new();
|
||||
step_3_to_4(&mut doc, &mut changes);
|
||||
assert!(changes.is_empty(), "must not touch existing sources");
|
||||
// 기존 source 만 존재(default 가 추가되지 않음).
|
||||
assert!(!doc.to_string().contains("id = \"default\""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn migrate_document_v3_to_v4_adds_sources_and_is_idempotent() {
|
||||
let v3 = "\
|
||||
schema_version = 3
|
||||
|
||||
[workspace]
|
||||
root = \"/n\"
|
||||
exclude = []
|
||||
";
|
||||
let outcome = migrate_document(v3);
|
||||
assert_eq!(outcome.from_schema_version, 3);
|
||||
assert_eq!(outcome.to_schema_version, 4);
|
||||
assert!(outcome.changed());
|
||||
assert!(outcome.new_text.contains("[[workspace.sources]]"));
|
||||
assert_eq!(read_schema_version(&outcome.new_text), 4);
|
||||
let again = migrate_document(&outcome.new_text);
|
||||
assert!(!again.changed(), "not idempotent: {:?}", again.changes);
|
||||
assert_eq!(again.new_text, outcome.new_text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn migrate_document_missing_schema_version_treated_as_v1() {
|
||||
let old = "[workspace]\nroot = \"/n\"\n";
|
||||
|
||||
@@ -11,11 +11,16 @@ const USER_V2: &str = include_str!("fixtures/user_v2_config.toml");
|
||||
fn user_v2_migrates_losslessly() {
|
||||
let out = migrate_document(USER_V2);
|
||||
assert_eq!(out.from_schema_version, 2);
|
||||
assert_eq!(out.to_schema_version, 3);
|
||||
// v2 → CURRENT(=4): v3 의 [ingest.*] relocation 에 더해 v4 의
|
||||
// [[workspace.sources]] default source 미러링까지 적용된다.
|
||||
assert_eq!(out.to_schema_version, 4);
|
||||
let t = &out.new_text;
|
||||
|
||||
// 사용자 값 보존.
|
||||
assert!(t.contains("root = \"/Users/user/Obsidian/Default\""), "{t}");
|
||||
// v4: workspace.root → [[workspace.sources]] id=default 미러링.
|
||||
assert!(t.contains("[[workspace.sources]]"), "v4 sources 누락:\n{t}");
|
||||
assert!(t.contains("id = \"default\""), "default source 누락:\n{t}");
|
||||
assert!(t.contains("model = \"snowflake-arctic-embed2\""));
|
||||
assert!(t.contains("endpoint = \"http://192.168.0.2:11943\""));
|
||||
// 사용자 주석/대안 줄 보존.
|
||||
|
||||
Reference in New Issue
Block a user