프로젝트 이름 `kb` → `kebab` rename 의 첫 단계. - workspace `Cargo.toml`: members `crates/kb-*` → `crates/kebab-*`, repository URL `altair823/kb` → `altair823/kebab`. - 18 crate 폴더 rename via `git mv` (history 보존). - 각 crate `Cargo.toml`: `name = "kb-*"` → `"kebab-*"`, path deps `../kb-*` → `../kebab-*`. - 모든 `.rs`: `kb_<id>` snake-case 모듈 path 18 개 (`kb_core`, `kb_config`, `kb_app`, `kb_cli`, `kb_eval`, `kb_search`, `kb_chunk`, `kb_normalize`, `kb_source_fs`, `kb_parse_md`, `kb_parse_types`, `kb_store_sqlite`, `kb_store_vector`, `kb_embed`, `kb_embed_local`, `kb_llm`, `kb_llm_local`, `kb_rag`) → `kebab_<id>` 일괄 sed (단어 경계 \\b 사용해 영어 문장 안의 "kb" 약어 미오염). CLI binary 이름 (`[[bin]] name = "kb"`), 환경변수 `KB_*`, XDG paths, tracing target, 그리고 docs sweep 은 다음 commit 에서. ## 검증 - `cargo check --workspace` clean — 모든 crate 빌드 통과 후 commit. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
105 lines
3.2 KiB
Rust
105 lines
3.2 KiB
Rust
//! Path / string normalization helpers (§4.1, §6.6).
|
|
|
|
use std::path::{Component, Path};
|
|
|
|
use unicode_normalization::UnicodeNormalization;
|
|
|
|
use crate::asset::WorkspacePath;
|
|
use crate::errors::CoreError;
|
|
|
|
/// NFC-normalize a UTF-8 string (§4.1).
|
|
pub fn nfc(input: &str) -> String {
|
|
input.nfc().collect()
|
|
}
|
|
|
|
/// Collapse a path to a POSIX-relative `WorkspacePath` per §6.6:
|
|
/// - convert all separators to `/`
|
|
/// - strip a leading `./`
|
|
/// - collapse repeated slashes
|
|
/// - NFC-normalize
|
|
///
|
|
/// Returns `Err(CoreError::Malformed(..))` if the resulting POSIX form
|
|
/// contains `#`, since `WorkspacePath` is forbidden from colliding with
|
|
/// the W3C-Media-Fragments separator that `Citation` URIs depend on.
|
|
pub fn to_posix(path: &Path) -> Result<WorkspacePath, CoreError> {
|
|
let mut out = String::new();
|
|
let mut first = true;
|
|
for comp in path.components() {
|
|
match comp {
|
|
Component::CurDir => continue,
|
|
Component::Normal(s) => {
|
|
if !first {
|
|
out.push('/');
|
|
}
|
|
out.push_str(&s.to_string_lossy());
|
|
first = false;
|
|
}
|
|
Component::ParentDir => {
|
|
if !first {
|
|
out.push('/');
|
|
}
|
|
out.push_str("..");
|
|
first = false;
|
|
}
|
|
Component::RootDir => {
|
|
if first {
|
|
out.push('/');
|
|
}
|
|
first = false;
|
|
}
|
|
Component::Prefix(_) => {
|
|
// Windows drive prefixes — `to_string_lossy` keeps form.
|
|
out.push_str(&comp.as_os_str().to_string_lossy());
|
|
first = false;
|
|
}
|
|
}
|
|
}
|
|
if out.is_empty() {
|
|
out.push('.');
|
|
}
|
|
WorkspacePath::new(nfc(&out))
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn collapses_curdir_and_redundant_slashes() {
|
|
let p = Path::new("./a//b.md");
|
|
// `Path::components` already collapses `//` on POSIX; the test
|
|
// doc-fixed example asserts the final string is `a/b.md`.
|
|
assert_eq!(to_posix(p).unwrap().0, "a/b.md");
|
|
}
|
|
|
|
#[test]
|
|
fn nfc_normalizes_korean() {
|
|
// U+1100 ㄱ + U+1161 ㅏ (NFD) vs U+AC00 가 (NFC). After NFC they
|
|
// collapse to the same string; `to_posix` runs NFC after path
|
|
// collapse, so the WorkspacePath comes out NFC regardless of input.
|
|
let nfd = "\u{1100}\u{1161}.md";
|
|
let nfc_str = "\u{AC00}.md";
|
|
assert_eq!(
|
|
to_posix(Path::new(nfd)).unwrap().0,
|
|
to_posix(Path::new(nfc_str)).unwrap().0
|
|
);
|
|
assert_eq!(to_posix(Path::new(nfd)).unwrap().0, "\u{AC00}.md");
|
|
}
|
|
|
|
#[test]
|
|
fn nfc_function_idempotent() {
|
|
let s = "\u{AC00}";
|
|
assert_eq!(nfc(s), s);
|
|
}
|
|
|
|
#[test]
|
|
fn to_posix_rejects_hash_in_path() {
|
|
// `#` collides with the W3C-Media-Fragments separator used by
|
|
// `Citation`; the WorkspacePath invariant rejects it at construction.
|
|
let p = Path::new("notes/has#hash.md");
|
|
let err = to_posix(p).expect_err("# in path must be rejected");
|
|
let msg = format!("{err}");
|
|
assert!(msg.contains('#'), "error message should mention '#': {msg}");
|
|
}
|
|
}
|