From 41061a38ac58fabf29292b4463f0e2770fd59fa9 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Thu, 7 May 2026 17:58:31 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20feat(kebab-app):=20exte?= =?UTF-8?q?rnal=20module=20=E2=80=94=20=5Fexternal=20dir=20+=20frontmatter?= =?UTF-8?q?=20inject=20(fb-31)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure-fn helpers for the `_external/` workspace subdirectory: - `ensure_external_dir(workspace_root)` — mkdir if absent - `ensure_kebabignore_entry(workspace_root)` — append `_external/` line to .kebabignore if missing (idempotent) - `copy_to_external(ext_dir, bytes, ext)` — write to `/.`, idempotent on same content - `inject_frontmatter(body, title, source_uri?)` — prepend YAML block with strict double-quote escaping; errors if body already starts with `---` - `yaml_quote(s)` — defensive escaping for agent-supplied strings 14 unit tests cover happy + idempotency + edge (CRLF frontmatter detection, YAML escape). ingest_file / ingest_stdin facades (Tasks 2-5) compose these. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/external.rs | 248 +++++++++++++++++++++++++++++++ crates/kebab-app/src/lib.rs | 1 + 2 files changed, 249 insertions(+) create mode 100644 crates/kebab-app/src/external.rs diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs new file mode 100644 index 0000000..61fe8a6 --- /dev/null +++ b/crates/kebab-app/src/external.rs @@ -0,0 +1,248 @@ +//! Helpers for the `_external/` workspace subdirectory used by +//! `ingest_file_with_config` and `ingest_stdin_with_config` (p9-fb-31). +//! +//! - `ensure_external_dir`: create `/_external/` if absent. +//! - `ensure_kebabignore_entry`: append `_external/` to `/.kebabignore` +//! if missing — prevents subsequent `kebab ingest` workspace walks from +//! re-walking files that were imported via single-file ingest. +//! - `copy_to_external`: write bytes to `_external/.`, idempotent. +//! - `inject_frontmatter`: prepend a YAML frontmatter block to a markdown body +//! string (used by `ingest_stdin_with_config`). + +use std::fs; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result}; + +pub const EXTERNAL_DIR: &str = "_external"; +const KEBABIGNORE_LINE: &str = "_external/"; + +/// Ensure `/_external/` exists. Returns the directory path. +pub fn ensure_external_dir(workspace_root: &Path) -> Result { + let dir = workspace_root.join(EXTERNAL_DIR); + fs::create_dir_all(&dir) + .with_context(|| format!("create _external dir at {}", dir.display()))?; + Ok(dir) +} + +/// Append `_external/` line to `/.kebabignore` if not already +/// present. Idempotent — checks for the exact line before appending. +pub fn ensure_kebabignore_entry(workspace_root: &Path) -> Result<()> { + let path = workspace_root.join(".kebabignore"); + let existing = if path.exists() { + fs::read_to_string(&path) + .with_context(|| format!("read existing .kebabignore at {}", path.display()))? + } else { + String::new() + }; + let already = existing + .lines() + .any(|line| line.trim() == KEBABIGNORE_LINE); + if already { + return Ok(()); + } + let mut file = fs::OpenOptions::new() + .create(true) + .append(true) + .open(&path) + .with_context(|| format!("open .kebabignore for append at {}", path.display()))?; + if !existing.is_empty() && !existing.ends_with('\n') { + file.write_all(b"\n")?; + } + writeln!(file, "{}", KEBABIGNORE_LINE)?; + Ok(()) +} + +/// Copy bytes to `/.`. Idempotent — if the +/// destination file already exists with the expected hash, the existing +/// file is reused (no second write). Returns the destination path. +pub fn copy_to_external( + external_dir: &Path, + bytes: &[u8], + ext: &str, +) -> Result { + let hash = blake3::hash(bytes); + let hex = hash.to_hex(); + let prefix = &hex.as_str()[..12]; + let filename = format!("{prefix}.{ext}"); + let dest = external_dir.join(&filename); + if !dest.exists() { + fs::write(&dest, bytes) + .with_context(|| format!("write external file at {}", dest.display()))?; + } + Ok(dest) +} + +/// Prepend a YAML frontmatter block to a markdown body. Returns the wrapped +/// markdown string. Errors if `body` already starts with `---` (the user +/// should use `ingest_file_with_config` for files that already carry +/// frontmatter). +pub fn inject_frontmatter( + body: &str, + title: &str, + source_uri: Option<&str>, +) -> Result { + if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") { + anyhow::bail!( + "stdin already has frontmatter; use `kebab ingest-file` for files with metadata" + ); + } + let title_yaml = yaml_quote(title); + let mut header = String::new(); + header.push_str("---\n"); + header.push_str(&format!("title: {title_yaml}\n")); + if let Some(uri) = source_uri { + let uri_yaml = yaml_quote(uri); + header.push_str(&format!("source_uri: {uri_yaml}\n")); + } + header.push_str("---\n\n"); + header.push_str(body); + Ok(header) +} + +/// YAML-quote a string. Always uses double-quoted form with backslash-escape +/// for `"` and `\`. Defensive against agent-supplied titles that contain +/// quotes / control chars. +fn yaml_quote(s: &str) -> String { + let mut out = String::with_capacity(s.len() + 2); + out.push('"'); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)), + c => out.push(c), + } + } + out.push('"'); + out +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + #[test] + fn ensure_external_dir_creates_dir() { + let dir = tempdir().unwrap(); + let result = ensure_external_dir(dir.path()).unwrap(); + assert_eq!(result, dir.path().join("_external")); + assert!(result.is_dir()); + } + + #[test] + fn ensure_external_dir_is_idempotent() { + let dir = tempdir().unwrap(); + let _ = ensure_external_dir(dir.path()).unwrap(); + let result = ensure_external_dir(dir.path()).unwrap(); + assert!(result.is_dir()); + } + + #[test] + fn ensure_kebabignore_entry_creates_file_with_line() { + let dir = tempdir().unwrap(); + ensure_kebabignore_entry(dir.path()).unwrap(); + let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap(); + assert!(content.lines().any(|l| l.trim() == "_external/")); + } + + #[test] + fn ensure_kebabignore_entry_appends_to_existing() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join(".kebabignore"), "*.tmp\n").unwrap(); + ensure_kebabignore_entry(dir.path()).unwrap(); + let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap(); + let lines: Vec<&str> = content.lines().collect(); + assert!(lines.contains(&"*.tmp")); + assert!(lines.contains(&"_external/")); + } + + #[test] + fn ensure_kebabignore_entry_idempotent() { + let dir = tempdir().unwrap(); + ensure_kebabignore_entry(dir.path()).unwrap(); + ensure_kebabignore_entry(dir.path()).unwrap(); + let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap(); + let count = content.lines().filter(|l| l.trim() == "_external/").count(); + assert_eq!(count, 1, "should not duplicate"); + } + + #[test] + fn ensure_kebabignore_entry_handles_missing_trailing_newline() { + let dir = tempdir().unwrap(); + fs::write(dir.path().join(".kebabignore"), "*.tmp").unwrap(); // no \n + ensure_kebabignore_entry(dir.path()).unwrap(); + let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap(); + let lines: Vec<&str> = content.lines().collect(); + assert!(lines.contains(&"*.tmp")); + assert!(lines.contains(&"_external/")); + } + + #[test] + fn copy_to_external_writes_with_hash_prefix_filename() { + let dir = tempdir().unwrap(); + let ext_dir = ensure_external_dir(dir.path()).unwrap(); + let path = copy_to_external(&ext_dir, b"hello", "md").unwrap(); + assert!(path.exists()); + assert!(path.file_name().unwrap().to_string_lossy().ends_with(".md")); + let stem = path.file_stem().unwrap().to_string_lossy(); + assert_eq!(stem.len(), 12); + } + + #[test] + fn copy_to_external_is_idempotent_for_same_bytes() { + let dir = tempdir().unwrap(); + let ext_dir = ensure_external_dir(dir.path()).unwrap(); + let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap(); + let p2 = copy_to_external(&ext_dir, b"hello", "md").unwrap(); + assert_eq!(p1, p2); + } + + #[test] + fn copy_to_external_different_bytes_produce_different_filenames() { + let dir = tempdir().unwrap(); + let ext_dir = ensure_external_dir(dir.path()).unwrap(); + let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap(); + let p2 = copy_to_external(&ext_dir, b"world", "md").unwrap(); + assert_ne!(p1, p2); + } + + #[test] + fn inject_frontmatter_basic() { + let out = inject_frontmatter("## Body", "Article X", None).unwrap(); + assert!(out.starts_with("---\ntitle: \"Article X\"\n---\n\n## Body")); + } + + #[test] + fn inject_frontmatter_with_source_uri() { + let out = inject_frontmatter("## Body", "X", Some("https://example.com/x")).unwrap(); + assert!(out.contains("title: \"X\"")); + assert!(out.contains("source_uri: \"https://example.com/x\"")); + assert!(out.contains("\n## Body")); + } + + #[test] + fn inject_frontmatter_errors_on_existing_frontmatter() { + let body = "---\ntitle: Existing\n---\n\n## Body"; + let err = inject_frontmatter(body, "New", None).unwrap_err(); + assert!(err.to_string().contains("already has frontmatter")); + } + + #[test] + fn inject_frontmatter_errors_on_existing_frontmatter_crlf() { + let body = "---\r\ntitle: Existing\r\n---\r\n\r\n## Body"; + let err = inject_frontmatter(body, "New", None).unwrap_err(); + assert!(err.to_string().contains("already has frontmatter")); + } + + #[test] + fn yaml_quote_escapes_quotes_and_backslashes() { + assert_eq!(yaml_quote("hello \"world\""), "\"hello \\\"world\\\"\""); + assert_eq!(yaml_quote("path\\to"), "\"path\\\\to\""); + assert_eq!(yaml_quote("line\nbreak"), "\"line\\nbreak\""); + } +} diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 58c4062..c9dc518 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -58,6 +58,7 @@ mod app; pub mod doctor_signal; pub mod error_signal; pub mod error_wire; +pub mod external; pub mod ingest_progress; pub mod logging; pub mod reset;