From 41061a38ac58fabf29292b4463f0e2770fd59fa9 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 17:58:31 +0900
Subject: [PATCH] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20feat(kebab-app):=20exte?=
=?UTF-8?q?rnal=20module=20=E2=80=94=20=5Fexternal=20dir=20+=20frontmatter?=
=?UTF-8?q?=20inject=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Pure-fn helpers for the `_external/` workspace subdirectory:
- `ensure_external_dir(workspace_root)` — mkdir if absent
- `ensure_kebabignore_entry(workspace_root)` — append `_external/` line
to .kebabignore if missing (idempotent)
- `copy_to_external(ext_dir, bytes, ext)` — write to
`/.`, idempotent on same content
- `inject_frontmatter(body, title, source_uri?)` — prepend YAML block
with strict double-quote escaping; errors if body already starts
with `---`
- `yaml_quote(s)` — defensive escaping for agent-supplied strings
14 unit tests cover happy + idempotency + edge (CRLF frontmatter
detection, YAML escape).
ingest_file / ingest_stdin facades (Tasks 2-5) compose these.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/external.rs | 248 +++++++++++++++++++++++++++++++
crates/kebab-app/src/lib.rs | 1 +
2 files changed, 249 insertions(+)
create mode 100644 crates/kebab-app/src/external.rs
diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs
new file mode 100644
index 0000000..61fe8a6
--- /dev/null
+++ b/crates/kebab-app/src/external.rs
@@ -0,0 +1,248 @@
+//! Helpers for the `_external/` workspace subdirectory used by
+//! `ingest_file_with_config` and `ingest_stdin_with_config` (p9-fb-31).
+//!
+//! - `ensure_external_dir`: create `/_external/` if absent.
+//! - `ensure_kebabignore_entry`: append `_external/` to `/.kebabignore`
+//! if missing — prevents subsequent `kebab ingest` workspace walks from
+//! re-walking files that were imported via single-file ingest.
+//! - `copy_to_external`: write bytes to `_external/.`, idempotent.
+//! - `inject_frontmatter`: prepend a YAML frontmatter block to a markdown body
+//! string (used by `ingest_stdin_with_config`).
+
+use std::fs;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+
+pub const EXTERNAL_DIR: &str = "_external";
+const KEBABIGNORE_LINE: &str = "_external/";
+
+/// Ensure `/_external/` exists. Returns the directory path.
+pub fn ensure_external_dir(workspace_root: &Path) -> Result {
+ let dir = workspace_root.join(EXTERNAL_DIR);
+ fs::create_dir_all(&dir)
+ .with_context(|| format!("create _external dir at {}", dir.display()))?;
+ Ok(dir)
+}
+
+/// Append `_external/` line to `/.kebabignore` if not already
+/// present. Idempotent — checks for the exact line before appending.
+pub fn ensure_kebabignore_entry(workspace_root: &Path) -> Result<()> {
+ let path = workspace_root.join(".kebabignore");
+ let existing = if path.exists() {
+ fs::read_to_string(&path)
+ .with_context(|| format!("read existing .kebabignore at {}", path.display()))?
+ } else {
+ String::new()
+ };
+ let already = existing
+ .lines()
+ .any(|line| line.trim() == KEBABIGNORE_LINE);
+ if already {
+ return Ok(());
+ }
+ let mut file = fs::OpenOptions::new()
+ .create(true)
+ .append(true)
+ .open(&path)
+ .with_context(|| format!("open .kebabignore for append at {}", path.display()))?;
+ if !existing.is_empty() && !existing.ends_with('\n') {
+ file.write_all(b"\n")?;
+ }
+ writeln!(file, "{}", KEBABIGNORE_LINE)?;
+ Ok(())
+}
+
+/// Copy bytes to `/.`. Idempotent — if the
+/// destination file already exists with the expected hash, the existing
+/// file is reused (no second write). Returns the destination path.
+pub fn copy_to_external(
+ external_dir: &Path,
+ bytes: &[u8],
+ ext: &str,
+) -> Result {
+ let hash = blake3::hash(bytes);
+ let hex = hash.to_hex();
+ let prefix = &hex.as_str()[..12];
+ let filename = format!("{prefix}.{ext}");
+ let dest = external_dir.join(&filename);
+ if !dest.exists() {
+ fs::write(&dest, bytes)
+ .with_context(|| format!("write external file at {}", dest.display()))?;
+ }
+ Ok(dest)
+}
+
+/// Prepend a YAML frontmatter block to a markdown body. Returns the wrapped
+/// markdown string. Errors if `body` already starts with `---` (the user
+/// should use `ingest_file_with_config` for files that already carry
+/// frontmatter).
+pub fn inject_frontmatter(
+ body: &str,
+ title: &str,
+ source_uri: Option<&str>,
+) -> Result {
+ if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
+ anyhow::bail!(
+ "stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
+ );
+ }
+ let title_yaml = yaml_quote(title);
+ let mut header = String::new();
+ header.push_str("---\n");
+ header.push_str(&format!("title: {title_yaml}\n"));
+ if let Some(uri) = source_uri {
+ let uri_yaml = yaml_quote(uri);
+ header.push_str(&format!("source_uri: {uri_yaml}\n"));
+ }
+ header.push_str("---\n\n");
+ header.push_str(body);
+ Ok(header)
+}
+
+/// YAML-quote a string. Always uses double-quoted form with backslash-escape
+/// for `"` and `\`. Defensive against agent-supplied titles that contain
+/// quotes / control chars.
+fn yaml_quote(s: &str) -> String {
+ let mut out = String::with_capacity(s.len() + 2);
+ out.push('"');
+ for c in s.chars() {
+ match c {
+ '"' => out.push_str("\\\""),
+ '\\' => out.push_str("\\\\"),
+ '\n' => out.push_str("\\n"),
+ '\r' => out.push_str("\\r"),
+ c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
+ c => out.push(c),
+ }
+ }
+ out.push('"');
+ out
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use tempfile::tempdir;
+
+ #[test]
+ fn ensure_external_dir_creates_dir() {
+ let dir = tempdir().unwrap();
+ let result = ensure_external_dir(dir.path()).unwrap();
+ assert_eq!(result, dir.path().join("_external"));
+ assert!(result.is_dir());
+ }
+
+ #[test]
+ fn ensure_external_dir_is_idempotent() {
+ let dir = tempdir().unwrap();
+ let _ = ensure_external_dir(dir.path()).unwrap();
+ let result = ensure_external_dir(dir.path()).unwrap();
+ assert!(result.is_dir());
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_creates_file_with_line() {
+ let dir = tempdir().unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ assert!(content.lines().any(|l| l.trim() == "_external/"));
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_appends_to_existing() {
+ let dir = tempdir().unwrap();
+ fs::write(dir.path().join(".kebabignore"), "*.tmp\n").unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let lines: Vec<&str> = content.lines().collect();
+ assert!(lines.contains(&"*.tmp"));
+ assert!(lines.contains(&"_external/"));
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_idempotent() {
+ let dir = tempdir().unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let count = content.lines().filter(|l| l.trim() == "_external/").count();
+ assert_eq!(count, 1, "should not duplicate");
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_handles_missing_trailing_newline() {
+ let dir = tempdir().unwrap();
+ fs::write(dir.path().join(".kebabignore"), "*.tmp").unwrap(); // no \n
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let lines: Vec<&str> = content.lines().collect();
+ assert!(lines.contains(&"*.tmp"));
+ assert!(lines.contains(&"_external/"));
+ }
+
+ #[test]
+ fn copy_to_external_writes_with_hash_prefix_filename() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let path = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ assert!(path.exists());
+ assert!(path.file_name().unwrap().to_string_lossy().ends_with(".md"));
+ let stem = path.file_stem().unwrap().to_string_lossy();
+ assert_eq!(stem.len(), 12);
+ }
+
+ #[test]
+ fn copy_to_external_is_idempotent_for_same_bytes() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ let p2 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ assert_eq!(p1, p2);
+ }
+
+ #[test]
+ fn copy_to_external_different_bytes_produce_different_filenames() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ let p2 = copy_to_external(&ext_dir, b"world", "md").unwrap();
+ assert_ne!(p1, p2);
+ }
+
+ #[test]
+ fn inject_frontmatter_basic() {
+ let out = inject_frontmatter("## Body", "Article X", None).unwrap();
+ assert!(out.starts_with("---\ntitle: \"Article X\"\n---\n\n## Body"));
+ }
+
+ #[test]
+ fn inject_frontmatter_with_source_uri() {
+ let out = inject_frontmatter("## Body", "X", Some("https://example.com/x")).unwrap();
+ assert!(out.contains("title: \"X\""));
+ assert!(out.contains("source_uri: \"https://example.com/x\""));
+ assert!(out.contains("\n## Body"));
+ }
+
+ #[test]
+ fn inject_frontmatter_errors_on_existing_frontmatter() {
+ let body = "---\ntitle: Existing\n---\n\n## Body";
+ let err = inject_frontmatter(body, "New", None).unwrap_err();
+ assert!(err.to_string().contains("already has frontmatter"));
+ }
+
+ #[test]
+ fn inject_frontmatter_errors_on_existing_frontmatter_crlf() {
+ let body = "---\r\ntitle: Existing\r\n---\r\n\r\n## Body";
+ let err = inject_frontmatter(body, "New", None).unwrap_err();
+ assert!(err.to_string().contains("already has frontmatter"));
+ }
+
+ #[test]
+ fn yaml_quote_escapes_quotes_and_backslashes() {
+ assert_eq!(yaml_quote("hello \"world\""), "\"hello \\\"world\\\"\"");
+ assert_eq!(yaml_quote("path\\to"), "\"path\\\\to\"");
+ assert_eq!(yaml_quote("line\nbreak"), "\"line\\nbreak\"");
+ }
+}
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index 58c4062..c9dc518 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -58,6 +58,7 @@ mod app;
pub mod doctor_signal;
pub mod error_signal;
pub mod error_wire;
+pub mod external;
pub mod ingest_progress;
pub mod logging;
pub mod reset;