diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs index 61fe8a6..7a51990 100644 --- a/crates/kebab-app/src/external.rs +++ b/crates/kebab-app/src/external.rs @@ -78,12 +78,17 @@ pub fn copy_to_external( /// markdown string. Errors if `body` already starts with `---` (the user /// should use `ingest_file_with_config` for files that already carry /// frontmatter). +/// +/// Internal `yaml_quote` always uses double-quoted YAML form with backslash +/// escapes for `"` / `\` / control chars — agent-supplied titles with +/// special characters are safe. pub fn inject_frontmatter( body: &str, title: &str, source_uri: Option<&str>, ) -> Result { - if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") { + let head = body.trim_start(); + if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") { anyhow::bail!( "stdin already has frontmatter; use `kebab ingest-file` for files with metadata" ); diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 20ee48b..fa5b242 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config( anyhow::bail!("ingest-file: not a regular file: {}", path.display()); } - let ext = path + let ext_raw = path .extension() .and_then(|e| e.to_str()) .ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?; + let ext = ext_raw.to_lowercase(); + + const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"]; + if !SUPPORTED_EXTS.contains(&ext.as_str()) { + anyhow::bail!( + "ingest-file: unsupported extension `.{}` (supported: {:?})", + ext, SUPPORTED_EXTS + ); + } let bytes = std::fs::read(path) .with_context(|| format!("ingest-file: read source {}", path.display()))?; @@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config( .context("ingest-file: append _external/ to .kebabignore")?; // Copy bytes to _external/.. - let dest = crate::external::copy_to_external(&external_dir, &bytes, ext) + let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext) .context("ingest-file: copy to _external")?; // Build a SourceScope that targets _external/ with include filter @@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config( let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?; let workspace_root = config.resolve_workspace_root(); + // Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external + // are called here AND inside ingest_file_with_config. All three are + // idempotent; the redundancy is intentional — keeping stdin's wrapped + // bytes accessible by `ingest_file_with_config` requires the dest path + // to exist. The ~ms double-stat overhead is negligible at v1 scale. let external_dir = crate::external::ensure_external_dir(&workspace_root)?; crate::external::ensure_kebabignore_entry(&workspace_root)?; diff --git a/crates/kebab-app/tests/ingest_file.rs b/crates/kebab-app/tests/ingest_file.rs index b70fa4d..85255f6 100644 --- a/crates/kebab-app/tests/ingest_file.rs +++ b/crates/kebab-app/tests/ingest_file.rs @@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() { let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err(); assert!(err.to_string().contains("does not exist"), "{err}"); } + +#[test] +fn ingest_file_errors_on_unsupported_extension() { + let dir = tempfile::tempdir().unwrap(); + let workspace = dir.path().join("notes"); + let data = dir.path().join("data"); + fs::create_dir_all(&workspace).unwrap(); + fs::create_dir_all(&data).unwrap(); + + let mut cfg = Config::defaults(); + cfg.workspace.root = workspace.to_string_lossy().into_owned(); + cfg.storage.data_dir = data.to_string_lossy().into_owned(); + cfg.models.embedding.provider = "none".to_string(); + cfg.models.embedding.dimensions = 0; + + let docx = dir.path().join("doc.docx"); + fs::write(&docx, b"fake docx bytes").unwrap(); + + let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err(); + assert!(err.to_string().contains("unsupported extension"), "{err}"); + assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}"); +}