From 7f5739d8fbd8bcc607d5ea491db43dcfcc0cb004 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:46:55 +0900
Subject: [PATCH] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20refactor(fb-31):=20appl?=
=?UTF-8?q?y=20round=201=20review=20nits?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- ingest_file_with_config: lowercase normalize ext (caller-side) +
early error on unsupported extension (`.docx` etc. now Err with
helpful message instead of silent skipped_by_extension counter).
New test ingest_file_errors_on_unsupported_extension.
- ingest_stdin_with_config: doc comment explaining intentional
double-call of ensure helpers (idempotent + ~ms negligible).
- external::inject_frontmatter: simplify precheck via single
trim_start binding + add CR-only line ending edge case.
- external::inject_frontmatter: doc note on yaml_quote escape
contract (agent-supplied titles with special chars are safe).
Round 1 review summary: http://gitea.altair823.xyz/altair823-org/kebab/pulls/111#issuecomment-1875
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/external.rs | 7 ++++++-
crates/kebab-app/src/lib.rs | 18 ++++++++++++++++--
crates/kebab-app/tests/ingest_file.rs | 22 ++++++++++++++++++++++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs
index 61fe8a6..7a51990 100644
--- a/crates/kebab-app/src/external.rs
+++ b/crates/kebab-app/src/external.rs
@@ -78,12 +78,17 @@ pub fn copy_to_external(
/// markdown string. Errors if `body` already starts with `---` (the user
/// should use `ingest_file_with_config` for files that already carry
/// frontmatter).
+///
+/// Internal `yaml_quote` always uses double-quoted YAML form with backslash
+/// escapes for `"` / `\` / control chars — agent-supplied titles with
+/// special characters are safe.
pub fn inject_frontmatter(
body: &str,
title: &str,
source_uri: Option<&str>,
) -> Result {
- if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
+ let head = body.trim_start();
+ if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") {
anyhow::bail!(
"stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
);
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index 20ee48b..fa5b242 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config(
anyhow::bail!("ingest-file: not a regular file: {}", path.display());
}
- let ext = path
+ let ext_raw = path
.extension()
.and_then(|e| e.to_str())
.ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?;
+ let ext = ext_raw.to_lowercase();
+
+ const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"];
+ if !SUPPORTED_EXTS.contains(&ext.as_str()) {
+ anyhow::bail!(
+ "ingest-file: unsupported extension `.{}` (supported: {:?})",
+ ext, SUPPORTED_EXTS
+ );
+ }
let bytes = std::fs::read(path)
.with_context(|| format!("ingest-file: read source {}", path.display()))?;
@@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config(
.context("ingest-file: append _external/ to .kebabignore")?;
// Copy bytes to _external/..
- let dest = crate::external::copy_to_external(&external_dir, &bytes, ext)
+ let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext)
.context("ingest-file: copy to _external")?;
// Build a SourceScope that targets _external/ with include filter
@@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config(
let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?;
let workspace_root = config.resolve_workspace_root();
+ // Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external
+ // are called here AND inside ingest_file_with_config. All three are
+ // idempotent; the redundancy is intentional — keeping stdin's wrapped
+ // bytes accessible by `ingest_file_with_config` requires the dest path
+ // to exist. The ~ms double-stat overhead is negligible at v1 scale.
let external_dir = crate::external::ensure_external_dir(&workspace_root)?;
crate::external::ensure_kebabignore_entry(&workspace_root)?;
diff --git a/crates/kebab-app/tests/ingest_file.rs b/crates/kebab-app/tests/ingest_file.rs
index b70fa4d..85255f6 100644
--- a/crates/kebab-app/tests/ingest_file.rs
+++ b/crates/kebab-app/tests/ingest_file.rs
@@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() {
let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err();
assert!(err.to_string().contains("does not exist"), "{err}");
}
+
+#[test]
+fn ingest_file_errors_on_unsupported_extension() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let docx = dir.path().join("doc.docx");
+ fs::write(&docx, b"fake docx bytes").unwrap();
+
+ let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
+ assert!(err.to_string().contains("unsupported extension"), "{err}");
+ assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
+}