From 7f5739d8fbd8bcc607d5ea491db43dcfcc0cb004 Mon Sep 17 00:00:00 2001 From: th-kim0823 Date: Thu, 7 May 2026 18:46:55 +0900 Subject: [PATCH] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20refactor(fb-31):=20appl?= =?UTF-8?q?y=20round=201=20review=20nits?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - ingest_file_with_config: lowercase normalize ext (caller-side) + early error on unsupported extension (`.docx` etc. now Err with helpful message instead of silent skipped_by_extension counter). New test ingest_file_errors_on_unsupported_extension. - ingest_stdin_with_config: doc comment explaining intentional double-call of ensure helpers (idempotent + ~ms negligible). - external::inject_frontmatter: simplify precheck via single trim_start binding + add CR-only line ending edge case. - external::inject_frontmatter: doc note on yaml_quote escape contract (agent-supplied titles with special chars are safe). Round 1 review summary: http://gitea.altair823.xyz/altair823-org/kebab/pulls/111#issuecomment-1875 Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/kebab-app/src/external.rs | 7 ++++++- crates/kebab-app/src/lib.rs | 18 ++++++++++++++++-- crates/kebab-app/tests/ingest_file.rs | 22 ++++++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs index 61fe8a6..7a51990 100644 --- a/crates/kebab-app/src/external.rs +++ b/crates/kebab-app/src/external.rs @@ -78,12 +78,17 @@ pub fn copy_to_external( /// markdown string. Errors if `body` already starts with `---` (the user /// should use `ingest_file_with_config` for files that already carry /// frontmatter). +/// +/// Internal `yaml_quote` always uses double-quoted YAML form with backslash +/// escapes for `"` / `\` / control chars — agent-supplied titles with +/// special characters are safe. pub fn inject_frontmatter( body: &str, title: &str, source_uri: Option<&str>, ) -> Result { - if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") { + let head = body.trim_start(); + if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") { anyhow::bail!( "stdin already has frontmatter; use `kebab ingest-file` for files with metadata" ); diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs index 20ee48b..fa5b242 100644 --- a/crates/kebab-app/src/lib.rs +++ b/crates/kebab-app/src/lib.rs @@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config( anyhow::bail!("ingest-file: not a regular file: {}", path.display()); } - let ext = path + let ext_raw = path .extension() .and_then(|e| e.to_str()) .ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?; + let ext = ext_raw.to_lowercase(); + + const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"]; + if !SUPPORTED_EXTS.contains(&ext.as_str()) { + anyhow::bail!( + "ingest-file: unsupported extension `.{}` (supported: {:?})", + ext, SUPPORTED_EXTS + ); + } let bytes = std::fs::read(path) .with_context(|| format!("ingest-file: read source {}", path.display()))?; @@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config( .context("ingest-file: append _external/ to .kebabignore")?; // Copy bytes to _external/.. - let dest = crate::external::copy_to_external(&external_dir, &bytes, ext) + let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext) .context("ingest-file: copy to _external")?; // Build a SourceScope that targets _external/ with include filter @@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config( let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?; let workspace_root = config.resolve_workspace_root(); + // Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external + // are called here AND inside ingest_file_with_config. All three are + // idempotent; the redundancy is intentional — keeping stdin's wrapped + // bytes accessible by `ingest_file_with_config` requires the dest path + // to exist. The ~ms double-stat overhead is negligible at v1 scale. let external_dir = crate::external::ensure_external_dir(&workspace_root)?; crate::external::ensure_kebabignore_entry(&workspace_root)?; diff --git a/crates/kebab-app/tests/ingest_file.rs b/crates/kebab-app/tests/ingest_file.rs index b70fa4d..85255f6 100644 --- a/crates/kebab-app/tests/ingest_file.rs +++ b/crates/kebab-app/tests/ingest_file.rs @@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() { let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err(); assert!(err.to_string().contains("does not exist"), "{err}"); } + +#[test] +fn ingest_file_errors_on_unsupported_extension() { + let dir = tempfile::tempdir().unwrap(); + let workspace = dir.path().join("notes"); + let data = dir.path().join("data"); + fs::create_dir_all(&workspace).unwrap(); + fs::create_dir_all(&data).unwrap(); + + let mut cfg = Config::defaults(); + cfg.workspace.root = workspace.to_string_lossy().into_owned(); + cfg.storage.data_dir = data.to_string_lossy().into_owned(); + cfg.models.embedding.provider = "none".to_string(); + cfg.models.embedding.dimensions = 0; + + let docx = dir.path().join("doc.docx"); + fs::write(&docx, b"fake docx bytes").unwrap(); + + let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err(); + assert!(err.to_string().contains("unsupported extension"), "{err}"); + assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}"); +}