🏗️ refactor(fb-31): apply round 1 review nits
- ingest_file_with_config: lowercase normalize ext (caller-side) + early error on unsupported extension (`.docx` etc. now Err with helpful message instead of silent skipped_by_extension counter). New test ingest_file_errors_on_unsupported_extension. - ingest_stdin_with_config: doc comment explaining intentional double-call of ensure helpers (idempotent + ~ms negligible). - external::inject_frontmatter: simplify precheck via single trim_start binding + add CR-only line ending edge case. - external::inject_frontmatter: doc note on yaml_quote escape contract (agent-supplied titles with special chars are safe). Round 1 review summary: #111 (comment) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -78,12 +78,17 @@ pub fn copy_to_external(
|
||||
/// markdown string. Errors if `body` already starts with `---` (the user
|
||||
/// should use `ingest_file_with_config` for files that already carry
|
||||
/// frontmatter).
|
||||
///
|
||||
/// Internal `yaml_quote` always uses double-quoted YAML form with backslash
|
||||
/// escapes for `"` / `\` / control chars — agent-supplied titles with
|
||||
/// special characters are safe.
|
||||
pub fn inject_frontmatter(
|
||||
body: &str,
|
||||
title: &str,
|
||||
source_uri: Option<&str>,
|
||||
) -> Result<String> {
|
||||
if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
|
||||
let head = body.trim_start();
|
||||
if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") {
|
||||
anyhow::bail!(
|
||||
"stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
|
||||
);
|
||||
|
||||
@@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config(
|
||||
anyhow::bail!("ingest-file: not a regular file: {}", path.display());
|
||||
}
|
||||
|
||||
let ext = path
|
||||
let ext_raw = path
|
||||
.extension()
|
||||
.and_then(|e| e.to_str())
|
||||
.ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?;
|
||||
let ext = ext_raw.to_lowercase();
|
||||
|
||||
const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"];
|
||||
if !SUPPORTED_EXTS.contains(&ext.as_str()) {
|
||||
anyhow::bail!(
|
||||
"ingest-file: unsupported extension `.{}` (supported: {:?})",
|
||||
ext, SUPPORTED_EXTS
|
||||
);
|
||||
}
|
||||
|
||||
let bytes = std::fs::read(path)
|
||||
.with_context(|| format!("ingest-file: read source {}", path.display()))?;
|
||||
@@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config(
|
||||
.context("ingest-file: append _external/ to .kebabignore")?;
|
||||
|
||||
// Copy bytes to _external/<hash>.<ext>.
|
||||
let dest = crate::external::copy_to_external(&external_dir, &bytes, ext)
|
||||
let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext)
|
||||
.context("ingest-file: copy to _external")?;
|
||||
|
||||
// Build a SourceScope that targets _external/ with include filter
|
||||
@@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config(
|
||||
let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?;
|
||||
|
||||
let workspace_root = config.resolve_workspace_root();
|
||||
// Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external
|
||||
// are called here AND inside ingest_file_with_config. All three are
|
||||
// idempotent; the redundancy is intentional — keeping stdin's wrapped
|
||||
// bytes accessible by `ingest_file_with_config` requires the dest path
|
||||
// to exist. The ~ms double-stat overhead is negligible at v1 scale.
|
||||
let external_dir = crate::external::ensure_external_dir(&workspace_root)?;
|
||||
crate::external::ensure_kebabignore_entry(&workspace_root)?;
|
||||
|
||||
|
||||
@@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() {
|
||||
let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err();
|
||||
assert!(err.to_string().contains("does not exist"), "{err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_file_errors_on_unsupported_extension() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let workspace = dir.path().join("notes");
|
||||
let data = dir.path().join("data");
|
||||
fs::create_dir_all(&workspace).unwrap();
|
||||
fs::create_dir_all(&data).unwrap();
|
||||
|
||||
let mut cfg = Config::defaults();
|
||||
cfg.workspace.root = workspace.to_string_lossy().into_owned();
|
||||
cfg.storage.data_dir = data.to_string_lossy().into_owned();
|
||||
cfg.models.embedding.provider = "none".to_string();
|
||||
cfg.models.embedding.dimensions = 0;
|
||||
|
||||
let docx = dir.path().join("doc.docx");
|
||||
fs::write(&docx, b"fake docx bytes").unwrap();
|
||||
|
||||
let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
|
||||
assert!(err.to_string().contains("unsupported extension"), "{err}");
|
||||
assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user