feat(fb-31): single-file / stdin ingest — agent on-demand 저장 #111

Merged
altair823 merged 16 commits from feat/p9-fb-31-single-file-stdin-ingest into main 2026-05-07 09:56:11 +00:00
3 changed files with 44 additions and 3 deletions
Showing only changes of commit 7f5739d8fb - Show all commits

View File

@@ -78,12 +78,17 @@ pub fn copy_to_external(
/// markdown string. Errors if `body` already starts with `---` (the user
/// should use `ingest_file_with_config` for files that already carry
/// frontmatter).
///
/// Internal `yaml_quote` always uses double-quoted YAML form with backslash
/// escapes for `"` / `\` / control chars — agent-supplied titles with
/// special characters are safe.
pub fn inject_frontmatter(
body: &str,

[edge] inject_frontmattertrim_start().starts_with("---\n") + "---\r\n" 두 검사 — trim_start() 가 이미 모든 leading whitespace 제거하므로 첫 케이스 ("---\n") 와 두 번째 ("---\r\n") 모두 결과적으로 "---" 로 시작하면 매치.

실제로 두 검사 OR 가 정확히 같은 set 잡지는 않음 — "---a" 같은 "3 hyphen + non-newline" 케이스는 둘 다 false. 의도된 동작? frontmatter 의 canonical form 은 ---\n (또는 ---\r\n) 라 OK. 단순화 가능:

let head = body.trim_start();
if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") {
    bail!(...)
}

or body.trim_start().lines().next() == Some("---") 로 더 명확. 현 코드 functional 정확 — 가독성 nit only.

**[edge]** `inject_frontmatter` 의 `trim_start().starts_with("---\n")` + `"---\r\n"` 두 검사 — `trim_start()` 가 이미 모든 leading whitespace 제거하므로 첫 케이스 (`"---\n"`) 와 두 번째 (`"---\r\n"`) 모두 결과적으로 `"---"` 로 시작하면 매치. 실제로 두 검사 OR 가 정확히 같은 set 잡지는 않음 — `"---a"` 같은 "3 hyphen + non-newline" 케이스는 둘 다 false. 의도된 동작? frontmatter 의 canonical form 은 `---\n` (또는 `---\r\n`) 라 OK. 단순화 가능: ```rust let head = body.trim_start(); if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") { bail!(...) } ``` or `body.trim_start().lines().next() == Some("---")` 로 더 명확. 현 코드 functional 정확 — 가독성 nit only.
title: &str,
source_uri: Option<&str>,
) -> Result<String> {
if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
let head = body.trim_start();

[doc] inject_frontmatter doc 에 title YAML 처리 명시 권장 — agent 가 title: "He said \"hi\"" 같은 이상한 입력 넣을 때 internal escape 가 무엇 하는지 명시. 한 줄:

Internal yaml_quote always uses double-quoted YAML form with backslash escapes for " / \ / control chars — agent-supplied titles with special characters are safe.

별 task 아님 — doc clarity nit.

**[doc]** `inject_frontmatter` doc 에 title YAML 처리 명시 권장 — agent 가 `title: "He said \"hi\""` 같은 이상한 입력 넣을 때 internal escape 가 무엇 하는지 명시. 한 줄: > Internal `yaml_quote` always uses double-quoted YAML form with backslash escapes for `"` / `\` / control chars — agent-supplied titles with special characters are safe. 별 task 아님 — doc clarity nit.
if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") {
anyhow::bail!(
"stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
);

View File

@@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config(
anyhow::bail!("ingest-file: not a regular file: {}", path.display());
}
let ext = path
let ext_raw = path
.extension()
.and_then(|e| e.to_str())
.ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?;
let ext = ext_raw.to_lowercase();
const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"];
if !SUPPORTED_EXTS.contains(&ext.as_str()) {
anyhow::bail!(
"ingest-file: unsupported extension `.{}` (supported: {:?})",
ext, SUPPORTED_EXTS
);
}
let bytes = std::fs::read(path)
.with_context(|| format!("ingest-file: read source {}", path.display()))?;
@@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config(
.context("ingest-file: append _external/ to .kebabignore")?;
// Copy bytes to _external/<hash>.<ext>.
let dest = crate::external::copy_to_external(&external_dir, &bytes, ext)
let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext)
.context("ingest-file: copy to _external")?;
// Build a SourceScope that targets _external/ with include filter
@@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config(
let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?;
let workspace_root = config.resolve_workspace_root();
// Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external
// are called here AND inside ingest_file_with_config. All three are
// idempotent; the redundancy is intentional — keeping stdin's wrapped
// bytes accessible by `ingest_file_with_config` requires the dest path
// to exist. The ~ms double-stat overhead is negligible at v1 scale.
let external_dir = crate::external::ensure_external_dir(&workspace_root)?;
crate::external::ensure_kebabignore_entry(&workspace_root)?;

View File

@@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() {
let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err();
assert!(err.to_string().contains("does not exist"), "{err}");
}
#[test]
fn ingest_file_errors_on_unsupported_extension() {
let dir = tempfile::tempdir().unwrap();
let workspace = dir.path().join("notes");
let data = dir.path().join("data");
fs::create_dir_all(&workspace).unwrap();
fs::create_dir_all(&data).unwrap();
let mut cfg = Config::defaults();
cfg.workspace.root = workspace.to_string_lossy().into_owned();
cfg.storage.data_dir = data.to_string_lossy().into_owned();
cfg.models.embedding.provider = "none".to_string();
cfg.models.embedding.dimensions = 0;
let docx = dir.path().join("doc.docx");
fs::write(&docx, b"fake docx bytes").unwrap();
let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
assert!(err.to_string().contains("unsupported extension"), "{err}");
assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
}