From 41061a38ac58fabf29292b4463f0e2770fd59fa9 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 17:58:31 +0900
Subject: [PATCH 01/16] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20feat(kebab-app):?=
=?UTF-8?q?=20external=20module=20=E2=80=94=20=5Fexternal=20dir=20+=20fron?=
=?UTF-8?q?tmatter=20inject=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Pure-fn helpers for the `_external/` workspace subdirectory:
- `ensure_external_dir(workspace_root)` — mkdir if absent
- `ensure_kebabignore_entry(workspace_root)` — append `_external/` line
to .kebabignore if missing (idempotent)
- `copy_to_external(ext_dir, bytes, ext)` — write to
`/.`, idempotent on same content
- `inject_frontmatter(body, title, source_uri?)` — prepend YAML block
with strict double-quote escaping; errors if body already starts
with `---`
- `yaml_quote(s)` — defensive escaping for agent-supplied strings
14 unit tests cover happy + idempotency + edge (CRLF frontmatter
detection, YAML escape).
ingest_file / ingest_stdin facades (Tasks 2-5) compose these.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/external.rs | 248 +++++++++++++++++++++++++++++++
crates/kebab-app/src/lib.rs | 1 +
2 files changed, 249 insertions(+)
create mode 100644 crates/kebab-app/src/external.rs
diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs
new file mode 100644
index 0000000..61fe8a6
--- /dev/null
+++ b/crates/kebab-app/src/external.rs
@@ -0,0 +1,248 @@
+//! Helpers for the `_external/` workspace subdirectory used by
+//! `ingest_file_with_config` and `ingest_stdin_with_config` (p9-fb-31).
+//!
+//! - `ensure_external_dir`: create `/_external/` if absent.
+//! - `ensure_kebabignore_entry`: append `_external/` to `/.kebabignore`
+//! if missing — prevents subsequent `kebab ingest` workspace walks from
+//! re-walking files that were imported via single-file ingest.
+//! - `copy_to_external`: write bytes to `_external/.`, idempotent.
+//! - `inject_frontmatter`: prepend a YAML frontmatter block to a markdown body
+//! string (used by `ingest_stdin_with_config`).
+
+use std::fs;
+use std::io::Write;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+
+pub const EXTERNAL_DIR: &str = "_external";
+const KEBABIGNORE_LINE: &str = "_external/";
+
+/// Ensure `/_external/` exists. Returns the directory path.
+pub fn ensure_external_dir(workspace_root: &Path) -> Result {
+ let dir = workspace_root.join(EXTERNAL_DIR);
+ fs::create_dir_all(&dir)
+ .with_context(|| format!("create _external dir at {}", dir.display()))?;
+ Ok(dir)
+}
+
+/// Append `_external/` line to `/.kebabignore` if not already
+/// present. Idempotent — checks for the exact line before appending.
+pub fn ensure_kebabignore_entry(workspace_root: &Path) -> Result<()> {
+ let path = workspace_root.join(".kebabignore");
+ let existing = if path.exists() {
+ fs::read_to_string(&path)
+ .with_context(|| format!("read existing .kebabignore at {}", path.display()))?
+ } else {
+ String::new()
+ };
+ let already = existing
+ .lines()
+ .any(|line| line.trim() == KEBABIGNORE_LINE);
+ if already {
+ return Ok(());
+ }
+ let mut file = fs::OpenOptions::new()
+ .create(true)
+ .append(true)
+ .open(&path)
+ .with_context(|| format!("open .kebabignore for append at {}", path.display()))?;
+ if !existing.is_empty() && !existing.ends_with('\n') {
+ file.write_all(b"\n")?;
+ }
+ writeln!(file, "{}", KEBABIGNORE_LINE)?;
+ Ok(())
+}
+
+/// Copy bytes to `/.`. Idempotent — if the
+/// destination file already exists with the expected hash, the existing
+/// file is reused (no second write). Returns the destination path.
+pub fn copy_to_external(
+ external_dir: &Path,
+ bytes: &[u8],
+ ext: &str,
+) -> Result {
+ let hash = blake3::hash(bytes);
+ let hex = hash.to_hex();
+ let prefix = &hex.as_str()[..12];
+ let filename = format!("{prefix}.{ext}");
+ let dest = external_dir.join(&filename);
+ if !dest.exists() {
+ fs::write(&dest, bytes)
+ .with_context(|| format!("write external file at {}", dest.display()))?;
+ }
+ Ok(dest)
+}
+
+/// Prepend a YAML frontmatter block to a markdown body. Returns the wrapped
+/// markdown string. Errors if `body` already starts with `---` (the user
+/// should use `ingest_file_with_config` for files that already carry
+/// frontmatter).
+pub fn inject_frontmatter(
+ body: &str,
+ title: &str,
+ source_uri: Option<&str>,
+) -> Result {
+ if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
+ anyhow::bail!(
+ "stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
+ );
+ }
+ let title_yaml = yaml_quote(title);
+ let mut header = String::new();
+ header.push_str("---\n");
+ header.push_str(&format!("title: {title_yaml}\n"));
+ if let Some(uri) = source_uri {
+ let uri_yaml = yaml_quote(uri);
+ header.push_str(&format!("source_uri: {uri_yaml}\n"));
+ }
+ header.push_str("---\n\n");
+ header.push_str(body);
+ Ok(header)
+}
+
+/// YAML-quote a string. Always uses double-quoted form with backslash-escape
+/// for `"` and `\`. Defensive against agent-supplied titles that contain
+/// quotes / control chars.
+fn yaml_quote(s: &str) -> String {
+ let mut out = String::with_capacity(s.len() + 2);
+ out.push('"');
+ for c in s.chars() {
+ match c {
+ '"' => out.push_str("\\\""),
+ '\\' => out.push_str("\\\\"),
+ '\n' => out.push_str("\\n"),
+ '\r' => out.push_str("\\r"),
+ c if (c as u32) < 0x20 => out.push_str(&format!("\\u{:04x}", c as u32)),
+ c => out.push(c),
+ }
+ }
+ out.push('"');
+ out
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use tempfile::tempdir;
+
+ #[test]
+ fn ensure_external_dir_creates_dir() {
+ let dir = tempdir().unwrap();
+ let result = ensure_external_dir(dir.path()).unwrap();
+ assert_eq!(result, dir.path().join("_external"));
+ assert!(result.is_dir());
+ }
+
+ #[test]
+ fn ensure_external_dir_is_idempotent() {
+ let dir = tempdir().unwrap();
+ let _ = ensure_external_dir(dir.path()).unwrap();
+ let result = ensure_external_dir(dir.path()).unwrap();
+ assert!(result.is_dir());
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_creates_file_with_line() {
+ let dir = tempdir().unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ assert!(content.lines().any(|l| l.trim() == "_external/"));
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_appends_to_existing() {
+ let dir = tempdir().unwrap();
+ fs::write(dir.path().join(".kebabignore"), "*.tmp\n").unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let lines: Vec<&str> = content.lines().collect();
+ assert!(lines.contains(&"*.tmp"));
+ assert!(lines.contains(&"_external/"));
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_idempotent() {
+ let dir = tempdir().unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let count = content.lines().filter(|l| l.trim() == "_external/").count();
+ assert_eq!(count, 1, "should not duplicate");
+ }
+
+ #[test]
+ fn ensure_kebabignore_entry_handles_missing_trailing_newline() {
+ let dir = tempdir().unwrap();
+ fs::write(dir.path().join(".kebabignore"), "*.tmp").unwrap(); // no \n
+ ensure_kebabignore_entry(dir.path()).unwrap();
+ let content = fs::read_to_string(dir.path().join(".kebabignore")).unwrap();
+ let lines: Vec<&str> = content.lines().collect();
+ assert!(lines.contains(&"*.tmp"));
+ assert!(lines.contains(&"_external/"));
+ }
+
+ #[test]
+ fn copy_to_external_writes_with_hash_prefix_filename() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let path = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ assert!(path.exists());
+ assert!(path.file_name().unwrap().to_string_lossy().ends_with(".md"));
+ let stem = path.file_stem().unwrap().to_string_lossy();
+ assert_eq!(stem.len(), 12);
+ }
+
+ #[test]
+ fn copy_to_external_is_idempotent_for_same_bytes() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ let p2 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ assert_eq!(p1, p2);
+ }
+
+ #[test]
+ fn copy_to_external_different_bytes_produce_different_filenames() {
+ let dir = tempdir().unwrap();
+ let ext_dir = ensure_external_dir(dir.path()).unwrap();
+ let p1 = copy_to_external(&ext_dir, b"hello", "md").unwrap();
+ let p2 = copy_to_external(&ext_dir, b"world", "md").unwrap();
+ assert_ne!(p1, p2);
+ }
+
+ #[test]
+ fn inject_frontmatter_basic() {
+ let out = inject_frontmatter("## Body", "Article X", None).unwrap();
+ assert!(out.starts_with("---\ntitle: \"Article X\"\n---\n\n## Body"));
+ }
+
+ #[test]
+ fn inject_frontmatter_with_source_uri() {
+ let out = inject_frontmatter("## Body", "X", Some("https://example.com/x")).unwrap();
+ assert!(out.contains("title: \"X\""));
+ assert!(out.contains("source_uri: \"https://example.com/x\""));
+ assert!(out.contains("\n## Body"));
+ }
+
+ #[test]
+ fn inject_frontmatter_errors_on_existing_frontmatter() {
+ let body = "---\ntitle: Existing\n---\n\n## Body";
+ let err = inject_frontmatter(body, "New", None).unwrap_err();
+ assert!(err.to_string().contains("already has frontmatter"));
+ }
+
+ #[test]
+ fn inject_frontmatter_errors_on_existing_frontmatter_crlf() {
+ let body = "---\r\ntitle: Existing\r\n---\r\n\r\n## Body";
+ let err = inject_frontmatter(body, "New", None).unwrap_err();
+ assert!(err.to_string().contains("already has frontmatter"));
+ }
+
+ #[test]
+ fn yaml_quote_escapes_quotes_and_backslashes() {
+ assert_eq!(yaml_quote("hello \"world\""), "\"hello \\\"world\\\"\"");
+ assert_eq!(yaml_quote("path\\to"), "\"path\\\\to\"");
+ assert_eq!(yaml_quote("line\nbreak"), "\"line\\nbreak\"");
+ }
+}
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index 58c4062..c9dc518 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -58,6 +58,7 @@ mod app;
pub mod doctor_signal;
pub mod error_signal;
pub mod error_wire;
+pub mod external;
pub mod ingest_progress;
pub mod logging;
pub mod reset;
--
2.49.1
From 9b53dcb94fb3c38b9dc93ecbfa50baf257e43f53 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:01:18 +0900
Subject: [PATCH 02/16] =?UTF-8?q?=E2=9C=A8=20feat(kebab-app):=20ingest=5Ff?=
=?UTF-8?q?ile=5Fwith=5Fconfig=20facade=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Single-file ingest entry. Copies bytes to _external/.
via crate::external::copy_to_external, runs the per-medium pipeline on
that single asset (reuses ingest_with_config_opts via a SourceScope
{ root: _external/, include: [], exclude:
config.workspace.exclude }).
`.kebabignore` matches log a stderr warn line and proceed (explicit
ingest is bypass intent). Internal helper `check_kebabignore_match`
uses the `ignore` crate's GitignoreBuilder.
Returns the standard IngestReport (incremental ingest from fb-23
handles re-ingest as `unchanged`).
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/Cargo.toml | 3 ++
crates/kebab-app/src/lib.rs | 96 +++++++++++++++++++++++++++++++++++++
2 files changed, 99 insertions(+)
diff --git a/crates/kebab-app/Cargo.toml b/crates/kebab-app/Cargo.toml
index d163e9c..cc35d07 100644
--- a/crates/kebab-app/Cargo.toml
+++ b/crates/kebab-app/Cargo.toml
@@ -49,6 +49,9 @@ lru = { workspace = true }
# `" foo "` collapse to one entry. Same crate kebab-normalize +
# kebab-core already use, no version drift.
unicode-normalization = "0.1"
+# p9-fb-31: GitignoreBuilder for .kebabignore matching in ingest_file_with_config.
+# Same version as kebab-source-fs (0.4) to avoid duplicate dep versions.
+ignore = "0.4"
[dev-dependencies]
rusqlite = { workspace = true }
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index c9dc518..e754d23 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -1875,3 +1875,99 @@ pub fn doctor_with_config_path(config_path: Option<&std::path::Path>) -> anyhow:
pub fn doctor() -> anyhow::Result {
doctor_with_config_path(None)
}
+
+/// Single-file ingest (p9-fb-31). Copies the file to
+/// `/_external/.` and runs the
+/// per-medium ingest pipeline on that single asset. Returns an
+/// `IngestReport` with `scanned: 1` (and either `new: 1` or
+/// `unchanged: 1` depending on whether the content hash + version
+/// cascade match an existing doc — incremental ingest from p9-fb-23).
+///
+/// `path` may point inside or outside the workspace.
+///
+/// `.kebabignore` patterns matching `path` are bypassed with a stderr
+/// `warn:` line — explicit ingest is intent.
+#[doc(hidden)]
+pub fn ingest_file_with_config(
+ config: kebab_config::Config,
+ path: &std::path::Path,
+) -> anyhow::Result {
+ if !path.exists() {
+ anyhow::bail!("ingest-file: source path does not exist: {}", path.display());
+ }
+ if !path.is_file() {
+ anyhow::bail!("ingest-file: not a regular file: {}", path.display());
+ }
+
+ let ext = path
+ .extension()
+ .and_then(|e| e.to_str())
+ .ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?;
+
+ let bytes = std::fs::read(path)
+ .with_context(|| format!("ingest-file: read source {}", path.display()))?;
+
+ let workspace_root = config.resolve_workspace_root();
+
+ // .kebabignore check — warn but continue.
+ let ignore_match = check_kebabignore_match(&workspace_root, path);
+ if ignore_match {
+ eprintln!(
+ "warn: {} matches .kebabignore patterns; proceeding (explicit ingest bypasses ignore)",
+ path.display()
+ );
+ }
+
+ // Set up _external/ dir + auto-ignore line.
+ let external_dir = crate::external::ensure_external_dir(&workspace_root)
+ .context("ingest-file: ensure _external/ dir")?;
+ crate::external::ensure_kebabignore_entry(&workspace_root)
+ .context("ingest-file: append _external/ to .kebabignore")?;
+
+ // Copy bytes to _external/..
+ let dest = crate::external::copy_to_external(&external_dir, &bytes, ext)
+ .context("ingest-file: copy to _external")?;
+
+ // Build a SourceScope that targets _external/ with include filter
+ // restricting walk to the single dest filename.
+ let filename = dest
+ .file_name()
+ .ok_or_else(|| anyhow::anyhow!("ingest-file: dest has no filename"))?
+ .to_string_lossy()
+ .into_owned();
+ let scope = kebab_core::SourceScope {
+ root: external_dir.clone(),
+ include: vec![filename],
+ exclude: config.workspace.exclude.clone(),
+ };
+
+ let opts = IngestOpts::default();
+ ingest_with_config_opts(config, scope, /* summary_only = */ false, opts)
+}
+
+/// Returns true if `source_path` matches any `.kebabignore` pattern
+/// rooted at `workspace_root`. Used by `ingest_file_with_config` to
+/// emit a stderr warn before bypassing the ignore.
+fn check_kebabignore_match(workspace_root: &std::path::Path, source_path: &std::path::Path) -> bool {
+ let kebabignore = workspace_root.join(".kebabignore");
+ if !kebabignore.exists() {
+ return false;
+ }
+ let text = match std::fs::read_to_string(&kebabignore) {
+ Ok(s) => s,
+ Err(_) => return false,
+ };
+ let mut builder = ignore::gitignore::GitignoreBuilder::new(workspace_root);
+ for line in text.lines() {
+ let line = line.trim();
+ if line.is_empty() || line.starts_with('#') {
+ continue;
+ }
+ let _ = builder.add_line(None, line);
+ }
+ let matcher = match builder.build() {
+ Ok(m) => m,
+ Err(_) => return false,
+ };
+ matcher.matched(source_path, source_path.is_dir()).is_ignore()
+}
--
2.49.1
From 73ee64c73f47b84b43fdd1911fb173b00bef918b Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:02:52 +0900
Subject: [PATCH 03/16] =?UTF-8?q?=F0=9F=A7=AA=20test(kebab-app):=20ingest?=
=?UTF-8?q?=5Ffile=5Fwith=5Fconfig=20integration=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Three scenarios — copies external md + reports new=1, idempotent on
second call (unchanged=1), errors on missing path.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/tests/ingest_file.rs | 89 +++++++++++++++++++++++++++
1 file changed, 89 insertions(+)
create mode 100644 crates/kebab-app/tests/ingest_file.rs
diff --git a/crates/kebab-app/tests/ingest_file.rs b/crates/kebab-app/tests/ingest_file.rs
new file mode 100644
index 0000000..b70fa4d
--- /dev/null
+++ b/crates/kebab-app/tests/ingest_file.rs
@@ -0,0 +1,89 @@
+//! Integration: kebab_app::ingest_file_with_config copies external file
+//! to _external/, ingests as single asset, idempotent on second call.
+
+use std::fs;
+
+use kebab_config::Config;
+
+#[test]
+fn ingest_file_copies_external_md_and_reports_new() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ // Source file outside the workspace.
+ let external_src = dir.path().join("source.md");
+ fs::write(&external_src, "# Hello\n\nbody.").unwrap();
+
+ let report = kebab_app::ingest_file_with_config(cfg.clone(), &external_src).unwrap();
+ assert_eq!(report.scanned, 1, "{report:?}");
+ assert_eq!(report.new, 1, "{report:?}");
+ assert_eq!(report.unchanged, 0, "{report:?}");
+
+ // _external/ dir created, file copied with hash prefix.
+ let ext_dir = workspace.join("_external");
+ assert!(ext_dir.is_dir());
+ let entries: Vec<_> = fs::read_dir(&ext_dir)
+ .unwrap()
+ .filter_map(|e| e.ok())
+ .collect();
+ assert_eq!(entries.len(), 1, "exactly one file in _external/");
+ let name = entries[0].file_name().to_string_lossy().into_owned();
+ assert!(name.ends_with(".md"));
+
+ // .kebabignore has _external/ line.
+ let ki = fs::read_to_string(workspace.join(".kebabignore")).unwrap();
+ assert!(ki.lines().any(|l| l.trim() == "_external/"));
+}
+
+#[test]
+fn ingest_file_idempotent_on_second_call() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let src = dir.path().join("doc.md");
+ fs::write(&src, "# A\n\nbody.").unwrap();
+
+ let r1 = kebab_app::ingest_file_with_config(cfg.clone(), &src).unwrap();
+ assert_eq!(r1.new, 1);
+
+ let r2 = kebab_app::ingest_file_with_config(cfg.clone(), &src).unwrap();
+ assert_eq!(r2.new, 0, "{r2:?}");
+ assert_eq!(r2.unchanged, 1, "{r2:?}");
+}
+
+#[test]
+fn ingest_file_errors_on_missing_path() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let nonexistent = dir.path().join("nope.md");
+ let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err();
+ assert!(err.to_string().contains("does not exist"), "{err}");
+}
--
2.49.1
From 67050016cc7fd099d35df7fa9355787041d77fec Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:04:01 +0900
Subject: [PATCH 04/16] =?UTF-8?q?=E2=9C=A8=20feat(kebab-app):=20ingest=5Fs?=
=?UTF-8?q?tdin=5Fwith=5Fconfig=20facade=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Wraps body with YAML frontmatter (title + source_uri) via
crate::external::inject_frontmatter, writes to
_external/.md, delegates to ingest_file_with_config. Markdown
only in v1.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/lib.rs | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index e754d23..6b3ed4c 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -1945,6 +1945,35 @@ pub fn ingest_file_with_config(
ingest_with_config_opts(config, scope, /* summary_only = */ false, opts)
}
+/// Stdin ingest (p9-fb-31, v1 markdown only). Prepends a YAML
+/// frontmatter block (`title` + optional `source_uri`) to `body`,
+/// writes the wrapped markdown to `_external/.md`, and runs
+/// `ingest_file_with_config` on the resulting file.
+///
+/// Errors if `body` already starts with `---` (the user should call
+/// `ingest_file_with_config` directly for files that already carry
+/// frontmatter).
+pub fn ingest_stdin_with_config(
+ config: kebab_config::Config,
+ body: &str,
+ title: &str,
+ source_uri: Option<&str>,
+) -> anyhow::Result {
+ let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?;
+
+ let workspace_root = config.resolve_workspace_root();
+ let external_dir = crate::external::ensure_external_dir(&workspace_root)?;
+ crate::external::ensure_kebabignore_entry(&workspace_root)?;
+
+ let dest = crate::external::copy_to_external(
+ &external_dir,
+ wrapped.as_bytes(),
+ "md",
+ )?;
+
+ ingest_file_with_config(config, &dest)
+}
+
/// Returns true if `source_path` matches any `.kebabignore` pattern
/// rooted at `workspace_root`. Used by `ingest_file_with_config` to
/// emit a stderr warn before bypassing the ignore.
--
2.49.1
From a42f907640ec0e5716e883a9559f00b8db666961 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:04:52 +0900
Subject: [PATCH 05/16] =?UTF-8?q?=F0=9F=A7=AA=20test(kebab-app):=20ingest?=
=?UTF-8?q?=5Fstdin=5Fwith=5Fconfig=20integration=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/tests/ingest_stdin.rs | 78 ++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 crates/kebab-app/tests/ingest_stdin.rs
diff --git a/crates/kebab-app/tests/ingest_stdin.rs b/crates/kebab-app/tests/ingest_stdin.rs
new file mode 100644
index 0000000..0eeafe7
--- /dev/null
+++ b/crates/kebab-app/tests/ingest_stdin.rs
@@ -0,0 +1,78 @@
+//! Integration: kebab_app::ingest_stdin_with_config injects frontmatter,
+//! writes to _external/, ingests as single asset.
+
+use std::fs;
+
+use kebab_config::Config;
+
+fn fresh_cfg(dir: &std::path::Path) -> Config {
+ let workspace = dir.join("notes");
+ let data = dir.join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+ cfg
+}
+
+#[test]
+fn ingest_stdin_writes_frontmatter_and_reports_new() {
+ let dir = tempfile::tempdir().unwrap();
+ let cfg = fresh_cfg(dir.path());
+
+ let report = kebab_app::ingest_stdin_with_config(
+ cfg.clone(),
+ "## Body content\n\nMore.",
+ "Article X",
+ Some("https://example.com/x"),
+ ).unwrap();
+ assert_eq!(report.new, 1, "{report:?}");
+
+ // _external/ contains exactly one .md file with frontmatter.
+ let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
+ let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
+ .filter_map(|e| e.ok())
+ .collect();
+ assert_eq!(entries.len(), 1);
+ let content = fs::read_to_string(entries[0].path()).unwrap();
+ assert!(content.starts_with("---\n"));
+ assert!(content.contains("title: \"Article X\""));
+ assert!(content.contains("source_uri: \"https://example.com/x\""));
+ assert!(content.contains("## Body content"));
+}
+
+#[test]
+fn ingest_stdin_without_source_uri() {
+ let dir = tempfile::tempdir().unwrap();
+ let cfg = fresh_cfg(dir.path());
+
+ let report = kebab_app::ingest_stdin_with_config(
+ cfg.clone(),
+ "## Body",
+ "Title",
+ None,
+ ).unwrap();
+ assert_eq!(report.new, 1);
+
+ let ext_dir = std::path::PathBuf::from(&cfg.workspace.root).join("_external");
+ let entries: Vec<_> = fs::read_dir(&ext_dir).unwrap()
+ .filter_map(|e| e.ok())
+ .collect();
+ let content = fs::read_to_string(entries[0].path()).unwrap();
+ assert!(content.contains("title: \"Title\""));
+ assert!(!content.contains("source_uri"));
+}
+
+#[test]
+fn ingest_stdin_errors_on_existing_frontmatter() {
+ let dir = tempfile::tempdir().unwrap();
+ let cfg = fresh_cfg(dir.path());
+
+ let body = "---\ntitle: Already\n---\n\n## Body";
+ let err = kebab_app::ingest_stdin_with_config(cfg, body, "New", None).unwrap_err();
+ assert!(err.to_string().contains("already has frontmatter"), "{err}");
+}
--
2.49.1
From 9cc7deca118d608d787cb2679033d49d61f4b9ce Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:06:25 +0900
Subject: [PATCH 06/16] =?UTF-8?q?=E2=9C=A8=20feat(kebab-cli):=20kebab=20in?=
=?UTF-8?q?gest-file=20subcommand=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-cli/src/main.rs | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index db0f4b1..74046de 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -193,6 +193,13 @@ enum Cmd {
/// agent hosts (Claude Code / Cursor / OpenAI Agents) to call kebab
/// tools (search / ask / schema / doctor).
Mcp,
+
+ /// Ingest a single file (workspace external paths allowed).
+ /// Bytes are copied into `/_external/.`.
+ IngestFile {
+ /// File path to ingest.
+ path: std::path::PathBuf,
+ },
}
#[derive(Subcommand, Debug)]
@@ -745,6 +752,22 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
}
},
+ Cmd::IngestFile { path } => {
+ let cfg = kebab_config::Config::load(cli.config.as_deref())?;
+ let report = kebab_app::ingest_file_with_config(cfg, path)?;
+ if cli.json {
+ let v = wire::wire_ingest(&report);
+ println!("{}", serde_json::to_string(&v)?);
+ } else {
+ println!(
+ "ingest-file: scanned={} new={} updated={} unchanged={} skipped={} errors={}",
+ report.scanned, report.new, report.updated,
+ report.unchanged, report.skipped, report.errors
+ );
+ }
+ Ok(())
+ }
+
Cmd::Mcp => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
kebab_mcp::serve_stdio(cfg, cli.config.clone())
--
2.49.1
From 0386adcb5e75b0afb53fa12bb39187e208d193a9 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:07:35 +0900
Subject: [PATCH 07/16] =?UTF-8?q?=E2=9C=A8=20feat(kebab-cli):=20kebab=20in?=
=?UTF-8?q?gest-stdin=20subcommand=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-cli/src/main.rs | 38 ++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/crates/kebab-cli/src/main.rs b/crates/kebab-cli/src/main.rs
index 74046de..6c7a159 100644
--- a/crates/kebab-cli/src/main.rs
+++ b/crates/kebab-cli/src/main.rs
@@ -4,6 +4,7 @@
use std::path::PathBuf;
use std::process::ExitCode;
+use anyhow::Context;
use clap::{Parser, Subcommand};
use kebab_app::doctor_signal::{DoctorUnhealthy, NoHitSignal, RefusalSignal};
@@ -200,6 +201,17 @@ enum Cmd {
/// File path to ingest.
path: std::path::PathBuf,
},
+
+ /// Ingest markdown content from stdin. v1 markdown only.
+ /// Frontmatter (title + source_uri) is auto-injected.
+ IngestStdin {
+ /// Title — required, written to frontmatter.
+ #[arg(long)]
+ title: String,
+ /// Source URI — optional, written to frontmatter when present.
+ #[arg(long)]
+ source_uri: Option,
+ },
}
#[derive(Subcommand, Debug)]
@@ -768,6 +780,32 @@ fn run(cli: &Cli) -> anyhow::Result<()> {
Ok(())
}
+ Cmd::IngestStdin { title, source_uri } => {
+ use std::io::Read;
+ let mut body = String::new();
+ std::io::stdin()
+ .read_to_string(&mut body)
+ .context("kebab ingest-stdin: read stdin")?;
+ let cfg = kebab_config::Config::load(cli.config.as_deref())?;
+ let report = kebab_app::ingest_stdin_with_config(
+ cfg,
+ &body,
+ title,
+ source_uri.as_deref(),
+ )?;
+ if cli.json {
+ let v = wire::wire_ingest(&report);
+ println!("{}", serde_json::to_string(&v)?);
+ } else {
+ println!(
+ "ingest-stdin: scanned={} new={} updated={} unchanged={} skipped={} errors={}",
+ report.scanned, report.new, report.updated,
+ report.unchanged, report.skipped, report.errors
+ );
+ }
+ Ok(())
+ }
+
Cmd::Mcp => {
let cfg = kebab_config::Config::load(cli.config.as_deref())?;
kebab_mcp::serve_stdio(cfg, cli.config.clone())
--
2.49.1
From fbc01eda50a42f2754afc0c9f7f1c729a772845f Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:10:09 +0900
Subject: [PATCH 08/16] =?UTF-8?q?=F0=9F=A7=AA=20test(kebab-cli):=20cli=5Fi?=
=?UTF-8?q?ngest=5Ffile=20+=20cli=5Fingest=5Fstdin=20integration=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-cli/tests/cli_ingest_file.rs | 92 +++++++++++++++++++
crates/kebab-cli/tests/cli_ingest_stdin.rs | 100 +++++++++++++++++++++
2 files changed, 192 insertions(+)
create mode 100644 crates/kebab-cli/tests/cli_ingest_file.rs
create mode 100644 crates/kebab-cli/tests/cli_ingest_stdin.rs
diff --git a/crates/kebab-cli/tests/cli_ingest_file.rs b/crates/kebab-cli/tests/cli_ingest_file.rs
new file mode 100644
index 0000000..5f81dbd
--- /dev/null
+++ b/crates/kebab-cli/tests/cli_ingest_file.rs
@@ -0,0 +1,92 @@
+//! Integration: spawn `kebab ingest-file ` and verify ingest_report.v1.
+
+use std::fs;
+use std::process::Command;
+
+#[test]
+fn cli_ingest_file_emits_ingest_report_v1() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let cfg_path = dir.path().join("config.toml");
+ fs::write(
+ &cfg_path,
+ format!(
+ r#"schema_version = 1
+
+[workspace]
+root = "{workspace}"
+exclude = [".git/**"]
+
+[storage]
+data_dir = "{data}"
+sqlite = "{{data_dir}}/kebab.sqlite"
+vector_dir = "{{data_dir}}/lancedb"
+asset_dir = "{{data_dir}}/assets"
+artifact_dir = "{{data_dir}}/artifacts"
+model_dir = "{{data_dir}}/models"
+runs_dir = "{{data_dir}}/runs"
+copy_threshold_mb = 100
+
+[indexing]
+max_parallel_extractors = 2
+max_parallel_embeddings = 1
+watch_filesystem = false
+
+[chunking]
+target_tokens = 500
+overlap_tokens = 80
+respect_markdown_headings = true
+chunker_version = "md-heading-v1"
+
+[models.embedding]
+provider = "none"
+model = "none"
+version = "v0"
+dimensions = 0
+batch_size = 1
+
+[models.llm]
+provider = "ollama"
+model = "none"
+context_tokens = 4096
+endpoint = "http://127.0.0.1:11434"
+temperature = 0.0
+seed = 0
+
+[search]
+default_k = 10
+hybrid_fusion = "rrf"
+rrf_k = 60
+snippet_chars = 220
+
+[rag]
+prompt_template_version = "rag-v1"
+score_gate = 0.30
+explain_default = false
+max_context_tokens = 8000
+"#,
+ workspace = workspace.display(),
+ data = data.display(),
+ ),
+ ).unwrap();
+
+ let src = dir.path().join("doc.md");
+ fs::write(&src, "# A\n\nbody.").unwrap();
+
+ let bin = env!("CARGO_BIN_EXE_kebab");
+ let out = Command::new(bin)
+ .args(["--json", "--config", cfg_path.to_str().unwrap(), "ingest-file"])
+ .arg(&src)
+ .output()
+ .unwrap();
+ assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
+
+ let stdout = String::from_utf8_lossy(&out.stdout);
+ let v: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap();
+ assert_eq!(v.get("schema_version").and_then(|s| s.as_str()), Some("ingest_report.v1"));
+ assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
+}
diff --git a/crates/kebab-cli/tests/cli_ingest_stdin.rs b/crates/kebab-cli/tests/cli_ingest_stdin.rs
new file mode 100644
index 0000000..d040350
--- /dev/null
+++ b/crates/kebab-cli/tests/cli_ingest_stdin.rs
@@ -0,0 +1,100 @@
+//! Integration: spawn `kebab ingest-stdin --title X` with stdin pipe.
+
+use std::fs;
+use std::io::Write;
+use std::process::{Command, Stdio};
+
+#[test]
+fn cli_ingest_stdin_emits_ingest_report_v1() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let cfg_path = dir.path().join("config.toml");
+ fs::write(
+ &cfg_path,
+ format!(
+ r#"schema_version = 1
+
+[workspace]
+root = "{workspace}"
+exclude = [".git/**"]
+
+[storage]
+data_dir = "{data}"
+sqlite = "{{data_dir}}/kebab.sqlite"
+vector_dir = "{{data_dir}}/lancedb"
+asset_dir = "{{data_dir}}/assets"
+artifact_dir = "{{data_dir}}/artifacts"
+model_dir = "{{data_dir}}/models"
+runs_dir = "{{data_dir}}/runs"
+copy_threshold_mb = 100
+
+[indexing]
+max_parallel_extractors = 2
+max_parallel_embeddings = 1
+watch_filesystem = false
+
+[chunking]
+target_tokens = 500
+overlap_tokens = 80
+respect_markdown_headings = true
+chunker_version = "md-heading-v1"
+
+[models.embedding]
+provider = "none"
+model = "none"
+version = "v0"
+dimensions = 0
+batch_size = 1
+
+[models.llm]
+provider = "ollama"
+model = "none"
+context_tokens = 4096
+endpoint = "http://127.0.0.1:11434"
+temperature = 0.0
+seed = 0
+
+[search]
+default_k = 10
+hybrid_fusion = "rrf"
+rrf_k = 60
+snippet_chars = 220
+
+[rag]
+prompt_template_version = "rag-v1"
+score_gate = 0.30
+explain_default = false
+max_context_tokens = 8000
+"#,
+ workspace = workspace.display(),
+ data = data.display(),
+ ),
+ ).unwrap();
+
+ let bin = env!("CARGO_BIN_EXE_kebab");
+ let mut child = Command::new(bin)
+ .args([
+ "--json", "--config", cfg_path.to_str().unwrap(),
+ "ingest-stdin", "--title", "X",
+ ])
+ .stdin(Stdio::piped())
+ .stdout(Stdio::piped())
+ .stderr(Stdio::piped())
+ .spawn()
+ .unwrap();
+ {
+ let stdin = child.stdin.as_mut().unwrap();
+ stdin.write_all(b"## Body\n\nbody text.\n").unwrap();
+ }
+ let out = child.wait_with_output().unwrap();
+ assert!(out.status.success(), "stderr: {}", String::from_utf8_lossy(&out.stderr));
+
+ let stdout = String::from_utf8_lossy(&out.stdout);
+ let v: serde_json::Value = serde_json::from_str(stdout.trim()).unwrap();
+ assert_eq!(v.get("schema_version").and_then(|s| s.as_str()), Some("ingest_report.v1"));
+ assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
+}
--
2.49.1
From ecd77290cd3c920849490857102f227c4590172e Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:12:18 +0900
Subject: [PATCH 09/16] =?UTF-8?q?=E2=9C=A8=20feat(kebab-mcp):=20ingest=5Ff?=
=?UTF-8?q?ile=20+=20ingest=5Fstdin=20tools=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
5th + 6th MCP tools — first mutation surface (fb-30 v1 was read-only).
Both wrap the new kebab-app facade fns + use spawn_blocking via the
existing spawn_tool helper. tools/list now returns 6 tools.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-mcp/src/lib.rs | 30 +++++++++++++--
crates/kebab-mcp/src/tools/ingest_file.rs | 39 +++++++++++++++++++
crates/kebab-mcp/src/tools/ingest_stdin.rs | 44 ++++++++++++++++++++++
crates/kebab-mcp/src/tools/mod.rs | 2 +
crates/kebab-mcp/tests/tools_list.rs | 8 ++--
5 files changed, 117 insertions(+), 6 deletions(-)
create mode 100644 crates/kebab-mcp/src/tools/ingest_file.rs
create mode 100644 crates/kebab-mcp/src/tools/ingest_stdin.rs
diff --git a/crates/kebab-mcp/src/lib.rs b/crates/kebab-mcp/src/lib.rs
index a190673..fc6a2a4 100644
--- a/crates/kebab-mcp/src/lib.rs
+++ b/crates/kebab-mcp/src/lib.rs
@@ -1,6 +1,6 @@
-//! MCP (Model Context Protocol) server over stdio. Exposes 4 read-only
-//! tools (`search` / `ask` / `schema` / `doctor`) backed by `kebab-app`
-//! facade methods. Used by `kebab-cli`'s `Cmd::Mcp` arm.
+//! MCP (Model Context Protocol) server over stdio. Exposes 6 tools
+//! (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`)
+//! backed by `kebab-app` facade methods. Used by `kebab-cli`'s `Cmd::Mcp` arm.
//!
//! See spec `docs/superpowers/specs/2026-05-07-p9-fb-30-mcp-server-design.md`.
@@ -51,6 +51,16 @@ pub fn build_tools_vec() -> Vec {
"RAG question answering over the knowledge base. Returns answer.v1 JSON. Pass session_id for multi-turn context.",
schema_for_type::(),
),
+ Tool::new(
+ "ingest_file",
+ "Ingest a single file (path) into the knowledge base. Workspace external paths allowed — bytes are copied into _external/.",
+ schema_for_type::(),
+ ),
+ Tool::new(
+ "ingest_stdin",
+ "Ingest markdown content into the knowledge base. v1 markdown only. Frontmatter (title + source_uri) auto-injected.",
+ schema_for_type::(),
+ ),
]
}
@@ -133,6 +143,20 @@ impl ServerHandler for KebabHandler {
})
.await
}
+ "ingest_file" => {
+ let args = request.arguments.unwrap_or_default();
+ self.spawn_tool(args, |state, input| {
+ tools::ingest_file::handle(&state, input)
+ })
+ .await
+ }
+ "ingest_stdin" => {
+ let args = request.arguments.unwrap_or_default();
+ self.spawn_tool(args, |state, input| {
+ tools::ingest_stdin::handle(&state, input)
+ })
+ .await
+ }
_other => Err(ErrorData::method_not_found::<
rmcp::model::CallToolRequestMethod,
>()),
diff --git a/crates/kebab-mcp/src/tools/ingest_file.rs b/crates/kebab-mcp/src/tools/ingest_file.rs
new file mode 100644
index 0000000..0bad2a6
--- /dev/null
+++ b/crates/kebab-mcp/src/tools/ingest_file.rs
@@ -0,0 +1,39 @@
+//! `ingest_file` tool — wraps `kebab_app::ingest_file_with_config`.
+//! Input: { path }. Output: ingest_report.v1 JSON.
+
+use std::path::PathBuf;
+
+use rmcp::model::CallToolResult;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use crate::error::{to_tool_error, to_tool_success};
+use crate::state::KebabAppState;
+
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct IngestFileInput {
+ /// Absolute or relative path to the file to ingest. Workspace external
+ /// paths are allowed — bytes are copied into `_external/`.
+ pub path: String,
+}
+
+pub fn handle(state: &KebabAppState, input: IngestFileInput) -> CallToolResult {
+ let cfg_clone = (*state.config).clone();
+ let path = PathBuf::from(input.path);
+ match kebab_app::ingest_file_with_config(cfg_clone, &path) {
+ Ok(report) => match serde_json::to_value(&report) {
+ Ok(mut v) => {
+ if let serde_json::Value::Object(ref mut map) = v {
+ map.entry("schema_version".to_string())
+ .or_insert_with(|| serde_json::Value::String("ingest_report.v1".to_string()));
+ }
+ match serde_json::to_string(&v) {
+ Ok(json) => to_tool_success(json),
+ Err(e) => to_tool_error(&anyhow::anyhow!(e)),
+ }
+ }
+ Err(e) => to_tool_error(&anyhow::anyhow!(e)),
+ },
+ Err(e) => to_tool_error(&e),
+ }
+}
diff --git a/crates/kebab-mcp/src/tools/ingest_stdin.rs b/crates/kebab-mcp/src/tools/ingest_stdin.rs
new file mode 100644
index 0000000..5957711
--- /dev/null
+++ b/crates/kebab-mcp/src/tools/ingest_stdin.rs
@@ -0,0 +1,44 @@
+//! `ingest_stdin` tool — wraps `kebab_app::ingest_stdin_with_config`.
+//! Input: { content, title, source_uri? }. Output: ingest_report.v1 JSON.
+
+use rmcp::model::CallToolResult;
+use schemars::JsonSchema;
+use serde::{Deserialize, Serialize};
+
+use crate::error::{to_tool_error, to_tool_success};
+use crate::state::KebabAppState;
+
+#[derive(Debug, Deserialize, Serialize, JsonSchema)]
+pub struct IngestStdinInput {
+ /// Markdown body content. v1 supports markdown only.
+ pub content: String,
+ /// Title for frontmatter injection.
+ pub title: String,
+ /// Optional source URI (e.g. https URL agent fetched from).
+ pub source_uri: Option,
+}
+
+pub fn handle(state: &KebabAppState, input: IngestStdinInput) -> CallToolResult {
+ let cfg_clone = (*state.config).clone();
+ match kebab_app::ingest_stdin_with_config(
+ cfg_clone,
+ &input.content,
+ &input.title,
+ input.source_uri.as_deref(),
+ ) {
+ Ok(report) => match serde_json::to_value(&report) {
+ Ok(mut v) => {
+ if let serde_json::Value::Object(ref mut map) = v {
+ map.entry("schema_version".to_string())
+ .or_insert_with(|| serde_json::Value::String("ingest_report.v1".to_string()));
+ }
+ match serde_json::to_string(&v) {
+ Ok(json) => to_tool_success(json),
+ Err(e) => to_tool_error(&anyhow::anyhow!(e)),
+ }
+ }
+ Err(e) => to_tool_error(&anyhow::anyhow!(e)),
+ },
+ Err(e) => to_tool_error(&e),
+ }
+}
diff --git a/crates/kebab-mcp/src/tools/mod.rs b/crates/kebab-mcp/src/tools/mod.rs
index 3e3d898..087d630 100644
--- a/crates/kebab-mcp/src/tools/mod.rs
+++ b/crates/kebab-mcp/src/tools/mod.rs
@@ -4,3 +4,5 @@ pub mod schema;
pub mod doctor;
pub mod search;
pub mod ask;
+pub mod ingest_file;
+pub mod ingest_stdin;
diff --git a/crates/kebab-mcp/tests/tools_list.rs b/crates/kebab-mcp/tests/tools_list.rs
index f7c0cd4..01bfe6e 100644
--- a/crates/kebab-mcp/tests/tools_list.rs
+++ b/crates/kebab-mcp/tests/tools_list.rs
@@ -1,19 +1,21 @@
-//! Integration: `build_tools_vec` returns 4 tools with correct names and
+//! Integration: `build_tools_vec` returns 6 tools with correct names and
//! inputSchema. Uses the extracted `pub fn build_tools_vec()` helper — no
//! transport or RequestContext needed.
use kebab_mcp::build_tools_vec;
#[test]
-fn tools_list_returns_four_tools() {
+fn tools_list_returns_six_tools() {
let tools = build_tools_vec();
- assert_eq!(tools.len(), 4, "expected exactly 4 tools, got {}", tools.len());
+ assert_eq!(tools.len(), 6, "expected exactly 6 tools, got {}", tools.len());
let names: Vec<&str> = tools.iter().map(|t| t.name.as_ref()).collect();
assert!(names.contains(&"schema"), "missing 'schema' tool");
assert!(names.contains(&"doctor"), "missing 'doctor' tool");
assert!(names.contains(&"search"), "missing 'search' tool");
assert!(names.contains(&"ask"), "missing 'ask' tool");
+ assert!(names.contains(&"ingest_file"), "missing 'ingest_file' tool");
+ assert!(names.contains(&"ingest_stdin"), "missing 'ingest_stdin' tool");
}
#[test]
--
2.49.1
From 71c2bbdc97d2368f46de5f2a2c54a182bad4caf3 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:14:07 +0900
Subject: [PATCH 10/16] =?UTF-8?q?=F0=9F=A7=AA=20test(kebab-mcp):=20ingest?=
=?UTF-8?q?=5Ffile=20+=20ingest=5Fstdin=20integration=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
.../kebab-mcp/tests/tools_call_ingest_file.rs | 53 +++++++++++
.../tests/tools_call_ingest_stdin.rs | 89 +++++++++++++++++++
2 files changed, 142 insertions(+)
create mode 100644 crates/kebab-mcp/tests/tools_call_ingest_file.rs
create mode 100644 crates/kebab-mcp/tests/tools_call_ingest_stdin.rs
diff --git a/crates/kebab-mcp/tests/tools_call_ingest_file.rs b/crates/kebab-mcp/tests/tools_call_ingest_file.rs
new file mode 100644
index 0000000..e9eab6e
--- /dev/null
+++ b/crates/kebab-mcp/tests/tools_call_ingest_file.rs
@@ -0,0 +1,53 @@
+//! Integration: tools/call name=ingest_file → ingest_report.v1.
+
+use std::fs;
+
+use kebab_config::Config;
+use kebab_mcp::{KebabAppState, KebabHandler};
+use rmcp::model::RawContent;
+
+#[tokio::test]
+async fn ingest_file_tool_returns_ingest_report_v1() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let src = dir.path().join("doc.md");
+ fs::write(&src, "# Title\n\nbody.").unwrap();
+
+ let state = KebabAppState::new(cfg, None);
+ let handler = KebabHandler::new(state);
+
+ let result = tokio::task::spawn_blocking({
+ let state = handler.state().clone();
+ let path = src.to_string_lossy().into_owned();
+ move || {
+ kebab_mcp::tools::ingest_file::handle(
+ &state,
+ kebab_mcp::tools::ingest_file::IngestFileInput { path },
+ )
+ }
+ })
+ .await
+ .unwrap();
+
+ assert!(!result.is_error.unwrap_or(false), "{result:?}");
+ let text = match &result.content.first().unwrap().raw {
+ RawContent::Text(t) => &t.text,
+ other => panic!("expected text content, got {other:?}"),
+ };
+ let v: serde_json::Value = serde_json::from_str(text).unwrap();
+ assert_eq!(
+ v.get("schema_version").and_then(|s| s.as_str()),
+ Some("ingest_report.v1")
+ );
+ assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
+}
diff --git a/crates/kebab-mcp/tests/tools_call_ingest_stdin.rs b/crates/kebab-mcp/tests/tools_call_ingest_stdin.rs
new file mode 100644
index 0000000..45943d6
--- /dev/null
+++ b/crates/kebab-mcp/tests/tools_call_ingest_stdin.rs
@@ -0,0 +1,89 @@
+//! Integration: tools/call name=ingest_stdin → ingest_report.v1.
+//! Frontmatter precheck path also covered.
+
+use std::fs;
+
+use kebab_config::Config;
+use kebab_mcp::KebabAppState;
+use rmcp::model::RawContent;
+
+fn fresh_state(dir: &std::path::Path) -> KebabAppState {
+ let workspace = dir.join("notes");
+ let data = dir.join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+ KebabAppState::new(cfg, None)
+}
+
+#[tokio::test]
+async fn ingest_stdin_tool_returns_ingest_report_v1() {
+ let dir = tempfile::tempdir().unwrap();
+ let state = fresh_state(dir.path());
+
+ let result = tokio::task::spawn_blocking({
+ let state = state.clone();
+ move || {
+ kebab_mcp::tools::ingest_stdin::handle(
+ &state,
+ kebab_mcp::tools::ingest_stdin::IngestStdinInput {
+ content: "## Body".to_string(),
+ title: "X".to_string(),
+ source_uri: Some("https://example.com/x".to_string()),
+ },
+ )
+ }
+ })
+ .await
+ .unwrap();
+
+ assert!(!result.is_error.unwrap_or(false), "{result:?}");
+ let text = match &result.content.first().unwrap().raw {
+ RawContent::Text(t) => &t.text,
+ other => panic!("expected text content, got {other:?}"),
+ };
+ let v: serde_json::Value = serde_json::from_str(text).unwrap();
+ assert_eq!(
+ v.get("schema_version").and_then(|s| s.as_str()),
+ Some("ingest_report.v1")
+ );
+ assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
+}
+
+#[tokio::test]
+async fn ingest_stdin_tool_emits_error_v1_on_existing_frontmatter() {
+ let dir = tempfile::tempdir().unwrap();
+ let state = fresh_state(dir.path());
+
+ let result = tokio::task::spawn_blocking({
+ let state = state.clone();
+ move || {
+ kebab_mcp::tools::ingest_stdin::handle(
+ &state,
+ kebab_mcp::tools::ingest_stdin::IngestStdinInput {
+ content: "---\ntitle: Existing\n---\n\n## Body".to_string(),
+ title: "New".to_string(),
+ source_uri: None,
+ },
+ )
+ }
+ })
+ .await
+ .unwrap();
+
+ assert_eq!(result.is_error, Some(true), "{result:?}");
+ let text = match &result.content.first().unwrap().raw {
+ RawContent::Text(t) => &t.text,
+ other => panic!("expected text content, got {other:?}"),
+ };
+ let v: serde_json::Value = serde_json::from_str(text).unwrap();
+ assert_eq!(
+ v.get("schema_version").and_then(|s| s.as_str()),
+ Some("error.v1")
+ );
+}
--
2.49.1
From 345a4f363ac976b3c16e421c1b7b93d008a10efb Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:16:45 +0900
Subject: [PATCH 11/16] =?UTF-8?q?=F0=9F=93=9D=20docs:=20sync=20README=20/?=
=?UTF-8?q?=20HANDOFF=20/=20CLAUDE=20/=20skill=20/=20design=20for=20fb-31?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- README 명령 표 에 `kebab ingest-file` + `kebab ingest-stdin` 두 row + MCP tool list 4 → 6.
- HANDOFF post-도그푸딩 항목 한 줄.
- CLAUDE.md `_external/` 디렉토리 + naming convention 한 줄.
- integrations skill — Recipe D (agent fetched web doc) + MCP tool list 갱신.
- design §6.7 `_external/` subdirectory 절 신설.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
CLAUDE.md | 1 +
HANDOFF.md | 1 +
README.md | 6 ++++--
.../specs/2026-04-27-kebab-final-form-design.md | 6 ++++++
integrations/claude-code/kebab/SKILL.md | 17 ++++++++++++++++-
5 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index 6ed5f32..4dbd472 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -94,6 +94,7 @@ Release 절차:
- XDG paths: `~/.config/kebab/`, `~/.local/share/kebab/`, `~/.cache/kebab/`, `~/.local/state/kebab/`.
- SQLite filename: `kebab.sqlite` (under `data_dir`).
- Workspace ignore: `.kebabignore` (per directory).
+- `_external/` (under `workspace.root`): single-file / stdin ingest 가 외부 file 을 deterministic 명명 (`.`) 으로 copy. 첫 생성 시 `.kebabignore` 자동 append.
The migration from the old `kb` name lives in commits `911fb49 / f1a448d / f9714aa`. If you spot a leftover `kb` reference, treat it as a leftover and fix it (the rename PR sweep covered crates/, docs/, tasks/, README, design doc, fixtures — but workspace root `Cargo.toml` comments needed a follow-up; assume similar misses are possible).
diff --git a/HANDOFF.md b/HANDOFF.md
index bd6c164..b32ed74 100644
--- a/HANDOFF.md
+++ b/HANDOFF.md
@@ -31,6 +31,7 @@ P0~P5 직렬. P6~P9 P5 이후 병렬 가능.
머지 후 발견된 모든 deviation / hotfix 의 dated 로그는 [tasks/HOTFIXES.md](tasks/HOTFIXES.md). 본 요약은 \"누군가가 인수받을 때 알아두면 시간을 많이 절약하는\" 항목만:
+- **2026-05-07 P9 post-도그푸딩 (p9-fb-31)** — `kebab ingest-file ` + `kebab ingest-stdin --title ` 두 신규 subcommand + MCP tool `ingest_file` / `ingest_stdin` (4 → 6 tool). agent 가 fetch 한 web markdown / 외부 file 을 KB 에 즉시 저장. workspace 외부 file 은 `/_external/.` 로 copy (deterministic 명명 → idempotent). `_external/` 디렉토리 첫 생성 시 `.kebabignore` 자동 append (walk 무한 루프 방지). stdin 은 markdown 전용 + flag (`--title`, `--source-uri`) → frontmatter 자동 prepend. .kebabignore 매치 시 stderr warn 후 진행 (explicit ingest = bypass intent). fb-30 의 v1 read-only MCP 정책 변경 — 첫 mutation tool 도입. spec: `tasks/p9/p9-fb-31-single-file-stdin-ingest.md`. design: `docs/superpowers/specs/2026-05-07-p9-fb-31-single-file-stdin-ingest-design.md`.
- **2026-05-07 P9 post-도그푸딩 (p9-fb-30)** — `kebab mcp` 신규 subcommand + new crate `kebab-mcp` (lib only) — stdio JSON-RPC server. 4 read-only tool (`search` / `ask` / `schema` / `doctor`) 가 `kebab-app` facade 위에 build. rmcp 1.6 SDK 채택, manual `tools/list` + `tools/call` dispatch (rmcp 의 `#[tool_router]` 매크로 대신). `error_classify` 모듈을 `kebab-cli` → `kebab-app::error_wire` 로 promotion (UI crate 끼리 import 회피, facade 룰 준수). `ErrorV1` 에 `schema_version: String` 필드 추가 — kebab-mcp 의 직접 serialize 경로에서도 wire 정합. `KebabAppState` 가 `(Config, Option)` carry — doctor tool 의 path-aware behavior 위해. ask + search arm 의 `tokio::task::spawn_blocking` wrap — `OllamaLanguageModel` 의 reqwest blocking client 가 async 안에서 panic 회피. capability flag `mcp_server` `false` → `true`. agent integration MVP 완성 — Claude Code / Cursor / OpenAI Agents 등 host-agnostic 사용 가능. spec: `tasks/p9/p9-fb-30-mcp-server.md`. design: `docs/superpowers/specs/2026-05-07-p9-fb-30-mcp-server-design.md`.
- **P3-5 / P4-3 `--config` 누락** — `kebab-cli` 가 `--config ` 를 honor 하려면 `kebab_app::*_with_config` companion 을 호출해야 함. 두 번 같은 모양으로 회귀했음.
- **P6-2 OCR 기본 엔진** — spec literal 의 Tesseract 가 시스템 dep 부담으로 거부됨, Ollama vision LM 으로 대체. `OcrEngine` trait 그대로라 future swap 가능.
diff --git a/README.md b/README.md
index 170439b..717ae49 100644
--- a/README.md
+++ b/README.md
@@ -80,7 +80,9 @@ kebab doctor
| `kebab reset [--all / --data-only / --vector-only / --config-only] [--yes]` | XDG 데이터 wipe. **Irreversible.** TTY 면 confirm prompt, 아니면 `--yes` 필수. `--vector-only` 는 SQLite `embedding_records` 도 함께 truncate (orphan 방지) |
| `kebab eval run / compare` | golden query 회귀 측정 |
| `kebab schema [--json]` | introspection — wire schemas / capabilities / models / stats 한 번에. `--json` 은 `schema.v1` wire; 사람 모드는 서식 출력. |
-| `kebab mcp` | MCP (Model Context Protocol) stdio server. agent host (Claude Code / Cursor / OpenAI Agents) 가 spawn 하여 tool 호출 (`search` / `ask` / `schema` / `doctor`). `--config` honor. |
+| `kebab ingest-file ` | 단일 파일 ingest (workspace 외부 가능). 바이트는 `/_external/.` 로 copy. `.kebabignore` 매치 시 stderr warn 후 진행 (explicit ingest 가 bypass intent). |
+| `kebab ingest-stdin --title [--source-uri ]` | stdin 의 markdown 본문 ingest. frontmatter (title + source_uri) 자동 prepend. v1 markdown only. |
+| `kebab mcp` | MCP (Model Context Protocol) stdio server. agent host (Claude Code / Cursor / OpenAI Agents) 가 spawn 하여 tool 호출 (`search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`). `--config` honor. |
모든 명령에 `--json` 플래그. 출력은 frozen wire schema v1 (`schema_version` 항상 포함, 예: `ingest_report.v1`, `ingest_progress.v1`, `search_hit.v1`, `answer.v1`, `doctor.v1`, `reset_report.v1`, `schema.v1`). `--json` 모드에서 fatal error 는 stderr 에 `error.v1` ndjson 으로 emit (exit code 0/1/2/3 unchanged).
@@ -179,7 +181,7 @@ config 예시는 [docs/SMOKE.md](docs/SMOKE.md) 의 `/tmp/kebab-smoke/config.tom
}
```
-Claude Code 가 session 시작 시 `kebab mcp` 를 spawn — process 가 session 동안 살아 있어 SQLite / Lance / fastembed 가 hot. 4 tool: `search` (lexical/vector/hybrid 검색), `ask` (RAG 답변, optional `session_id` for multi-turn + optional `mode` override), `schema` (capability 조회), `doctor` (health check). 모든 tool 의 결과는 wire schema v1 JSON 으로 text content 안에 직렬화 — agent 가 parse 후 사용. tool dispatch 실패 (잘못된 config / 미초기화 KB 등) 는 `isError: true` + error.v1 content; refusal / no-hit / unhealthy 는 정상 응답 (semantic flag 으로 분기).
+Claude Code 가 session 시작 시 `kebab mcp` 를 spawn — process 가 session 동안 살아 있어 SQLite / Lance / fastembed 가 hot. 6 tool: `search` (lexical/vector/hybrid 검색), `ask` (RAG 답변, optional `session_id` for multi-turn + optional `mode` override), `schema` (capability 조회), `doctor` (health check), `ingest_file` (단일 파일 KB 저장), `ingest_stdin` (markdown 본문 + title/source_uri 로 KB 저장). 모든 tool 의 결과는 wire schema v1 JSON 으로 text content 안에 직렬화 — agent 가 parse 후 사용. tool dispatch 실패 (잘못된 config / 미초기화 KB 등) 는 `isError: true` + error.v1 content; refusal / no-hit / unhealthy 는 정상 응답 (semantic flag 으로 분기).
## 비-목표
diff --git a/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md b/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
index ad2f9fd..5f5835f 100644
--- a/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
+++ b/docs/superpowers/specs/2026-04-27-kebab-final-form-design.md
@@ -1206,6 +1206,12 @@ hint edit ~/.config/kebab/config.toml then `kebab ingest ~/KnowledgeBase`
- 항상 POSIX path 정규화 후 DB 저장. `to_posix` 단일 함수.
- 심볼릭 링크: 1차 follow + 무한루프 detect (`canonicalize` 후 set 추적).
+### 6.7 `_external/` subdirectory (fb-31)
+
+`/_external/` 가 single-file / stdin ingest 의 destination. 명명: `.` (12-char hex prefix of content hash + 원래 extension). deterministic — 동일 content 재 ingest 면 idempotent.
+
+첫 생성 시 `/.kebabignore` 에 `_external/` line 자동 append — 향후 `kebab ingest` 전체 walk 가 이 디렉토리 재 walk 안 함 (re-ingestion 무한 루프 방지).
+
---
## 7. Trait contracts (kebab-core)
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index 2f74cda..2c9cebb 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -86,10 +86,25 @@ Since v0.4.0, `kebab` exposes an MCP (Model Context Protocol) stdio server. Conf
}
```
-Claude Code spawns `kebab mcp` at session start; the process stays alive across all tool calls so SQLite / Lance / fastembed are hot after the first call. 4 tools available: `search` / `ask` / `schema` / `doctor`. Same wire shapes as the CLI `--json` mode — see `Two surfaces, pick the right one` above for the same guidance.
+Claude Code spawns `kebab mcp` at session start; the process stays alive across all tool calls so SQLite / Lance / fastembed are hot after the first call. 6 tools available: `search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`. Same wire shapes as the CLI `--json` mode — see `Two surfaces, pick the right one` above for the same guidance.
If your host doesn't support MCP, the CLI subprocess pattern (`kebab search --json` / `kebab ask --json`) above continues to work.
+## Recipe D — agent fetched a web doc, save to KB
+
+When you've fetched a markdown article (e.g. via WebFetch) that the user might query later:
+
+1. Call MCP tool `ingest_stdin` with:
+ - `content`: the markdown body
+ - `title`: a stable title (article H1 or page title)
+ - `source_uri`: the URL you fetched from
+
+The doc lands in `/_external/.md` and is indexed for `search` / `ask` immediately. Subsequent calls with identical content are no-ops (incremental ingest detects unchanged hash).
+
+Don't loop ingest the same article — content-hash dedup makes it safe but wastes embedding cost.
+
+For files already on disk that the user references, prefer `ingest_file` with the path — kebab handles the copy + dedup.
+
## Workflow recipes
**Recipe A — user asks an internal-context question, you want grounded answer:**
--
2.49.1
From e041173e8e39afaa95debf29acaceb3db81835fe Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:18:50 +0900
Subject: [PATCH 12/16] =?UTF-8?q?=F0=9F=93=9D=20docs(tasks):=20HOTFIXES=20?=
=?UTF-8?q?entry=20+=20p9-fb-31=20status=20=E2=86=92=20completed?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-Authored-By: Claude Opus 4.7 (1M context)
---
tasks/HOTFIXES.md | 33 +++++++++++++++++++
tasks/p9/p9-fb-31-single-file-stdin-ingest.md | 4 +--
2 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/tasks/HOTFIXES.md b/tasks/HOTFIXES.md
index 6dc93e2..ecd605a 100644
--- a/tasks/HOTFIXES.md
+++ b/tasks/HOTFIXES.md
@@ -14,6 +14,39 @@ historical contract that was implemented; this file accumulates the
deltas so phase 5+ readers can find the live behavior without diffing
git history.
+## 2026-05-07 — p9-fb-31 (post-dogfooding): single-file / stdin ingest
+
+**Source feedback**: 사용자 도그푸딩 2026-05-06 — agent (Claude Code via MCP, fb-30) 가 web fetch 한 markdown / 단일 외부 file 을 KB 에 저장하려면 `kebab ingest` 전체 walk 재실행 비효율. agent 메모리상 string contents 도 stdin ingest 가능해야.
+
+**Live binding 변경**:
+
+- 신규 subcommand `kebab ingest-file ` — 단일 file ingest, workspace 외부 path 가능.
+- 신규 subcommand `kebab ingest-stdin --title [--source-uri ]` — stdin 의 markdown 본문 ingest, v1 markdown only.
+- 신규 MCP tool `ingest_file` + `ingest_stdin` — fb-30 v1 read-only 정책 변경, 첫 mutation surface 도입 (의도된 진화). tools/list 4 → 6.
+- 외부 file 저장 정책: `/_external/.` 로 copy. deterministic 명명 → idempotent. `_external/` 첫 생성 시 `.kebabignore` 자동 append (walk 무한 루프 방지).
+- `.kebabignore` 매치 시 stderr warn (`warn: matches .kebabignore patterns; proceeding (explicit ingest bypasses ignore)`) 후 진행. `--force-ignore` flag 불필요 — explicit ingest 가 default bypass intent.
+- stdin frontmatter 처리: 본문이 `---` 으로 시작하면 error (`use kebab ingest-file`); 그 외 frontmatter block prepend (title + 옵션 source_uri, YAML 더블쿼트 escape).
+- `kebab-app::external` 신규 모듈 — `ensure_external_dir`, `ensure_kebabignore_entry`, `copy_to_external`, `inject_frontmatter` helper. kebab-cli + kebab-mcp 둘 다 facade 통해 호출.
+- `kebab-app::ingest_file_with_config` + `ingest_stdin_with_config` 신규 facade fn.
+
+**Spec contract impact**: design §6 에 `_external/` subdirectory 절 추가 (실제 §6.7 — 기존 §6 sub-section 이 6.6 까지 채워져 있어 §6.7 로 부착됨; spec stub 의 §6.3 명시는 deviation).
+
+**Tests added**: kebab-app external::tests (14: dir / kebabignore append / copy / inject_frontmatter / yaml_quote), kebab-app integration (3 + 3: ingest_file + ingest_stdin), kebab-cli integration (2: cli_ingest_file + cli_ingest_stdin spawn-based), kebab-mcp integration (1 + 2: tools_call_ingest_file + tools_call_ingest_stdin), tools_list assertion update (4 → 6).
+
+**Known limitation (deferred)**:
+
+- PDF / image stdin — binary stream + base64 처리 v2.
+- `--title` + `--source-uri` 외 metadata field (tags, language, custom kv) — v2.
+- 자동 dedup by source_uri — content hash 기반 dedup 만 (incremental ingest). URI lookup 별 task.
+- Storage quota / TTL — agent 무한 ingest 시 KB 비대 우려. monitor + 별 task.
+- frontmatter merge (stdin 이 이미 frontmatter 보유 시 머지) — v1 은 error.
+- MCP `ingest_file` 의 multi-file batch 입력 — v1 single path. 여러 file 호출은 agent 가 N 회.
+
+**Amends**:
+- design §6 (`_external/` subdirectory subsection 추가, §6.7 위치).
+- spec `tasks/p9/p9-fb-31-single-file-stdin-ingest.md` (status `open` → `completed`).
+- spec stub 의 §6.3 명시 → 실제 §6.7 (기존 §6 구조 우선).
+
## 2026-05-07 — p9-fb-30 (post-dogfooding): MCP server (stdio) — agent integration MVP
**Source feedback**: 사용자 도그푸딩 2026-05-06 — Claude Code 같은 AI agent 가 kebab CLI 를 사용하는 것이 궁극 목표. 현재 surface 는 Claude Code 전용 skill (subprocess wrapper) 만 — host 무관 표준 통신 없음. fb-29 HTTP daemon 은 single-user local-first 환경 대비 비대로 deferred (2026-05-07), fb-30 stdio MCP 가 동일 사용자 가치 (agent integration + session 동안 hot cache) 를 daemon 복잡도 없이 제공.
diff --git a/tasks/p9/p9-fb-31-single-file-stdin-ingest.md b/tasks/p9/p9-fb-31-single-file-stdin-ingest.md
index 74026eb..5f1ed8d 100644
--- a/tasks/p9/p9-fb-31-single-file-stdin-ingest.md
+++ b/tasks/p9/p9-fb-31-single-file-stdin-ingest.md
@@ -3,7 +3,7 @@ phase: P9
component: kebab-cli + kebab-app
task_id: p9-fb-31
title: "Single-file / stdin ingest — agent on-demand 저장"
-status: open
+status: completed
target_version: 0.3.0
depends_on: []
unblocks: []
@@ -14,7 +14,7 @@ source_feedback: 사용자 도그푸딩 2026-05-06 — agent 가 읽은 article
# p9-fb-31 — Single-file / stdin ingest
-> ⏳ **백로그 only — 미구현.** 본 spec 은 도그푸딩 피드백 skeleton. 구현 착수 전 [superpowers:brainstorming](../../docs/superpowers/) 으로 설계 단계 선행 필요. workspace 외부 file 의 저장 위치 / metadata 입력 방식 / .kebabignore 우회 정책 brainstorm 후 확정.
+> ✅ **구현 완료.** 본 spec 은 구현 시점의 frozen 상태. post-merge deviation 은 [HOTFIXES.md](../HOTFIXES.md) 의 `2026-05-07 — p9-fb-31` 항목 참조 — live source of truth.
## 증상 / 동기
--
2.49.1
From ccee30037d925d01a0120de1b8163ec35f6e13d9 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:26:51 +0900
Subject: [PATCH 13/16] =?UTF-8?q?=F0=9F=A7=AA=20test(kebab-cli):=20update?=
=?UTF-8?q?=20cli=5Fmcp=5Fsmoke=20tools/list=20assertion=204=20=E2=86=92?=
=?UTF-8?q?=206=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
fb-31 added ingest_file + ingest_stdin MCP tools (Task 9) but the
spawn-based smoke test in cli_mcp_smoke.rs still asserted the fb-30
count of 4. Bump to 6 to match the live tools/list response.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-cli/tests/cli_mcp_smoke.rs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/crates/kebab-cli/tests/cli_mcp_smoke.rs b/crates/kebab-cli/tests/cli_mcp_smoke.rs
index bdfe335..1c0b07b 100644
--- a/crates/kebab-cli/tests/cli_mcp_smoke.rs
+++ b/crates/kebab-cli/tests/cli_mcp_smoke.rs
@@ -66,8 +66,8 @@ fn cli_mcp_initialize_then_tools_list() {
.expect("tools/list result.tools must be an array");
assert_eq!(
tools.len(),
- 4,
- "expected 4 tools (schema, doctor, search, ask), got {}: {list}",
+ 6,
+ "expected 6 tools (schema, doctor, search, ask, ingest_file, ingest_stdin), got {}: {list}",
tools.len()
);
--
2.49.1
From dc24cb34b1f29fa1c1f5b48a4bf0773a330fed61 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:38:19 +0900
Subject: [PATCH 14/16] =?UTF-8?q?=F0=9F=9A=91=20fix(fb-31):=20apply=20fina?=
=?UTF-8?q?l=20review=20nits?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- kebab-app: add #[doc(hidden)] to ingest_stdin_with_config (CLAUDE.md
convention — all *_with_config functions should have this attribute;
fb-31's first impl missed it on the second facade fn).
- SKILL.md: "Since v0.4.0" → "Since v0.3.1" (MCP shipped in fb-30
release v0.3.1; the wrong version claim was introduced in fb-30 doc
sync and carried forward into fb-31).
- tools_call_ingest_file: add idempotency test (second call with same
content → unchanged=1, new=0). Spec called for two tests; first impl
shipped only the happy path.
Version bump 0.3.1 → 0.3.2 deferred to separate `chore/bump-v0.3.2` PR
mirroring fb-27 + fb-30 precedent (commits 73f5d73 / 5495d96).
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/lib.rs | 1 +
.../kebab-mcp/tests/tools_call_ingest_file.rs | 64 +++++++++++++++++++
integrations/claude-code/kebab/SKILL.md | 2 +-
3 files changed, 66 insertions(+), 1 deletion(-)
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index 6b3ed4c..20ee48b 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -1953,6 +1953,7 @@ pub fn ingest_file_with_config(
/// Errors if `body` already starts with `---` (the user should call
/// `ingest_file_with_config` directly for files that already carry
/// frontmatter).
+#[doc(hidden)]
pub fn ingest_stdin_with_config(
config: kebab_config::Config,
body: &str,
diff --git a/crates/kebab-mcp/tests/tools_call_ingest_file.rs b/crates/kebab-mcp/tests/tools_call_ingest_file.rs
index e9eab6e..ff112b8 100644
--- a/crates/kebab-mcp/tests/tools_call_ingest_file.rs
+++ b/crates/kebab-mcp/tests/tools_call_ingest_file.rs
@@ -51,3 +51,67 @@ async fn ingest_file_tool_returns_ingest_report_v1() {
);
assert_eq!(v.get("new").and_then(|n| n.as_u64()), Some(1));
}
+
+#[tokio::test]
+async fn ingest_file_tool_idempotent_on_second_call() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ std::fs::create_dir_all(&workspace).unwrap();
+ std::fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = kebab_config::Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let src = dir.path().join("doc.md");
+ std::fs::write(&src, "# A\n\nbody.").unwrap();
+
+ let state = kebab_mcp::KebabAppState::new(cfg, None);
+ let handler = kebab_mcp::KebabHandler::new(state);
+
+ // First call.
+ let r1 = tokio::task::spawn_blocking({
+ let state = handler.state().clone();
+ let path = src.to_string_lossy().into_owned();
+ move || {
+ kebab_mcp::tools::ingest_file::handle(
+ &state,
+ kebab_mcp::tools::ingest_file::IngestFileInput { path },
+ )
+ }
+ })
+ .await
+ .unwrap();
+ assert!(!r1.is_error.unwrap_or(false));
+ let text1 = match &r1.content.first().unwrap().raw {
+ rmcp::model::RawContent::Text(t) => &t.text,
+ other => panic!("expected text, got {other:?}"),
+ };
+ let v1: serde_json::Value = serde_json::from_str(text1).unwrap();
+ assert_eq!(v1.get("new").and_then(|n| n.as_u64()), Some(1));
+
+ // Second call — same content, expect unchanged=1.
+ let r2 = tokio::task::spawn_blocking({
+ let state = handler.state().clone();
+ let path = src.to_string_lossy().into_owned();
+ move || {
+ kebab_mcp::tools::ingest_file::handle(
+ &state,
+ kebab_mcp::tools::ingest_file::IngestFileInput { path },
+ )
+ }
+ })
+ .await
+ .unwrap();
+ assert!(!r2.is_error.unwrap_or(false));
+ let text2 = match &r2.content.first().unwrap().raw {
+ rmcp::model::RawContent::Text(t) => &t.text,
+ other => panic!("expected text, got {other:?}"),
+ };
+ let v2: serde_json::Value = serde_json::from_str(text2).unwrap();
+ assert_eq!(v2.get("new").and_then(|n| n.as_u64()), Some(0), "{v2:?}");
+ assert_eq!(v2.get("unchanged").and_then(|n| n.as_u64()), Some(1), "{v2:?}");
+}
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index 2c9cebb..362af61 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -73,7 +73,7 @@ If a call fails or returns suspicious output, run `kebab doctor` first — it su
## MCP server (recommended over CLI subprocess wrapping)
-Since v0.4.0, `kebab` exposes an MCP (Model Context Protocol) stdio server. Configure once in `~/.claude/mcp.json`:
+Since v0.3.1, `kebab` exposes an MCP (Model Context Protocol) stdio server. Configure once in `~/.claude/mcp.json`:
```json
{
--
2.49.1
From 7f5739d8fbd8bcc607d5ea491db43dcfcc0cb004 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:46:55 +0900
Subject: [PATCH 15/16] =?UTF-8?q?=F0=9F=8F=97=EF=B8=8F=20refactor(fb-31):?=
=?UTF-8?q?=20apply=20round=201=20review=20nits?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- ingest_file_with_config: lowercase normalize ext (caller-side) +
early error on unsupported extension (`.docx` etc. now Err with
helpful message instead of silent skipped_by_extension counter).
New test ingest_file_errors_on_unsupported_extension.
- ingest_stdin_with_config: doc comment explaining intentional
double-call of ensure helpers (idempotent + ~ms negligible).
- external::inject_frontmatter: simplify precheck via single
trim_start binding + add CR-only line ending edge case.
- external::inject_frontmatter: doc note on yaml_quote escape
contract (agent-supplied titles with special chars are safe).
Round 1 review summary: http://gitea.altair823.xyz/altair823-org/kebab/pulls/111#issuecomment-1875
Co-Authored-By: Claude Opus 4.7 (1M context)
---
crates/kebab-app/src/external.rs | 7 ++++++-
crates/kebab-app/src/lib.rs | 18 ++++++++++++++++--
crates/kebab-app/tests/ingest_file.rs | 22 ++++++++++++++++++++++
3 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/crates/kebab-app/src/external.rs b/crates/kebab-app/src/external.rs
index 61fe8a6..7a51990 100644
--- a/crates/kebab-app/src/external.rs
+++ b/crates/kebab-app/src/external.rs
@@ -78,12 +78,17 @@ pub fn copy_to_external(
/// markdown string. Errors if `body` already starts with `---` (the user
/// should use `ingest_file_with_config` for files that already carry
/// frontmatter).
+///
+/// Internal `yaml_quote` always uses double-quoted YAML form with backslash
+/// escapes for `"` / `\` / control chars — agent-supplied titles with
+/// special characters are safe.
pub fn inject_frontmatter(
body: &str,
title: &str,
source_uri: Option<&str>,
) -> Result {
- if body.trim_start().starts_with("---\n") || body.trim_start().starts_with("---\r\n") {
+ let head = body.trim_start();
+ if head.starts_with("---\n") || head.starts_with("---\r\n") || head.starts_with("---\r") {
anyhow::bail!(
"stdin already has frontmatter; use `kebab ingest-file` for files with metadata"
);
diff --git a/crates/kebab-app/src/lib.rs b/crates/kebab-app/src/lib.rs
index 20ee48b..fa5b242 100644
--- a/crates/kebab-app/src/lib.rs
+++ b/crates/kebab-app/src/lib.rs
@@ -1899,10 +1899,19 @@ pub fn ingest_file_with_config(
anyhow::bail!("ingest-file: not a regular file: {}", path.display());
}
- let ext = path
+ let ext_raw = path
.extension()
.and_then(|e| e.to_str())
.ok_or_else(|| anyhow::anyhow!("ingest-file: source has no extension: {}", path.display()))?;
+ let ext = ext_raw.to_lowercase();
+
+ const SUPPORTED_EXTS: &[&str] = &["md", "pdf", "png", "jpg", "jpeg"];
+ if !SUPPORTED_EXTS.contains(&ext.as_str()) {
+ anyhow::bail!(
+ "ingest-file: unsupported extension `.{}` (supported: {:?})",
+ ext, SUPPORTED_EXTS
+ );
+ }
let bytes = std::fs::read(path)
.with_context(|| format!("ingest-file: read source {}", path.display()))?;
@@ -1925,7 +1934,7 @@ pub fn ingest_file_with_config(
.context("ingest-file: append _external/ to .kebabignore")?;
// Copy bytes to _external/..
- let dest = crate::external::copy_to_external(&external_dir, &bytes, ext)
+ let dest = crate::external::copy_to_external(&external_dir, &bytes, &ext)
.context("ingest-file: copy to _external")?;
// Build a SourceScope that targets _external/ with include filter
@@ -1963,6 +1972,11 @@ pub fn ingest_stdin_with_config(
let wrapped = crate::external::inject_frontmatter(body, title, source_uri)?;
let workspace_root = config.resolve_workspace_root();
+ // Note: ensure_external_dir + ensure_kebabignore_entry + copy_to_external
+ // are called here AND inside ingest_file_with_config. All three are
+ // idempotent; the redundancy is intentional — keeping stdin's wrapped
+ // bytes accessible by `ingest_file_with_config` requires the dest path
+ // to exist. The ~ms double-stat overhead is negligible at v1 scale.
let external_dir = crate::external::ensure_external_dir(&workspace_root)?;
crate::external::ensure_kebabignore_entry(&workspace_root)?;
diff --git a/crates/kebab-app/tests/ingest_file.rs b/crates/kebab-app/tests/ingest_file.rs
index b70fa4d..85255f6 100644
--- a/crates/kebab-app/tests/ingest_file.rs
+++ b/crates/kebab-app/tests/ingest_file.rs
@@ -87,3 +87,25 @@ fn ingest_file_errors_on_missing_path() {
let err = kebab_app::ingest_file_with_config(cfg, &nonexistent).unwrap_err();
assert!(err.to_string().contains("does not exist"), "{err}");
}
+
+#[test]
+fn ingest_file_errors_on_unsupported_extension() {
+ let dir = tempfile::tempdir().unwrap();
+ let workspace = dir.path().join("notes");
+ let data = dir.path().join("data");
+ fs::create_dir_all(&workspace).unwrap();
+ fs::create_dir_all(&data).unwrap();
+
+ let mut cfg = Config::defaults();
+ cfg.workspace.root = workspace.to_string_lossy().into_owned();
+ cfg.storage.data_dir = data.to_string_lossy().into_owned();
+ cfg.models.embedding.provider = "none".to_string();
+ cfg.models.embedding.dimensions = 0;
+
+ let docx = dir.path().join("doc.docx");
+ fs::write(&docx, b"fake docx bytes").unwrap();
+
+ let err = kebab_app::ingest_file_with_config(cfg, &docx).unwrap_err();
+ assert!(err.to_string().contains("unsupported extension"), "{err}");
+ assert!(err.to_string().contains(".docx") || err.to_string().contains("docx"), "{err}");
+}
--
2.49.1
From 47bfd518c8796e442492998657ebfb8384327cc0 Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Thu, 7 May 2026 18:53:59 +0900
Subject: [PATCH 16/16] =?UTF-8?q?=F0=9F=93=9D=20docs:=20comprehensive=20MC?=
=?UTF-8?q?P=20usage=20guide=20(fb-31)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
신규 docs/mcp-usage.md (~280 line) — agent integration 의 종합 가이드:
- Quick start + `--config` thread 예시
- Host config 예시 (Claude Code / Cursor / OpenAI Agents / Copilot CLI)
- 6 tool catalog (search / ask / schema / doctor / ingest_file / ingest_stdin)
각 tool 의 input shape, defaults, output 예시, "언제 사용", mutation
주의사항.
- Troubleshooting — error.v1 의 7 code 별 조치 표 + grounded:false +
doctor !ok + empty search + tool-not-found 시나리오.
- Multi-turn ask + session 관리 — session_id 명명, 새 session 시작
시점, lifetime, single-shot vs session 비교.
- Performance / Security 절.
README.md 의 기존 MCP 절은 quick start 만 유지하고 docs/mcp-usage.md
링크. integrations/claude-code/kebab/SKILL.md 도 동일 cross-link.
agent 사용자 도그푸딩 후속 의견 — host-agnostic 가이드 + 명시적
troubleshooting 표 + multi-turn session 명명 컨벤션 부재 해소.
Co-Authored-By: Claude Opus 4.7 (1M context)
---
README.md | 8 +-
docs/mcp-usage.md | 486 ++++++++++++++++++++++++
integrations/claude-code/kebab/SKILL.md | 2 +
3 files changed, 493 insertions(+), 3 deletions(-)
create mode 100644 docs/mcp-usage.md
diff --git a/README.md b/README.md
index 717ae49..de5dabf 100644
--- a/README.md
+++ b/README.md
@@ -166,9 +166,11 @@ config 예시는 [docs/SMOKE.md](docs/SMOKE.md) 의 `/tmp/kebab-smoke/config.tom
- **MCP server** — stdio JSON-RPC 로 `kebab-app` facade 1:1 노출. `kebab mcp` 참조.
- **HTTP wrapper** — `kebab serve --bind 127.0.0.1:7711` (P+, local-only 가치 신중).
-## MCP 사용 (Claude Code 예시)
+## MCP 사용
-`~/.claude/mcp.json` (또는 host 의 동등 위치):
+`kebab mcp` 가 stdio MCP server. 6 tool: `search` / `ask` / `schema` / `doctor` / `ingest_file` / `ingest_stdin`.
+
+Claude Code 빠른 등록 (`~/.claude/mcp.json` 또는 host 동등 위치):
```json
{
@@ -181,7 +183,7 @@ config 예시는 [docs/SMOKE.md](docs/SMOKE.md) 의 `/tmp/kebab-smoke/config.tom
}
```
-Claude Code 가 session 시작 시 `kebab mcp` 를 spawn — process 가 session 동안 살아 있어 SQLite / Lance / fastembed 가 hot. 6 tool: `search` (lexical/vector/hybrid 검색), `ask` (RAG 답변, optional `session_id` for multi-turn + optional `mode` override), `schema` (capability 조회), `doctor` (health check), `ingest_file` (단일 파일 KB 저장), `ingest_stdin` (markdown 본문 + title/source_uri 로 KB 저장). 모든 tool 의 결과는 wire schema v1 JSON 으로 text content 안에 직렬화 — agent 가 parse 후 사용. tool dispatch 실패 (잘못된 config / 미초기화 KB 등) 는 `isError: true` + error.v1 content; refusal / no-hit / unhealthy 는 정상 응답 (semantic flag 으로 분기).
+자세한 사용법 (Cursor / OpenAI Agents / Copilot CLI config, per-tool 입출력 예시, troubleshooting, multi-turn ask + session 관리, performance / security) — **[docs/mcp-usage.md](docs/mcp-usage.md)** 참조.
## 비-목표
diff --git a/docs/mcp-usage.md b/docs/mcp-usage.md
new file mode 100644
index 0000000..7f3aa85
--- /dev/null
+++ b/docs/mcp-usage.md
@@ -0,0 +1,486 @@
+# MCP usage — agent integration guide
+
+`kebab mcp` runs an MCP (Model Context Protocol) stdio JSON-RPC server. agent host (Claude Code / Cursor / OpenAI Agents / Copilot CLI 등) 가 본 binary 를 spawn 하여 KB 검색 / 답변 / ingest 를 호출.
+
+shipped since **v0.3.1** (fb-30). 6 tool 으로 확장 (v0.3.2, fb-31).
+
+---
+
+## Quick start
+
+binary 를 PATH 에 두고 (`cargo install --path crates/kebab-cli` 또는 release tarball), agent host 의 mcp config 에 등록:
+
+```json
+{
+ "mcpServers": {
+ "kebab": {
+ "command": "kebab",
+ "args": ["mcp"]
+ }
+ }
+}
+```
+
+session 시작 시 host 가 `kebab mcp` 를 spawn — process 가 session 동안 살아 있어 SQLite / Lance / fastembed 가 hot. 첫 tool call 만 cold-start 비용, 이후 sub-100ms.
+
+`--config` 옵션 thread:
+
+```json
+{
+ "mcpServers": {
+ "kebab": {
+ "command": "kebab",
+ "args": ["--config", "/Users/me/.config/kebab/agent.toml", "mcp"]
+ }
+ }
+}
+```
+
+---
+
+## Host config 예시
+
+### Claude Code
+
+`~/.claude/mcp.json` (또는 OS 별 동등 위치):
+
+```json
+{
+ "mcpServers": {
+ "kebab": {
+ "command": "kebab",
+ "args": ["mcp"]
+ }
+ }
+}
+```
+
+session 재시작 후 `kebab` server 가 tool list 에 등장. agent 가 `mcp__kebab__search` / `mcp__kebab__ask` 등 호출 가능.
+
+### Cursor
+
+`~/.cursor/mcp.json`:
+
+```json
+{
+ "mcpServers": {
+ "kebab": {
+ "command": "kebab",
+ "args": ["mcp"]
+ }
+ }
+}
+```
+
+Cursor 의 Composer / Agent 모드에서 활성화.
+
+### OpenAI Agents (`agents-sdk`)
+
+Python:
+
+```python
+from openai_agents import Agent, MCPServerStdio
+
+kebab = MCPServerStdio(
+ name="kebab",
+ params={"command": "kebab", "args": ["mcp"]},
+)
+
+agent = Agent(
+ name="researcher",
+ mcp_servers=[kebab],
+)
+```
+
+Node:
+
+```ts
+import { Agent, MCPServerStdio } from "openai-agents";
+
+const kebab = new MCPServerStdio({
+ name: "kebab",
+ params: { command: "kebab", args: ["mcp"] },
+});
+
+const agent = new Agent({ name: "researcher", mcpServers: [kebab] });
+```
+
+### Copilot CLI
+
+`~/.config/copilot-cli/mcp.json` (or wherever the CLI looks):
+
+```json
+{
+ "mcpServers": {
+ "kebab": {
+ "command": "kebab",
+ "args": ["mcp"]
+ }
+ }
+}
+```
+
+### 기타 host
+
+stdio JSON-RPC MCP 표준을 따르는 모든 host 가 지원. 위 형식 (`command` + `args`) 만 맞추면 동작.
+
+---
+
+## Tool catalog (6 tools)
+
+모든 tool 의 출력은 wire schema v1 JSON 을 MCP `text` content block 으로 직렬화. CLI `--json` 모드와 byte-동일 (single source of truth).
+
+### `search` — corpus 검색
+
+| | |
+|---|---|
+| Input | `{ "query": string, "mode"?: "lexical"\|"vector"\|"hybrid", "k"?: 1-100 }` |
+| Defaults | `mode = "hybrid"`, `k = 10` |
+| Output | `search_hit.v1` array, ranked |
+
+예시:
+
+```json
+{
+ "name": "search",
+ "arguments": {
+ "query": "Kubernetes ingress controller setup",
+ "mode": "hybrid",
+ "k": 5
+ }
+}
+```
+
+응답 (한 hit 발췌):
+
+```json
+[
+ {
+ "schema_version": "search_hit.v1",
+ "rank": 1,
+ "score": 0.847,
+ "doc_id": "...",
+ "chunk_id": "...",
+ "doc_path": "k8s/ingress.md",
+ "heading_path": ["Setup", "Ingress controller"],
+ "snippet": "...",
+ "citation": { ... }
+ },
+ ...
+]
+```
+
+**언제 사용**: 사용자가 \"문서 어디 있는지\" 묻거나, agent 가 답변 전 raw chunk 가 필요할 때.
+
+### `ask` — RAG 답변
+
+| | |
+|---|---|
+| Input | `{ "query": string, "session_id"?: string, "mode"?: "lexical"\|"vector"\|"hybrid" }` |
+| Defaults | `mode = "hybrid"` |
+| Output | `answer.v1` (single object) |
+
+예시:
+
+```json
+{
+ "name": "ask",
+ "arguments": {
+ "query": "What's our internal Kubernetes ingress setup?",
+ "session_id": "ops-onboarding-2026-05"
+ }
+}
+```
+
+응답:
+
+```json
+{
+ "schema_version": "answer.v1",
+ "answer": "...",
+ "citations": [ ... ],
+ "grounded": true,
+ "refusal_reason": null,
+ "model": { ... },
+ "conversation_id": "...",
+ "turn_index": 0
+}
+```
+
+**`grounded: false` 처리**: KB 에 충분한 context 없음. `refusal_reason` 확인 후 사용자에게 \"KB 에 정보 없음\" 으로 안내, 본인 지식 fallback 또는 source 요청. **paraphrase 하면 안 됨** (hallucination 위험).
+
+multi-turn 은 [Session 관리](#session-관리-multi-turn-ask) 참조.
+
+### `schema` — capability discovery
+
+| | |
+|---|---|
+| Input | `{}` (no args) |
+| Output | `schema.v1` |
+
+예시:
+
+```json
+{ "name": "schema", "arguments": {} }
+```
+
+응답:
+
+```json
+{
+ "schema_version": "schema.v1",
+ "kebab_version": "0.3.2",
+ "wire": { "schemas": ["answer.v1", "search_hit.v1", ...] },
+ "capabilities": {
+ "json_mode": true,
+ "rag_multi_turn": true,
+ "mcp_server": true,
+ "streaming_ask": false,
+ ...
+ },
+ "models": { "parser_version": "...", "embedding_version": "...", ... },
+ "stats": { "doc_count": 128, "chunk_count": 2147, "asset_count": 130, ... }
+}
+```
+
+**언제 사용**: session 시작 시 한 번 — feature gate 결정 (`capabilities.streaming_ask` true 면 streaming 사용 등). cheap call (no LLM, no embedder), session 동안 1 회 충분.
+
+### `doctor` — health check
+
+| | |
+|---|---|
+| Input | `{}` (no args) |
+| Output | `doctor.v1` |
+
+예시:
+
+```json
+{ "name": "doctor", "arguments": {} }
+```
+
+응답:
+
+```json
+{
+ "schema_version": "doctor.v1",
+ "ok": true,
+ "checks": [
+ { "name": "config_loaded", "ok": true, "detail": "..." },
+ { "name": "ollama_reachable", "ok": true, "detail": "..." },
+ ...
+ ]
+}
+```
+
+**언제 사용**: 다른 tool 이 실패하거나 비정상 응답 줄 때 first triage. `ok: false` 면 `checks[]` 의 failed entry 가 원인 — 사용자에게 보고 후 stop (자동 retry 금지).
+
+### `ingest_file` — 단일 파일 저장 (mutation)
+
+| | |
+|---|---|
+| Input | `{ "path": string }` |
+| Supported ext | `.md` / `.pdf` / `.png` / `.jpg` / `.jpeg` (`unsupported extension` error 그 외) |
+| Output | `ingest_report.v1` (single asset) |
+
+예시:
+
+```json
+{
+ "name": "ingest_file",
+ "arguments": { "path": "/Users/me/Downloads/article.md" }
+}
+```
+
+응답:
+
+```json
+{
+ "schema_version": "ingest_report.v1",
+ "scanned": 1,
+ "new": 1,
+ "updated": 0,
+ "unchanged": 0,
+ "skipped": 0,
+ "errors": 0,
+ ...
+}
+```
+
+**언제 사용**: 사용자가 disk 의 file 을 KB 에 저장 의향 명시 시. workspace 외부 path OK — 파일은 `/_external/.` 으로 copy. 동일 content 재 ingest 면 idempotent (`unchanged: 1`).
+
+**주의**: mutation tool — 사용자 명시 의도 없을 때 자동 호출 금지.
+
+### `ingest_stdin` — stdin markdown 저장 (mutation)
+
+| | |
+|---|---|
+| Input | `{ "content": string, "title": string, "source_uri"?: string }` |
+| v1 scope | markdown only |
+| Output | `ingest_report.v1` (single asset) |
+
+예시:
+
+```json
+{
+ "name": "ingest_stdin",
+ "arguments": {
+ "content": "## Article body\n\nMain text here.",
+ "title": "Article X",
+ "source_uri": "https://example.com/x"
+ }
+}
+```
+
+응답:
+
+```json
+{
+ "schema_version": "ingest_report.v1",
+ "scanned": 1,
+ "new": 1,
+ ...
+}
+```
+
+**언제 사용**: agent 가 web fetch 한 markdown article 을 KB 에 저장. 사용자가 \"이거 나중에 또 보고 싶어\" 명시 시 또는 multi-turn 대화에서 자료 누적. content 가 이미 frontmatter (`---` 시작) 이면 error — `ingest_file` 사용.
+
+`title` + `source_uri` 가 frontmatter 로 자동 prepend → `Document.metadata` 에 저장 → 후속 `search` 결과의 `doc_meta` 에 포함. agent 가 source URL 추적 가능.
+
+**주의**: mutation tool. 같은 content 무한 ingest 안 함 (idempotent 보장이지만 embedding cost 낭비).
+
+---
+
+## Troubleshooting
+
+### `isError: true` + `error.v1` content
+
+tool dispatch 가 `Err` 반환 시. content 의 `error.v1` JSON 의 `code` 로 분기:
+
+| code | 의미 | 조치 |
+|------|------|------|
+| `config_invalid` | `--config` path missing / TOML parse 실패 | path 확인 + `kebab schema` 로 검증. `details.path` + `details.cause` 확인. |
+| `not_indexed` | `kebab.sqlite` 미존재 / migration 미실행 | 사용자에게 `kebab init` + `kebab ingest` 실행 안내. retry 자동 금지. |
+| `model_unreachable` | Ollama endpoint 연결 실패 | Ollama 실행 확인 (`ollama serve`). `details.endpoint` 의 host 가 reachable 한지. retry 1-2 회 후 사용자 보고. |
+| `model_not_pulled` | Ollama model not found | 사용자에게 `ollama pull ` 안내 — `details.model` 표시. |
+| `timeout` | LLM stream / embed deadline 초과 | 일시적이면 retry 1 회. 재발 시 사용자 보고 (model 응답 느림 / Ollama load). |
+| `io_error` | filesystem / 권한 / disk full | `details.kind` 보고 사용자에게 disk space / permission 확인 안내. |
+| `generic` | catch-all | `details.chain` (verbose 시) 보고 사용자에게 그대로 전달. retry 금지. |
+
+`hint` field 가 있으면 사용자에게 그대로 보여주기 (각 code 의 가장 빠른 조치).
+
+### `grounded: false` (ask refusal)
+
+`isError: false` (정상 응답). KB 에 충분한 context 없음. `refusal_reason` 확인 후:
+
+- `NoChunks` — 검색 자체가 0 hit. 다른 표현 / 더 일반적인 query 시도.
+- `LowScores` — hit 있지만 score gate 미달. `kebab search` (별도) 로 raw hit 확인.
+- 그 외 — refusal 메시지 그대로 사용자에게 보고.
+
+자동 paraphrase 금지. 사용자에게 \"KB 에 정보 없음\" 명시 후 본인 지식 또는 source 요청.
+
+### `doctor` `ok: false`
+
+다른 tool 호출 전 `doctor` 부터. `checks[]` 의 failed entry 원인 명시 — 사용자에게 보고 후 stop.
+
+### empty `search` result
+
+`isError: false`, content = `[]` (빈 array). KB 에 매칭 없음. `mode` 변경 (lexical → vector or vice versa) 또는 query 표현 다양화. 그래도 빈 결과면 KB coverage 부족 — 사용자에게 보고.
+
+### tool not found
+
+`tools/list` 에서 본 binary 의 6 tool 확인. 0.3.1 (fb-30) 은 4 tool, 0.3.2 (fb-31) 부터 6. binary version 확인:
+
+```json
+{ "name": "schema", "arguments": {} }
+```
+
+응답의 `kebab_version` 이 0.3.2+ 인지 확인.
+
+---
+
+## Session 관리 (multi-turn ask)
+
+`ask` tool 의 `session_id` 가 multi-turn RAG context 활성화. 같은 `session_id` 로 연속 호출 시 이전 Q/A history 가 새 query 의 retrieval expansion + prompt context 에 포함.
+
+### session_id 명명
+
+`-` 형식 권장 — 사용자 친화 + uniqueness:
+
+- `ops-onboarding-2026-05`
+- `kubernetes-ingress-debug-2026-05-07`
+- `agent-research-session-1` (auto-numbered)
+
+session_id 는 임의 string — kebab 이 처음 보는 id 면 새 session 생성, 기존 id 면 history append.
+
+### 언제 새 session 시작?
+
+- 주제 완전 전환 (KB 의 다른 도메인) — 이전 history 가 noise.
+- 사용자 명시 reset 요청.
+- Long session (50+ turn) 의 context bloat — 새 session 으로 fresh start.
+
+### Session lifetime
+
+session 데이터는 SQLite `chat_sessions` + `chat_turns` 에 영속. `kebab reset --data-only` 가 모두 wipe. session 별 삭제 명령은 없음 (P+).
+
+### 예시 multi-turn flow
+
+```json
+// turn 1
+{ "name": "ask", "arguments": {
+ "query": "What's our internal Kubernetes ingress setup?",
+ "session_id": "ops-2026-05"
+}}
+// → answer.v1 with conversation_id, turn_index: 0
+
+// turn 2 — 이전 답변을 context 로 retrieval expansion
+{ "name": "ask", "arguments": {
+ "query": "What about TLS?",
+ "session_id": "ops-2026-05"
+}}
+// → kebab 가 "TLS" 만으로 retrieval 안 함, 이전 \"Kubernetes ingress\" history 포함 query 로 검색
+
+// turn 3 — 명시적 reference
+{ "name": "ask", "arguments": {
+ "query": "How does that compare to AWS ALB?",
+ "session_id": "ops-2026-05"
+}}
+```
+
+### Session vs single-shot
+
+`session_id` 없이 `ask` 호출 = single-shot. agent host 자체가 conversation 추적하면 single-shot + agent-side context 도 OK. session 이 필요한 경우:
+
+- KB 가 \"이전 질문\" 을 retrieval expansion 에 사용해야 정확 (e.g. follow-up 의 대명사).
+- 한 session 안에서 같은 chunk 반복 fetch 회피 (kebab 가 turn 간 chunk overlap 인지).
+
+agent host 가 conversation 추적 + 충분한 context 보유면 session 불필요.
+
+---
+
+## Performance
+
+- **첫 tool call**: cold start ~1-2s (SQLite open + Lance dataset open + fastembed model load).
+- **이후 tool call (same session)**: hot — search ~50-200ms, ask ~수 초 (Ollama LLM dominant).
+- **session 종료** (host 가 process kill): 모든 cache lost. 다음 session 첫 call 다시 cold.
+- **`schema` / `doctor`**: cheap (no LLM / no embedder), 매 call ~ms.
+- **`ingest_file` / `ingest_stdin`**: 첫 call 시 fastembed cold start. 이후 file 당 ~수 백 ms (parse + chunk + embed).
+
+cold-start 회피하려면 host 가 long-running session 유지 (Claude Code default).
+
+---
+
+## Security
+
+- stdio MCP — 외부 네트워크 노출 없음. agent host 만 access.
+- `kebab mcp` 가 호출하는 facade 는 `--config` 의 권한으로 동작. config 내 secret (Ollama API key 등) 은 process 환경에 한정.
+- mutation tool (`ingest_file` / `ingest_stdin`) 는 사용자 명시 의도 없이 자동 호출 금지 — agent 측 가드.
+
+---
+
+## Related
+
+- CLI usage: `kebab --help` + [README.md](../README.md)
+- Wire schemas: `docs/wire-schema/v1/*.schema.json`
+- design contract: `docs/superpowers/specs/2026-04-27-kebab-final-form-design.md` §10.2
+- Claude Code 전용 skill: `integrations/claude-code/kebab/SKILL.md`
+- HOTFIXES (post-merge deviations): `tasks/HOTFIXES.md`
diff --git a/integrations/claude-code/kebab/SKILL.md b/integrations/claude-code/kebab/SKILL.md
index 362af61..6f757e1 100644
--- a/integrations/claude-code/kebab/SKILL.md
+++ b/integrations/claude-code/kebab/SKILL.md
@@ -90,6 +90,8 @@ Claude Code spawns `kebab mcp` at session start; the process stays alive across
If your host doesn't support MCP, the CLI subprocess pattern (`kebab search --json` / `kebab ask --json`) above continues to work.
+For per-tool input/output examples, error code reference, multi-turn ask + session management, and host config beyond Claude Code (Cursor / OpenAI Agents / Copilot CLI), see [docs/mcp-usage.md](../../../docs/mcp-usage.md) in the kebab repo.
+
## Recipe D — agent fetched a web doc, save to KB
When you've fetched a markdown article (e.g. via WebFetch) that the user might query later:
--
2.49.1