From bf4ebf8d2ac532b77dce0aa611ceb66dbe17854c Mon Sep 17 00:00:00 2001
From: th-kim0823
Date: Fri, 15 May 2026 15:44:18 +0900
Subject: [PATCH] feat(p10-1a-1): add Metadata.repo / git_branch / git_commit /
code_lang
Four optional, serde-skipped-when-None fields added to `Metadata` for
code ingest context. All 11 downstream construction sites patched with
`repo: None, git_branch: None, git_commit: None, code_lang: None`.
Full workspace check (`--tests`) and per-crate test suite pass clean.
Co-Authored-By: Claude Sonnet 4.6
---
crates/kebab-chunk/src/md_heading_v1.rs | 4 ++
crates/kebab-chunk/src/pdf_page_v1.rs | 8 +++
crates/kebab-core/src/metadata.rs | 70 +++++++++++++++++++
crates/kebab-normalize/src/lib.rs | 4 ++
crates/kebab-parse-image/src/lib.rs | 4 ++
crates/kebab-parse-md/src/frontmatter.rs | 4 ++
crates/kebab-parse-pdf/src/lib.rs | 4 ++
.../kebab-store-sqlite/tests/idempotency.rs | 4 ++
.../tests/incremental_ingest.rs | 4 ++
crates/kebab-store-sqlite/tests/list_docs.rs | 4 ++
crates/kebab-tui/tests/inspect.rs | 4 ++
11 files changed, 114 insertions(+)
diff --git a/crates/kebab-chunk/src/md_heading_v1.rs b/crates/kebab-chunk/src/md_heading_v1.rs
index fa0578f..b6094bd 100644
--- a/crates/kebab-chunk/src/md_heading_v1.rs
+++ b/crates/kebab-chunk/src/md_heading_v1.rs
@@ -472,6 +472,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version: kebab_core::ParserVersion("test-parser-0".into()),
diff --git a/crates/kebab-chunk/src/pdf_page_v1.rs b/crates/kebab-chunk/src/pdf_page_v1.rs
index cab61aa..41dfe83 100644
--- a/crates/kebab-chunk/src/pdf_page_v1.rs
+++ b/crates/kebab-chunk/src/pdf_page_v1.rs
@@ -347,6 +347,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version,
@@ -512,6 +516,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version,
diff --git a/crates/kebab-core/src/metadata.rs b/crates/kebab-core/src/metadata.rs
index 229ee0d..bed5cc2 100644
--- a/crates/kebab-core/src/metadata.rs
+++ b/crates/kebab-core/src/metadata.rs
@@ -17,6 +17,25 @@ pub struct Metadata {
pub user_id_alias: Option,
/// Frontmatter keys we don't recognise are preserved here per §0 Q9.
pub user: Map,
+
+ /// p10-1A-1: name of the source repo if the file lives inside a git
+ /// working tree (`.git/` walk-up). null otherwise.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub repo: Option,
+
+ /// p10-1A-1: HEAD branch at ingest time. null when no repo or detached HEAD.
+ /// Informational only — current-state observability, not a partition key.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub git_branch: Option,
+
+ /// p10-1A-1: HEAD commit (40-hex) at ingest time. null when no repo.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub git_commit: Option,
+
+ /// p10-1A-1: programming language identifier (lowercase canonical). null
+ /// for markdown / pdf / image. Set by `kebab_parse_code::lang::code_lang_for_path`.
+ #[serde(default, skip_serializing_if = "Option::is_none")]
+ pub code_lang: Option,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
@@ -66,3 +85,54 @@ pub enum ProvenanceKind {
Warning,
Error,
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn metadata_repo_fields_default_to_none_and_omit_when_serialized() {
+ let m = Metadata {
+ aliases: vec![],
+ tags: vec![],
+ created_at: time::OffsetDateTime::UNIX_EPOCH,
+ updated_at: time::OffsetDateTime::UNIX_EPOCH,
+ source_type: SourceType::Markdown,
+ trust_level: TrustLevel::Primary,
+ user_id_alias: None,
+ user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
+ };
+ let v = serde_json::to_value(&m).unwrap();
+ assert!(v.get("repo").is_none());
+ assert!(v.get("git_branch").is_none());
+ assert!(v.get("git_commit").is_none());
+ assert!(v.get("code_lang").is_none());
+ }
+
+ #[test]
+ fn metadata_repo_fields_present_when_some() {
+ let m = Metadata {
+ aliases: vec![],
+ tags: vec![],
+ created_at: time::OffsetDateTime::UNIX_EPOCH,
+ updated_at: time::OffsetDateTime::UNIX_EPOCH,
+ source_type: SourceType::Markdown,
+ trust_level: TrustLevel::Primary,
+ user_id_alias: None,
+ user: Default::default(),
+ repo: Some("kebab".into()),
+ git_branch: Some("main".into()),
+ git_commit: Some("a".repeat(40)),
+ code_lang: Some("rust".into()),
+ };
+ let v = serde_json::to_value(&m).unwrap();
+ assert_eq!(v["repo"], "kebab");
+ assert_eq!(v["git_branch"], "main");
+ assert_eq!(v["git_commit"].as_str().unwrap().len(), 40);
+ assert_eq!(v["code_lang"], "rust");
+ }
+}
diff --git a/crates/kebab-normalize/src/lib.rs b/crates/kebab-normalize/src/lib.rs
index d3edf54..bc1e988 100644
--- a/crates/kebab-normalize/src/lib.rs
+++ b/crates/kebab-normalize/src/lib.rs
@@ -467,6 +467,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-parse-image/src/lib.rs b/crates/kebab-parse-image/src/lib.rs
index 5f1fc6f..a8d1be5 100644
--- a/crates/kebab-parse-image/src/lib.rs
+++ b/crates/kebab-parse-image/src/lib.rs
@@ -190,6 +190,10 @@ impl Extractor for ImageExtractor {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
};
tracing::debug!(
diff --git a/crates/kebab-parse-md/src/frontmatter.rs b/crates/kebab-parse-md/src/frontmatter.rs
index 86d3f80..92c8a3c 100644
--- a/crates/kebab-parse-md/src/frontmatter.rs
+++ b/crates/kebab-parse-md/src/frontmatter.rs
@@ -471,6 +471,10 @@ fn derive_metadata(
trust_level,
user_id_alias,
user,
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-parse-pdf/src/lib.rs b/crates/kebab-parse-pdf/src/lib.rs
index 963a0fe..5f1b90e 100644
--- a/crates/kebab-parse-pdf/src/lib.rs
+++ b/crates/kebab-parse-pdf/src/lib.rs
@@ -194,6 +194,10 @@ impl Extractor for PdfTextExtractor {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
};
tracing::debug!(
diff --git a/crates/kebab-store-sqlite/tests/idempotency.rs b/crates/kebab-store-sqlite/tests/idempotency.rs
index 8ff482b..85471e7 100644
--- a/crates/kebab-store-sqlite/tests/idempotency.rs
+++ b/crates/kebab-store-sqlite/tests/idempotency.rs
@@ -42,6 +42,10 @@ fn make_metadata() -> Metadata {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
}
}
diff --git a/crates/kebab-store-sqlite/tests/incremental_ingest.rs b/crates/kebab-store-sqlite/tests/incremental_ingest.rs
index 3c544a2..ef67706 100644
--- a/crates/kebab-store-sqlite/tests/incremental_ingest.rs
+++ b/crates/kebab-store-sqlite/tests/incremental_ingest.rs
@@ -51,6 +51,10 @@ fn make_doc() -> CanonicalDocument {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
};
CanonicalDocument {
doc_id,
diff --git a/crates/kebab-store-sqlite/tests/list_docs.rs b/crates/kebab-store-sqlite/tests/list_docs.rs
index 01bb626..acfad1c 100644
--- a/crates/kebab-store-sqlite/tests/list_docs.rs
+++ b/crates/kebab-store-sqlite/tests/list_docs.rs
@@ -54,6 +54,10 @@ fn make_doc(
trust_level: trust,
user_id_alias: None,
user: Default::default(),
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
};
let doc = CanonicalDocument {
doc_id,
diff --git a/crates/kebab-tui/tests/inspect.rs b/crates/kebab-tui/tests/inspect.rs
index 7fb3413..039fa53 100644
--- a/crates/kebab-tui/tests/inspect.rs
+++ b/crates/kebab-tui/tests/inspect.rs
@@ -79,6 +79,10 @@ fn make_doc() -> CanonicalDocument {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
+ repo: None,
+ git_branch: None,
+ git_commit: None,
+ code_lang: None,
},
provenance: Provenance {
events: vec![ProvenanceEvent {