feat(p10-1a-1): add Metadata.repo / git_branch / git_commit / code_lang

Four optional, serde-skipped-when-None fields added to `Metadata` for
code ingest context. All 11 downstream construction sites patched with
`repo: None, git_branch: None, git_commit: None, code_lang: None`.
Full workspace check (`--tests`) and per-crate test suite pass clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
th-kim0823
2026-05-15 15:44:18 +09:00
parent 351c7a0826
commit bf4ebf8d2a
11 changed files with 114 additions and 0 deletions

View File

@@ -472,6 +472,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version: kebab_core::ParserVersion("test-parser-0".into()),

View File

@@ -347,6 +347,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version,
@@ -512,6 +516,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
},
provenance: Provenance { events: vec![] },
parser_version,

View File

@@ -17,6 +17,25 @@ pub struct Metadata {
pub user_id_alias: Option<String>,
/// Frontmatter keys we don't recognise are preserved here per §0 Q9.
pub user: Map<String, Value>,
/// p10-1A-1: name of the source repo if the file lives inside a git
/// working tree (`.git/` walk-up). null otherwise.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub repo: Option<String>,
/// p10-1A-1: HEAD branch at ingest time. null when no repo or detached HEAD.
/// Informational only — current-state observability, not a partition key.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_branch: Option<String>,
/// p10-1A-1: HEAD commit (40-hex) at ingest time. null when no repo.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit: Option<String>,
/// p10-1A-1: programming language identifier (lowercase canonical). null
/// for markdown / pdf / image. Set by `kebab_parse_code::lang::code_lang_for_path`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub code_lang: Option<String>,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
@@ -66,3 +85,54 @@ pub enum ProvenanceKind {
Warning,
Error,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn metadata_repo_fields_default_to_none_and_omit_when_serialized() {
let m = Metadata {
aliases: vec![],
tags: vec![],
created_at: time::OffsetDateTime::UNIX_EPOCH,
updated_at: time::OffsetDateTime::UNIX_EPOCH,
source_type: SourceType::Markdown,
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
let v = serde_json::to_value(&m).unwrap();
assert!(v.get("repo").is_none());
assert!(v.get("git_branch").is_none());
assert!(v.get("git_commit").is_none());
assert!(v.get("code_lang").is_none());
}
#[test]
fn metadata_repo_fields_present_when_some() {
let m = Metadata {
aliases: vec![],
tags: vec![],
created_at: time::OffsetDateTime::UNIX_EPOCH,
updated_at: time::OffsetDateTime::UNIX_EPOCH,
source_type: SourceType::Markdown,
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: Some("kebab".into()),
git_branch: Some("main".into()),
git_commit: Some("a".repeat(40)),
code_lang: Some("rust".into()),
};
let v = serde_json::to_value(&m).unwrap();
assert_eq!(v["repo"], "kebab");
assert_eq!(v["git_branch"], "main");
assert_eq!(v["git_commit"].as_str().unwrap().len(), 40);
assert_eq!(v["code_lang"], "rust");
}
}

View File

@@ -467,6 +467,10 @@ mod tests {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
}
}

View File

@@ -190,6 +190,10 @@ impl Extractor for ImageExtractor {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
tracing::debug!(

View File

@@ -471,6 +471,10 @@ fn derive_metadata(
trust_level,
user_id_alias,
user,
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
}
}

View File

@@ -194,6 +194,10 @@ impl Extractor for PdfTextExtractor {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
tracing::debug!(

View File

@@ -42,6 +42,10 @@ fn make_metadata() -> Metadata {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
}
}

View File

@@ -51,6 +51,10 @@ fn make_doc() -> CanonicalDocument {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
CanonicalDocument {
doc_id,

View File

@@ -54,6 +54,10 @@ fn make_doc(
trust_level: trust,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
let doc = CanonicalDocument {
doc_id,

View File

@@ -79,6 +79,10 @@ fn make_doc() -> CanonicalDocument {
trust_level: TrustLevel::Primary,
user_id_alias: None,
user,
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
},
provenance: Provenance {
events: vec![ProvenanceEvent {