diff --git a/crates/kebab-chunk/src/md_heading_v1.rs b/crates/kebab-chunk/src/md_heading_v1.rs index fa0578f..b6094bd 100644 --- a/crates/kebab-chunk/src/md_heading_v1.rs +++ b/crates/kebab-chunk/src/md_heading_v1.rs @@ -472,6 +472,10 @@ mod tests { trust_level: TrustLevel::Primary, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }, provenance: Provenance { events: vec![] }, parser_version: kebab_core::ParserVersion("test-parser-0".into()), diff --git a/crates/kebab-chunk/src/pdf_page_v1.rs b/crates/kebab-chunk/src/pdf_page_v1.rs index cab61aa..41dfe83 100644 --- a/crates/kebab-chunk/src/pdf_page_v1.rs +++ b/crates/kebab-chunk/src/pdf_page_v1.rs @@ -347,6 +347,10 @@ mod tests { trust_level: TrustLevel::Primary, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }, provenance: Provenance { events: vec![] }, parser_version, @@ -512,6 +516,10 @@ mod tests { trust_level: TrustLevel::Primary, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }, provenance: Provenance { events: vec![] }, parser_version, diff --git a/crates/kebab-core/src/metadata.rs b/crates/kebab-core/src/metadata.rs index 229ee0d..bed5cc2 100644 --- a/crates/kebab-core/src/metadata.rs +++ b/crates/kebab-core/src/metadata.rs @@ -17,6 +17,25 @@ pub struct Metadata { pub user_id_alias: Option, /// Frontmatter keys we don't recognise are preserved here per §0 Q9. pub user: Map, + + /// p10-1A-1: name of the source repo if the file lives inside a git + /// working tree (`.git/` walk-up). null otherwise. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub repo: Option, + + /// p10-1A-1: HEAD branch at ingest time. null when no repo or detached HEAD. + /// Informational only — current-state observability, not a partition key. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub git_branch: Option, + + /// p10-1A-1: HEAD commit (40-hex) at ingest time. null when no repo. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub git_commit: Option, + + /// p10-1A-1: programming language identifier (lowercase canonical). null + /// for markdown / pdf / image. Set by `kebab_parse_code::lang::code_lang_for_path`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub code_lang: Option, } #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)] @@ -66,3 +85,54 @@ pub enum ProvenanceKind { Warning, Error, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn metadata_repo_fields_default_to_none_and_omit_when_serialized() { + let m = Metadata { + aliases: vec![], + tags: vec![], + created_at: time::OffsetDateTime::UNIX_EPOCH, + updated_at: time::OffsetDateTime::UNIX_EPOCH, + source_type: SourceType::Markdown, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, + }; + let v = serde_json::to_value(&m).unwrap(); + assert!(v.get("repo").is_none()); + assert!(v.get("git_branch").is_none()); + assert!(v.get("git_commit").is_none()); + assert!(v.get("code_lang").is_none()); + } + + #[test] + fn metadata_repo_fields_present_when_some() { + let m = Metadata { + aliases: vec![], + tags: vec![], + created_at: time::OffsetDateTime::UNIX_EPOCH, + updated_at: time::OffsetDateTime::UNIX_EPOCH, + source_type: SourceType::Markdown, + trust_level: TrustLevel::Primary, + user_id_alias: None, + user: Default::default(), + repo: Some("kebab".into()), + git_branch: Some("main".into()), + git_commit: Some("a".repeat(40)), + code_lang: Some("rust".into()), + }; + let v = serde_json::to_value(&m).unwrap(); + assert_eq!(v["repo"], "kebab"); + assert_eq!(v["git_branch"], "main"); + assert_eq!(v["git_commit"].as_str().unwrap().len(), 40); + assert_eq!(v["code_lang"], "rust"); + } +} diff --git a/crates/kebab-normalize/src/lib.rs b/crates/kebab-normalize/src/lib.rs index d3edf54..bc1e988 100644 --- a/crates/kebab-normalize/src/lib.rs +++ b/crates/kebab-normalize/src/lib.rs @@ -467,6 +467,10 @@ mod tests { trust_level: TrustLevel::Primary, user_id_alias: None, user, + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, } } diff --git a/crates/kebab-parse-image/src/lib.rs b/crates/kebab-parse-image/src/lib.rs index 5f1fc6f..a8d1be5 100644 --- a/crates/kebab-parse-image/src/lib.rs +++ b/crates/kebab-parse-image/src/lib.rs @@ -190,6 +190,10 @@ impl Extractor for ImageExtractor { trust_level: TrustLevel::Primary, user_id_alias: None, user, + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }; tracing::debug!( diff --git a/crates/kebab-parse-md/src/frontmatter.rs b/crates/kebab-parse-md/src/frontmatter.rs index 86d3f80..92c8a3c 100644 --- a/crates/kebab-parse-md/src/frontmatter.rs +++ b/crates/kebab-parse-md/src/frontmatter.rs @@ -471,6 +471,10 @@ fn derive_metadata( trust_level, user_id_alias, user, + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, } } diff --git a/crates/kebab-parse-pdf/src/lib.rs b/crates/kebab-parse-pdf/src/lib.rs index 963a0fe..5f1b90e 100644 --- a/crates/kebab-parse-pdf/src/lib.rs +++ b/crates/kebab-parse-pdf/src/lib.rs @@ -194,6 +194,10 @@ impl Extractor for PdfTextExtractor { trust_level: TrustLevel::Primary, user_id_alias: None, user, + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }; tracing::debug!( diff --git a/crates/kebab-store-sqlite/tests/idempotency.rs b/crates/kebab-store-sqlite/tests/idempotency.rs index 8ff482b..85471e7 100644 --- a/crates/kebab-store-sqlite/tests/idempotency.rs +++ b/crates/kebab-store-sqlite/tests/idempotency.rs @@ -42,6 +42,10 @@ fn make_metadata() -> Metadata { trust_level: TrustLevel::Primary, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, } } diff --git a/crates/kebab-store-sqlite/tests/incremental_ingest.rs b/crates/kebab-store-sqlite/tests/incremental_ingest.rs index 3c544a2..ef67706 100644 --- a/crates/kebab-store-sqlite/tests/incremental_ingest.rs +++ b/crates/kebab-store-sqlite/tests/incremental_ingest.rs @@ -51,6 +51,10 @@ fn make_doc() -> CanonicalDocument { trust_level: TrustLevel::Primary, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }; CanonicalDocument { doc_id, diff --git a/crates/kebab-store-sqlite/tests/list_docs.rs b/crates/kebab-store-sqlite/tests/list_docs.rs index 01bb626..acfad1c 100644 --- a/crates/kebab-store-sqlite/tests/list_docs.rs +++ b/crates/kebab-store-sqlite/tests/list_docs.rs @@ -54,6 +54,10 @@ fn make_doc( trust_level: trust, user_id_alias: None, user: Default::default(), + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }; let doc = CanonicalDocument { doc_id, diff --git a/crates/kebab-tui/tests/inspect.rs b/crates/kebab-tui/tests/inspect.rs index 7fb3413..039fa53 100644 --- a/crates/kebab-tui/tests/inspect.rs +++ b/crates/kebab-tui/tests/inspect.rs @@ -79,6 +79,10 @@ fn make_doc() -> CanonicalDocument { trust_level: TrustLevel::Primary, user_id_alias: None, user, + repo: None, + git_branch: None, + git_commit: None, + code_lang: None, }, provenance: Provenance { events: vec![ProvenanceEvent {