Files
kebab/crates/kebab-core/src/metadata.rs
th-kim0823 bf4ebf8d2a feat(p10-1a-1): add Metadata.repo / git_branch / git_commit / code_lang
Four optional, serde-skipped-when-None fields added to `Metadata` for
code ingest context. All 11 downstream construction sites patched with
`repo: None, git_branch: None, git_commit: None, code_lang: None`.
Full workspace check (`--tests`) and per-crate test suite pass clean.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-15 15:44:18 +09:00

139 lines
4.2 KiB
Rust

//! Metadata + Provenance (§3.6).
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use time::OffsetDateTime;
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Metadata {
pub aliases: Vec<String>,
pub tags: Vec<String>,
#[serde(with = "time::serde::rfc3339")]
pub created_at: OffsetDateTime,
#[serde(with = "time::serde::rfc3339")]
pub updated_at: OffsetDateTime,
pub source_type: SourceType,
pub trust_level: TrustLevel,
pub user_id_alias: Option<String>,
/// Frontmatter keys we don't recognise are preserved here per §0 Q9.
pub user: Map<String, Value>,
/// p10-1A-1: name of the source repo if the file lives inside a git
/// working tree (`.git/` walk-up). null otherwise.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub repo: Option<String>,
/// p10-1A-1: HEAD branch at ingest time. null when no repo or detached HEAD.
/// Informational only — current-state observability, not a partition key.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_branch: Option<String>,
/// p10-1A-1: HEAD commit (40-hex) at ingest time. null when no repo.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub git_commit: Option<String>,
/// p10-1A-1: programming language identifier (lowercase canonical). null
/// for markdown / pdf / image. Set by `kebab_parse_code::lang::code_lang_for_path`.
#[serde(default, skip_serializing_if = "Option::is_none")]
pub code_lang: Option<String>,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum SourceType {
Markdown,
Note,
Paper,
Reference,
Inbox,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum TrustLevel {
Primary,
Secondary,
Generated,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct Provenance {
pub events: Vec<ProvenanceEvent>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct ProvenanceEvent {
#[serde(with = "time::serde::rfc3339")]
pub at: OffsetDateTime,
pub agent: String,
pub kind: ProvenanceKind,
pub note: Option<String>,
}
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ProvenanceKind {
Discovered,
Parsed,
Normalized,
Chunked,
OcrApplied,
CaptionApplied,
Transcribed,
Embedded,
Indexed,
Warning,
Error,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn metadata_repo_fields_default_to_none_and_omit_when_serialized() {
let m = Metadata {
aliases: vec![],
tags: vec![],
created_at: time::OffsetDateTime::UNIX_EPOCH,
updated_at: time::OffsetDateTime::UNIX_EPOCH,
source_type: SourceType::Markdown,
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: None,
git_branch: None,
git_commit: None,
code_lang: None,
};
let v = serde_json::to_value(&m).unwrap();
assert!(v.get("repo").is_none());
assert!(v.get("git_branch").is_none());
assert!(v.get("git_commit").is_none());
assert!(v.get("code_lang").is_none());
}
#[test]
fn metadata_repo_fields_present_when_some() {
let m = Metadata {
aliases: vec![],
tags: vec![],
created_at: time::OffsetDateTime::UNIX_EPOCH,
updated_at: time::OffsetDateTime::UNIX_EPOCH,
source_type: SourceType::Markdown,
trust_level: TrustLevel::Primary,
user_id_alias: None,
user: Default::default(),
repo: Some("kebab".into()),
git_branch: Some("main".into()),
git_commit: Some("a".repeat(40)),
code_lang: Some("rust".into()),
};
let v = serde_json::to_value(&m).unwrap();
assert_eq!(v["repo"], "kebab");
assert_eq!(v["git_branch"], "main");
assert_eq!(v["git_commit"].as_str().unwrap().len(), 40);
assert_eq!(v["code_lang"], "rust");
}
}