From 86b4e1ebd069d26ae6a7601d3b86bde4998ca658 Mon Sep 17 00:00:00 2001 From: altair823 Date: Sat, 30 May 2026 10:31:03 +0000 Subject: [PATCH] feat(core): ALIAS_SUFFIX + strip_alias_suffix (dense alias vectors) --- crates/kebab-core/src/ids.rs | 19 +++++++++++++++++++ crates/kebab-core/src/lib.rs | 5 +++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/crates/kebab-core/src/ids.rs b/crates/kebab-core/src/ids.rs index c43b661..c0ac1fe 100644 --- a/crates/kebab-core/src/ids.rs +++ b/crates/kebab-core/src/ids.rs @@ -58,6 +58,15 @@ fn validate_hex32(s: &str) -> Result<(), CoreError> { Ok(()) } +/// Suffix appended to a chunk's vector ID to mark an alias embedding row. +pub const ALIAS_SUFFIX: &str = "#alias"; + +/// Strip `#alias` suffix from `id`, returning the bare chunk ID. +/// If `id` does not end with `ALIAS_SUFFIX`, returns `id` unchanged. +pub fn strip_alias_suffix(id: &str) -> &str { + id.strip_suffix(ALIAS_SUFFIX).unwrap_or(id) +} + /// Canonical-JSON + blake3 + hex prefix 32. Per design ยง4.2. pub fn id_from(tuple: T) -> String { let bytes = serde_json_canonicalizer::to_vec(&tuple) @@ -430,6 +439,16 @@ mod tests { assert_eq!(id.0, "71992c457a5da39880a6d17d646ed0fd"); } + #[test] + fn strip_alias_suffix_roundtrip() { + let bare = "0123456789abcdef0123456789abcdef"; + let with_suffix = format!("{}{}", bare, ALIAS_SUFFIX); + assert_eq!(strip_alias_suffix(&with_suffix), bare); + assert_eq!(strip_alias_suffix(bare), bare); + assert_eq!(strip_alias_suffix(""), ""); + assert_eq!(strip_alias_suffix("#alias"), ""); + } + /// Independent pin for id_for_index. /// inputs: /// collection="default", diff --git a/crates/kebab-core/src/lib.rs b/crates/kebab-core/src/lib.rs index 8cb57d6..b4ddb35 100644 --- a/crates/kebab-core/src/lib.rs +++ b/crates/kebab-core/src/lib.rs @@ -43,8 +43,9 @@ pub use document::{ pub use errors::CoreError; pub use fetch::{FetchKind, FetchOpts, FetchQuery, FetchResult}; pub use ids::{ - AssetId, BlockId, ChunkId, DocumentId, EmbeddingId, IndexId, id_for_asset, id_for_block, - id_for_chunk, id_for_doc, id_for_embedding, id_for_index, id_from, + ALIAS_SUFFIX, AssetId, BlockId, ChunkId, DocumentId, EmbeddingId, IndexId, id_for_asset, + id_for_block, id_for_chunk, id_for_doc, id_for_embedding, id_for_index, id_from, + strip_alias_suffix, }; pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples}; pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};