feat(core): ALIAS_SUFFIX + strip_alias_suffix (dense alias vectors)

This commit is contained in:
2026-05-30 10:31:03 +00:00
parent 825543549d
commit 86b4e1ebd0
2 changed files with 22 additions and 2 deletions

View File

@@ -58,6 +58,15 @@ fn validate_hex32(s: &str) -> Result<(), CoreError> {
Ok(())
}
/// Suffix appended to a chunk's vector ID to mark an alias embedding row.
pub const ALIAS_SUFFIX: &str = "#alias";
/// Strip `#alias` suffix from `id`, returning the bare chunk ID.
/// If `id` does not end with `ALIAS_SUFFIX`, returns `id` unchanged.
pub fn strip_alias_suffix(id: &str) -> &str {
id.strip_suffix(ALIAS_SUFFIX).unwrap_or(id)
}
/// Canonical-JSON + blake3 + hex prefix 32. Per design §4.2.
pub fn id_from<T: Serialize>(tuple: T) -> String {
let bytes = serde_json_canonicalizer::to_vec(&tuple)
@@ -430,6 +439,16 @@ mod tests {
assert_eq!(id.0, "71992c457a5da39880a6d17d646ed0fd");
}
#[test]
fn strip_alias_suffix_roundtrip() {
let bare = "0123456789abcdef0123456789abcdef";
let with_suffix = format!("{}{}", bare, ALIAS_SUFFIX);
assert_eq!(strip_alias_suffix(&with_suffix), bare);
assert_eq!(strip_alias_suffix(bare), bare);
assert_eq!(strip_alias_suffix(""), "");
assert_eq!(strip_alias_suffix("#alias"), "");
}
/// Independent pin for id_for_index.
/// inputs:
/// collection="default",

View File

@@ -43,8 +43,9 @@ pub use document::{
pub use errors::CoreError;
pub use fetch::{FetchKind, FetchOpts, FetchQuery, FetchResult};
pub use ids::{
AssetId, BlockId, ChunkId, DocumentId, EmbeddingId, IndexId, id_for_asset, id_for_block,
id_for_chunk, id_for_doc, id_for_embedding, id_for_index, id_from,
ALIAS_SUFFIX, AssetId, BlockId, ChunkId, DocumentId, EmbeddingId, IndexId, id_for_asset,
id_for_block, id_for_chunk, id_for_doc, id_for_embedding, id_for_index, id_from,
strip_alias_suffix,
};
pub use ingest::{IngestItem, IngestItemKind, IngestReport, SkipExamples};
pub use jobs::{JobFilter, JobId, JobKind, JobRow, JobStatus};