376 lines
12 KiB
TypeScript
376 lines
12 KiB
TypeScript
/**
|
|
* Pure parse functions for F6-L3 (Import).
|
|
*
|
|
* Reverses the output of `composeMarkdown` from `exportFormat.ts`.
|
|
* Minimal YAML parser handling exactly the variants F5 emits — plain scalars,
|
|
* single-quoted strings (with `''` escapes), block scalar `|-`, and the two
|
|
* structured lists (`tags:` inline-flow, `images:` block).
|
|
*
|
|
* No filesystem, no I/O.
|
|
*/
|
|
|
|
export interface ParsedNoteTag {
|
|
name: string;
|
|
source: 'ai' | 'user';
|
|
}
|
|
|
|
export interface ParsedNoteImage {
|
|
rel: string;
|
|
mime: string;
|
|
bytes: number;
|
|
}
|
|
|
|
export interface ParsedNote {
|
|
id: string;
|
|
createdAt: string;
|
|
updatedAt: string;
|
|
rawText: string;
|
|
aiTitle: string | null;
|
|
aiSummary: string | null;
|
|
titleEditedByUser: boolean;
|
|
summaryEditedByUser: boolean;
|
|
aiProvider: string | null;
|
|
aiGeneratedAt: string | null;
|
|
userIntent: string | null;
|
|
intentPromptedAt: string | null;
|
|
deletedAt: string | null; // 신규 v0.2.3 #4
|
|
// v0.3.0 Cut E — round-trip status / due_date / move_reason from frontmatter.
|
|
// Default to 'active' / null / false when absent (older exports pre-Cut E).
|
|
status: 'active' | 'completed' | 'archived' | 'trashed';
|
|
statusChangedAt: string | null;
|
|
moveReason: string | null;
|
|
dueDate: string | null;
|
|
dueDateEditedByUser: boolean;
|
|
tags: ParsedNoteTag[];
|
|
images: ParsedNoteImage[];
|
|
exportVersion: number;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// YAML helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
function unquoteSingle(raw: string): string {
|
|
// Caller has confirmed `raw` is wrapped in single quotes.
|
|
const inner = raw.slice(1, -1);
|
|
return inner.replace(/''/g, "'");
|
|
}
|
|
|
|
interface ParsedScalar {
|
|
value: string;
|
|
/** number of source lines consumed (1 for plain/quoted, 1+N for block scalar) */
|
|
consumed: number;
|
|
}
|
|
|
|
/**
|
|
* Parse a scalar value starting at `lines[startIdx]`.
|
|
* - `key: value` → consumed=1
|
|
* - `key: 'quoted'` → consumed=1
|
|
* - `key: |-` + indented body → consumed=1+N
|
|
*
|
|
* Returns `null` if the line is not a `key: …` scalar at column 0.
|
|
*/
|
|
function parseScalarAt(
|
|
lines: string[],
|
|
startIdx: number,
|
|
expectedKey: string
|
|
): ParsedScalar | null {
|
|
const line = lines[startIdx];
|
|
if (line === undefined) return null;
|
|
const prefix = `${expectedKey}:`;
|
|
if (!line.startsWith(prefix)) return null;
|
|
const after = line.slice(prefix.length);
|
|
if (after.length > 0 && after[0] !== ' ') return null;
|
|
const rhs = after.trimStart();
|
|
|
|
// Block scalar
|
|
if (rhs === '|-') {
|
|
const bodyLines: string[] = [];
|
|
let i = startIdx + 1;
|
|
// Determine indent from first body line; F5 emits 2-space indent at this level
|
|
// (composeFrontmatter passes default `indent=2`). We accept any indent ≥ 1
|
|
// and use the first body line's leading whitespace as the dedent prefix.
|
|
let dedent: string | null = null;
|
|
while (i < lines.length) {
|
|
const l = lines[i]!;
|
|
if (l.length === 0) {
|
|
// blank line inside block scalar — keep, dedent later (treat as empty)
|
|
bodyLines.push('');
|
|
i += 1;
|
|
continue;
|
|
}
|
|
const m = /^( +)/.exec(l);
|
|
if (!m) {
|
|
// first non-indented line ends the block
|
|
break;
|
|
}
|
|
const indent = m[1]!;
|
|
if (dedent === null) dedent = indent;
|
|
// Use the smallest leading-space of the first body line as the dedent prefix.
|
|
// (F5 always emits a uniform indent for a given block.)
|
|
bodyLines.push(l.startsWith(dedent) ? l.slice(dedent.length) : l.trimStart());
|
|
i += 1;
|
|
}
|
|
// Trim trailing blank lines that we tentatively added (block scalar `|-`
|
|
// strips final newline anyway).
|
|
while (bodyLines.length > 0 && bodyLines[bodyLines.length - 1] === '') {
|
|
bodyLines.pop();
|
|
}
|
|
return { value: bodyLines.join('\n'), consumed: i - startIdx };
|
|
}
|
|
|
|
// Single-quoted
|
|
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
|
|
return { value: unquoteSingle(rhs), consumed: 1 };
|
|
}
|
|
|
|
// Plain scalar
|
|
return { value: rhs, consumed: 1 };
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Frontmatter section parser
|
|
// ---------------------------------------------------------------------------
|
|
|
|
interface Frontmatter {
|
|
fields: Map<string, string>;
|
|
tags: ParsedNoteTag[];
|
|
images: ParsedNoteImage[];
|
|
/** total lines consumed including the closing `---` delimiter */
|
|
consumedLines: number;
|
|
}
|
|
|
|
/**
|
|
* Parse a tag flow item: `- { name: foo, source: ai }` or
|
|
* `- { name: 'a, b', source: user }`.
|
|
*/
|
|
function parseTagFlow(line: string): ParsedNoteTag | null {
|
|
const trimmed = line.trim();
|
|
if (!trimmed.startsWith('-')) return null;
|
|
const afterDash = trimmed.slice(1).trimStart();
|
|
if (!afterDash.startsWith('{') || !afterDash.endsWith('}')) return null;
|
|
const inner = afterDash.slice(1, -1).trim();
|
|
// Expect `name: <value>, source: ai|user`. Value may be single-quoted with embedded commas.
|
|
// Split on the comma that is OUTSIDE single quotes.
|
|
let nameRaw: string | null = null;
|
|
let sourceRaw: string | null = null;
|
|
let inQuote = false;
|
|
let cursor = 0;
|
|
const parts: string[] = [];
|
|
for (let i = 0; i < inner.length; i++) {
|
|
const ch = inner[i];
|
|
if (ch === "'") {
|
|
// Toggle, accounting for `''` escape (still inside the quote scope).
|
|
if (inQuote && inner[i + 1] === "'") {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
inQuote = !inQuote;
|
|
} else if (ch === ',' && !inQuote) {
|
|
parts.push(inner.slice(cursor, i));
|
|
cursor = i + 1;
|
|
}
|
|
}
|
|
parts.push(inner.slice(cursor));
|
|
for (const p of parts) {
|
|
const colon = p.indexOf(':');
|
|
if (colon === -1) return null;
|
|
const k = p.slice(0, colon).trim();
|
|
const v = p.slice(colon + 1).trim();
|
|
if (k === 'name') {
|
|
nameRaw = v;
|
|
} else if (k === 'source') {
|
|
sourceRaw = v;
|
|
}
|
|
}
|
|
if (nameRaw === null || sourceRaw === null) return null;
|
|
const name =
|
|
nameRaw.startsWith("'") && nameRaw.endsWith("'") ? unquoteSingle(nameRaw) : nameRaw;
|
|
if (sourceRaw !== 'ai' && sourceRaw !== 'user') return null;
|
|
return { name, source: sourceRaw };
|
|
}
|
|
|
|
function parseFrontmatter(lines: string[]): Frontmatter {
|
|
if (lines[0] !== '---') {
|
|
throw new Error('importFormat: expected frontmatter to start with "---"');
|
|
}
|
|
const fields = new Map<string, string>();
|
|
const tags: ParsedNoteTag[] = [];
|
|
const images: ParsedNoteImage[] = [];
|
|
|
|
let i = 1;
|
|
while (i < lines.length) {
|
|
const line = lines[i]!;
|
|
if (line === '---') {
|
|
// Closing delimiter — return.
|
|
return { fields, tags, images, consumedLines: i + 1 };
|
|
}
|
|
if (line.length === 0) {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
|
|
// Top-level keys (column 0). Detect `key:` (list intro) or `key: value`.
|
|
if (line === 'tags:') {
|
|
i += 1;
|
|
while (i < lines.length) {
|
|
const l = lines[i]!;
|
|
if (l === '---') break;
|
|
if (!l.startsWith(' -')) break;
|
|
const tag = parseTagFlow(l);
|
|
if (tag) tags.push(tag);
|
|
i += 1;
|
|
}
|
|
continue;
|
|
}
|
|
if (line === 'images:') {
|
|
i += 1;
|
|
while (i < lines.length) {
|
|
const l = lines[i]!;
|
|
if (l === '---') break;
|
|
if (!l.startsWith(' - rel:')) break;
|
|
// Image item: 3 lines (rel, mime, bytes), each as a sub-scalar.
|
|
const relScalar = parseImageSubScalar(l, ' - rel:');
|
|
const mimeLine = lines[i + 1] ?? '';
|
|
const bytesLine = lines[i + 2] ?? '';
|
|
const mimeScalar = parseImageSubScalar(mimeLine, ' mime:');
|
|
const bytesScalar = parseImageSubScalar(bytesLine, ' bytes:');
|
|
if (relScalar === null || mimeScalar === null || bytesScalar === null) {
|
|
throw new Error('importFormat: malformed images item');
|
|
}
|
|
const bytesNum = Number.parseInt(bytesScalar, 10);
|
|
if (!Number.isFinite(bytesNum)) {
|
|
throw new Error('importFormat: bytes must be a number');
|
|
}
|
|
images.push({ rel: relScalar, mime: mimeScalar, bytes: bytesNum });
|
|
i += 3;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Top-level scalar — find key, parse value.
|
|
const colon = line.indexOf(':');
|
|
if (colon === -1) {
|
|
// Stray line, skip.
|
|
i += 1;
|
|
continue;
|
|
}
|
|
const key = line.slice(0, colon);
|
|
const scalar = parseScalarAt(lines, i, key);
|
|
if (scalar === null) {
|
|
i += 1;
|
|
continue;
|
|
}
|
|
fields.set(key, scalar.value);
|
|
i += scalar.consumed;
|
|
}
|
|
throw new Error('importFormat: frontmatter not terminated');
|
|
}
|
|
|
|
function parseImageSubScalar(line: string, prefix: string): string | null {
|
|
if (!line.startsWith(prefix)) return null;
|
|
const rhs = line.slice(prefix.length).trimStart();
|
|
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
|
|
return unquoteSingle(rhs);
|
|
}
|
|
return rhs;
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Body parser (raw_text recovery)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Strips the rendered prefix (h1, blockquote summary) and trailing image refs
|
|
* to recover `rawText`.
|
|
*/
|
|
function extractRawText(bodyLines: string[]): string {
|
|
let i = 0;
|
|
// Drop leading blanks
|
|
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
|
|
// Skip a single h1 line (`# …`)
|
|
if (i < bodyLines.length && bodyLines[i]!.startsWith('# ')) {
|
|
i += 1;
|
|
}
|
|
// Drop blanks
|
|
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
|
|
// Skip blockquote run (`> …`)
|
|
while (i < bodyLines.length && bodyLines[i]!.startsWith('> ')) i += 1;
|
|
// Drop blanks
|
|
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
|
|
|
|
// Capture until first standalone `` line OR end.
|
|
const captured: string[] = [];
|
|
while (i < bodyLines.length) {
|
|
const l = bodyLines[i]!;
|
|
// Image refs are emitted only at line start, separated from body by `\n\n`.
|
|
if (l.startsWith(') break;
|
|
captured.push(l);
|
|
i += 1;
|
|
}
|
|
// Trim trailing blank lines.
|
|
while (captured.length > 0 && captured[captured.length - 1] === '') {
|
|
captured.pop();
|
|
}
|
|
return captured.join('\n');
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Public API
|
|
// ---------------------------------------------------------------------------
|
|
|
|
export function parseExportNote(markdown: string): ParsedNote {
|
|
if (!markdown.startsWith('---\n')) {
|
|
throw new Error('importFormat: markdown must start with "---\\n"');
|
|
}
|
|
// Normalize line endings (F5 emits LF only, but be defensive).
|
|
const normalized = markdown.replace(/\r\n/g, '\n');
|
|
const allLines = normalized.split('\n');
|
|
const fm = parseFrontmatter(allLines);
|
|
const bodyLines = allLines.slice(fm.consumedLines);
|
|
const rawText = extractRawText(bodyLines);
|
|
|
|
const get = (k: string): string | null => (fm.fields.has(k) ? fm.fields.get(k)! : null);
|
|
const id = get('id');
|
|
const createdAt = get('created_at');
|
|
const updatedAt = get('updated_at');
|
|
if (id === null || createdAt === null || updatedAt === null) {
|
|
throw new Error('importFormat: id/created_at/updated_at are required');
|
|
}
|
|
const titleSource = get('title_source');
|
|
const summarySource = get('summary_source');
|
|
const versionRaw = get('inkling_export_version');
|
|
const exportVersion = versionRaw === null ? 0 : Number.parseInt(versionRaw, 10) || 0;
|
|
|
|
const statusRaw = get('status');
|
|
const validStatuses = ['active', 'completed', 'archived', 'trashed'] as const;
|
|
const status = (validStatuses as readonly string[]).includes(statusRaw ?? 'active')
|
|
? ((statusRaw ?? 'active') as ParsedNote['status'])
|
|
: 'active';
|
|
const dueDateSource = get('due_date_source');
|
|
|
|
return {
|
|
id,
|
|
createdAt,
|
|
updatedAt,
|
|
rawText,
|
|
aiTitle: get('title'),
|
|
aiSummary: get('summary'),
|
|
titleEditedByUser: titleSource === 'user',
|
|
summaryEditedByUser: summarySource === 'user',
|
|
aiProvider: get('ai_provider'),
|
|
aiGeneratedAt: get('ai_generated_at'),
|
|
userIntent: get('user_intent'),
|
|
intentPromptedAt: get('intent_prompted_at'),
|
|
deletedAt: get('deleted_at'),
|
|
status,
|
|
statusChangedAt: get('status_changed_at'),
|
|
moveReason: get('move_reason'),
|
|
dueDate: get('due_date'),
|
|
dueDateEditedByUser: dueDateSource === 'user',
|
|
tags: fm.tags,
|
|
images: fm.images,
|
|
exportVersion
|
|
};
|
|
}
|