Files
inkling/src/main/services/importFormat.ts

357 lines
11 KiB
TypeScript

/**
* Pure parse functions for F6-L3 (Import).
*
* Reverses the output of `composeMarkdown` from `exportFormat.ts`.
* Minimal YAML parser handling exactly the variants F5 emits — plain scalars,
* single-quoted strings (with `''` escapes), block scalar `|-`, and the two
* structured lists (`tags:` inline-flow, `images:` block).
*
* No filesystem, no I/O.
*/
export interface ParsedNoteTag {
name: string;
source: 'ai' | 'user';
}
export interface ParsedNoteImage {
rel: string;
mime: string;
bytes: number;
}
export interface ParsedNote {
id: string;
createdAt: string;
updatedAt: string;
rawText: string;
aiTitle: string | null;
aiSummary: string | null;
titleEditedByUser: boolean;
summaryEditedByUser: boolean;
aiProvider: string | null;
aiGeneratedAt: string | null;
userIntent: string | null;
intentPromptedAt: string | null;
deletedAt: string | null; // 신규 v0.2.3 #4
tags: ParsedNoteTag[];
images: ParsedNoteImage[];
exportVersion: number;
}
// ---------------------------------------------------------------------------
// YAML helpers
// ---------------------------------------------------------------------------
function unquoteSingle(raw: string): string {
// Caller has confirmed `raw` is wrapped in single quotes.
const inner = raw.slice(1, -1);
return inner.replace(/''/g, "'");
}
interface ParsedScalar {
value: string;
/** number of source lines consumed (1 for plain/quoted, 1+N for block scalar) */
consumed: number;
}
/**
* Parse a scalar value starting at `lines[startIdx]`.
* - `key: value` → consumed=1
* - `key: 'quoted'` → consumed=1
* - `key: |-` + indented body → consumed=1+N
*
* Returns `null` if the line is not a `key: …` scalar at column 0.
*/
function parseScalarAt(
lines: string[],
startIdx: number,
expectedKey: string
): ParsedScalar | null {
const line = lines[startIdx];
if (line === undefined) return null;
const prefix = `${expectedKey}:`;
if (!line.startsWith(prefix)) return null;
const after = line.slice(prefix.length);
if (after.length > 0 && after[0] !== ' ') return null;
const rhs = after.trimStart();
// Block scalar
if (rhs === '|-') {
const bodyLines: string[] = [];
let i = startIdx + 1;
// Determine indent from first body line; F5 emits 2-space indent at this level
// (composeFrontmatter passes default `indent=2`). We accept any indent ≥ 1
// and use the first body line's leading whitespace as the dedent prefix.
let dedent: string | null = null;
while (i < lines.length) {
const l = lines[i]!;
if (l.length === 0) {
// blank line inside block scalar — keep, dedent later (treat as empty)
bodyLines.push('');
i += 1;
continue;
}
const m = /^( +)/.exec(l);
if (!m) {
// first non-indented line ends the block
break;
}
const indent = m[1]!;
if (dedent === null) dedent = indent;
// Use the smallest leading-space of the first body line as the dedent prefix.
// (F5 always emits a uniform indent for a given block.)
bodyLines.push(l.startsWith(dedent) ? l.slice(dedent.length) : l.trimStart());
i += 1;
}
// Trim trailing blank lines that we tentatively added (block scalar `|-`
// strips final newline anyway).
while (bodyLines.length > 0 && bodyLines[bodyLines.length - 1] === '') {
bodyLines.pop();
}
return { value: bodyLines.join('\n'), consumed: i - startIdx };
}
// Single-quoted
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
return { value: unquoteSingle(rhs), consumed: 1 };
}
// Plain scalar
return { value: rhs, consumed: 1 };
}
// ---------------------------------------------------------------------------
// Frontmatter section parser
// ---------------------------------------------------------------------------
interface Frontmatter {
fields: Map<string, string>;
tags: ParsedNoteTag[];
images: ParsedNoteImage[];
/** total lines consumed including the closing `---` delimiter */
consumedLines: number;
}
/**
* Parse a tag flow item: `- { name: foo, source: ai }` or
* `- { name: 'a, b', source: user }`.
*/
function parseTagFlow(line: string): ParsedNoteTag | null {
const trimmed = line.trim();
if (!trimmed.startsWith('-')) return null;
const afterDash = trimmed.slice(1).trimStart();
if (!afterDash.startsWith('{') || !afterDash.endsWith('}')) return null;
const inner = afterDash.slice(1, -1).trim();
// Expect `name: <value>, source: ai|user`. Value may be single-quoted with embedded commas.
// Split on the comma that is OUTSIDE single quotes.
let nameRaw: string | null = null;
let sourceRaw: string | null = null;
let inQuote = false;
let cursor = 0;
const parts: string[] = [];
for (let i = 0; i < inner.length; i++) {
const ch = inner[i];
if (ch === "'") {
// Toggle, accounting for `''` escape (still inside the quote scope).
if (inQuote && inner[i + 1] === "'") {
i += 1;
continue;
}
inQuote = !inQuote;
} else if (ch === ',' && !inQuote) {
parts.push(inner.slice(cursor, i));
cursor = i + 1;
}
}
parts.push(inner.slice(cursor));
for (const p of parts) {
const colon = p.indexOf(':');
if (colon === -1) return null;
const k = p.slice(0, colon).trim();
const v = p.slice(colon + 1).trim();
if (k === 'name') {
nameRaw = v;
} else if (k === 'source') {
sourceRaw = v;
}
}
if (nameRaw === null || sourceRaw === null) return null;
const name =
nameRaw.startsWith("'") && nameRaw.endsWith("'") ? unquoteSingle(nameRaw) : nameRaw;
if (sourceRaw !== 'ai' && sourceRaw !== 'user') return null;
return { name, source: sourceRaw };
}
function parseFrontmatter(lines: string[]): Frontmatter {
if (lines[0] !== '---') {
throw new Error('importFormat: expected frontmatter to start with "---"');
}
const fields = new Map<string, string>();
const tags: ParsedNoteTag[] = [];
const images: ParsedNoteImage[] = [];
let i = 1;
while (i < lines.length) {
const line = lines[i]!;
if (line === '---') {
// Closing delimiter — return.
return { fields, tags, images, consumedLines: i + 1 };
}
if (line.length === 0) {
i += 1;
continue;
}
// Top-level keys (column 0). Detect `key:` (list intro) or `key: value`.
if (line === 'tags:') {
i += 1;
while (i < lines.length) {
const l = lines[i]!;
if (l === '---') break;
if (!l.startsWith(' -')) break;
const tag = parseTagFlow(l);
if (tag) tags.push(tag);
i += 1;
}
continue;
}
if (line === 'images:') {
i += 1;
while (i < lines.length) {
const l = lines[i]!;
if (l === '---') break;
if (!l.startsWith(' - rel:')) break;
// Image item: 3 lines (rel, mime, bytes), each as a sub-scalar.
const relScalar = parseImageSubScalar(l, ' - rel:');
const mimeLine = lines[i + 1] ?? '';
const bytesLine = lines[i + 2] ?? '';
const mimeScalar = parseImageSubScalar(mimeLine, ' mime:');
const bytesScalar = parseImageSubScalar(bytesLine, ' bytes:');
if (relScalar === null || mimeScalar === null || bytesScalar === null) {
throw new Error('importFormat: malformed images item');
}
const bytesNum = Number.parseInt(bytesScalar, 10);
if (!Number.isFinite(bytesNum)) {
throw new Error('importFormat: bytes must be a number');
}
images.push({ rel: relScalar, mime: mimeScalar, bytes: bytesNum });
i += 3;
}
continue;
}
// Top-level scalar — find key, parse value.
const colon = line.indexOf(':');
if (colon === -1) {
// Stray line, skip.
i += 1;
continue;
}
const key = line.slice(0, colon);
const scalar = parseScalarAt(lines, i, key);
if (scalar === null) {
i += 1;
continue;
}
fields.set(key, scalar.value);
i += scalar.consumed;
}
throw new Error('importFormat: frontmatter not terminated');
}
function parseImageSubScalar(line: string, prefix: string): string | null {
if (!line.startsWith(prefix)) return null;
const rhs = line.slice(prefix.length).trimStart();
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
return unquoteSingle(rhs);
}
return rhs;
}
// ---------------------------------------------------------------------------
// Body parser (raw_text recovery)
// ---------------------------------------------------------------------------
/**
* Strips the rendered prefix (h1, blockquote summary) and trailing image refs
* to recover `rawText`.
*/
function extractRawText(bodyLines: string[]): string {
let i = 0;
// Drop leading blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Skip a single h1 line (`# …`)
if (i < bodyLines.length && bodyLines[i]!.startsWith('# ')) {
i += 1;
}
// Drop blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Skip blockquote run (`> …`)
while (i < bodyLines.length && bodyLines[i]!.startsWith('> ')) i += 1;
// Drop blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Capture until first standalone `![](media/…)` line OR end.
const captured: string[] = [];
while (i < bodyLines.length) {
const l = bodyLines[i]!;
// Image refs are emitted only at line start, separated from body by `\n\n`.
if (l.startsWith('![](media/')) break;
captured.push(l);
i += 1;
}
// Trim trailing blank lines.
while (captured.length > 0 && captured[captured.length - 1] === '') {
captured.pop();
}
return captured.join('\n');
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
export function parseExportNote(markdown: string): ParsedNote {
if (!markdown.startsWith('---\n')) {
throw new Error('importFormat: markdown must start with "---\\n"');
}
// Normalize line endings (F5 emits LF only, but be defensive).
const normalized = markdown.replace(/\r\n/g, '\n');
const allLines = normalized.split('\n');
const fm = parseFrontmatter(allLines);
const bodyLines = allLines.slice(fm.consumedLines);
const rawText = extractRawText(bodyLines);
const get = (k: string): string | null => (fm.fields.has(k) ? fm.fields.get(k)! : null);
const id = get('id');
const createdAt = get('created_at');
const updatedAt = get('updated_at');
if (id === null || createdAt === null || updatedAt === null) {
throw new Error('importFormat: id/created_at/updated_at are required');
}
const titleSource = get('title_source');
const summarySource = get('summary_source');
const versionRaw = get('inkling_export_version');
const exportVersion = versionRaw === null ? 0 : Number.parseInt(versionRaw, 10) || 0;
return {
id,
createdAt,
updatedAt,
rawText,
aiTitle: get('title'),
aiSummary: get('summary'),
titleEditedByUser: titleSource === 'user',
summaryEditedByUser: summarySource === 'user',
aiProvider: get('ai_provider'),
aiGeneratedAt: get('ai_generated_at'),
userIntent: get('user_intent'),
intentPromptedAt: get('intent_prompted_at'),
deletedAt: get('deleted_at'),
tags: fm.tags,
images: fm.images,
exportVersion
};
}