feat(import): pure parser for F5 export format

parseExportNote reverses composeMarkdown — minimal YAML parser
covering only the variants F5 emits (plain, single-quoted, block
scalar, tag/image lists). Body extraction strips h1 + blockquote +
image refs to recover rawText. Round-trip tested against
exportFormat.composeMarkdown.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
altair823
2026-04-26 10:53:29 +09:00
parent fe6bbd3104
commit e8587c1986
2 changed files with 563 additions and 0 deletions

View File

@@ -0,0 +1,354 @@
/**
* Pure parse functions for F6-L3 (Import).
*
* Reverses the output of `composeMarkdown` from `exportFormat.ts`.
* Minimal YAML parser handling exactly the variants F5 emits — plain scalars,
* single-quoted strings (with `''` escapes), block scalar `|-`, and the two
* structured lists (`tags:` inline-flow, `images:` block).
*
* No filesystem, no I/O.
*/
export interface ParsedNoteTag {
name: string;
source: 'ai' | 'user';
}
export interface ParsedNoteImage {
rel: string;
mime: string;
bytes: number;
}
export interface ParsedNote {
id: string;
createdAt: string;
updatedAt: string;
rawText: string;
aiTitle: string | null;
aiSummary: string | null;
titleEditedByUser: boolean;
summaryEditedByUser: boolean;
aiProvider: string | null;
aiGeneratedAt: string | null;
userIntent: string | null;
intentPromptedAt: string | null;
tags: ParsedNoteTag[];
images: ParsedNoteImage[];
exportVersion: number;
}
// ---------------------------------------------------------------------------
// YAML helpers
// ---------------------------------------------------------------------------
function unquoteSingle(raw: string): string {
// Caller has confirmed `raw` is wrapped in single quotes.
const inner = raw.slice(1, -1);
return inner.replace(/''/g, "'");
}
interface ParsedScalar {
value: string;
/** number of source lines consumed (1 for plain/quoted, 1+N for block scalar) */
consumed: number;
}
/**
* Parse a scalar value starting at `lines[startIdx]`.
* - `key: value` → consumed=1
* - `key: 'quoted'` → consumed=1
* - `key: |-` + indented body → consumed=1+N
*
* Returns `null` if the line is not a `key: …` scalar at column 0.
*/
function parseScalarAt(
lines: string[],
startIdx: number,
expectedKey: string
): ParsedScalar | null {
const line = lines[startIdx];
if (line === undefined) return null;
const prefix = `${expectedKey}:`;
if (!line.startsWith(prefix)) return null;
const after = line.slice(prefix.length);
if (after.length > 0 && after[0] !== ' ') return null;
const rhs = after.trimStart();
// Block scalar
if (rhs === '|-') {
const bodyLines: string[] = [];
let i = startIdx + 1;
// Determine indent from first body line; F5 emits 2-space indent at this level
// (composeFrontmatter passes default `indent=2`). We accept any indent ≥ 1
// and use the first body line's leading whitespace as the dedent prefix.
let dedent: string | null = null;
while (i < lines.length) {
const l = lines[i]!;
if (l.length === 0) {
// blank line inside block scalar — keep, dedent later (treat as empty)
bodyLines.push('');
i += 1;
continue;
}
const m = /^( +)/.exec(l);
if (!m) {
// first non-indented line ends the block
break;
}
const indent = m[1]!;
if (dedent === null) dedent = indent;
// Use the smallest leading-space of the first body line as the dedent prefix.
// (F5 always emits a uniform indent for a given block.)
bodyLines.push(l.startsWith(dedent) ? l.slice(dedent.length) : l.trimStart());
i += 1;
}
// Trim trailing blank lines that we tentatively added (block scalar `|-`
// strips final newline anyway).
while (bodyLines.length > 0 && bodyLines[bodyLines.length - 1] === '') {
bodyLines.pop();
}
return { value: bodyLines.join('\n'), consumed: i - startIdx };
}
// Single-quoted
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
return { value: unquoteSingle(rhs), consumed: 1 };
}
// Plain scalar
return { value: rhs, consumed: 1 };
}
// ---------------------------------------------------------------------------
// Frontmatter section parser
// ---------------------------------------------------------------------------
interface Frontmatter {
fields: Map<string, string>;
tags: ParsedNoteTag[];
images: ParsedNoteImage[];
/** total lines consumed including the closing `---` delimiter */
consumedLines: number;
}
/**
* Parse a tag flow item: `- { name: foo, source: ai }` or
* `- { name: 'a, b', source: user }`.
*/
function parseTagFlow(line: string): ParsedNoteTag | null {
const trimmed = line.trim();
if (!trimmed.startsWith('-')) return null;
const afterDash = trimmed.slice(1).trimStart();
if (!afterDash.startsWith('{') || !afterDash.endsWith('}')) return null;
const inner = afterDash.slice(1, -1).trim();
// Expect `name: <value>, source: ai|user`. Value may be single-quoted with embedded commas.
// Split on the comma that is OUTSIDE single quotes.
let nameRaw: string | null = null;
let sourceRaw: string | null = null;
let inQuote = false;
let cursor = 0;
const parts: string[] = [];
for (let i = 0; i < inner.length; i++) {
const ch = inner[i];
if (ch === "'") {
// Toggle, accounting for `''` escape (still inside the quote scope).
if (inQuote && inner[i + 1] === "'") {
i += 1;
continue;
}
inQuote = !inQuote;
} else if (ch === ',' && !inQuote) {
parts.push(inner.slice(cursor, i));
cursor = i + 1;
}
}
parts.push(inner.slice(cursor));
for (const p of parts) {
const colon = p.indexOf(':');
if (colon === -1) return null;
const k = p.slice(0, colon).trim();
const v = p.slice(colon + 1).trim();
if (k === 'name') {
nameRaw = v;
} else if (k === 'source') {
sourceRaw = v;
}
}
if (nameRaw === null || sourceRaw === null) return null;
const name =
nameRaw.startsWith("'") && nameRaw.endsWith("'") ? unquoteSingle(nameRaw) : nameRaw;
if (sourceRaw !== 'ai' && sourceRaw !== 'user') return null;
return { name, source: sourceRaw };
}
function parseFrontmatter(lines: string[]): Frontmatter {
if (lines[0] !== '---') {
throw new Error('importFormat: expected frontmatter to start with "---"');
}
const fields = new Map<string, string>();
const tags: ParsedNoteTag[] = [];
const images: ParsedNoteImage[] = [];
let i = 1;
while (i < lines.length) {
const line = lines[i]!;
if (line === '---') {
// Closing delimiter — return.
return { fields, tags, images, consumedLines: i + 1 };
}
if (line.length === 0) {
i += 1;
continue;
}
// Top-level keys (column 0). Detect `key:` (list intro) or `key: value`.
if (line === 'tags:') {
i += 1;
while (i < lines.length) {
const l = lines[i]!;
if (l === '---') break;
if (!l.startsWith(' -')) break;
const tag = parseTagFlow(l);
if (tag) tags.push(tag);
i += 1;
}
continue;
}
if (line === 'images:') {
i += 1;
while (i < lines.length) {
const l = lines[i]!;
if (l === '---') break;
if (!l.startsWith(' - rel:')) break;
// Image item: 3 lines (rel, mime, bytes), each as a sub-scalar.
const relScalar = parseImageSubScalar(l, ' - rel:');
const mimeLine = lines[i + 1] ?? '';
const bytesLine = lines[i + 2] ?? '';
const mimeScalar = parseImageSubScalar(mimeLine, ' mime:');
const bytesScalar = parseImageSubScalar(bytesLine, ' bytes:');
if (relScalar === null || mimeScalar === null || bytesScalar === null) {
throw new Error('importFormat: malformed images item');
}
const bytesNum = Number.parseInt(bytesScalar, 10);
if (!Number.isFinite(bytesNum)) {
throw new Error('importFormat: bytes must be a number');
}
images.push({ rel: relScalar, mime: mimeScalar, bytes: bytesNum });
i += 3;
}
continue;
}
// Top-level scalar — find key, parse value.
const colon = line.indexOf(':');
if (colon === -1) {
// Stray line, skip.
i += 1;
continue;
}
const key = line.slice(0, colon);
const scalar = parseScalarAt(lines, i, key);
if (scalar === null) {
i += 1;
continue;
}
fields.set(key, scalar.value);
i += scalar.consumed;
}
throw new Error('importFormat: frontmatter not terminated');
}
function parseImageSubScalar(line: string, prefix: string): string | null {
if (!line.startsWith(prefix)) return null;
const rhs = line.slice(prefix.length).trimStart();
if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
return unquoteSingle(rhs);
}
return rhs;
}
// ---------------------------------------------------------------------------
// Body parser (raw_text recovery)
// ---------------------------------------------------------------------------
/**
* Strips the rendered prefix (h1, blockquote summary) and trailing image refs
* to recover `rawText`.
*/
function extractRawText(bodyLines: string[]): string {
let i = 0;
// Drop leading blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Skip a single h1 line (`# …`)
if (i < bodyLines.length && bodyLines[i]!.startsWith('# ')) {
i += 1;
}
// Drop blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Skip blockquote run (`> …`)
while (i < bodyLines.length && bodyLines[i]!.startsWith('> ')) i += 1;
// Drop blanks
while (i < bodyLines.length && bodyLines[i] === '') i += 1;
// Capture until first standalone `![](media/…)` line OR end.
const captured: string[] = [];
while (i < bodyLines.length) {
const l = bodyLines[i]!;
// Image refs are emitted only at line start, separated from body by `\n\n`.
if (l.startsWith('![](media/')) break;
captured.push(l);
i += 1;
}
// Trim trailing blank lines.
while (captured.length > 0 && captured[captured.length - 1] === '') {
captured.pop();
}
return captured.join('\n');
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
export function parseExportNote(markdown: string): ParsedNote {
if (!markdown.startsWith('---\n')) {
throw new Error('importFormat: markdown must start with "---\\n"');
}
// Normalize line endings (F5 emits LF only, but be defensive).
const normalized = markdown.replace(/\r\n/g, '\n');
const allLines = normalized.split('\n');
const fm = parseFrontmatter(allLines);
const bodyLines = allLines.slice(fm.consumedLines);
const rawText = extractRawText(bodyLines);
const get = (k: string): string | null => (fm.fields.has(k) ? fm.fields.get(k)! : null);
const id = get('id');
const createdAt = get('created_at');
const updatedAt = get('updated_at');
if (id === null || createdAt === null || updatedAt === null) {
throw new Error('importFormat: id/created_at/updated_at are required');
}
const titleSource = get('title_source');
const summarySource = get('summary_source');
const versionRaw = get('inkling_export_version');
const exportVersion = versionRaw === null ? 0 : Number.parseInt(versionRaw, 10) || 0;
return {
id,
createdAt,
updatedAt,
rawText,
aiTitle: get('title'),
aiSummary: get('summary'),
titleEditedByUser: titleSource === 'user',
summaryEditedByUser: summarySource === 'user',
aiProvider: get('ai_provider'),
aiGeneratedAt: get('ai_generated_at'),
userIntent: get('user_intent'),
intentPromptedAt: get('intent_prompted_at'),
tags: fm.tags,
images: fm.images,
exportVersion
};
}

View File

@@ -0,0 +1,209 @@
import { describe, it, expect } from 'vitest';
import {
composeMarkdown,
type ExportNote
} from '@main/services/exportFormat.js';
import { parseExportNote } from '@main/services/importFormat.js';
const baseNote: ExportNote = {
id: '014a3b9c-1234-7890-abcd-000000000001',
createdAt: '2026-04-25T14:23:11.000Z',
updatedAt: '2026-04-25T14:24:02.000Z',
rawText: '회고 메모 본문',
aiTitle: '주간 회고 PR 리뷰',
aiSummary: '회고 양식 통일을 위한 메모.',
titleEditedByUser: false,
summaryEditedByUser: false,
aiProvider: 'local-ollama/gemma4:e4b',
aiGeneratedAt: '2026-04-25T14:23:34.000Z',
userIntent: null,
intentPromptedAt: null,
tags: [{ name: 'pr', source: 'ai' }, { name: 'review', source: 'user' }],
media: []
};
describe('parseExportNote — round-trip with composeMarkdown', () => {
it('round-trips the base note', () => {
const md = composeMarkdown(baseNote);
const parsed = parseExportNote(md);
expect(parsed.id).toBe(baseNote.id);
expect(parsed.createdAt).toBe(baseNote.createdAt);
expect(parsed.updatedAt).toBe(baseNote.updatedAt);
expect(parsed.rawText).toBe(baseNote.rawText);
expect(parsed.aiTitle).toBe(baseNote.aiTitle);
expect(parsed.aiSummary).toBe(baseNote.aiSummary);
expect(parsed.aiProvider).toBe(baseNote.aiProvider);
expect(parsed.aiGeneratedAt).toBe(baseNote.aiGeneratedAt);
expect(parsed.titleEditedByUser).toBe(false);
expect(parsed.summaryEditedByUser).toBe(false);
expect(parsed.tags).toEqual([
{ name: 'pr', source: 'ai' },
{ name: 'review', source: 'user' }
]);
expect(parsed.images).toEqual([]);
expect(parsed.exportVersion).toBe(1);
});
it('round-trips a note with media', () => {
const note: ExportNote = {
...baseNote,
media: [
{ rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 1234 },
{ rel: 'media/014a3b9c__2.jpg', mime: 'image/jpeg', bytes: 5678 }
]
};
const md = composeMarkdown(note);
const parsed = parseExportNote(md);
expect(parsed.images).toEqual([
{ rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 1234 },
{ rel: 'media/014a3b9c__2.jpg', mime: 'image/jpeg', bytes: 5678 }
]);
expect(parsed.rawText).toBe(note.rawText);
});
});
describe('parseExportNote — frontmatter scalar variants', () => {
it('parses plain scalar', () => {
const md = composeMarkdown({ ...baseNote, aiTitle: '주간 회고' });
const parsed = parseExportNote(md);
expect(parsed.aiTitle).toBe('주간 회고');
});
it('parses single-quoted with embedded apostrophe (`` `` escape)', () => {
const note: ExportNote = { ...baseNote, aiTitle: "it's a: title" };
const md = composeMarkdown(note);
// Should be emitted as: title: 'it''s a: title'
expect(md).toContain("title: 'it''s a: title'");
const parsed = parseExportNote(md);
expect(parsed.aiTitle).toBe("it's a: title");
});
it('parses block scalar `|-` for multiline summary', () => {
const note: ExportNote = {
...baseNote,
aiSummary: 'line1\nline2\nline3'
};
const md = composeMarkdown(note);
expect(md).toContain('summary: |-');
const parsed = parseExportNote(md);
expect(parsed.aiSummary).toBe('line1\nline2\nline3');
});
});
describe('parseExportNote — list parsing', () => {
it('parses tags inline flow', () => {
const md = composeMarkdown({
...baseNote,
tags: [
{ name: 'foo', source: 'ai' },
{ name: 'bar baz', source: 'user' }
]
});
const parsed = parseExportNote(md);
expect(parsed.tags).toEqual([
{ name: 'foo', source: 'ai' },
{ name: 'bar baz', source: 'user' }
]);
});
it('parses images list with mime + bytes', () => {
const md = composeMarkdown({
...baseNote,
media: [{ rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 9876 }]
});
const parsed = parseExportNote(md);
expect(parsed.images).toEqual([
{ rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 9876 }
]);
});
});
describe('parseExportNote — body extraction', () => {
it('extracts rawText with summary present', () => {
const md = composeMarkdown({ ...baseNote, rawText: '본문\n두 번째 줄' });
const parsed = parseExportNote(md);
expect(parsed.rawText).toBe('본문\n두 번째 줄');
});
it('extracts rawText with summary absent', () => {
const md = composeMarkdown({
...baseNote,
aiSummary: null,
rawText: '요약 없는 본문'
});
const parsed = parseExportNote(md);
expect(parsed.rawText).toBe('요약 없는 본문');
});
it('extracts rawText with no images', () => {
const md = composeMarkdown({ ...baseNote, rawText: '이미지 없음', media: [] });
const parsed = parseExportNote(md);
expect(parsed.rawText).toBe('이미지 없음');
});
it('preserves `>` mid-line in rawText (not parsed as blockquote)', () => {
const md = composeMarkdown({
...baseNote,
rawText: '값 a > b 라는 부등호'
});
const parsed = parseExportNote(md);
expect(parsed.rawText).toBe('값 a > b 라는 부등호');
});
it('preserves `# ` mid-line in rawText (not parsed as heading)', () => {
const md = composeMarkdown({
...baseNote,
rawText: '예시: see issue #1 어쩌고 # 가운데 해시'
});
const parsed = parseExportNote(md);
expect(parsed.rawText).toBe('예시: see issue #1 어쩌고 # 가운데 해시');
});
});
describe('parseExportNote — provenance', () => {
it('recovers titleEditedByUser from title_source: user', () => {
const md = composeMarkdown({ ...baseNote, titleEditedByUser: true });
const parsed = parseExportNote(md);
expect(parsed.titleEditedByUser).toBe(true);
});
it('recovers summaryEditedByUser from summary_source: user', () => {
const md = composeMarkdown({ ...baseNote, summaryEditedByUser: true });
const parsed = parseExportNote(md);
expect(parsed.summaryEditedByUser).toBe(true);
});
it('exposes exportVersion = 1', () => {
const md = composeMarkdown(baseNote);
const parsed = parseExportNote(md);
expect(parsed.exportVersion).toBe(1);
});
});
describe('parseExportNote — edge cases', () => {
it('preserves user_intent when present', () => {
const md = composeMarkdown({
...baseNote,
userIntent: '팀에서 회고 양식 통일',
intentPromptedAt: '2026-04-25T14:24:02.000Z'
});
const parsed = parseExportNote(md);
expect(parsed.userIntent).toBe('팀에서 회고 양식 통일');
expect(parsed.intentPromptedAt).toBe('2026-04-25T14:24:02.000Z');
});
it('returns null aiTitle / aiSummary when omitted', () => {
const md = composeMarkdown({
...baseNote,
aiTitle: null,
aiSummary: null
});
const parsed = parseExportNote(md);
expect(parsed.aiTitle).toBeNull();
expect(parsed.aiSummary).toBeNull();
});
it('throws when input lacks frontmatter delimiter', () => {
expect(() => parseExportNote('hello world')).toThrow();
});
});