feat(import): pure parser for F5 export format

parseExportNote reverses composeMarkdown — minimal YAML parser covering only the variants F5 emits (plain, single-quoted, block scalar, tag/image lists). Body extraction strips h1 + blockquote + image refs to recover rawText. Round-trip tested against exportFormat.composeMarkdown. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 10:53:29 +09:00
parent fe6bbd3104
commit e8587c1986
2 changed files with 563 additions and 0 deletions
--- a/src/main/services/importFormat.ts
+++ b/src/main/services/importFormat.ts
@@ -0,0 +1,354 @@
+/**
+ * Pure parse functions for F6-L3 (Import).
+ *
+ * Reverses the output of `composeMarkdown` from `exportFormat.ts`.
+ * Minimal YAML parser handling exactly the variants F5 emits — plain scalars,
+ * single-quoted strings (with `''` escapes), block scalar `|-`, and the two
+ * structured lists (`tags:` inline-flow, `images:` block).
+ *
+ * No filesystem, no I/O.
+ */
+
+export interface ParsedNoteTag {
+  name: string;
+  source: 'ai' | 'user';
+}
+
+export interface ParsedNoteImage {
+  rel: string;
+  mime: string;
+  bytes: number;
+}
+
+export interface ParsedNote {
+  id: string;
+  createdAt: string;
+  updatedAt: string;
+  rawText: string;
+  aiTitle: string | null;
+  aiSummary: string | null;
+  titleEditedByUser: boolean;
+  summaryEditedByUser: boolean;
+  aiProvider: string | null;
+  aiGeneratedAt: string | null;
+  userIntent: string | null;
+  intentPromptedAt: string | null;
+  tags: ParsedNoteTag[];
+  images: ParsedNoteImage[];
+  exportVersion: number;
+}
+
+// ---------------------------------------------------------------------------
+// YAML helpers
+// ---------------------------------------------------------------------------
+
+function unquoteSingle(raw: string): string {
+  // Caller has confirmed `raw` is wrapped in single quotes.
+  const inner = raw.slice(1, -1);
+  return inner.replace(/''/g, "'");
+}
+
+interface ParsedScalar {
+  value: string;
+  /** number of source lines consumed (1 for plain/quoted, 1+N for block scalar) */
+  consumed: number;
+}
+
+/**
+ * Parse a scalar value starting at `lines[startIdx]`.
+ *  - `key: value`         → consumed=1
+ *  - `key: 'quoted'`      → consumed=1
+ *  - `key: |-` + indented body → consumed=1+N
+ *
+ * Returns `null` if the line is not a `key: …` scalar at column 0.
+ */
+function parseScalarAt(
+  lines: string[],
+  startIdx: number,
+  expectedKey: string
+): ParsedScalar | null {
+  const line = lines[startIdx];
+  if (line === undefined) return null;
+  const prefix = `${expectedKey}:`;
+  if (!line.startsWith(prefix)) return null;
+  const after = line.slice(prefix.length);
+  if (after.length > 0 && after[0] !== ' ') return null;
+  const rhs = after.trimStart();
+
+  // Block scalar
+  if (rhs === '|-') {
+    const bodyLines: string[] = [];
+    let i = startIdx + 1;
+    // Determine indent from first body line; F5 emits 2-space indent at this level
+    // (composeFrontmatter passes default `indent=2`). We accept any indent ≥ 1
+    // and use the first body line's leading whitespace as the dedent prefix.
+    let dedent: string | null = null;
+    while (i < lines.length) {
+      const l = lines[i]!;
+      if (l.length === 0) {
+        // blank line inside block scalar — keep, dedent later (treat as empty)
+        bodyLines.push('');
+        i += 1;
+        continue;
+      }
+      const m = /^( +)/.exec(l);
+      if (!m) {
+        // first non-indented line ends the block
+        break;
+      }
+      const indent = m[1]!;
+      if (dedent === null) dedent = indent;
+      // Use the smallest leading-space of the first body line as the dedent prefix.
+      // (F5 always emits a uniform indent for a given block.)
+      bodyLines.push(l.startsWith(dedent) ? l.slice(dedent.length) : l.trimStart());
+      i += 1;
+    }
+    // Trim trailing blank lines that we tentatively added (block scalar `|-`
+    // strips final newline anyway).
+    while (bodyLines.length > 0 && bodyLines[bodyLines.length - 1] === '') {
+      bodyLines.pop();
+    }
+    return { value: bodyLines.join('\n'), consumed: i - startIdx };
+  }
+
+  // Single-quoted
+  if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
+    return { value: unquoteSingle(rhs), consumed: 1 };
+  }
+
+  // Plain scalar
+  return { value: rhs, consumed: 1 };
+}
+
+// ---------------------------------------------------------------------------
+// Frontmatter section parser
+// ---------------------------------------------------------------------------
+
+interface Frontmatter {
+  fields: Map<string, string>;
+  tags: ParsedNoteTag[];
+  images: ParsedNoteImage[];
+  /** total lines consumed including the closing `---` delimiter */
+  consumedLines: number;
+}
+
+/**
+ * Parse a tag flow item: `- { name: foo, source: ai }` or
+ * `- { name: 'a, b', source: user }`.
+ */
+function parseTagFlow(line: string): ParsedNoteTag | null {
+  const trimmed = line.trim();
+  if (!trimmed.startsWith('-')) return null;
+  const afterDash = trimmed.slice(1).trimStart();
+  if (!afterDash.startsWith('{') || !afterDash.endsWith('}')) return null;
+  const inner = afterDash.slice(1, -1).trim();
+  // Expect `name: <value>, source: ai|user`. Value may be single-quoted with embedded commas.
+  // Split on the comma that is OUTSIDE single quotes.
+  let nameRaw: string | null = null;
+  let sourceRaw: string | null = null;
+  let inQuote = false;
+  let cursor = 0;
+  const parts: string[] = [];
+  for (let i = 0; i < inner.length; i++) {
+    const ch = inner[i];
+    if (ch === "'") {
+      // Toggle, accounting for `''` escape (still inside the quote scope).
+      if (inQuote && inner[i + 1] === "'") {
+        i += 1;
+        continue;
+      }
+      inQuote = !inQuote;
+    } else if (ch === ',' && !inQuote) {
+      parts.push(inner.slice(cursor, i));
+      cursor = i + 1;
+    }
+  }
+  parts.push(inner.slice(cursor));
+  for (const p of parts) {
+    const colon = p.indexOf(':');
+    if (colon === -1) return null;
+    const k = p.slice(0, colon).trim();
+    const v = p.slice(colon + 1).trim();
+    if (k === 'name') {
+      nameRaw = v;
+    } else if (k === 'source') {
+      sourceRaw = v;
+    }
+  }
+  if (nameRaw === null || sourceRaw === null) return null;
+  const name =
+    nameRaw.startsWith("'") && nameRaw.endsWith("'") ? unquoteSingle(nameRaw) : nameRaw;
+  if (sourceRaw !== 'ai' && sourceRaw !== 'user') return null;
+  return { name, source: sourceRaw };
+}
+
+function parseFrontmatter(lines: string[]): Frontmatter {
+  if (lines[0] !== '---') {
+    throw new Error('importFormat: expected frontmatter to start with "---"');
+  }
+  const fields = new Map<string, string>();
+  const tags: ParsedNoteTag[] = [];
+  const images: ParsedNoteImage[] = [];
+
+  let i = 1;
+  while (i < lines.length) {
+    const line = lines[i]!;
+    if (line === '---') {
+      // Closing delimiter — return.
+      return { fields, tags, images, consumedLines: i + 1 };
+    }
+    if (line.length === 0) {
+      i += 1;
+      continue;
+    }
+
+    // Top-level keys (column 0). Detect `key:` (list intro) or `key: value`.
+    if (line === 'tags:') {
+      i += 1;
+      while (i < lines.length) {
+        const l = lines[i]!;
+        if (l === '---') break;
+        if (!l.startsWith('  -')) break;
+        const tag = parseTagFlow(l);
+        if (tag) tags.push(tag);
+        i += 1;
+      }
+      continue;
+    }
+    if (line === 'images:') {
+      i += 1;
+      while (i < lines.length) {
+        const l = lines[i]!;
+        if (l === '---') break;
+        if (!l.startsWith('  - rel:')) break;
+        // Image item: 3 lines (rel, mime, bytes), each as a sub-scalar.
+        const relScalar = parseImageSubScalar(l, '  - rel:');
+        const mimeLine = lines[i + 1] ?? '';
+        const bytesLine = lines[i + 2] ?? '';
+        const mimeScalar = parseImageSubScalar(mimeLine, '    mime:');
+        const bytesScalar = parseImageSubScalar(bytesLine, '    bytes:');
+        if (relScalar === null || mimeScalar === null || bytesScalar === null) {
+          throw new Error('importFormat: malformed images item');
+        }
+        const bytesNum = Number.parseInt(bytesScalar, 10);
+        if (!Number.isFinite(bytesNum)) {
+          throw new Error('importFormat: bytes must be a number');
+        }
+        images.push({ rel: relScalar, mime: mimeScalar, bytes: bytesNum });
+        i += 3;
+      }
+      continue;
+    }
+
+    // Top-level scalar — find key, parse value.
+    const colon = line.indexOf(':');
+    if (colon === -1) {
+      // Stray line, skip.
+      i += 1;
+      continue;
+    }
+    const key = line.slice(0, colon);
+    const scalar = parseScalarAt(lines, i, key);
+    if (scalar === null) {
+      i += 1;
+      continue;
+    }
+    fields.set(key, scalar.value);
+    i += scalar.consumed;
+  }
+  throw new Error('importFormat: frontmatter not terminated');
+}
+
+function parseImageSubScalar(line: string, prefix: string): string | null {
+  if (!line.startsWith(prefix)) return null;
+  const rhs = line.slice(prefix.length).trimStart();
+  if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
+    return unquoteSingle(rhs);
+  }
+  return rhs;
+}
+
+// ---------------------------------------------------------------------------
+// Body parser (raw_text recovery)
+// ---------------------------------------------------------------------------
+
+/**
+ * Strips the rendered prefix (h1, blockquote summary) and trailing image refs
+ * to recover `rawText`.
+ */
+function extractRawText(bodyLines: string[]): string {
+  let i = 0;
+  // Drop leading blanks
+  while (i < bodyLines.length && bodyLines[i] === '') i += 1;
+  // Skip a single h1 line (`# …`)
+  if (i < bodyLines.length && bodyLines[i]!.startsWith('# ')) {
+    i += 1;
+  }
+  // Drop blanks
+  while (i < bodyLines.length && bodyLines[i] === '') i += 1;
+  // Skip blockquote run (`> …`)
+  while (i < bodyLines.length && bodyLines[i]!.startsWith('> ')) i += 1;
+  // Drop blanks
+  while (i < bodyLines.length && bodyLines[i] === '') i += 1;
+
+  // Capture until first standalone `![](media/…)` line OR end.
+  const captured: string[] = [];
+  while (i < bodyLines.length) {
+    const l = bodyLines[i]!;
+    // Image refs are emitted only at line start, separated from body by `\n\n`.
+    if (l.startsWith('![](media/')) break;
+    captured.push(l);
+    i += 1;
+  }
+  // Trim trailing blank lines.
+  while (captured.length > 0 && captured[captured.length - 1] === '') {
+    captured.pop();
+  }
+  return captured.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+export function parseExportNote(markdown: string): ParsedNote {
+  if (!markdown.startsWith('---\n')) {
+    throw new Error('importFormat: markdown must start with "---\\n"');
+  }
+  // Normalize line endings (F5 emits LF only, but be defensive).
+  const normalized = markdown.replace(/\r\n/g, '\n');
+  const allLines = normalized.split('\n');
+  const fm = parseFrontmatter(allLines);
+  const bodyLines = allLines.slice(fm.consumedLines);
+  const rawText = extractRawText(bodyLines);
+
+  const get = (k: string): string | null => (fm.fields.has(k) ? fm.fields.get(k)! : null);
+  const id = get('id');
+  const createdAt = get('created_at');
+  const updatedAt = get('updated_at');
+  if (id === null || createdAt === null || updatedAt === null) {
+    throw new Error('importFormat: id/created_at/updated_at are required');
+  }
+  const titleSource = get('title_source');
+  const summarySource = get('summary_source');
+  const versionRaw = get('inkling_export_version');
+  const exportVersion = versionRaw === null ? 0 : Number.parseInt(versionRaw, 10) || 0;
+
+  return {
+    id,
+    createdAt,
+    updatedAt,
+    rawText,
+    aiTitle: get('title'),
+    aiSummary: get('summary'),
+    titleEditedByUser: titleSource === 'user',
+    summaryEditedByUser: summarySource === 'user',
+    aiProvider: get('ai_provider'),
+    aiGeneratedAt: get('ai_generated_at'),
+    userIntent: get('user_intent'),
+    intentPromptedAt: get('intent_prompted_at'),
+    tags: fm.tags,
+    images: fm.images,
+    exportVersion
+  };
+}
--- a/tests/unit/importFormat.test.ts
+++ b/tests/unit/importFormat.test.ts
@@ -0,0 +1,209 @@
+import { describe, it, expect } from 'vitest';
+import {
+  composeMarkdown,
+  type ExportNote
+} from '@main/services/exportFormat.js';
+import { parseExportNote } from '@main/services/importFormat.js';
+
+const baseNote: ExportNote = {
+  id: '014a3b9c-1234-7890-abcd-000000000001',
+  createdAt: '2026-04-25T14:23:11.000Z',
+  updatedAt: '2026-04-25T14:24:02.000Z',
+  rawText: '회고 메모 본문',
+  aiTitle: '주간 회고 PR 리뷰',
+  aiSummary: '회고 양식 통일을 위한 메모.',
+  titleEditedByUser: false,
+  summaryEditedByUser: false,
+  aiProvider: 'local-ollama/gemma4:e4b',
+  aiGeneratedAt: '2026-04-25T14:23:34.000Z',
+  userIntent: null,
+  intentPromptedAt: null,
+  tags: [{ name: 'pr', source: 'ai' }, { name: 'review', source: 'user' }],
+  media: []
+};
+
+describe('parseExportNote — round-trip with composeMarkdown', () => {
+  it('round-trips the base note', () => {
+    const md = composeMarkdown(baseNote);
+    const parsed = parseExportNote(md);
+    expect(parsed.id).toBe(baseNote.id);
+    expect(parsed.createdAt).toBe(baseNote.createdAt);
+    expect(parsed.updatedAt).toBe(baseNote.updatedAt);
+    expect(parsed.rawText).toBe(baseNote.rawText);
+    expect(parsed.aiTitle).toBe(baseNote.aiTitle);
+    expect(parsed.aiSummary).toBe(baseNote.aiSummary);
+    expect(parsed.aiProvider).toBe(baseNote.aiProvider);
+    expect(parsed.aiGeneratedAt).toBe(baseNote.aiGeneratedAt);
+    expect(parsed.titleEditedByUser).toBe(false);
+    expect(parsed.summaryEditedByUser).toBe(false);
+    expect(parsed.tags).toEqual([
+      { name: 'pr', source: 'ai' },
+      { name: 'review', source: 'user' }
+    ]);
+    expect(parsed.images).toEqual([]);
+    expect(parsed.exportVersion).toBe(1);
+  });
+
+  it('round-trips a note with media', () => {
+    const note: ExportNote = {
+      ...baseNote,
+      media: [
+        { rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 1234 },
+        { rel: 'media/014a3b9c__2.jpg', mime: 'image/jpeg', bytes: 5678 }
+      ]
+    };
+    const md = composeMarkdown(note);
+    const parsed = parseExportNote(md);
+    expect(parsed.images).toEqual([
+      { rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 1234 },
+      { rel: 'media/014a3b9c__2.jpg', mime: 'image/jpeg', bytes: 5678 }
+    ]);
+    expect(parsed.rawText).toBe(note.rawText);
+  });
+});
+
+describe('parseExportNote — frontmatter scalar variants', () => {
+  it('parses plain scalar', () => {
+    const md = composeMarkdown({ ...baseNote, aiTitle: '주간 회고' });
+    const parsed = parseExportNote(md);
+    expect(parsed.aiTitle).toBe('주간 회고');
+  });
+
+  it('parses single-quoted with embedded apostrophe (`` `` escape)', () => {
+    const note: ExportNote = { ...baseNote, aiTitle: "it's a: title" };
+    const md = composeMarkdown(note);
+    // Should be emitted as: title: 'it''s a: title'
+    expect(md).toContain("title: 'it''s a: title'");
+    const parsed = parseExportNote(md);
+    expect(parsed.aiTitle).toBe("it's a: title");
+  });
+
+  it('parses block scalar `|-` for multiline summary', () => {
+    const note: ExportNote = {
+      ...baseNote,
+      aiSummary: 'line1\nline2\nline3'
+    };
+    const md = composeMarkdown(note);
+    expect(md).toContain('summary: |-');
+    const parsed = parseExportNote(md);
+    expect(parsed.aiSummary).toBe('line1\nline2\nline3');
+  });
+});
+
+describe('parseExportNote — list parsing', () => {
+  it('parses tags inline flow', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      tags: [
+        { name: 'foo', source: 'ai' },
+        { name: 'bar baz', source: 'user' }
+      ]
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.tags).toEqual([
+      { name: 'foo', source: 'ai' },
+      { name: 'bar baz', source: 'user' }
+    ]);
+  });
+
+  it('parses images list with mime + bytes', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      media: [{ rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 9876 }]
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.images).toEqual([
+      { rel: 'media/014a3b9c__1.png', mime: 'image/png', bytes: 9876 }
+    ]);
+  });
+});
+
+describe('parseExportNote — body extraction', () => {
+  it('extracts rawText with summary present', () => {
+    const md = composeMarkdown({ ...baseNote, rawText: '본문\n두 번째 줄' });
+    const parsed = parseExportNote(md);
+    expect(parsed.rawText).toBe('본문\n두 번째 줄');
+  });
+
+  it('extracts rawText with summary absent', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      aiSummary: null,
+      rawText: '요약 없는 본문'
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.rawText).toBe('요약 없는 본문');
+  });
+
+  it('extracts rawText with no images', () => {
+    const md = composeMarkdown({ ...baseNote, rawText: '이미지 없음', media: [] });
+    const parsed = parseExportNote(md);
+    expect(parsed.rawText).toBe('이미지 없음');
+  });
+
+  it('preserves `>` mid-line in rawText (not parsed as blockquote)', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      rawText: '값 a > b 라는 부등호'
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.rawText).toBe('값 a > b 라는 부등호');
+  });
+
+  it('preserves `# ` mid-line in rawText (not parsed as heading)', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      rawText: '예시: see issue #1 어쩌고 # 가운데 해시'
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.rawText).toBe('예시: see issue #1 어쩌고 # 가운데 해시');
+  });
+});
+
+describe('parseExportNote — provenance', () => {
+  it('recovers titleEditedByUser from title_source: user', () => {
+    const md = composeMarkdown({ ...baseNote, titleEditedByUser: true });
+    const parsed = parseExportNote(md);
+    expect(parsed.titleEditedByUser).toBe(true);
+  });
+
+  it('recovers summaryEditedByUser from summary_source: user', () => {
+    const md = composeMarkdown({ ...baseNote, summaryEditedByUser: true });
+    const parsed = parseExportNote(md);
+    expect(parsed.summaryEditedByUser).toBe(true);
+  });
+
+  it('exposes exportVersion = 1', () => {
+    const md = composeMarkdown(baseNote);
+    const parsed = parseExportNote(md);
+    expect(parsed.exportVersion).toBe(1);
+  });
+});
+
+describe('parseExportNote — edge cases', () => {
+  it('preserves user_intent when present', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      userIntent: '팀에서 회고 양식 통일',
+      intentPromptedAt: '2026-04-25T14:24:02.000Z'
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.userIntent).toBe('팀에서 회고 양식 통일');
+    expect(parsed.intentPromptedAt).toBe('2026-04-25T14:24:02.000Z');
+  });
+
+  it('returns null aiTitle / aiSummary when omitted', () => {
+    const md = composeMarkdown({
+      ...baseNote,
+      aiTitle: null,
+      aiSummary: null
+    });
+    const parsed = parseExportNote(md);
+    expect(parsed.aiTitle).toBeNull();
+    expect(parsed.aiSummary).toBeNull();
+  });
+
+  it('throws when input lacks frontmatter delimiter', () => {
+    expect(() => parseExportNote('hello world')).toThrow();
+  });
+});