inkling/src/main/services/importFormat.ts

/**
 * Pure parse functions for F6-L3 (Import).
 *
 * Reverses the output of `composeMarkdown` from `exportFormat.ts`.
 * Minimal YAML parser handling exactly the variants F5 emits — plain scalars,
 * single-quoted strings (with `''` escapes), block scalar `|-`, and the two
 * structured lists (`tags:` inline-flow, `images:` block).
 *
 * No filesystem, no I/O.
 */

export interface ParsedNoteTag {
  name: string;
  source: 'ai' | 'user';
}

export interface ParsedNoteImage {
  rel: string;
  mime: string;
  bytes: number;
}

export interface ParsedNote {
  id: string;
  createdAt: string;
  updatedAt: string;
  rawText: string;
  aiTitle: string | null;
  aiSummary: string | null;
  titleEditedByUser: boolean;
  summaryEditedByUser: boolean;
  aiProvider: string | null;
  aiGeneratedAt: string | null;
  userIntent: string | null;
  intentPromptedAt: string | null;
  deletedAt: string | null;  // 신규 v0.2.3 #4
  // v0.3.0 Cut E — round-trip status / due_date / move_reason from frontmatter.
  // Default to 'active' / null / false when absent (older exports pre-Cut E).
  status: 'active' | 'completed' | 'archived' | 'trashed';
  statusChangedAt: string | null;
  moveReason: string | null;
  dueDate: string | null;
  dueDateEditedByUser: boolean;
  tags: ParsedNoteTag[];
  images: ParsedNoteImage[];
  exportVersion: number;
}

// ---------------------------------------------------------------------------
// YAML helpers
// ---------------------------------------------------------------------------

function unquoteSingle(raw: string): string {
  // Caller has confirmed `raw` is wrapped in single quotes.
  const inner = raw.slice(1, -1);
  return inner.replace(/''/g, "'");
}

interface ParsedScalar {
  value: string;
  /** number of source lines consumed (1 for plain/quoted, 1+N for block scalar) */
  consumed: number;
}

/**
 * Parse a scalar value starting at `lines[startIdx]`.
 *  - `key: value`         → consumed=1
 *  - `key: 'quoted'`      → consumed=1
 *  - `key: |-` + indented body → consumed=1+N
 *
 * Returns `null` if the line is not a `key: …` scalar at column 0.
 */
function parseScalarAt(
  lines: string[],
  startIdx: number,
  expectedKey: string
): ParsedScalar | null {
  const line = lines[startIdx];
  if (line === undefined) return null;
  const prefix = `${expectedKey}:`;
  if (!line.startsWith(prefix)) return null;
  const after = line.slice(prefix.length);
  if (after.length > 0 && after[0] !== ' ') return null;
  const rhs = after.trimStart();

  // Block scalar
  if (rhs === '|-') {
    const bodyLines: string[] = [];
    let i = startIdx + 1;
    // Determine indent from first body line; F5 emits 2-space indent at this level
    // (composeFrontmatter passes default `indent=2`). We accept any indent ≥ 1
    // and use the first body line's leading whitespace as the dedent prefix.
    let dedent: string | null = null;
    while (i < lines.length) {
      const l = lines[i]!;
      if (l.length === 0) {
        // blank line inside block scalar — keep, dedent later (treat as empty)
        bodyLines.push('');
        i += 1;
        continue;
      }
      const m = /^( +)/.exec(l);
      if (!m) {
        // first non-indented line ends the block
        break;
      }
      const indent = m[1]!;
      if (dedent === null) dedent = indent;
      // Use the smallest leading-space of the first body line as the dedent prefix.
      // (F5 always emits a uniform indent for a given block.)
      bodyLines.push(l.startsWith(dedent) ? l.slice(dedent.length) : l.trimStart());
      i += 1;
    }
    // Trim trailing blank lines that we tentatively added (block scalar `|-`
    // strips final newline anyway).
    while (bodyLines.length > 0 && bodyLines[bodyLines.length - 1] === '') {
      bodyLines.pop();
    }
    return { value: bodyLines.join('\n'), consumed: i - startIdx };
  }

  // Single-quoted
  if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
    return { value: unquoteSingle(rhs), consumed: 1 };
  }

  // Plain scalar
  return { value: rhs, consumed: 1 };
}

// ---------------------------------------------------------------------------
// Frontmatter section parser
// ---------------------------------------------------------------------------

interface Frontmatter {
  fields: Map<string, string>;
  tags: ParsedNoteTag[];
  images: ParsedNoteImage[];
  /** total lines consumed including the closing `---` delimiter */
  consumedLines: number;
}

/**
 * Parse a tag flow item: `- { name: foo, source: ai }` or
 * `- { name: 'a, b', source: user }`.
 */
function parseTagFlow(line: string): ParsedNoteTag | null {
  const trimmed = line.trim();
  if (!trimmed.startsWith('-')) return null;
  const afterDash = trimmed.slice(1).trimStart();
  if (!afterDash.startsWith('{') || !afterDash.endsWith('}')) return null;
  const inner = afterDash.slice(1, -1).trim();
  // Expect `name: <value>, source: ai|user`. Value may be single-quoted with embedded commas.
  // Split on the comma that is OUTSIDE single quotes.
  let nameRaw: string | null = null;
  let sourceRaw: string | null = null;
  let inQuote = false;
  let cursor = 0;
  const parts: string[] = [];
  for (let i = 0; i < inner.length; i++) {
    const ch = inner[i];
    if (ch === "'") {
      // Toggle, accounting for `''` escape (still inside the quote scope).
      if (inQuote && inner[i + 1] === "'") {
        i += 1;
        continue;
      }
      inQuote = !inQuote;
    } else if (ch === ',' && !inQuote) {
      parts.push(inner.slice(cursor, i));
      cursor = i + 1;
    }
  }
  parts.push(inner.slice(cursor));
  for (const p of parts) {
    const colon = p.indexOf(':');
    if (colon === -1) return null;
    const k = p.slice(0, colon).trim();
    const v = p.slice(colon + 1).trim();
    if (k === 'name') {
      nameRaw = v;
    } else if (k === 'source') {
      sourceRaw = v;
    }
  }
  if (nameRaw === null || sourceRaw === null) return null;
  const name =
    nameRaw.startsWith("'") && nameRaw.endsWith("'") ? unquoteSingle(nameRaw) : nameRaw;
  if (sourceRaw !== 'ai' && sourceRaw !== 'user') return null;
  return { name, source: sourceRaw };
}

function parseFrontmatter(lines: string[]): Frontmatter {
  if (lines[0] !== '---') {
    throw new Error('importFormat: expected frontmatter to start with "---"');
  }
  const fields = new Map<string, string>();
  const tags: ParsedNoteTag[] = [];
  const images: ParsedNoteImage[] = [];

  let i = 1;
  while (i < lines.length) {
    const line = lines[i]!;
    if (line === '---') {
      // Closing delimiter — return.
      return { fields, tags, images, consumedLines: i + 1 };
    }
    if (line.length === 0) {
      i += 1;
      continue;
    }

    // Top-level keys (column 0). Detect `key:` (list intro) or `key: value`.
    if (line === 'tags:') {
      i += 1;
      while (i < lines.length) {
        const l = lines[i]!;
        if (l === '---') break;
        if (!l.startsWith('  -')) break;
        const tag = parseTagFlow(l);
        if (tag) tags.push(tag);
        i += 1;
      }
      continue;
    }
    if (line === 'images:') {
      i += 1;
      while (i < lines.length) {
        const l = lines[i]!;
        if (l === '---') break;
        if (!l.startsWith('  - rel:')) break;
        // Image item: 3 lines (rel, mime, bytes), each as a sub-scalar.
        const relScalar = parseImageSubScalar(l, '  - rel:');
        const mimeLine = lines[i + 1] ?? '';
        const bytesLine = lines[i + 2] ?? '';
        const mimeScalar = parseImageSubScalar(mimeLine, '    mime:');
        const bytesScalar = parseImageSubScalar(bytesLine, '    bytes:');
        if (relScalar === null || mimeScalar === null || bytesScalar === null) {
          throw new Error('importFormat: malformed images item');
        }
        const bytesNum = Number.parseInt(bytesScalar, 10);
        if (!Number.isFinite(bytesNum)) {
          throw new Error('importFormat: bytes must be a number');
        }
        images.push({ rel: relScalar, mime: mimeScalar, bytes: bytesNum });
        i += 3;
      }
      continue;
    }

    // Top-level scalar — find key, parse value.
    const colon = line.indexOf(':');
    if (colon === -1) {
      // Stray line, skip.
      i += 1;
      continue;
    }
    const key = line.slice(0, colon);
    const scalar = parseScalarAt(lines, i, key);
    if (scalar === null) {
      i += 1;
      continue;
    }
    fields.set(key, scalar.value);
    i += scalar.consumed;
  }
  throw new Error('importFormat: frontmatter not terminated');
}

function parseImageSubScalar(line: string, prefix: string): string | null {
  if (!line.startsWith(prefix)) return null;
  const rhs = line.slice(prefix.length).trimStart();
  if (rhs.startsWith("'") && rhs.endsWith("'") && rhs.length >= 2) {
    return unquoteSingle(rhs);
  }
  return rhs;
}

// ---------------------------------------------------------------------------
// Body parser (raw_text recovery)
// ---------------------------------------------------------------------------

/**
 * Strips the rendered prefix (h1, blockquote summary) and trailing image refs
 * to recover `rawText`.
 */
function extractRawText(bodyLines: string[]): string {
  let i = 0;
  // Drop leading blanks
  while (i < bodyLines.length && bodyLines[i] === '') i += 1;
  // Skip a single h1 line (`# …`)
  if (i < bodyLines.length && bodyLines[i]!.startsWith('# ')) {
    i += 1;
  }
  // Drop blanks
  while (i < bodyLines.length && bodyLines[i] === '') i += 1;
  // Skip blockquote run (`> …`)
  while (i < bodyLines.length && bodyLines[i]!.startsWith('> ')) i += 1;
  // Drop blanks
  while (i < bodyLines.length && bodyLines[i] === '') i += 1;

  // Capture until first standalone `![](media/…)` line OR end.
  const captured: string[] = [];
  while (i < bodyLines.length) {
    const l = bodyLines[i]!;
    // Image refs are emitted only at line start, separated from body by `\n\n`.
    if (l.startsWith('![](media/')) break;
    captured.push(l);
    i += 1;
  }
  // Trim trailing blank lines.
  while (captured.length > 0 && captured[captured.length - 1] === '') {
    captured.pop();
  }
  return captured.join('\n');
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

export function parseExportNote(markdown: string): ParsedNote {
  if (!markdown.startsWith('---\n')) {
    throw new Error('importFormat: markdown must start with "---\\n"');
  }
  // Normalize line endings (F5 emits LF only, but be defensive).
  const normalized = markdown.replace(/\r\n/g, '\n');
  const allLines = normalized.split('\n');
  const fm = parseFrontmatter(allLines);
  const bodyLines = allLines.slice(fm.consumedLines);
  const rawText = extractRawText(bodyLines);

  const get = (k: string): string | null => (fm.fields.has(k) ? fm.fields.get(k)! : null);
  const id = get('id');
  const createdAt = get('created_at');
  const updatedAt = get('updated_at');
  if (id === null || createdAt === null || updatedAt === null) {
    throw new Error('importFormat: id/created_at/updated_at are required');
  }
  const titleSource = get('title_source');
  const summarySource = get('summary_source');
  const versionRaw = get('inkling_export_version');
  const exportVersion = versionRaw === null ? 0 : Number.parseInt(versionRaw, 10) || 0;

  const statusRaw = get('status');
  const validStatuses = ['active', 'completed', 'archived', 'trashed'] as const;
  const status = (validStatuses as readonly string[]).includes(statusRaw ?? 'active')
    ? ((statusRaw ?? 'active') as ParsedNote['status'])
    : 'active';
  const dueDateSource = get('due_date_source');

  return {
    id,
    createdAt,
    updatedAt,
    rawText,
    aiTitle: get('title'),
    aiSummary: get('summary'),
    titleEditedByUser: titleSource === 'user',
    summaryEditedByUser: summarySource === 'user',
    aiProvider: get('ai_provider'),
    aiGeneratedAt: get('ai_generated_at'),
    userIntent: get('user_intent'),
    intentPromptedAt: get('intent_prompted_at'),
    deletedAt: get('deleted_at'),
    status,
    statusChangedAt: get('status_changed_at'),
    moveReason: get('move_reason'),
    dueDate: get('due_date'),
    dueDateEditedByUser: dueDateSource === 'user',
    tags: fm.tags,
    images: fm.images,
    exportVersion
  };
}