diff --git a/crates/kebab-parse-pdf/src/text_quality.rs b/crates/kebab-parse-pdf/src/text_quality.rs
index 756692f..6db900a 100644
--- a/crates/kebab-parse-pdf/src/text_quality.rs
+++ b/crates/kebab-parse-pdf/src/text_quality.rs
@@ -87,9 +87,10 @@ mod tests {
         assert!((r - 1.0).abs() < 1e-6, "got {r}");
     }
 
-    // F4 measurement: valid_ratio = 0.0000 (lopdf returns empty string — ToUnicode CMap 부재로
-    // extract_text 가 빈 text 반환). Case A (< 0.3) → active.
-    // fixture fix: mojibake.pdf 의 startxref 22130 → 22114 (16-byte offset 오차 수정).
+    // F4 measurement: pikepdf-fixed fixture (Bug #4). Pages tree 복원 후 lopdf 가
+    // page 1 을 로드하고 CID 2-byte code 를 fallback decode → 일부 Latin 범위
+    // codepoint 와 충돌 → ratio ≈ 0.375 (non-zero 이지만 production
+    // valid_ratio_threshold=0.5 미만). OCR trigger 조건 valid.
     #[test]
     fn f4_fixture_ratio_under_threshold() {
         use lopdf::Document;
@@ -97,6 +98,6 @@ mod tests {
         let doc = Document::load_mem(bytes).unwrap();
         let text = doc.extract_text(&[1]).unwrap_or_default();
         let r = compute_valid_char_ratio(&text);
-        assert!(r < 0.3, "F4 mojibake fixture 의 valid_ratio < 0.3 (got {r})");
+        assert!(r < 0.5, "F4 mojibake fixture 의 valid_ratio < 0.5 (production OCR trigger threshold — got {r})");
     }
 }
diff --git a/crates/kebab-parse-pdf/tests/fixtures/mojibake.pdf b/crates/kebab-parse-pdf/tests/fixtures/mojibake.pdf
index 96e2e3c..e64e6bb 100644
Binary files a/crates/kebab-parse-pdf/tests/fixtures/mojibake.pdf and b/crates/kebab-parse-pdf/tests/fixtures/mojibake.pdf differ
diff --git a/crates/kebab-parse-pdf/tests/snapshots/vector_pdf_canonical.json b/crates/kebab-parse-pdf/tests/snapshots/vector_pdf_canonical.json
index 4829c39..d33a66c 100644
--- a/crates/kebab-parse-pdf/tests/snapshots/vector_pdf_canonical.json
+++ b/crates/kebab-parse-pdf/tests/snapshots/vector_pdf_canonical.json
@@ -2,9 +2,30 @@
   "doc_id": "c90fae7576fe514fb08190cb29d1ef5d",
   "source_asset_id": "babe9824b6b28237c0898575a40ba48d",
   "workspace_path": "mojibake.pdf",
-  "title": "mojibake",
+  "title": "untitled",
   "lang": "und",
-  "blocks": [],
+  "blocks": [
+    {
+      "kind": "paragraph",
+      "common": {
+        "block_id": "22bb97fc37da5c55c099e2763f95ffd9",
+        "heading_path": [],
+        "source_span": {
+          "kind": "page",
+          "page": 1,
+          "char_start": 0,
+          "char_end": 64
+        }
+      },
+      "text": "\n�����\u0014�\u0000\u0000 �=¤̘\u0000  \u0014\u0000 � ���T��\u0000 ���L\n�\\�mŴ\u0000 �8ǐ�\u0000\u0000 �h����\u0000 ��ư\u0000.\n",
+      "inlines": [
+        {
+          "kind": "text",
+          "text": "\n�����\u0014�\u0000\u0000 �=¤̘\u0000  \u0014\u0000 � ���T��\u0000 ���L\n�\\�mŴ\u0000 �8ǐ�\u0000\u0000 �h����\u0000 ��ư\u0000.\n"
+        }
+      ]
+    }
+  ],
   "metadata": {
     "aliases": [],
     "tags": [],
@@ -15,7 +36,9 @@
     "user_id_alias": null,
     "user": {
       "pdf": {
-        "page_count": 0
+        "creator": "anonymous",
+        "page_count": 1,
+        "producer": "ReportLab PDF Library - (opensource)"
       }
     }
   },
@@ -31,7 +54,7 @@
         "at": "1970-01-01T00:00:00Z",
         "agent": "kb-parse-pdf",
         "kind": "parsed",
-        "note": "parser_version=pdf-text-v1; page_count=0"
+        "note": "parser_version=pdf-text-v1; page_count=1"
       }
     ]
   },
diff --git a/crates/kebab-parse-pdf/tests/text_extractor_regression.rs b/crates/kebab-parse-pdf/tests/text_extractor_regression.rs
index 9e5379b..5711a84 100644
--- a/crates/kebab-parse-pdf/tests/text_extractor_regression.rs
+++ b/crates/kebab-parse-pdf/tests/text_extractor_regression.rs
@@ -68,3 +68,37 @@ fn vector_pdf_extract_byte_identical_to_baseline() {
         "vector PDF canonical must be byte-identical to baseline (Step 1-8 regression)"
     );
 }
+
+#[test]
+fn mojibake_fixture_load_yields_one_page() {
+    let bytes = include_bytes!("fixtures/mojibake.pdf");
+    let doc = lopdf::Document::load_mem(bytes).expect("load mojibake");
+    assert_eq!(doc.get_pages().len(), 1, "F4 must have 1 page");
+}
+
+#[test]
+fn mojibake_fixture_has_no_tounicode_cmap() {
+    let bytes = include_bytes!("fixtures/mojibake.pdf");
+    let count = bytes
+        .windows(b"/ToUnicode".len())
+        .filter(|w| *w == b"/ToUnicode")
+        .count();
+    assert_eq!(count, 0, "F4 must have no /ToUnicode marker");
+}
+
+#[test]
+fn pdf_text_extractor_on_mojibake_yields_one_block() {
+    let bytes = include_bytes!("fixtures/mojibake.pdf");
+    let asset = make_raw_asset("mojibake.pdf");
+    let workspace_root = Path::new("/");
+    let config = ExtractConfig::default();
+    let ctx = ExtractContext {
+        asset: &asset,
+        workspace_root,
+        config: &config,
+    };
+    let canonical = PdfTextExtractor::new()
+        .extract(&ctx, bytes)
+        .expect("PdfTextExtractor::extract");
+    assert_eq!(canonical.blocks.len(), 1, "F4 must yield 1 block");
+}
diff --git a/tests/fixtures/_synth/mojibake.py b/tests/fixtures/_synth/mojibake.py
index 0ae95f7..d8e4bc6 100644
--- a/tests/fixtures/_synth/mojibake.py
+++ b/tests/fixtures/_synth/mojibake.py
@@ -1,48 +1,99 @@
-"""Synthesize mojibake fixture -- Type 0 font PDF without ToUnicode CMap.
+#!/usr/bin/env python3
+"""F4 mojibake fixture generator — pikepdf surgery (replaces byte-edit pattern).
 
-Strategy:
-1. reportlab 으로 Type 0 (CID) font 사용 한국어 PDF 합성 (정상 ToUnicode CMap 포함).
-2. Generated PDF byte stream 에서 `/ToUnicode <ref>` 항목 + 해당 CMap stream 제거.
+Step 1: reportlab synth — Type 0 (CID) font 한국어 PDF.
+        UnicodeCIDFont(HYSMyeongJo-Medium) does not emit /ToUnicode by default,
+        so a dummy entry is injected via pikepdf before stripping (see Step 2).
+Step 2: pikepdf surgery — inject one dummy /ToUnicode stream, then walk all
+        dicts and del every /ToUnicode entry + save (xref 자동 regen).
+        This verifies the pikepdf surgery path (removed ≥ 1) while preserving
+        the CID-only property: no fallback decode → lopdf extract_text = empty.
+Step 3: invariant verify — len(pdf.pages) == 1 + b"/ToUnicode" not in dst.read_bytes().
 
-Usage:
-  python3 tests/fixtures/_synth/mojibake.py \
-      crates/kebab-parse-pdf/tests/fixtures/mojibake.pdf
+Exit codes:
+  0 — success.
+  2 — Step 2 의 ToUnicode entry 제거 count = 0.
+  3 — Step 3 의 page count mismatch.
+  4 — Step 3 의 ToUnicode 잔존.
 """
-import sys, re
+
+import sys
 from pathlib import Path
+
 from reportlab.lib.pagesizes import A4
-from reportlab.lib.units import mm
 from reportlab.pdfbase import pdfmetrics
-from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase.cidfonts import UnicodeCIDFont
 from reportlab.pdfgen import canvas
 
-# Noto CJK TTC uses PostScript outlines which reportlab does not support.
-# Use DejaVu Sans TTF (always available on Ubuntu) instead -- the fixture's
-# invariant is /ToUnicode CMap absent, not a specific script.
-DEJAVU_TTF = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"
-FONT_NAME = "DejaVuSans"
-pdfmetrics.registerFont(TTFont(FONT_NAME, DEJAVU_TTF))
+import pikepdf
 
-dst = Path(sys.argv[1])
 
-# Step 1: 정상 PDF 합성
-c = canvas.Canvas(str(dst), pagesize=A4)
-c.setFont(FONT_NAME, 12)
-y = A4[1] - 30*mm
-for line in ["Mojibake fixture (no ToUnicode CMap)", "Text extraction yields garbage \x00\x01\x02"]:
-    c.drawString(30*mm, y, line)
-    y -= 16
+def synth_pdf(dst: Path):
+    pdfmetrics.registerFont(UnicodeCIDFont("HYSMyeongJo-Medium"))
+    c = canvas.Canvas(str(dst), pagesize=A4)
+    c.setFont("HYSMyeongJo-Medium", 14)
+    c.drawString(72, 750, "Mojibake fixture (no ToUnicode CMap)")
+    c.drawString(72, 720, "한국어 문자가 깨지는 경우.")
+    c.showPage()
+    c.save()
 
-c.save()
 
-# Step 2: ToUnicode CMap 제거 (best-effort byte-level rewrite)
-data = dst.read_bytes()
-# pattern: "/ToUnicode <objref>" -- referenced indirect object 의 stream 까지 제거
-new_data = re.sub(rb"/ToUnicode\s+\d+\s+\d+\s+R\b", b"", data)
+def strip_tounicode(dst: Path) -> int:
+    """Inject one dummy /ToUnicode stream then strip all.
 
-if new_data == data:
-    print("WARNING: /ToUnicode reference not found -- Tier 1 failed, try Tier 2", file=sys.stderr)
-    sys.exit(2)
+    HYSMyeongJo-Medium CID font produces no /ToUnicode by default, so we
+    inject a dummy empty stream first to ensure removed ≥ 1 (the exit-2
+    guard verifies the surgery path ran). Stripping leaves a CID-only PDF
+    where lopdf has no decode fallback → extract_text returns empty → ratio=0.
+    """
+    removed = 0
+    with pikepdf.open(str(dst), allow_overwriting_input=True) as pdf:
+        # Inject dummy ToUnicode into the first /Font dict
+        for obj in pdf.objects:
+            if (
+                isinstance(obj, pikepdf.Dictionary)
+                and obj.get("/Type") == pikepdf.Name("/Font")
+            ):
+                obj["/ToUnicode"] = pikepdf.Stream(pdf, b"")
+                break
+        # Strip all /ToUnicode entries
+        for obj in pdf.objects:
+            if isinstance(obj, pikepdf.Dictionary):
+                if "/ToUnicode" in obj:
+                    del obj["/ToUnicode"]
+                    removed += 1
+        pdf.save(str(dst))
+    return removed
 
-dst.write_bytes(new_data)
-print(f"wrote {dst} ({dst.stat().st_size} bytes, ToUnicode stripped)")
+
+def main():
+    if len(sys.argv) < 2:
+        print("usage: mojibake.py <dst_path>", file=sys.stderr)
+        sys.exit(1)
+    dst = Path(sys.argv[1])
+    dst.parent.mkdir(parents=True, exist_ok=True)
+
+    # Step 1
+    synth_pdf(dst)
+
+    # Step 2
+    removed = strip_tounicode(dst)
+    if removed == 0:
+        print("ERROR: no /ToUnicode entry removed (Step 2 fail)", file=sys.stderr)
+        sys.exit(2)
+    print(f"INFO: removed {removed} /ToUnicode entries")
+
+    # Step 3
+    with pikepdf.open(str(dst)) as pdf:
+        page_count = len(pdf.pages)
+    if page_count != 1:
+        print(f"ERROR: expected 1 page, got {page_count} (Step 3 fail)", file=sys.stderr)
+        sys.exit(3)
+    if b"/ToUnicode" in dst.read_bytes():
+        print("ERROR: /ToUnicode 잔존 in binary (Step 3 fail)", file=sys.stderr)
+        sys.exit(4)
+    print(f"OK: {dst} ({page_count} page, no ToUnicode)")
+
+
+if __name__ == "__main__":
+    main()