//! P2-1 FTS5 schema + trigger + rebuild tests.
//!
//! Strategy: `chunks_fts` triggers fire off raw SQL on `chunks`, so we
//! seed and mutate via direct INSERT/UPDATE/DELETE rather than the full
//! `kb-parse-md → kb-normalize → kb-chunk → put_chunks` pipeline. That
//! keeps the assertions about trigger behavior independent of any
//! upstream crate. The `chunks` rows we produce satisfy NOT NULL on the
//! columns required by V001 §5.5; we elide FK pressure on `documents`
//! by disabling foreign keys for the test connection (the trigger logic
//! we exercise has no `documents` dependency).
//!
//! Test connections open a fresh side-channel `rusqlite::Connection`
//! that bypasses the `SqliteStore` mutex; that's fine because each test
//! gets its own tempdir and no concurrent mutator is in flight.

use kebab_chunk::tokenize_korean_morphological;
use kebab_store_sqlite::{SqliteStore, rebuild_chunks_fts};
use rusqlite::Connection;

mod common;

/// Insert a chunks row directly. The triggers will mirror it into
/// `chunks_fts` as part of the same statement.
fn insert_chunk(
    conn: &Connection,
    chunk_id: &str,
    doc_id: &str,
    heading_path_json: &str,
    text: &str,
) {
    conn.execute(
        "INSERT INTO chunks (
            chunk_id, doc_id, text, heading_path_json, section_label,
            source_spans_json, token_estimate, chunker_version,
            policy_hash, block_ids_json, created_at
        ) VALUES (?, ?, ?, ?, NULL, '[]', 0, 'v1', 'h', '[]', '2024-01-01T00:00:00Z')",
        rusqlite::params![chunk_id, doc_id, text, heading_path_json],
    )
    .expect("insert chunk row");
}

fn count(conn: &Connection, table: &str) -> i64 {
    conn.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0))
        .expect("count")
}

/// Open a fresh side-channel connection with FK enforcement OFF. The
/// FTS triggers we test do not touch `documents`, but `chunks` has a
/// FK to `documents(doc_id)`; turning FK enforcement off lets us seed
/// chunks without first synthesizing a full documents/assets row graph.
fn raw_conn_no_fk(env: &common::TestEnv) -> Connection {
    let conn = Connection::open(env.db_path()).expect("open side conn");
    conn.pragma_update(None, "foreign_keys", "OFF").unwrap();
    conn
}

// ── 1. Migration apply: backfill ──────────────────────────────────────

/// Apply V001 only, seed N rows into `chunks` (which has no FTS shadow
/// at this point — V001 doesn't create `chunks_fts`), then apply V002's
/// SQL verbatim. The V002 backfill INSERT must produce one chunks_fts
/// row per pre-existing chunks row, and each row's columns must match.
///
/// This is the literal cold-upgrade path: V001-shipped database, V002
/// applied on top, existing chunks become searchable without re-ingest.
/// The trigger-based mirror (chunks_ai) is covered by the §2 tests.
#[test]
fn fts_v002_backfills_existing_chunks() {
    let env = common::TestEnv::new();
    let conn = Connection::open(env.db_path()).expect("open db");
    conn.pragma_update(None, "foreign_keys", "OFF").unwrap();

    // 1) Apply V001 only — chunks table exists, chunks_fts does not.
    let v001_sql = include_str!("../../../migrations/V001__init.sql");
    conn.execute_batch(v001_sql).expect("apply V001");
    assert!(
        conn.query_row(
            "SELECT name FROM sqlite_master WHERE type='table' AND name='chunks_fts'",
            [],
            |r| r.get::<_, String>(0),
        )
        .is_err(),
        "chunks_fts must not exist under V001 only"
    );

    // 2) Seed pre-existing chunks rows (the V001-shipped state we expect
    //    on a customer DB upgrading from P1 to P2-1).
    const N: usize = 4;
    for i in 0..N {
        let cid = format!("{i:0>32}");
        insert_chunk(
            &conn,
            &cid,
            &"d".repeat(32),
            "[\"Section\"]",
            &format!("seedrow{i} payload"),
        );
    }
    assert_eq!(count(&conn, "chunks"), N as i64);

    // 3) Apply V002 verbatim — its CREATE VIRTUAL TABLE + triggers + the
    //    final backfill INSERT. The triggers don't fire on this path
    //    (they only fire on chunks INSERT/UPDATE/DELETE); the backfill
    //    INSERT does the work.
    let v002_sql = include_str!("../../../migrations/V002__fts.sql");
    conn.execute_batch(v002_sql).expect("apply V002");

    // 4) Assert: count parity, and the backfilled rows mirror the chunks
    //    rows column-for-column on the indexed/UNINDEXED columns.
    assert_eq!(
        count(&conn, "chunks_fts"),
        N as i64,
        "V002 backfill INSERT must seed one chunks_fts row per chunks row"
    );
    for i in 0..N {
        let cid = format!("{i:0>32}");
        let term = format!("seedrow{i}");
        let hit: String = conn
            .query_row(
                "SELECT chunk_id FROM chunks_fts WHERE chunks_fts MATCH ?",
                [&term],
                |r| r.get(0),
            )
            .unwrap_or_else(|_| panic!("MATCH {term} must hit backfilled row"));
        assert_eq!(hit, cid, "backfill must preserve chunk_id mapping");
    }
}

/// Direct test of the V002 backfill INSERT on a DB seeded under V001.
/// We achieve V001-only state by running all migrations, dropping the
/// FTS rows, then re-running the exact backfill INSERT V002 ships and
/// asserting count parity.
#[test]
fn fts_v002_backfill_select_matches_chunks_count() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    for i in 0..5 {
        let cid = format!("{i:0>32}");
        insert_chunk(&conn, &cid, &"d".repeat(32), "[]", &format!("row {i}"));
    }
    // Wipe + run the literal V002 backfill INSERT.
    conn.execute("DELETE FROM chunks_fts", []).unwrap();
    assert_eq!(count(&conn, "chunks_fts"), 0);
    conn.execute(
        "INSERT INTO chunks_fts(chunk_id, doc_id, heading_path, text)
         SELECT chunk_id, doc_id, heading_path_json, text FROM chunks",
        [],
    )
    .unwrap();
    assert_eq!(count(&conn, "chunks_fts"), count(&conn, "chunks"));
}

// ── 2. Trigger sync: INSERT / DELETE / UPDATE ────────────────────────

#[test]
fn fts_chunks_ai_trigger_propagates_insert() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    insert_chunk(
        &conn,
        &"a".repeat(32),
        &"d".repeat(32),
        "[\"Heading\"]",
        "needle in haystack",
    );

    // chunks_fts row count == 1 and MATCH finds it.
    assert_eq!(count(&conn, "chunks_fts"), 1);
    let hit: String = conn
        .query_row(
            "SELECT chunk_id FROM chunks_fts WHERE chunks_fts MATCH 'needle'",
            [],
            |r| r.get(0),
        )
        .expect("MATCH 'needle' must hit");
    assert_eq!(hit, "a".repeat(32));
}

#[test]
fn fts_chunks_ad_trigger_propagates_delete() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    let cid = "a".repeat(32);
    insert_chunk(&conn, &cid, &"d".repeat(32), "[]", "ephemeral");
    assert_eq!(count(&conn, "chunks_fts"), 1);

    conn.execute("DELETE FROM chunks WHERE chunk_id = ?", [&cid])
        .expect("delete chunk");
    assert_eq!(
        count(&conn, "chunks_fts"),
        0,
        "chunks_ad must remove the FTS row"
    );
}

#[test]
fn fts_chunks_au_trigger_propagates_update() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    let cid = "a".repeat(32);
    insert_chunk(&conn, &cid, &"d".repeat(32), "[]", "before");

    // Old text is searchable.
    assert_eq!(count_match(&conn, "before"), 1);
    assert_eq!(count_match(&conn, "after"), 0);

    conn.execute(
        "UPDATE chunks SET text = ? WHERE chunk_id = ?",
        rusqlite::params!["after rewrite", cid],
    )
    .expect("update chunk text");

    // New text is searchable; old token is gone. Row count unchanged.
    assert_eq!(count(&conn, "chunks_fts"), 1);
    assert_eq!(
        count_match(&conn, "before"),
        0,
        "old text must not survive UPDATE"
    );
    assert_eq!(count_match(&conn, "after"), 1, "new text must be indexed");
}

fn count_match(conn: &Connection, term: &str) -> i64 {
    conn.query_row(
        "SELECT COUNT(*) FROM chunks_fts WHERE chunks_fts MATCH ?",
        [term],
        |r| r.get(0),
    )
    .expect("count_match")
}

// ── 3. rebuild_chunks_fts ────────────────────────────────────────────

#[test]
fn fts_rebuild_chunks_fts_is_idempotent() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    for i in 0..3 {
        let cid = format!("{i:0>32}");
        insert_chunk(&conn, &cid, &"d".repeat(32), "[]", &format!("token{i}"));
    }
    let before = count(&conn, "chunks_fts");
    assert_eq!(before, 3);

    // First rebuild: trivial round-trip — same row count.
    rebuild_chunks_fts(&conn).expect("rebuild 1");
    assert_eq!(count(&conn, "chunks_fts"), before);

    // Second rebuild: idempotent (same row count again).
    rebuild_chunks_fts(&conn).expect("rebuild 2");
    assert_eq!(count(&conn, "chunks_fts"), before);

    // After rebuild, MATCH still finds expected tokens.
    for i in 0..3 {
        assert_eq!(count_match(&conn, &format!("token{i}")), 1);
    }
}

#[test]
fn fts_rebuild_chunks_fts_recovers_from_drift() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    let cid = "a".repeat(32);
    insert_chunk(&conn, &cid, &"d".repeat(32), "[]", "recovered");

    // Manually wipe chunks_fts to simulate drift; this is the failure
    // mode `kb index --rebuild-fts` exists to recover from.
    conn.execute("DELETE FROM chunks_fts", []).unwrap();
    assert_eq!(count(&conn, "chunks_fts"), 0);
    assert_eq!(count(&conn, "chunks"), 1);

    rebuild_chunks_fts(&conn).expect("rebuild");
    assert_eq!(count(&conn, "chunks_fts"), 1);
    assert_eq!(count_match(&conn, "recovered"), 1);
}

// ── 4. Migration double-apply no-op ──────────────────────────────────

#[test]
fn fts_double_run_migrations_is_noop() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().expect("run 1");
    // Second invocation must be a no-op (refinery's bookkeeping table
    // tracks applied versions). The chunks_fts virtual table is still
    // present and queryable.
    store.run_migrations().expect("run 2");

    let conn = raw_conn_no_fk(&env);
    // The virtual table is queryable.
    let n: i64 = conn
        .query_row("SELECT COUNT(*) FROM chunks_fts", [], |r| r.get(0))
        .expect("chunks_fts queryable after double-run");
    assert_eq!(n, 0);
}

// ── 5. CI diff guard: V002 SQL matches design §5.5 verbatim ──────────

/// Whitespace-normalize a SQL block: trim, then collapse every run of
/// whitespace (newlines included) into a single space. Lets the
/// design-doc ↔ migration-file comparison ignore cosmetic drift like
/// blank-line counts while still catching token-level changes.
fn normalize_ws(s: &str) -> String {
    s.split_whitespace().collect::<Vec<_>>().join(" ")
}

/// Extract the §5.5 FTS slice from the design doc: locate the
/// `### 5.5 Chunks + FTS5` heading, walk to the next ```sql fenced
/// block, then within that block slice from `CREATE VIRTUAL TABLE
/// chunks_fts` through the last `END;`. The §5.5 fenced block also
/// contains the `chunks` CREATE TABLE — we only want the FTS portion.
///
/// Failure modes (any of these means the design doc layout drifted —
/// the test should fail loud, which is the point):
/// - heading missing
/// - no ```sql block follows
/// - no `CREATE VIRTUAL TABLE chunks_fts` inside that block
/// - no `END;` after the virtual-table line
fn extract_design_5_5_fts_block() -> String {
    let doc = include_str!("../../../docs/superpowers/specs/2026-04-27-kebab-final-form-design.md");
    let heading_idx = doc
        .find("### 5.5 Chunks + FTS5")
        .expect("design doc must contain `### 5.5 Chunks + FTS5` heading");
    let after_heading = &doc[heading_idx..];

    // Find the opening fence ```sql after the heading.
    let fence_open_rel = after_heading
        .find("```sql")
        .expect("§5.5 must be followed by a ```sql fenced block");
    // Move past the fence line.
    let body_start_rel = fence_open_rel
        + after_heading[fence_open_rel..]
            .find('\n')
            .expect("```sql fence must end with a newline")
        + 1;
    let body = &after_heading[body_start_rel..];
    let fence_close_rel = body
        .find("\n```")
        .expect("§5.5 ```sql block must close with ``` on its own line");
    let fenced = &body[..fence_close_rel];

    // Within the fenced block, slice from CREATE VIRTUAL TABLE chunks_fts
    // through the last `END;`.
    let virt_idx = fenced
        .find("CREATE VIRTUAL TABLE chunks_fts")
        .expect("§5.5 fenced block must contain `CREATE VIRTUAL TABLE chunks_fts`");
    let fts_slice = &fenced[virt_idx..];
    let last_end = fts_slice
        .rfind("END;")
        .expect("§5.5 FTS slice must terminate with `END;`");
    fts_slice[..last_end + "END;".len()].to_string()
}

/// Extract the §5.5 verbatim block from the V009 migration (V009 replaces
/// V007 's trigram tokenizer with unicode61 + CASE expression triggers for
/// Korean morphological tokenization — V007 stays in place for historical
/// cold-upgrade replay but V009 is now the source of truth),
/// between the `── §5.5 verbatim block ──` anchor markers V009 carries.
fn extract_migration_5_5_verbatim_block() -> String {
    let migration = include_str!("../../../migrations/V009__fts_korean_morphological.sql");
    // The opening anchor line ends with `── §5.5 verbatim block ─...`.
    let open_marker = "§5.5 verbatim block";
    let close_marker = "End §5.5 verbatim block";

    let open_idx = migration
        .find(open_marker)
        .expect("V009 must carry the `§5.5 verbatim block` opening anchor");
    let after_open_line = open_idx
        + migration[open_idx..]
            .find('\n')
            .expect("opening anchor line must end with a newline")
        + 1;

    let close_idx = migration[after_open_line..]
        .find(close_marker)
        .expect("V009 must carry the `End §5.5 verbatim block` closing anchor")
        + after_open_line;
    // Walk back from the close marker to the start of its comment line.
    let close_line_start = migration[..close_idx].rfind('\n').map_or(0, |n| n + 1);

    migration[after_open_line..close_line_start].to_string()
}

/// CI diff guard: the §5.5 block in `migrations/V009__fts_korean_morphological.sql`
/// must match the design doc verbatim (whitespace-normalized). V009
/// replaced V007 's trigram tokenizer with unicode61 + CASE expression
/// triggers for Korean morphological tokenization (2026-05-28).
/// V007 stays in place for historical replay of cold-upgrade paths
/// but is no longer compared against the design doc — V009 is now
/// the source of truth.
#[test]
fn fts_v009_matches_design_section_5_5_verbatim() {
    let design = extract_design_5_5_fts_block();
    let migration_block = extract_migration_5_5_verbatim_block();

    // Sanity: the slices we extracted look like the §5.5 FTS block (not
    // some unrelated snippet that happened to match a marker).
    assert!(
        design.contains("CREATE VIRTUAL TABLE chunks_fts"),
        "design slice must include CREATE VIRTUAL TABLE chunks_fts"
    );
    assert!(
        migration_block.contains("CREATE VIRTUAL TABLE chunks_fts"),
        "migration slice must include CREATE VIRTUAL TABLE chunks_fts"
    );
    assert!(
        design.trim_end().ends_with("END;"),
        "design slice must terminate with END;"
    );

    let design_n = normalize_ws(&design);
    let migration_n = normalize_ws(&migration_block);
    assert_eq!(
        design_n, migration_n,
        "V009__fts_korean_morphological.sql §5.5 block must match design doc §5.5 verbatim \
         (whitespace-normalized). If you intentionally changed one, \
         update the other in the same commit."
    );
}

// ── 5b. V009 corpus_revision bump ────────────────────────────────────

/// V009 migration 이 corpus_revision kv 를 bump 하는지 검증.
/// SqliteStore::open + run_migrations 후 corpus_revision 이 ≥ 1 이어야 함.
/// (V004 seed = '0', V009 UPDATE = CAST(CAST('0' AS INTEGER) + 1 AS TEXT) = '1').
#[test]
fn v009_bumps_corpus_revision() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();
    let rev = store.corpus_revision();
    assert!(
        rev >= 1,
        "corpus_revision must be ≥ 1 after V009 migration \
         (V004 seeds 0, V009 bumps to ≥ 1); got {rev}"
    );
}

// ── 5c. backfill_tokenized_korean_text ───────────────────────────────

#[test]
fn backfill_tokenized_korean_text_populates_nullable_rows() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    // chunks 에 한국어 row 두 개 INSERT (tokenized_korean_text 는 chunks_ai trigger
    // 가 채우지만, 여기서는 raw_conn_no_fk 로 직접 INSERT 하므로 NULL 로 남음).
    let conn = raw_conn_no_fk(&env);
    insert_chunk(
        &conn,
        &"a".repeat(32),
        &"d".repeat(32),
        "[]",
        "한국 문화는 오래되었다",
    );
    insert_chunk(
        &conn,
        &"b".repeat(32),
        &"d".repeat(32),
        "[]",
        "서울특별시는 한국의 수도",
    );
    let null_count_before: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM chunks WHERE tokenized_korean_text IS NULL",
            [],
            |r| r.get(0),
        )
        .unwrap();
    assert_eq!(null_count_before, 2);
    drop(conn);

    // backfill 호출 → lindera 가 두 row 모두 분해 성공 → 2 반환.
    let processed = store
        .backfill_tokenized_korean_text(|_, _| {}, tokenize_korean_morphological)
        .unwrap();
    assert_eq!(processed, 2, "both rows should be populated by lindera");

    let conn = raw_conn_no_fk(&env);
    let null_count_after: i64 = conn
        .query_row(
            "SELECT COUNT(*) FROM chunks WHERE tokenized_korean_text IS NULL",
            [],
            |r| r.get(0),
        )
        .unwrap();
    assert_eq!(null_count_after, 0);

    // idempotency: 두 번째 호출 → 0 (모든 row 가 이미 채워져 있음).
    drop(conn);
    let processed_again = store
        .backfill_tokenized_korean_text(|_, _| {}, tokenize_korean_morphological)
        .unwrap();
    assert_eq!(processed_again, 0);
}

// ── 6. WAL cleanup: drop store before tempdir reaps WAL/SHM ──────────

/// Mirror the P1-6 pattern: opening + migrating + dropping the store
/// must not strand `kebab.sqlite-wal`/`-shm` files such that the tempdir
/// can't be cleaned up. After dropping the store + side-channel conn,
/// the WAL/SHM siblings must either not exist or be removable — if a
/// stray handle were holding them open, on Windows the remove would
/// fail (on Linux unlink succeeds even with open handles, so this is
/// mostly a portability canary, but we still assert).
#[test]
fn fts_store_drop_releases_wal_files() {
    let env = common::TestEnv::new();
    let db_path = env.db_path();
    {
        let store = SqliteStore::open(&env.config()).unwrap();
        store.run_migrations().unwrap();
        // Force at least one trigger fire so WAL has content to flush.
        let conn = raw_conn_no_fk(&env);
        insert_chunk(&conn, &"a".repeat(32), &"d".repeat(32), "[]", "x");
        drop(conn);
        drop(store);
    }

    // After the store drops, any remaining WAL/SHM siblings must be
    // removable. If a connection is still open this would fail on
    // platforms with mandatory file locking.
    for suffix in ["-wal", "-shm"] {
        let p = db_path.with_extension(format!("sqlite{suffix}"));
        if p.exists() {
            std::fs::remove_file(&p).unwrap_or_else(|e| {
                panic!(
                    "WAL/SHM sibling {} should be removable after store drop: {e}",
                    p.display()
                )
            });
        }
    }
    // The main DB file should likewise be removable.
    if db_path.exists() {
        std::fs::remove_file(&db_path).expect("main DB file should be removable after store drop");
    }
}

// ── 7. Tokenizer behavior (V009 unicode61 + Korean morpheme column) ───
//
// V007 의 trigram-specific substring 매칭 test 들은 V009 로 obsolete:
// - English substring (`token` → `tokenizer` hit) 는 unicode61 의 whole-token
//   매칭으로 회귀 — spec §3 Non-Goals 의 Path A 명시.
// - Korean substring (`발생한` → `발생한다` hit) 도 동일하게 whole-token only.
//
// V009 의 신규 검증은 S7 (plan §2 Step 7) 에서 추가되는
// `fts_v009_korean_morphological_2char_query_hits` + `fts_v009_english_whole_token_only`
// 가 담당한다. 2자 query 0-hit 의 pinned 동작 (`fts_trigram_korean_short_query_zero_hit_pinned`)
// 은 V009 의 형태소 분해가 hit 시키므로 의도된 회귀 — S7 의 신규 test 가 새 baseline 을 핀.

/// V009 의 unicode61 + morpheme column 환경에서 단일 토큰 매칭이 정상
/// 동작하는지 sanity check. 형태소 사전이 없어도 chunks_fts 의
/// `tokenize='unicode61'` 만으로도 space-separated 한국어 token (chunk text
/// 의 raw 공백 split) 은 매칭되어야 한다.
#[test]
fn fts_v009_unicode61_space_separated_korean_token_hits() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    insert_chunk(
        &conn,
        &"k".repeat(32),
        &"d".repeat(32),
        "[]",
        "해시 충돌은 키와 값을 매핑할 때 발생한다",
    );

    // unicode61 이 공백으로 분리한 token 은 그대로 매칭.
    assert_eq!(count_match(&conn, "충돌은"), 1, "whole-token '충돌은' hit");
    assert_eq!(count_match(&conn, "해시"), 1, "whole-token '해시' hit");
    // substring (token 의 부분 문자열) 은 V009 unicode61 에서 0-hit.
    assert_eq!(
        count_match(&conn, "발생한"),
        0,
        "substring '발생한' of '발생한다' 0-hit"
    );
}

// ── 8. V009 morphological tokenizer behavior ──────────────────────────

/// V009 의 핵심 가치: 한국어 2자 query 가 hit. 형태소 분해된
/// tokenized_korean_text column 이 chunks_fts 에 indexed.
#[test]
fn fts_v009_korean_morphological_2char_query_hits() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    let text = "한국 문화는 오래되었다";
    let tokenized = tokenize_korean_morphological(text);
    conn.execute(
        "INSERT INTO chunks (
            chunk_id, doc_id, text, heading_path_json, section_label,
            source_spans_json, token_estimate, chunker_version,
            policy_hash, block_ids_json, created_at,
            tokenized_korean_text
        ) VALUES (?, ?, ?, '[]', NULL, '[]', 0, 'v1', 'h', '[]', '2024-01-01T00:00:00Z', ?)",
        rusqlite::params![&"k".repeat(32), &"d".repeat(32), text, tokenized,],
    )
    .expect("insert chunk with tokenized_korean_text");

    assert!(
        count_match(&conn, "한국") >= 1,
        "2-char Korean morpheme '한국' must hit when tokenized column is populated"
    );
}

/// V009 의 Path A 회귀 확인: 영어 substring 매칭이 사라짐
/// (unicode61 의 whole-token only 동작).
#[test]
fn fts_v009_english_whole_token_only() {
    let env = common::TestEnv::new();
    let store = SqliteStore::open(&env.config()).unwrap();
    store.run_migrations().unwrap();

    let conn = raw_conn_no_fk(&env);
    insert_chunk(
        &conn,
        &"e".repeat(32),
        &"d".repeat(32),
        "[]",
        "the tokenizer normalizes whitespace before matching",
    );

    assert_eq!(
        count_match(&conn, "token"),
        0,
        "V009 unicode61: 'token' is substring of 'tokenizer', should NOT hit"
    );
    assert_eq!(
        count_match(&conn, "tokenizer"),
        1,
        "V009 unicode61: whole-token 'tokenizer' must hit"
    );
}