fix(v032): AiWorker vocabSet COLLATE NOCASE 정합 (#31)

DB tags.name 가 COLLATE NOCASE 인데 vocabSet 은 strict-eq 였음 →
대문자/소문자 vocab 과 AI tag 가 다를 때 silently skip.

vocab.toLowerCase() + tagName.toLowerCase() 양쪽 normalize 로 정합.
This commit is contained in:
altair823
2026-05-10 13:55:52 +09:00
parent 36eafa1ce9
commit 6985db3505
2 changed files with 90 additions and 2 deletions

View File

@@ -186,9 +186,9 @@ export class AiWorker {
}).catch(() => {});
// v0.2.3 #3 — per-tag vocab hit/miss 분류 (updateAiResult 후 → tagId 보장)
// dedup: AI 응답에 같은 태그 중복 가능 — INSERT OR IGNORE 와 정합한 1-emit/태그 보장
const vocabSet = new Set(vocab);
const vocabSet = new Set(vocab.map((v) => v.toLowerCase()));
for (const tagName of new Set(res.tags)) {
if (vocabSet.has(tagName)) {
if (vocabSet.has(tagName.toLowerCase())) {
const tagId = this.repo.getTagIdByName(tagName);
if (tagId !== null) {
await this.telemetry.emit({

View File

@@ -560,3 +560,91 @@ describe('AiWorker — vocab fetch + per-tag hit/miss (v0.2.3 #3 T7)', () => {
expect(miss).toHaveLength(1); // 'meeting' 1 miss
});
});
describe('vocab COLLATE NOCASE', () => {
let db: Database.Database;
let repo: NoteRepository;
beforeEach(() => {
db = new Database(':memory:');
runMigrations(db);
repo = new NoteRepository(db);
});
it('hits when vocab has lowercase and AI returns capital', async () => {
// Pre-seed: 'design' in vocab (lowercase)
const seed = repo.create({ rawText: 'seed' }).id;
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['design'], provider: 'p' });
const { id } = repo.create({ rawText: 'x' });
const provider = makeProvider({
generate: vi.fn(async () => ({
title: 't', summary: 'a\nb\nc',
tags: ['Design'], // AI returns capitalized — DB COLLATE NOCASE matches 'design'
dueDate: null
}))
});
const emits: EmittedEvent[] = [];
const w = new AiWorker(repo, new ProviderHolder(provider), {
backoffsMs: [0, 0, 0],
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
});
await w.enqueue(id);
await w.drain();
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
});
it('hits when vocab has capital and AI returns lowercase', async () => {
// Scenario: vocab contains 'Design' (capital), AI returns 'design' (lowercase).
// getTopUsedTags filters via KEBAB_CASE_RE (/^[a-z0-9-]+$/) so 'Design' would be
// stripped in production. We stub getTopUsedTags to inject the capital vocab directly,
// and pre-seed the DB so getTagIdByName (COLLATE NOCASE) can resolve 'design' → tagId.
const seed = repo.create({ rawText: 'seed' }).id;
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['Design'], provider: 'p' });
// Inject capital vocab bypassing the kebab filter
vi.spyOn(repo, 'getTopUsedTags').mockReturnValueOnce(['Design']);
const { id } = repo.create({ rawText: 'x' });
const provider = makeProvider({
generate: vi.fn(async () => ({
title: 't', summary: 'a\nb\nc',
tags: ['design'], // AI returns lowercase — DB COLLATE NOCASE matches 'Design'
dueDate: null
}))
});
const emits: EmittedEvent[] = [];
const w = new AiWorker(repo, new ProviderHolder(provider), {
backoffsMs: [0, 0, 0],
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
});
await w.enqueue(id);
await w.drain();
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
});
it('still hits when both vocab and AI tag are same lowercase (regression)', async () => {
// Pre-seed: 'design' in vocab (lowercase)
const seed = repo.create({ rawText: 'seed' }).id;
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['design'], provider: 'p' });
const { id } = repo.create({ rawText: 'x' });
const provider = makeProvider({
generate: vi.fn(async () => ({
title: 't', summary: 'a\nb\nc',
tags: ['design'], // same lowercase — should still hit
dueDate: null
}))
});
const emits: EmittedEvent[] = [];
const w = new AiWorker(repo, new ProviderHolder(provider), {
backoffsMs: [0, 0, 0],
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
});
await w.enqueue(id);
await w.drain();
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
});
});