fix(v032): AiWorker vocabSet COLLATE NOCASE 정합 (#31)
DB tags.name 가 COLLATE NOCASE 인데 vocabSet 은 strict-eq 였음 → 대문자/소문자 vocab 과 AI tag 가 다를 때 silently skip. vocab.toLowerCase() + tagName.toLowerCase() 양쪽 normalize 로 정합.
This commit is contained in:
@@ -186,9 +186,9 @@ export class AiWorker {
|
||||
}).catch(() => {});
|
||||
// v0.2.3 #3 — per-tag vocab hit/miss 분류 (updateAiResult 후 → tagId 보장)
|
||||
// dedup: AI 응답에 같은 태그 중복 가능 — INSERT OR IGNORE 와 정합한 1-emit/태그 보장
|
||||
const vocabSet = new Set(vocab);
|
||||
const vocabSet = new Set(vocab.map((v) => v.toLowerCase()));
|
||||
for (const tagName of new Set(res.tags)) {
|
||||
if (vocabSet.has(tagName)) {
|
||||
if (vocabSet.has(tagName.toLowerCase())) {
|
||||
const tagId = this.repo.getTagIdByName(tagName);
|
||||
if (tagId !== null) {
|
||||
await this.telemetry.emit({
|
||||
|
||||
@@ -560,3 +560,91 @@ describe('AiWorker — vocab fetch + per-tag hit/miss (v0.2.3 #3 T7)', () => {
|
||||
expect(miss).toHaveLength(1); // 'meeting' 1 miss
|
||||
});
|
||||
});
|
||||
|
||||
describe('vocab COLLATE NOCASE', () => {
|
||||
let db: Database.Database;
|
||||
let repo: NoteRepository;
|
||||
|
||||
beforeEach(() => {
|
||||
db = new Database(':memory:');
|
||||
runMigrations(db);
|
||||
repo = new NoteRepository(db);
|
||||
});
|
||||
|
||||
it('hits when vocab has lowercase and AI returns capital', async () => {
|
||||
// Pre-seed: 'design' in vocab (lowercase)
|
||||
const seed = repo.create({ rawText: 'seed' }).id;
|
||||
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['design'], provider: 'p' });
|
||||
|
||||
const { id } = repo.create({ rawText: 'x' });
|
||||
const provider = makeProvider({
|
||||
generate: vi.fn(async () => ({
|
||||
title: 't', summary: 'a\nb\nc',
|
||||
tags: ['Design'], // AI returns capitalized — DB COLLATE NOCASE matches 'design'
|
||||
dueDate: null
|
||||
}))
|
||||
});
|
||||
const emits: EmittedEvent[] = [];
|
||||
const w = new AiWorker(repo, new ProviderHolder(provider), {
|
||||
backoffsMs: [0, 0, 0],
|
||||
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
|
||||
});
|
||||
await w.enqueue(id);
|
||||
await w.drain();
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('hits when vocab has capital and AI returns lowercase', async () => {
|
||||
// Scenario: vocab contains 'Design' (capital), AI returns 'design' (lowercase).
|
||||
// getTopUsedTags filters via KEBAB_CASE_RE (/^[a-z0-9-]+$/) so 'Design' would be
|
||||
// stripped in production. We stub getTopUsedTags to inject the capital vocab directly,
|
||||
// and pre-seed the DB so getTagIdByName (COLLATE NOCASE) can resolve 'design' → tagId.
|
||||
const seed = repo.create({ rawText: 'seed' }).id;
|
||||
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['Design'], provider: 'p' });
|
||||
// Inject capital vocab bypassing the kebab filter
|
||||
vi.spyOn(repo, 'getTopUsedTags').mockReturnValueOnce(['Design']);
|
||||
|
||||
const { id } = repo.create({ rawText: 'x' });
|
||||
const provider = makeProvider({
|
||||
generate: vi.fn(async () => ({
|
||||
title: 't', summary: 'a\nb\nc',
|
||||
tags: ['design'], // AI returns lowercase — DB COLLATE NOCASE matches 'Design'
|
||||
dueDate: null
|
||||
}))
|
||||
});
|
||||
const emits: EmittedEvent[] = [];
|
||||
const w = new AiWorker(repo, new ProviderHolder(provider), {
|
||||
backoffsMs: [0, 0, 0],
|
||||
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
|
||||
});
|
||||
await w.enqueue(id);
|
||||
await w.drain();
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('still hits when both vocab and AI tag are same lowercase (regression)', async () => {
|
||||
// Pre-seed: 'design' in vocab (lowercase)
|
||||
const seed = repo.create({ rawText: 'seed' }).id;
|
||||
repo.updateAiResult(seed, { title: 't', summary: 'a\nb\nc', tags: ['design'], provider: 'p' });
|
||||
|
||||
const { id } = repo.create({ rawText: 'x' });
|
||||
const provider = makeProvider({
|
||||
generate: vi.fn(async () => ({
|
||||
title: 't', summary: 'a\nb\nc',
|
||||
tags: ['design'], // same lowercase — should still hit
|
||||
dueDate: null
|
||||
}))
|
||||
});
|
||||
const emits: EmittedEvent[] = [];
|
||||
const w = new AiWorker(repo, new ProviderHolder(provider), {
|
||||
backoffsMs: [0, 0, 0],
|
||||
telemetry: { emit: vi.fn(async (input) => { emits.push(input); }) }
|
||||
});
|
||||
await w.enqueue(id);
|
||||
await w.drain();
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_hit')).toHaveLength(1);
|
||||
expect(emits.filter((e) => e.kind === 'tag_vocab_miss')).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user