feat(v031): AiWorker vision integration — note.media + visionModel + 5MB cap

This commit is contained in:
altair823
2026-05-10 04:53:21 +09:00
parent 5012b40c14
commit 2179cfbf39
4 changed files with 162 additions and 10 deletions

View File

@@ -1,6 +1,9 @@
import { readFile } from 'node:fs/promises';
import type { NoteRepository } from '../repository/NoteRepository.js';
import type { Note } from '@shared/types';
import type { AiFailedReason } from '../services/telemetryEvents.js';
import type { SettingsService } from '../services/SettingsService.js';
import type { MediaStore } from '../services/MediaStore.js';
import { ProviderHolder } from './ProviderHolder.js';
import { parseAllCandidates } from '../services/dueDateParser.js';
import { ZodError } from 'zod';
@@ -41,6 +44,10 @@ export interface AiWorkerOptions {
};
now?: () => Date;
telemetry?: AiTelemetryEmitter;
/** v0.3.1 Cut F — vision 지원. 미전달 시 vision 비활성. */
settings?: Pick<SettingsService, 'getVisionModel'>;
/** v0.3.1 Cut F — 첨부 이미지 절대경로 변환. settings 와 함께 전달 시 vision 활성. */
mediaStore?: Pick<MediaStore, 'absolutePath'>;
}
interface Job { noteId: string; attempts: number; }
@@ -56,6 +63,8 @@ export class AiWorker {
private logger: NonNullable<AiWorkerOptions['logger']>;
private now: () => Date;
private telemetry?: AiTelemetryEmitter;
private settings?: Pick<SettingsService, 'getVisionModel'>;
private mediaStore?: Pick<MediaStore, 'absolutePath'>;
constructor(
private repo: NoteRepository,
@@ -68,6 +77,8 @@ export class AiWorker {
this.logger = opts.logger ?? { info: () => {}, warn: () => {}, error: () => {} };
this.now = opts.now ?? (() => new Date());
this.telemetry = opts.telemetry;
this.settings = opts.settings;
this.mediaStore = opts.mediaStore;
}
async enqueue(noteId: string): Promise<void> {
@@ -128,12 +139,24 @@ export class AiWorker {
const todayIso = kstTodayIso(nowDate);
const candidates = parseAllCandidates(note.rawText, todayDate);
const vocab = this.repo.getTopUsedTags(VOCAB_TOP_N);
const res = await this.holder.get().generate({
text: note.rawText,
todayKst: todayIso,
dueDateCandidates: candidates,
vocab
});
// v0.3.1 Cut F — vision path: visionModel + note.media → base64 images
const visionModel = this.settings ? await this.settings.getVisionModel() : null;
let images: Array<{ base64: string; mime: string }> | undefined;
if (visionModel && note.media.length > 0 && this.mediaStore) {
images = await Promise.all(
note.media.map(async (m) => {
const buf = await readFile(this.mediaStore!.absolutePath(m.relPath));
if (buf.byteLength > 5 * 1024 * 1024) {
throw new Error(`image ${m.relPath} exceeds 5MB cap`);
}
return { base64: buf.toString('base64'), mime: m.mime };
})
);
}
const res = await this.holder.get().generate(
{ text: note.rawText, images, todayKst: todayIso, dueDateCandidates: candidates, vocab },
{ visionModel: visionModel ?? undefined }
);
// AI primary: AI's dueDate is final (no rule merge)
this.repo.updateAiResult(job.noteId, {
title: res.title,

View File

@@ -149,7 +149,10 @@ app.whenReady().then(async () => {
refreshTray({ todayCount: repo.countToday() });
},
logger,
telemetry
telemetry,
// v0.3.1 Cut F — vision 지원
settings: settingsSvc,
mediaStore: store
});
const notify = new NotificationService({

View File

@@ -449,9 +449,10 @@ describe('AiWorker — vocab fetch + per-tag hit/miss (v0.2.3 #3 T7)', () => {
});
await w.enqueue(id);
await w.drain();
expect(generateMock).toHaveBeenCalledWith(expect.objectContaining({
vocab: expect.arrayContaining(['design'])
}));
expect(generateMock).toHaveBeenCalledWith(
expect.objectContaining({ vocab: expect.arrayContaining(['design']) }),
expect.anything()
);
});
it('emits tag_vocab_hit for vocab tags + tag_vocab_miss for new tags', async () => {

View File

@@ -0,0 +1,125 @@
import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
import { writeFile, mkdtemp, mkdir, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import Database from 'better-sqlite3';
import { runMigrations } from '@main/db/migrations/index.js';
import { NoteRepository } from '@main/repository/NoteRepository.js';
import { AiWorker } from '@main/ai/AiWorker.js';
import { ProviderHolder } from '@main/ai/ProviderHolder.js';
import { MediaStore } from '@main/services/MediaStore.js';
import type { AiResponse } from '@main/ai/schema.js';
import type { InferenceProvider } from '@main/ai/InferenceProvider.js';
describe('AiWorker — vision path (v0.3.1 Cut F)', () => {
let db: Database.Database;
let repo: NoteRepository;
let workDir: string;
let mediaStore: MediaStore;
beforeEach(async () => {
db = new Database(':memory:');
db.pragma('foreign_keys = ON');
runMigrations(db);
repo = new NoteRepository(db);
workDir = await mkdtemp(join(tmpdir(), 'inkling-vision-'));
mediaStore = new MediaStore(workDir);
});
afterEach(async () => {
db.close();
await rm(workDir, { recursive: true, force: true });
});
function makeWorker(
generate: (input: Parameters<InferenceProvider['generate']>[0], opts?: Parameters<InferenceProvider['generate']>[1]) => Promise<AiResponse>,
getVisionModel: () => Promise<string | null>
): AiWorker {
const provider: InferenceProvider = {
name: 'fake',
generate,
abort: () => {},
healthCheck: vi.fn(async () => ({ ok: true }))
};
const holder = new ProviderHolder(provider);
const settings = { getVisionModel };
const logger = { info: vi.fn(), warn: vi.fn(), error: vi.fn() };
return new AiWorker(repo, holder, {
backoffsMs: [0, 0, 0],
logger,
settings,
mediaStore,
now: () => new Date('2026-05-10T05:00:00Z')
});
}
it('visionModel + media 있음 → provider.generate 가 images + opts 받음', async () => {
const { id } = repo.create({ rawText: '이미지 메모' });
await mkdir(join(workDir, 'media', id), { recursive: true });
await writeFile(join(workDir, 'media', id, '1.png'), Buffer.from([0x89, 0x50, 0x4e, 0x47]));
repo.insertMedia([{ noteId: id, kind: 'image', relPath: `media/${id}/1.png`, mime: 'image/png', bytes: 4 }]);
const calls: Array<Parameters<InferenceProvider['generate']>> = [];
const generate = vi.fn(async (
input: Parameters<InferenceProvider['generate']>[0],
opts?: Parameters<InferenceProvider['generate']>[1]
): Promise<AiResponse> => {
calls.push([input, opts]);
return { title: 't', summary: 'a\nb\nc', tags: [], dueDate: null };
});
const getVisionModel = vi.fn(async (): Promise<string | null> => 'gemma3:12b-vision');
const worker = makeWorker(generate, getVisionModel);
await worker.enqueue(id);
await worker.drain();
expect(calls.length).toBeGreaterThan(0);
const [callInput, callOpts] = calls[0]!;
expect(callInput.images).toHaveLength(1);
expect(callInput.images![0]!.mime).toBe('image/png');
expect(callOpts?.visionModel).toBe('gemma3:12b-vision');
});
it('visionModel null이면 text-only (images undefined)', async () => {
const { id } = repo.create({ rawText: 'just text' });
const calls: Array<Parameters<InferenceProvider['generate']>> = [];
const generate = vi.fn(async (
input: Parameters<InferenceProvider['generate']>[0],
opts?: Parameters<InferenceProvider['generate']>[1]
): Promise<AiResponse> => {
calls.push([input, opts]);
return { title: 't', summary: 'a\nb\nc', tags: [], dueDate: null };
});
const getVisionModel = vi.fn(async (): Promise<string | null> => null);
const worker = makeWorker(generate, getVisionModel);
await worker.enqueue(id);
await worker.drain();
expect(calls.length).toBeGreaterThan(0);
expect(calls[0]![0].images).toBeUndefined();
});
it('5MB 초과 이미지 → throw → AiWorker 의 fail 분기 (generate 미호출)', async () => {
const { id } = repo.create({ rawText: 'big image' });
await mkdir(join(workDir, 'media', id), { recursive: true });
await writeFile(join(workDir, 'media', id, '1.png'), Buffer.alloc(6 * 1024 * 1024));
repo.insertMedia([{ noteId: id, kind: 'image', relPath: `media/${id}/1.png`, mime: 'image/png', bytes: 6 * 1024 * 1024 }]);
const calls: Array<Parameters<InferenceProvider['generate']>> = [];
const generate = vi.fn(async (
input: Parameters<InferenceProvider['generate']>[0],
opts?: Parameters<InferenceProvider['generate']>[1]
): Promise<AiResponse> => {
calls.push([input, opts]);
return { title: 't', summary: 'a\nb\nc', tags: [], dueDate: null };
});
const getVisionModel = vi.fn(async (): Promise<string | null> => 'gemma3:12b-vision');
const worker = makeWorker(generate, getVisionModel);
await worker.enqueue(id);
await worker.drain();
expect(calls.length).toBe(0);
// AiWorker catch 분기가 처리 — note 는 여전히 DB 에 존재
const note = repo.findById(id);
expect(note).toBeTruthy();
});
});