feat(retry): AiWorker unreachable/timeout 무한 retry — 15분 cap (#2 v0.2.3)

This commit is contained in:
altair823
2026-05-02 03:19:43 +09:00
parent 2e3f0edffd
commit 449eb76683
2 changed files with 147 additions and 8 deletions

View File

@@ -37,6 +37,7 @@ export interface AiTelemetryEmitter {
export interface AiWorkerOptions {
backoffsMs?: number[];
unreachableBackoffsMs?: number[];
onUpdate?: (note: Note) => void;
logger?: {
info: (msg: string, meta?: Record<string, unknown>) => void;
@@ -54,6 +55,8 @@ export class AiWorker {
private running = false;
private drainResolvers: Array<() => void> = [];
private backoffsMs: number[];
private unreachableBackoffsMs: number[];
private unreachableBackoffStep = 0;
private onUpdate?: (note: Note) => void;
private logger: NonNullable<AiWorkerOptions['logger']>;
private now: () => Date;
@@ -65,6 +68,7 @@ export class AiWorker {
opts: AiWorkerOptions = {}
) {
this.backoffsMs = opts.backoffsMs ?? [0, 30_000, 120_000];
this.unreachableBackoffsMs = opts.unreachableBackoffsMs ?? [30_000, 60_000, 120_000, 240_000, 480_000, 900_000];
this.onUpdate = opts.onUpdate;
this.logger = opts.logger ?? { info: () => {}, warn: () => {}, error: () => {} };
this.now = opts.now ?? (() => new Date());
@@ -139,6 +143,7 @@ export class AiWorker {
provider: this.provider.name,
dueDate: res.dueDate ?? null
});
this.unreachableBackoffStep = 0; // 성공 시 step reset
this.logger.info('ai.done', {
noteId: job.noteId,
attempt,
@@ -158,9 +163,22 @@ export class AiWorker {
this.emit(job.noteId);
return;
} catch (err) {
const isLast = attempt === max - 1;
const reason = classifyReason(err);
const msg = (err as Error).message;
this.logger.warn('ai.retry', { noteId: job.noteId, attempt, err: msg });
this.logger.warn('ai.retry', { noteId: job.noteId, attempt, err: msg, reason });
if (reason === 'unreachable' || reason === 'timeout') {
// 무한 retry: attempts 증가 안 함, in-place loop + sleep.
// markAiFailed / ai_failed emit 안 함 — ratio 통계는 schema/other 만 누적.
const sleepMs = this.nextBackoffMs(this.unreachableBackoffStep);
this.unreachableBackoffStep = Math.min(this.unreachableBackoffStep + 1, this.unreachableBackoffsMs.length - 1);
const nextRunAt = new Date(Date.now() + sleepMs).toISOString();
this.repo.setNextRunAt(job.noteId, nextRunAt, msg);
await this.sleep(sleepMs);
attempt -= 1; // for 루프 attempt++ 상쇄 — 같은 attempt 인덱스로 재시도
continue;
}
// schema / other: 기존 max 3 retry 정책
const isLast = attempt === max - 1;
const nextRunAt = new Date(Date.now() + (this.backoffsMs[attempt + 1] ?? 0)).toISOString();
this.repo.incrementJobAttempt(job.noteId, nextRunAt, msg);
if (isLast) {
@@ -171,7 +189,7 @@ export class AiWorker {
kind: 'ai_failed',
payload: {
noteId: job.noteId,
reason: classifyReason(err),
reason,
attempts: attempt + 1
}
}).catch(() => {});
@@ -184,6 +202,11 @@ export class AiWorker {
}
}
private nextBackoffMs(step: number): number {
const idx = Math.min(step, this.unreachableBackoffsMs.length - 1);
return this.unreachableBackoffsMs[idx]!;
}
private emit(noteId: string): void {
if (!this.onUpdate) return;
const note = this.repo.findById(noteId);