feat(retry): AiWorker unreachable/timeout 무한 retry — 15분 cap (#2 v0.2.3)
This commit is contained in:
@@ -37,6 +37,7 @@ export interface AiTelemetryEmitter {
|
||||
|
||||
export interface AiWorkerOptions {
|
||||
backoffsMs?: number[];
|
||||
unreachableBackoffsMs?: number[];
|
||||
onUpdate?: (note: Note) => void;
|
||||
logger?: {
|
||||
info: (msg: string, meta?: Record<string, unknown>) => void;
|
||||
@@ -54,6 +55,8 @@ export class AiWorker {
|
||||
private running = false;
|
||||
private drainResolvers: Array<() => void> = [];
|
||||
private backoffsMs: number[];
|
||||
private unreachableBackoffsMs: number[];
|
||||
private unreachableBackoffStep = 0;
|
||||
private onUpdate?: (note: Note) => void;
|
||||
private logger: NonNullable<AiWorkerOptions['logger']>;
|
||||
private now: () => Date;
|
||||
@@ -65,6 +68,7 @@ export class AiWorker {
|
||||
opts: AiWorkerOptions = {}
|
||||
) {
|
||||
this.backoffsMs = opts.backoffsMs ?? [0, 30_000, 120_000];
|
||||
this.unreachableBackoffsMs = opts.unreachableBackoffsMs ?? [30_000, 60_000, 120_000, 240_000, 480_000, 900_000];
|
||||
this.onUpdate = opts.onUpdate;
|
||||
this.logger = opts.logger ?? { info: () => {}, warn: () => {}, error: () => {} };
|
||||
this.now = opts.now ?? (() => new Date());
|
||||
@@ -139,6 +143,7 @@ export class AiWorker {
|
||||
provider: this.provider.name,
|
||||
dueDate: res.dueDate ?? null
|
||||
});
|
||||
this.unreachableBackoffStep = 0; // 성공 시 step reset
|
||||
this.logger.info('ai.done', {
|
||||
noteId: job.noteId,
|
||||
attempt,
|
||||
@@ -158,9 +163,22 @@ export class AiWorker {
|
||||
this.emit(job.noteId);
|
||||
return;
|
||||
} catch (err) {
|
||||
const isLast = attempt === max - 1;
|
||||
const reason = classifyReason(err);
|
||||
const msg = (err as Error).message;
|
||||
this.logger.warn('ai.retry', { noteId: job.noteId, attempt, err: msg });
|
||||
this.logger.warn('ai.retry', { noteId: job.noteId, attempt, err: msg, reason });
|
||||
if (reason === 'unreachable' || reason === 'timeout') {
|
||||
// 무한 retry: attempts 증가 안 함, in-place loop + sleep.
|
||||
// markAiFailed / ai_failed emit 안 함 — ratio 통계는 schema/other 만 누적.
|
||||
const sleepMs = this.nextBackoffMs(this.unreachableBackoffStep);
|
||||
this.unreachableBackoffStep = Math.min(this.unreachableBackoffStep + 1, this.unreachableBackoffsMs.length - 1);
|
||||
const nextRunAt = new Date(Date.now() + sleepMs).toISOString();
|
||||
this.repo.setNextRunAt(job.noteId, nextRunAt, msg);
|
||||
await this.sleep(sleepMs);
|
||||
attempt -= 1; // for 루프 attempt++ 상쇄 — 같은 attempt 인덱스로 재시도
|
||||
continue;
|
||||
}
|
||||
// schema / other: 기존 max 3 retry 정책
|
||||
const isLast = attempt === max - 1;
|
||||
const nextRunAt = new Date(Date.now() + (this.backoffsMs[attempt + 1] ?? 0)).toISOString();
|
||||
this.repo.incrementJobAttempt(job.noteId, nextRunAt, msg);
|
||||
if (isLast) {
|
||||
@@ -171,7 +189,7 @@ export class AiWorker {
|
||||
kind: 'ai_failed',
|
||||
payload: {
|
||||
noteId: job.noteId,
|
||||
reason: classifyReason(err),
|
||||
reason,
|
||||
attempts: attempt + 1
|
||||
}
|
||||
}).catch(() => {});
|
||||
@@ -184,6 +202,11 @@ export class AiWorker {
|
||||
}
|
||||
}
|
||||
|
||||
private nextBackoffMs(step: number): number {
|
||||
const idx = Math.min(step, this.unreachableBackoffsMs.length - 1);
|
||||
return this.unreachableBackoffsMs[idx]!;
|
||||
}
|
||||
|
||||
private emit(noteId: string): void {
|
||||
if (!this.onUpdate) return;
|
||||
const note = this.repo.findById(noteId);
|
||||
|
||||
Reference in New Issue
Block a user