105 lines
3.8 KiB
TypeScript
105 lines
3.8 KiB
TypeScript
import type { HealthResult } from '../ai/InferenceProvider.js';
|
|
import { ProviderHolder } from '../ai/ProviderHolder.js';
|
|
|
|
export type HealthTelemetryEvent =
|
|
| { kind: 'ollama_unreachable'; reason: string }
|
|
| { kind: 'ollama_recovered'; downtimeMs: number }
|
|
| { kind: 'ollama_recheck_manual' };
|
|
|
|
export interface HealthCheckerOptions {
|
|
intervalMs?: number;
|
|
onUpdate?: (status: HealthResult) => void;
|
|
onTelemetry?: (event: HealthTelemetryEvent) => void;
|
|
now?: () => number;
|
|
// v0.2.9 Cut B Task 14 — settings.ai_enabled=false 면 polling skip.
|
|
// 미설정 시 항상 enabled (backward-compat).
|
|
isAiEnabled?: () => Promise<boolean>;
|
|
}
|
|
|
|
const DEFAULT_INTERVAL_MS = 60_000;
|
|
|
|
export class HealthChecker {
|
|
// sentinel: 첫 healthCheck 가 ok=true 면 transition 으로 인식 안 됨 (no-op),
|
|
// ok=false 면 unreachable transition 으로 정상 인식. 즉 첫 호출이 healthy 면 telemetry 0.
|
|
private last: HealthResult = { ok: true };
|
|
private timer: NodeJS.Timeout | null = null;
|
|
private unreachableSince: number | null = null;
|
|
// m2 fix: in-flight guard — 첫 runOnce 가 늦게 끝나는 동안 setInterval 이 두 번째
|
|
// runOnce 를 시작하면 같은 promise 반환. healthCheck 가 idempotent HTTP 라 안전 측면에선
|
|
// 큰 문제 없지만, telemetry 이중 emit (false→true→false 동시 처리) 회피.
|
|
private inFlight: Promise<HealthResult> | null = null;
|
|
private intervalMs: number;
|
|
private now: () => number;
|
|
|
|
constructor(
|
|
private holder: ProviderHolder,
|
|
private opts: HealthCheckerOptions = {}
|
|
) {
|
|
this.intervalMs = opts.intervalMs ?? DEFAULT_INTERVAL_MS;
|
|
this.now = opts.now ?? Date.now;
|
|
}
|
|
|
|
async runOnce(opts?: { manual?: boolean }): Promise<HealthResult> {
|
|
// n4 의도: ollama_recheck_manual 은 healthCheck 호출 *전에* fire — provider 가 throw 하거나
|
|
// 늦게 응답해도 manual 카운트는 누락 없음. user click → telemetry 1:1 보장.
|
|
if (opts?.manual === true) {
|
|
this.opts.onTelemetry?.({ kind: 'ollama_recheck_manual' });
|
|
}
|
|
if (this.inFlight !== null) return this.inFlight;
|
|
this.inFlight = this.doRunOnce();
|
|
try { return await this.inFlight; }
|
|
finally { this.inFlight = null; }
|
|
}
|
|
|
|
private async doRunOnce(): Promise<HealthResult> {
|
|
const next = await this.holder.get().healthCheck();
|
|
const prev = this.last;
|
|
const okChanged = prev.ok !== next.ok;
|
|
const reasonChanged = prev.reason !== next.reason;
|
|
if (okChanged) {
|
|
if (next.ok === false) {
|
|
this.unreachableSince = this.now();
|
|
this.opts.onTelemetry?.({ kind: 'ollama_unreachable', reason: next.reason ?? 'unknown' });
|
|
} else {
|
|
const downtimeMs = this.unreachableSince !== null ? this.now() - this.unreachableSince : 0;
|
|
this.unreachableSince = null;
|
|
this.opts.onTelemetry?.({ kind: 'ollama_recovered', downtimeMs });
|
|
}
|
|
this.opts.onUpdate?.(next);
|
|
} else if (reasonChanged) {
|
|
this.opts.onUpdate?.(next);
|
|
}
|
|
this.last = next;
|
|
return next;
|
|
}
|
|
|
|
start(): void {
|
|
if (this.timer !== null) return;
|
|
void this.tickIfEnabled();
|
|
this.timer = setInterval(() => { void this.tickIfEnabled(); }, this.intervalMs);
|
|
}
|
|
|
|
// v0.2.9 Cut B Task 14 — polling tick. settings.ai_enabled=false 면 skip.
|
|
// 수동 runOnce({ manual: true }) 는 이 게이트와 무관하게 항상 실행 (사용자 의도).
|
|
private async tickIfEnabled(): Promise<void> {
|
|
if (this.opts.isAiEnabled !== undefined) {
|
|
try {
|
|
const enabled = await this.opts.isAiEnabled();
|
|
if (!enabled) return;
|
|
} catch {
|
|
// settings 로드 실패 시 안전 측면 — polling 진행 (기존 동작 유지).
|
|
}
|
|
}
|
|
await this.runOnce();
|
|
}
|
|
|
|
stop(): void {
|
|
if (this.timer !== null) {
|
|
clearInterval(this.timer);
|
|
this.timer = null;
|
|
}
|
|
}
|
|
|
|
lastStatus(): HealthResult { return this.last; }
|
|
}
|