Files
inkling/src/main/services/HealthChecker.ts

105 lines
3.8 KiB
TypeScript

import type { HealthResult } from '../ai/InferenceProvider.js';
import { ProviderHolder } from '../ai/ProviderHolder.js';
export type HealthTelemetryEvent =
| { kind: 'ollama_unreachable'; reason: string }
| { kind: 'ollama_recovered'; downtimeMs: number }
| { kind: 'ollama_recheck_manual' };
export interface HealthCheckerOptions {
intervalMs?: number;
onUpdate?: (status: HealthResult) => void;
onTelemetry?: (event: HealthTelemetryEvent) => void;
now?: () => number;
// v0.2.9 Cut B Task 14 — settings.ai_enabled=false 면 polling skip.
// 미설정 시 항상 enabled (backward-compat).
isAiEnabled?: () => Promise<boolean>;
}
const DEFAULT_INTERVAL_MS = 60_000;
export class HealthChecker {
// sentinel: 첫 healthCheck 가 ok=true 면 transition 으로 인식 안 됨 (no-op),
// ok=false 면 unreachable transition 으로 정상 인식. 즉 첫 호출이 healthy 면 telemetry 0.
private last: HealthResult = { ok: true };
private timer: NodeJS.Timeout | null = null;
private unreachableSince: number | null = null;
// m2 fix: in-flight guard — 첫 runOnce 가 늦게 끝나는 동안 setInterval 이 두 번째
// runOnce 를 시작하면 같은 promise 반환. healthCheck 가 idempotent HTTP 라 안전 측면에선
// 큰 문제 없지만, telemetry 이중 emit (false→true→false 동시 처리) 회피.
private inFlight: Promise<HealthResult> | null = null;
private intervalMs: number;
private now: () => number;
constructor(
private holder: ProviderHolder,
private opts: HealthCheckerOptions = {}
) {
this.intervalMs = opts.intervalMs ?? DEFAULT_INTERVAL_MS;
this.now = opts.now ?? Date.now;
}
async runOnce(opts?: { manual?: boolean }): Promise<HealthResult> {
// n4 의도: ollama_recheck_manual 은 healthCheck 호출 *전에* fire — provider 가 throw 하거나
// 늦게 응답해도 manual 카운트는 누락 없음. user click → telemetry 1:1 보장.
if (opts?.manual === true) {
this.opts.onTelemetry?.({ kind: 'ollama_recheck_manual' });
}
if (this.inFlight !== null) return this.inFlight;
this.inFlight = this.doRunOnce();
try { return await this.inFlight; }
finally { this.inFlight = null; }
}
private async doRunOnce(): Promise<HealthResult> {
const next = await this.holder.get().healthCheck();
const prev = this.last;
const okChanged = prev.ok !== next.ok;
const reasonChanged = prev.reason !== next.reason;
if (okChanged) {
if (next.ok === false) {
this.unreachableSince = this.now();
this.opts.onTelemetry?.({ kind: 'ollama_unreachable', reason: next.reason ?? 'unknown' });
} else {
const downtimeMs = this.unreachableSince !== null ? this.now() - this.unreachableSince : 0;
this.unreachableSince = null;
this.opts.onTelemetry?.({ kind: 'ollama_recovered', downtimeMs });
}
this.opts.onUpdate?.(next);
} else if (reasonChanged) {
this.opts.onUpdate?.(next);
}
this.last = next;
return next;
}
start(): void {
if (this.timer !== null) return;
void this.tickIfEnabled();
this.timer = setInterval(() => { void this.tickIfEnabled(); }, this.intervalMs);
}
// v0.2.9 Cut B Task 14 — polling tick. settings.ai_enabled=false 면 skip.
// 수동 runOnce({ manual: true }) 는 이 게이트와 무관하게 항상 실행 (사용자 의도).
private async tickIfEnabled(): Promise<void> {
if (this.opts.isAiEnabled !== undefined) {
try {
const enabled = await this.opts.isAiEnabled();
if (!enabled) return;
} catch {
// settings 로드 실패 시 안전 측면 — polling 진행 (기존 동작 유지).
}
}
await this.runOnce();
}
stop(): void {
if (this.timer !== null) {
clearInterval(this.timer);
this.timer = null;
}
}
lastStatus(): HealthResult { return this.last; }
}