feat(due-date): parseAllCandidates — extract all matches (text order)

기존 parseDueDate (first-match-wins) 는 backward compat 로 보존.
parseAllCandidates 가 모든 high/medium 매치를 text 순서로 반환 — F7
AI-primary flow 의 prompt 후보 주입 입력으로 사용.

Overlapping-span suppression: 다음 주 월요일 같은 케이스에서 rule 10
(전체) 이 rule 13 (다음 주 alone) 을 포함하면 후자 매치 제거.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
altair823
2026-04-26 13:04:17 +09:00
parent 2ee45bc53c
commit 1c72b64c2f
2 changed files with 286 additions and 1 deletions

View File

@@ -325,3 +325,239 @@ export function parseDueDate(text: string, todayKst: Date): ParseResult {
return { iso: null, confidence: null, matchedToken: null };
}
/**
* Returns ALL high-confidence and medium-confidence candidate matches in the
* given text, sorted by text-position (ascending). Used by F7 AI-primary flow
* to inject candidates into the prompt as hints — the AI picks (or rejects).
*
* Internal `matchPosition` field is stripped before return; consumers see only
* `iso`, `confidence`, `matchedToken`. The original `parseDueDate` (first-
* match-wins) is preserved for backward compatibility.
*/
export function parseAllCandidates(text: string, todayKst: Date): ParseResult[] {
const today = new Date(
Date.UTC(
todayKst.getUTCFullYear(),
todayKst.getUTCMonth(),
todayKst.getUTCDate()
)
);
interface Candidate extends ParseResult {
matchPosition: number;
matchLength: number;
}
const out: Candidate[] = [];
function pushAllMatches(re: RegExp, build: (m: RegExpExecArray) => { iso: string | null; matchedToken: string } | null): void {
const flagged = new RegExp(re.source, re.flags.includes('g') ? re.flags : re.flags + 'g');
let m: RegExpExecArray | null;
while ((m = flagged.exec(text)) !== null) {
const built = build(m);
if (built !== null) {
out.push({
iso: built.iso,
confidence: 'high',
matchedToken: built.matchedToken,
matchPosition: m.index,
matchLength: m[0].length
});
}
// Avoid infinite loop on zero-length matches
if (m.index === flagged.lastIndex) flagged.lastIndex++;
}
}
// 1. YYYY-MM-DD literal
pushAllMatches(/\b(\d{4})-(\d{2})-(\d{2})\b/, (m) => {
const y = Number(m[1]);
const mo = Number(m[2]);
const d = Number(m[3]);
if (!isValidYmd(y, mo, d)) return null;
return { iso: toIso(makeUtcDate(y, mo, d)), matchedToken: m[0] };
});
// 2. N월 N일
pushAllMatches(/(\d{1,2})\s*월\s*(\d{1,2})\s*일/, (m) => {
const mo = Number(m[1]);
const d = Number(m[2]);
if (!(mo >= 1 && mo <= 12 && d >= 1 && d <= 31)) return null;
const ty = today.getUTCFullYear();
const tm = today.getUTCMonth() + 1;
const td = today.getUTCDate();
let year = ty;
if (mo < tm || (mo === tm && d < td)) year = ty + 1;
if (!isValidYmd(year, mo, d)) return null;
return { iso: toIso(makeUtcDate(year, mo, d)), matchedToken: m[0] };
});
// 3. MM/DD
pushAllMatches(/(?<!\d)(\d{1,2})\/(\d{1,2})(?!\d)/, (m) => {
const mo = Number(m[1]);
const d = Number(m[2]);
if (!(mo >= 1 && mo <= 12 && d >= 1 && d <= 31)) return null;
const ty = today.getUTCFullYear();
const tm = today.getUTCMonth() + 1;
const td = today.getUTCDate();
let year = ty;
if (mo < tm || (mo === tm && d < td)) year = ty + 1;
if (!isValidYmd(year, mo, d)) return null;
return { iso: toIso(makeUtcDate(year, mo, d)), matchedToken: m[0] };
});
// 4. N일 (뒤|후)
pushAllMatches(/(\d{1,3})\s*일\s*(뒤|후)/, (m) => {
const n = Number(m[1]);
return { iso: toIso(addDays(today, n)), matchedToken: m[0] };
});
// 5. N주 (뒤|후)
pushAllMatches(/(\d{1,2})\s*주\s*(뒤|후)/, (m) => {
const n = Number(m[1]);
return { iso: toIso(addDays(today, n * 7)), matchedToken: m[0] };
});
// 6. N개월 (뒤|후)
pushAllMatches(/(\d{1,2})\s*개월\s*(뒤|후)/, (m) => {
const n = Number(m[1]);
return { iso: toIso(addMonths(today, n)), matchedToken: m[0] };
});
// 7-9. 글피 / 모레 / 내일 — collect ALL occurrences as separate entries
for (const { token, offset } of [
{ token: '글피', offset: 3 },
{ token: '모레', offset: 2 },
{ token: '내일', offset: 1 }
]) {
let from = 0;
let idx: number;
while ((idx = text.indexOf(token, from)) >= 0) {
out.push({
iso: toIso(addDays(today, offset)),
confidence: 'high',
matchedToken: token,
matchPosition: idx,
matchLength: token.length
});
from = idx + token.length;
}
}
// 10. 다음 주 X요일
pushAllMatches(/다음\s*주\s*([월화수목금토일])요일/, (m) => {
const wd = m[1]!;
const offset = WEEKDAY_OFFSET[wd]!;
const base = nextWeekMonday(today);
return { iso: toIso(addDays(base, offset)), matchedToken: m[0] };
});
// 11. 이번 주 X요일
pushAllMatches(/이번\s*주\s*([월화수목금토일])요일/, (m) => {
const wd = m[1]!;
const offset = WEEKDAY_OFFSET[wd]!;
const base = thisMonday(today);
return { iso: toIso(addDays(base, offset)), matchedToken: m[0] };
});
// 12. 다음 달 N일
pushAllMatches(/다음\s*달\s*(\d{1,2})\s*일/, (m) => {
const d = Number(m[1]);
const next = addMonths(today, 1);
const target = new Date(
Date.UTC(next.getUTCFullYear(), next.getUTCMonth(), d)
);
if (
target.getUTCFullYear() !== next.getUTCFullYear() ||
target.getUTCMonth() !== next.getUTCMonth() ||
d < 1 ||
d > 31
) {
return null;
}
return { iso: toIso(target), matchedToken: m[0] };
});
// 13. 다음 주 (alone) → next Monday
pushAllMatches(/다음\s*주(?![가-힣])/, (m) => {
const base = nextWeekMonday(today);
return { iso: toIso(base), matchedToken: m[0] };
});
// 14. 다음 달 (alone) → first day of next month
pushAllMatches(/다음\s*달/, (m) => {
const next = addMonths(today, 1);
const first = new Date(
Date.UTC(next.getUTCFullYear(), next.getUTCMonth(), 1)
);
return { iso: toIso(first), matchedToken: m[0] };
});
// 15. 오늘 — collect all occurrences
{
const token = '오늘';
let from = 0;
let idx: number;
while ((idx = text.indexOf(token, from)) >= 0) {
out.push({
iso: toIso(today),
confidence: 'high',
matchedToken: token,
matchPosition: idx,
matchLength: token.length
});
from = idx + token.length;
}
}
// ── Medium-confidence ambiguous tokens ──
const ambiguousPatterns: Array<RegExp> = [
/월말/,
/주말/,
/퇴근\s*전/,
/오후\s*\d{1,2}\s*시/,
/오전\s*\d{1,2}\s*시/,
/\d{1,2}\s*시/
];
for (const re of ambiguousPatterns) {
const flagged = new RegExp(re.source, 'g');
let m: RegExpExecArray | null;
while ((m = flagged.exec(text)) !== null) {
out.push({
iso: null,
confidence: 'medium',
matchedToken: m[0],
matchPosition: m.index,
matchLength: m[0].length
});
if (m.index === flagged.lastIndex) flagged.lastIndex++;
}
}
// Suppress candidates whose span is fully contained in another's span,
// EXCEPT when the candidate produces a distinct iso (different semantic
// value). This handles "다음 주 월요일" matching both rule 10 (full
// weekday) and rule 13 (다음 주 alone) — drop rule 13's contained match.
const filtered = out.filter((c, i) => {
for (let j = 0; j < out.length; j++) {
if (i === j) continue;
const other = out[j]!;
if (other.matchLength <= c.matchLength) continue;
const cEnd = c.matchPosition + c.matchLength;
const oEnd = other.matchPosition + other.matchLength;
const contained = other.matchPosition <= c.matchPosition && oEnd >= cEnd;
if (contained) return false;
}
return true;
});
// Sort by text position ascending, stable on insertion order tie-break.
filtered.sort((a, b) => a.matchPosition - b.matchPosition);
// Strip internal matchPosition / matchLength before returning.
return filtered.map(({ iso, confidence, matchedToken }) => ({
iso,
confidence,
matchedToken
}));
}

View File

@@ -1,5 +1,5 @@
import { describe, it, expect } from 'vitest';
import { parseDueDate } from '@main/services/dueDateParser.js';
import { parseDueDate, parseAllCandidates } from '@main/services/dueDateParser.js';
// 2026-04-26 is a Sunday (KST). Use that as "today" for fixtures.
const TODAY = new Date('2026-04-26T00:00:00.000Z'); // KST midnight = UTC 15:00 prior day, but for parser logic we treat input Date as KST-aligned. The parser treats the passed Date as the "today reference" without further timezone math.
@@ -124,3 +124,52 @@ describe('parseDueDate (Korean rule parser)', () => {
expect(parseDueDate('다음 주 발표', TODAY).iso).toBe('2026-05-04');
});
});
describe('parseAllCandidates', () => {
it('returns empty array when no token', () => {
expect(parseAllCandidates('아무 일정 없음', TODAY)).toEqual([]);
});
it('returns 2 high-confidence candidates for "내일 모레"', () => {
const r = parseAllCandidates('내일 모레', TODAY);
expect(r.length).toBe(2);
const isos = r.map((c) => c.iso);
expect(isos).toContain('2026-04-27');
expect(isos).toContain('2026-04-28');
expect(r.every((c) => c.confidence === 'high')).toBe(true);
});
it('returns candidates in text-position order', () => {
const r = parseAllCandidates('내일 모레', TODAY);
expect(r[0]!.matchedToken).toBe('내일');
expect(r[1]!.matchedToken).toBe('모레');
});
it('returns 1 candidate for single token "내일"', () => {
const r = parseAllCandidates('내일 회의', TODAY);
expect(r.length).toBe(1);
expect(r[0]!.iso).toBe('2026-04-27');
});
it('returns 1 medium-confidence candidate for "월말 마감"', () => {
const r = parseAllCandidates('월말 마감', TODAY);
expect(r.length).toBe(1);
expect(r[0]!.iso).toBeNull();
expect(r[0]!.confidence).toBe('medium');
expect(r[0]!.matchedToken).toBe('월말');
});
it('returns mix of high + medium candidates', () => {
const r = parseAllCandidates('내일 월말 회의', TODAY);
expect(r.length).toBe(2);
expect(r[0]!.confidence).toBe('high');
expect(r[1]!.confidence).toBe('medium');
});
it('returns 2 candidates for "5월 1일 이후 다음 주 월요일까지"', () => {
const r = parseAllCandidates('5월 1일 이후 다음 주 월요일까지', TODAY);
expect(r.length).toBe(2);
expect(r[0]!.iso).toBe('2026-05-01');
expect(r[1]!.iso).toBe('2026-05-04');
});
});