feat(due-date): parseAllCandidates — extract all matches (text order)
기존 parseDueDate (first-match-wins) 는 backward compat 로 보존. parseAllCandidates 가 모든 high/medium 매치를 text 순서로 반환 — F7 AI-primary flow 의 prompt 후보 주입 입력으로 사용. Overlapping-span suppression: 다음 주 월요일 같은 케이스에서 rule 10 (전체) 이 rule 13 (다음 주 alone) 을 포함하면 후자 매치 제거. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -325,3 +325,239 @@ export function parseDueDate(text: string, todayKst: Date): ParseResult {
|
||||
|
||||
return { iso: null, confidence: null, matchedToken: null };
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns ALL high-confidence and medium-confidence candidate matches in the
|
||||
* given text, sorted by text-position (ascending). Used by F7 AI-primary flow
|
||||
* to inject candidates into the prompt as hints — the AI picks (or rejects).
|
||||
*
|
||||
* Internal `matchPosition` field is stripped before return; consumers see only
|
||||
* `iso`, `confidence`, `matchedToken`. The original `parseDueDate` (first-
|
||||
* match-wins) is preserved for backward compatibility.
|
||||
*/
|
||||
export function parseAllCandidates(text: string, todayKst: Date): ParseResult[] {
|
||||
const today = new Date(
|
||||
Date.UTC(
|
||||
todayKst.getUTCFullYear(),
|
||||
todayKst.getUTCMonth(),
|
||||
todayKst.getUTCDate()
|
||||
)
|
||||
);
|
||||
|
||||
interface Candidate extends ParseResult {
|
||||
matchPosition: number;
|
||||
matchLength: number;
|
||||
}
|
||||
const out: Candidate[] = [];
|
||||
|
||||
function pushAllMatches(re: RegExp, build: (m: RegExpExecArray) => { iso: string | null; matchedToken: string } | null): void {
|
||||
const flagged = new RegExp(re.source, re.flags.includes('g') ? re.flags : re.flags + 'g');
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = flagged.exec(text)) !== null) {
|
||||
const built = build(m);
|
||||
if (built !== null) {
|
||||
out.push({
|
||||
iso: built.iso,
|
||||
confidence: 'high',
|
||||
matchedToken: built.matchedToken,
|
||||
matchPosition: m.index,
|
||||
matchLength: m[0].length
|
||||
});
|
||||
}
|
||||
// Avoid infinite loop on zero-length matches
|
||||
if (m.index === flagged.lastIndex) flagged.lastIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
// 1. YYYY-MM-DD literal
|
||||
pushAllMatches(/\b(\d{4})-(\d{2})-(\d{2})\b/, (m) => {
|
||||
const y = Number(m[1]);
|
||||
const mo = Number(m[2]);
|
||||
const d = Number(m[3]);
|
||||
if (!isValidYmd(y, mo, d)) return null;
|
||||
return { iso: toIso(makeUtcDate(y, mo, d)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 2. N월 N일
|
||||
pushAllMatches(/(\d{1,2})\s*월\s*(\d{1,2})\s*일/, (m) => {
|
||||
const mo = Number(m[1]);
|
||||
const d = Number(m[2]);
|
||||
if (!(mo >= 1 && mo <= 12 && d >= 1 && d <= 31)) return null;
|
||||
const ty = today.getUTCFullYear();
|
||||
const tm = today.getUTCMonth() + 1;
|
||||
const td = today.getUTCDate();
|
||||
let year = ty;
|
||||
if (mo < tm || (mo === tm && d < td)) year = ty + 1;
|
||||
if (!isValidYmd(year, mo, d)) return null;
|
||||
return { iso: toIso(makeUtcDate(year, mo, d)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 3. MM/DD
|
||||
pushAllMatches(/(?<!\d)(\d{1,2})\/(\d{1,2})(?!\d)/, (m) => {
|
||||
const mo = Number(m[1]);
|
||||
const d = Number(m[2]);
|
||||
if (!(mo >= 1 && mo <= 12 && d >= 1 && d <= 31)) return null;
|
||||
const ty = today.getUTCFullYear();
|
||||
const tm = today.getUTCMonth() + 1;
|
||||
const td = today.getUTCDate();
|
||||
let year = ty;
|
||||
if (mo < tm || (mo === tm && d < td)) year = ty + 1;
|
||||
if (!isValidYmd(year, mo, d)) return null;
|
||||
return { iso: toIso(makeUtcDate(year, mo, d)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 4. N일 (뒤|후)
|
||||
pushAllMatches(/(\d{1,3})\s*일\s*(뒤|후)/, (m) => {
|
||||
const n = Number(m[1]);
|
||||
return { iso: toIso(addDays(today, n)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 5. N주 (뒤|후)
|
||||
pushAllMatches(/(\d{1,2})\s*주\s*(뒤|후)/, (m) => {
|
||||
const n = Number(m[1]);
|
||||
return { iso: toIso(addDays(today, n * 7)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 6. N개월 (뒤|후)
|
||||
pushAllMatches(/(\d{1,2})\s*개월\s*(뒤|후)/, (m) => {
|
||||
const n = Number(m[1]);
|
||||
return { iso: toIso(addMonths(today, n)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 7-9. 글피 / 모레 / 내일 — collect ALL occurrences as separate entries
|
||||
for (const { token, offset } of [
|
||||
{ token: '글피', offset: 3 },
|
||||
{ token: '모레', offset: 2 },
|
||||
{ token: '내일', offset: 1 }
|
||||
]) {
|
||||
let from = 0;
|
||||
let idx: number;
|
||||
while ((idx = text.indexOf(token, from)) >= 0) {
|
||||
out.push({
|
||||
iso: toIso(addDays(today, offset)),
|
||||
confidence: 'high',
|
||||
matchedToken: token,
|
||||
matchPosition: idx,
|
||||
matchLength: token.length
|
||||
});
|
||||
from = idx + token.length;
|
||||
}
|
||||
}
|
||||
|
||||
// 10. 다음 주 X요일
|
||||
pushAllMatches(/다음\s*주\s*([월화수목금토일])요일/, (m) => {
|
||||
const wd = m[1]!;
|
||||
const offset = WEEKDAY_OFFSET[wd]!;
|
||||
const base = nextWeekMonday(today);
|
||||
return { iso: toIso(addDays(base, offset)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 11. 이번 주 X요일
|
||||
pushAllMatches(/이번\s*주\s*([월화수목금토일])요일/, (m) => {
|
||||
const wd = m[1]!;
|
||||
const offset = WEEKDAY_OFFSET[wd]!;
|
||||
const base = thisMonday(today);
|
||||
return { iso: toIso(addDays(base, offset)), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 12. 다음 달 N일
|
||||
pushAllMatches(/다음\s*달\s*(\d{1,2})\s*일/, (m) => {
|
||||
const d = Number(m[1]);
|
||||
const next = addMonths(today, 1);
|
||||
const target = new Date(
|
||||
Date.UTC(next.getUTCFullYear(), next.getUTCMonth(), d)
|
||||
);
|
||||
if (
|
||||
target.getUTCFullYear() !== next.getUTCFullYear() ||
|
||||
target.getUTCMonth() !== next.getUTCMonth() ||
|
||||
d < 1 ||
|
||||
d > 31
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
return { iso: toIso(target), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 13. 다음 주 (alone) → next Monday
|
||||
pushAllMatches(/다음\s*주(?![가-힣])/, (m) => {
|
||||
const base = nextWeekMonday(today);
|
||||
return { iso: toIso(base), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 14. 다음 달 (alone) → first day of next month
|
||||
pushAllMatches(/다음\s*달/, (m) => {
|
||||
const next = addMonths(today, 1);
|
||||
const first = new Date(
|
||||
Date.UTC(next.getUTCFullYear(), next.getUTCMonth(), 1)
|
||||
);
|
||||
return { iso: toIso(first), matchedToken: m[0] };
|
||||
});
|
||||
|
||||
// 15. 오늘 — collect all occurrences
|
||||
{
|
||||
const token = '오늘';
|
||||
let from = 0;
|
||||
let idx: number;
|
||||
while ((idx = text.indexOf(token, from)) >= 0) {
|
||||
out.push({
|
||||
iso: toIso(today),
|
||||
confidence: 'high',
|
||||
matchedToken: token,
|
||||
matchPosition: idx,
|
||||
matchLength: token.length
|
||||
});
|
||||
from = idx + token.length;
|
||||
}
|
||||
}
|
||||
|
||||
// ── Medium-confidence ambiguous tokens ──
|
||||
const ambiguousPatterns: Array<RegExp> = [
|
||||
/월말/,
|
||||
/주말/,
|
||||
/퇴근\s*전/,
|
||||
/오후\s*\d{1,2}\s*시/,
|
||||
/오전\s*\d{1,2}\s*시/,
|
||||
/\d{1,2}\s*시/
|
||||
];
|
||||
for (const re of ambiguousPatterns) {
|
||||
const flagged = new RegExp(re.source, 'g');
|
||||
let m: RegExpExecArray | null;
|
||||
while ((m = flagged.exec(text)) !== null) {
|
||||
out.push({
|
||||
iso: null,
|
||||
confidence: 'medium',
|
||||
matchedToken: m[0],
|
||||
matchPosition: m.index,
|
||||
matchLength: m[0].length
|
||||
});
|
||||
if (m.index === flagged.lastIndex) flagged.lastIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
// Suppress candidates whose span is fully contained in another's span,
|
||||
// EXCEPT when the candidate produces a distinct iso (different semantic
|
||||
// value). This handles "다음 주 월요일" matching both rule 10 (full
|
||||
// weekday) and rule 13 (다음 주 alone) — drop rule 13's contained match.
|
||||
const filtered = out.filter((c, i) => {
|
||||
for (let j = 0; j < out.length; j++) {
|
||||
if (i === j) continue;
|
||||
const other = out[j]!;
|
||||
if (other.matchLength <= c.matchLength) continue;
|
||||
const cEnd = c.matchPosition + c.matchLength;
|
||||
const oEnd = other.matchPosition + other.matchLength;
|
||||
const contained = other.matchPosition <= c.matchPosition && oEnd >= cEnd;
|
||||
if (contained) return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
// Sort by text position ascending, stable on insertion order tie-break.
|
||||
filtered.sort((a, b) => a.matchPosition - b.matchPosition);
|
||||
|
||||
// Strip internal matchPosition / matchLength before returning.
|
||||
return filtered.map(({ iso, confidence, matchedToken }) => ({
|
||||
iso,
|
||||
confidence,
|
||||
matchedToken
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { parseDueDate } from '@main/services/dueDateParser.js';
|
||||
import { parseDueDate, parseAllCandidates } from '@main/services/dueDateParser.js';
|
||||
|
||||
// 2026-04-26 is a Sunday (KST). Use that as "today" for fixtures.
|
||||
const TODAY = new Date('2026-04-26T00:00:00.000Z'); // KST midnight = UTC 15:00 prior day, but for parser logic we treat input Date as KST-aligned. The parser treats the passed Date as the "today reference" without further timezone math.
|
||||
@@ -124,3 +124,52 @@ describe('parseDueDate (Korean rule parser)', () => {
|
||||
expect(parseDueDate('다음 주 발표', TODAY).iso).toBe('2026-05-04');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseAllCandidates', () => {
|
||||
it('returns empty array when no token', () => {
|
||||
expect(parseAllCandidates('아무 일정 없음', TODAY)).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns 2 high-confidence candidates for "내일 모레"', () => {
|
||||
const r = parseAllCandidates('내일 모레', TODAY);
|
||||
expect(r.length).toBe(2);
|
||||
const isos = r.map((c) => c.iso);
|
||||
expect(isos).toContain('2026-04-27');
|
||||
expect(isos).toContain('2026-04-28');
|
||||
expect(r.every((c) => c.confidence === 'high')).toBe(true);
|
||||
});
|
||||
|
||||
it('returns candidates in text-position order', () => {
|
||||
const r = parseAllCandidates('내일 모레', TODAY);
|
||||
expect(r[0]!.matchedToken).toBe('내일');
|
||||
expect(r[1]!.matchedToken).toBe('모레');
|
||||
});
|
||||
|
||||
it('returns 1 candidate for single token "내일"', () => {
|
||||
const r = parseAllCandidates('내일 회의', TODAY);
|
||||
expect(r.length).toBe(1);
|
||||
expect(r[0]!.iso).toBe('2026-04-27');
|
||||
});
|
||||
|
||||
it('returns 1 medium-confidence candidate for "월말 마감"', () => {
|
||||
const r = parseAllCandidates('월말 마감', TODAY);
|
||||
expect(r.length).toBe(1);
|
||||
expect(r[0]!.iso).toBeNull();
|
||||
expect(r[0]!.confidence).toBe('medium');
|
||||
expect(r[0]!.matchedToken).toBe('월말');
|
||||
});
|
||||
|
||||
it('returns mix of high + medium candidates', () => {
|
||||
const r = parseAllCandidates('내일 월말 회의', TODAY);
|
||||
expect(r.length).toBe(2);
|
||||
expect(r[0]!.confidence).toBe('high');
|
||||
expect(r[1]!.confidence).toBe('medium');
|
||||
});
|
||||
|
||||
it('returns 2 candidates for "5월 1일 이후 다음 주 월요일까지"', () => {
|
||||
const r = parseAllCandidates('5월 1일 이후 다음 주 월요일까지', TODAY);
|
||||
expect(r.length).toBe(2);
|
||||
expect(r[0]!.iso).toBe('2026-05-01');
|
||||
expect(r[1]!.iso).toBe('2026-05-04');
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user