feat(app): wire IngestLogWriter into 5 ingest emit hooks (Arc<Mutex> sync)
v0.20.x ingest log feature 의 ingest pipeline wiring. 5 emit hook: Hook 1: ingest_with_config_opts entry/exit (writer init + summary write + flush) Hook 2: apply_ocr_to_pdf_pages closure (PdfOcrProgress::Finished → LogEvent::Ocr) Hook 3: ingest_one_*_asset Err arm (LogEvent::Error) Hook 4: scan 직후 fs_skips.events enumerate (LogEvent::Skip) Hook 5: (Hook 3 통합) per-asset fatal error → LogEvent::Error Hook 4 의 skip event carry 위해 kebab-source-fs 의 FsScanSkips 에 events: Vec<FsSkipEvent> field 추가 (kebab-source-fs 가 kebab-app 재호출 안 함 — cycle 회피). Ownership: Option<Arc<Mutex<IngestLogWriter>>> binding 1 곳, 5 hook 이 clone+lock+write. ocr_ms_samples (Vec<u64> success-only) 는 Arc<Mutex> 로 share, summary stage 가 sort+p50/p90/max 계산. single-threaded per-asset loop 라 deadlock/contention 위험 없음. Writer 실패는 ingest 자체 fail 시키지 않음 (tracing::warn + 진행). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,6 +108,8 @@ impl FsSourceConnector {
|
||||
// Accumulate per-category skip counts and sample paths.
|
||||
let mut fs_skips = FsScanSkips::default();
|
||||
for entry in &skipped_entries {
|
||||
let rel_path = entry.path.strip_prefix(&root).unwrap_or(&entry.path);
|
||||
let doc_path = rel_path.to_string_lossy().replace('\\', "/");
|
||||
match entry.category {
|
||||
SkipCategory::BuiltinBlacklist => {
|
||||
fs_skips.skipped_builtin_blacklist =
|
||||
@@ -117,6 +119,14 @@ impl FsSourceConnector {
|
||||
&entry.path,
|
||||
&root,
|
||||
);
|
||||
let ext = entry.path.extension()
|
||||
.map(|e| format!(".{}", e.to_string_lossy()))
|
||||
.unwrap_or_default();
|
||||
fs_skips.events.push(FsSkipEvent {
|
||||
doc_path,
|
||||
reason: "builtin_blacklist",
|
||||
detail: if ext.is_empty() { None } else { Some(ext) },
|
||||
});
|
||||
}
|
||||
SkipCategory::Gitignore => {
|
||||
fs_skips.skipped_gitignore =
|
||||
@@ -126,11 +136,21 @@ impl FsSourceConnector {
|
||||
&entry.path,
|
||||
&root,
|
||||
);
|
||||
fs_skips.events.push(FsSkipEvent {
|
||||
doc_path,
|
||||
reason: "gitignore",
|
||||
detail: None,
|
||||
});
|
||||
}
|
||||
SkipCategory::Kebabignore => {
|
||||
fs_skips.skipped_kebabignore =
|
||||
fs_skips.skipped_kebabignore.saturating_add(1);
|
||||
// kebabignore intentionally NOT in skip_examples per spec §5.5.
|
||||
fs_skips.events.push(FsSkipEvent {
|
||||
doc_path,
|
||||
reason: "kebabignore",
|
||||
detail: None,
|
||||
});
|
||||
}
|
||||
SkipCategory::Other => {
|
||||
// DEFAULT_EXCLUDES or config.workspace.exclude — no dedicated
|
||||
@@ -162,6 +182,11 @@ impl FsSourceConnector {
|
||||
path = %rel_path.display(),
|
||||
"skip: generated-file marker detected"
|
||||
);
|
||||
fs_skips.events.push(FsSkipEvent {
|
||||
doc_path: rel_path.to_string_lossy().replace('\\', "/"),
|
||||
reason: "generated",
|
||||
detail: None,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -189,6 +214,11 @@ impl FsSourceConnector {
|
||||
max_lines = self.max_file_lines,
|
||||
"skip: code file exceeds size cap"
|
||||
);
|
||||
fs_skips.events.push(FsSkipEvent {
|
||||
doc_path: rel_path.to_string_lossy().replace('\\', "/"),
|
||||
reason: "size_exceeded",
|
||||
detail: None,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -218,6 +248,16 @@ pub struct FsScanSkips {
|
||||
/// Sample paths per spec §5.5 (≤ 5 per category). Paths are
|
||||
/// workspace-relative POSIX strings when available, absolute otherwise.
|
||||
pub skip_examples: SkipExamples,
|
||||
/// v0.20.x ingest log: per-file skip events for structured log writing.
|
||||
pub events: Vec<FsSkipEvent>,
|
||||
}
|
||||
|
||||
/// A single per-file skip event for structured ingest log (v0.20.x).
|
||||
#[derive(Debug)]
|
||||
pub struct FsSkipEvent {
|
||||
pub doc_path: String,
|
||||
pub reason: &'static str,
|
||||
pub detail: Option<String>,
|
||||
}
|
||||
|
||||
/// Push a path into a sample vec (cap = 5) as a workspace-relative POSIX
|
||||
|
||||
Reference in New Issue
Block a user