fix(ingest-progress): 리뷰 반영 — store_ms 경계 정정 + 중복 expansion 프레임 가드
- store_ms 에서 stale-vector orphan purge(LanceDB I/O) 제거 → embed/vector phase (embed_ms)로 이동. store_ms 가 이제 SQLite put_* 만 의미(진단 정확도; 편집 재색인 시 920ms 오귀속 제거). purge 는 여전히 unconditional + upsert 이전. - 최종 expansion_progress 프레임을 done != last_done 로 가드 (throttle 배수 시 중복 프레임 + chunks==0 시 0/0 프레임 제거). - schema/HOTFIXES: store_ms/embed_ms 설명 정정 + dangling IMPL_REPORT 참조 제거. clippy -D warnings 0, test 312 passed. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1349,6 +1349,7 @@ fn ingest_one_asset(
|
||||
// second — never per chunk (would flood the mpsc channel).
|
||||
let mut done: u32 = 0;
|
||||
let mut last_emit = std::time::Instant::now();
|
||||
let mut last_done: u32 = 0;
|
||||
for chunk in &mut chunks {
|
||||
let key = kebab_core::derivation_cache_key(
|
||||
"alias",
|
||||
@@ -1398,18 +1399,24 @@ fn ingest_one_asset(
|
||||
},
|
||||
);
|
||||
last_emit = std::time::Instant::now();
|
||||
last_done = done;
|
||||
}
|
||||
}
|
||||
// Final frame so the counter always lands on done == total.
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::ExpansionProgress {
|
||||
idx,
|
||||
total,
|
||||
done,
|
||||
chunks: total_chunks,
|
||||
},
|
||||
);
|
||||
// Final frame so the counter lands on done == total — but only
|
||||
// if the last in-loop emit didn't already report this `done`
|
||||
// (avoids a duplicate frame when chunks is a multiple of the
|
||||
// throttle, and skips a 0/0 frame when there are no chunks).
|
||||
if done != last_done {
|
||||
crate::ingest_progress::emit(
|
||||
progress,
|
||||
crate::ingest_progress::IngestEvent::ExpansionProgress {
|
||||
idx,
|
||||
total,
|
||||
done,
|
||||
chunks: total_chunks,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(
|
||||
@@ -1433,7 +1440,6 @@ fn ingest_one_asset(
|
||||
// the kb-app job. A failure mid-way leaves the DB in a state the
|
||||
// next ingest run can re-converge (UPSERT + DELETE-then-INSERT).
|
||||
let t_store = std::time::Instant::now();
|
||||
purge_vector_orphans_for_workspace_path(app, asset, vector_store)?;
|
||||
app.sqlite
|
||||
.put_asset_with_bytes(asset, &bytes)
|
||||
.context("DocumentStore::put_asset_with_bytes")?;
|
||||
@@ -1450,6 +1456,12 @@ fn ingest_one_asset(
|
||||
|
||||
// Embed + vector upsert (only when both sides are configured).
|
||||
let t_embed = std::time::Instant::now();
|
||||
// Stale-vector purge is LanceDB I/O, so it belongs to the embed/vector
|
||||
// phase — not the SQLite `store` phase. Keeping it here makes `store_ms`
|
||||
// mean "SQLite persist only" and `embed_ms` cover all vector-store work
|
||||
// (purge + upsert), so per-phase timings attribute the bottleneck
|
||||
// correctly (review fix). Runs before any new upsert, as before.
|
||||
purge_vector_orphans_for_workspace_path(app, asset, vector_store)?;
|
||||
let mut emb_cache_hit = 0_usize;
|
||||
let mut emb_cache_miss = 0_usize;
|
||||
if let (Some(emb), Some(vec_store)) = (embedder, vector_store) {
|
||||
|
||||
Reference in New Issue
Block a user