mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-07-04 02:31:37 +00:00
fix(#1809): green — gate background loader on LoadChunked completion
Background load was failing almost immediately at startup because
loadBackgroundChunks was spawned at FirstChunkReady (main.go) while
LoadChunked was still merging the rest of the hot window. At that
moment s.oldestLoaded is still "" (assigned at the end of
LoadChunked, chunked_load.go:330), so the bg loader read empty,
broke out at store.go:1462, walked zero chunks, and the coverage
gate at store.go:1543 flipped backgroundLoadFailed=true.
Fix: introduce PacketStore.RunStartupLoad(chunkSize). It runs
LoadChunked first, then — only on success and only if hotStartupHours
> 0 — calls loadBackgroundChunks(). main.go now invokes
RunStartupLoad in the same goroutine as before, so the
FirstChunkReady signal still unblocks the HTTP listener bind at
chunk #1; the bg loader is no longer raced against the rest of
LoadChunked.
No new config tunables. Test:
cmd/server/issue1809_bg_load_race_test.go:25
(Test1809_StartupLoad_BgLoaderSeesOldestLoaded) now passes — red
commit c9c782b5 added the same test against a pre-fix
RunStartupLoad stub that called the bg loader first.
This commit is contained in:
+25
-12
@@ -107,24 +107,37 @@ func (s *PacketStore) fireChunkCallbacks(rowsThisChunk, totalRows int) {
|
||||
|
||||
// RunStartupLoad orchestrates the startup load sequence:
|
||||
// 1. start LoadChunked (async)
|
||||
// 2. wait for FirstChunkReady (caller's HTTP listener may bind)
|
||||
// 3. spawn the background fill loader (only AFTER LoadChunked completes
|
||||
// and oldestLoaded is set; see issue #1809)
|
||||
// 2. caller waits for FirstChunkReady to bind the HTTP listener
|
||||
// 3. spawn the background fill loader AFTER LoadChunked completes,
|
||||
// so s.oldestLoaded is set before the bg loader reads it (#1809)
|
||||
//
|
||||
// chunkSize=0 uses the LoadChunked default. Returns the LoadChunked
|
||||
// error (if any).
|
||||
// chunkSize=0 uses the LoadChunked default. Blocks until LoadChunked
|
||||
// AND any background loader have finished. Callers that want to bind
|
||||
// the HTTP listener at FirstChunkReady should run this in a goroutine
|
||||
// and wait on FirstChunkReady() themselves.
|
||||
//
|
||||
// NOTE: this is the PRE-FIX stub. It reproduces issue #1809 by running
|
||||
// loadBackgroundChunks before LoadChunked has set s.oldestLoaded. The
|
||||
// bg loader then reads oldestLoaded="" and bails immediately → coverage
|
||||
// gate trips → backgroundLoadFailed=true. The fix moves bg loader
|
||||
// invocation to after LoadChunked completes.
|
||||
// Issue #1809 root cause: previously main.go spawned loadBackgroundChunks
|
||||
// at FirstChunkReady while LoadChunked was still merging the remainder
|
||||
// of the hot window. s.oldestLoaded is only assigned at the end of
|
||||
// LoadChunked (chunked_load.go:330), so the bg loader read "" and
|
||||
// bailed → coverage gate trips → backgroundLoadFailed=true. Gating the
|
||||
// bg loader on LoadChunked completion preserves the FirstChunkReady
|
||||
// HTTP-bind parallelism while ensuring oldestLoaded has a valid floor
|
||||
// when the bg loader starts.
|
||||
func (s *PacketStore) RunStartupLoad(chunkSize int) error {
|
||||
// BUG #1809: bg loader runs first, observing oldestLoaded="".
|
||||
loadErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
loadErrCh <- s.LoadChunked(chunkSize)
|
||||
}()
|
||||
// Block until LoadChunked returns. Callers that want to bind their
|
||||
// HTTP listener earlier can wait on FirstChunkReady() in parallel.
|
||||
if err := <-loadErrCh; err != nil {
|
||||
return err
|
||||
}
|
||||
if s.hotStartupHours > 0 {
|
||||
s.loadBackgroundChunks()
|
||||
}
|
||||
return s.LoadChunked(chunkSize)
|
||||
return nil
|
||||
}
|
||||
|
||||
// LoadChunked streams transmissions + observations from SQLite into
|
||||
|
||||
+16
-10
@@ -215,35 +215,41 @@ func main() {
|
||||
log.Printf("[neighbor] loaded persisted neighbor graph")
|
||||
}
|
||||
|
||||
// #1009: chunked Load with early HTTP readiness. LoadChunked runs
|
||||
// #1009: chunked Load with early HTTP readiness. RunStartupLoad runs
|
||||
// asynchronously and signals FirstChunkReady after the first chunk
|
||||
// is merged so the HTTP listener can bind without waiting for the
|
||||
// full multi-minute scan to finish. loadStatusMiddleware (wired
|
||||
// below) advertises loading|ready via X-CoreScope-Load-Status.
|
||||
//
|
||||
// #1809: the background fill loader (loadBackgroundChunks) used to
|
||||
// be spawned here at FirstChunkReady, but at that point LoadChunked
|
||||
// has not yet set s.oldestLoaded → bg loader read "" and bailed →
|
||||
// coverage gate trips. RunStartupLoad now gates the bg loader on
|
||||
// LoadChunked completion, preserving FirstChunkReady's parallelism
|
||||
// for the HTTP listener bind.
|
||||
chunkSize := cfg.DBLoadChunkSize()
|
||||
loadErrCh := make(chan error, 1)
|
||||
go func() {
|
||||
loadErrCh <- store.LoadChunked(chunkSize)
|
||||
loadErrCh <- store.RunStartupLoad(chunkSize)
|
||||
}()
|
||||
select {
|
||||
case <-store.FirstChunkReady():
|
||||
log.Printf("[store] first chunk ready (chunkSize=%d) — HTTP listener may bind", chunkSize)
|
||||
case err := <-loadErrCh:
|
||||
if err != nil {
|
||||
log.Fatalf("[store] LoadChunked failed before first chunk: %v", err)
|
||||
log.Fatalf("[store] RunStartupLoad failed before first chunk: %v", err)
|
||||
}
|
||||
log.Printf("[store] LoadChunked completed before first-chunk signal (empty DB?)")
|
||||
log.Printf("[store] RunStartupLoad completed before first-chunk signal (empty DB?)")
|
||||
}
|
||||
if store.hotStartupHours > 0 {
|
||||
log.Printf("[store] background load will start after LoadChunked completes: filling retentionHours=%gh from hotStartupHours=%gh",
|
||||
store.retentionHours, store.hotStartupHours)
|
||||
}
|
||||
go func() {
|
||||
if err := <-loadErrCh; err != nil {
|
||||
log.Printf("[store] LoadChunked background error: %v", err)
|
||||
log.Printf("[store] RunStartupLoad background error: %v", err)
|
||||
}
|
||||
}()
|
||||
if store.hotStartupHours > 0 {
|
||||
log.Printf("[store] starting background load: filling retentionHours=%gh from hotStartupHours=%gh",
|
||||
store.retentionHours, store.hotStartupHours)
|
||||
go store.loadBackgroundChunks()
|
||||
}
|
||||
|
||||
// Neighbor graph: the persisted snapshot (if present) was already
|
||||
// loaded above, before the packet load. Per #1287 schema migrations
|
||||
|
||||
Reference in New Issue
Block a user