Files
meshcore-analyzer/cmd/server/hot_startup_consistency_test.go
T
efiten 11d2026bb1 feat(startup): hot startup — load hotStartupHours synchronously, fill retentionHours in background (#1187)
Closes #1183

## Summary

- Adds `packetStore.hotStartupHours` config key (float64, default 0 =
disabled). When set, `Load()` loads only that many hours of data
synchronously, reducing startup time on large DBs. Background goroutine
fills the remaining `retentionHours` window in daily chunks after
startup completes.
- A background goroutine (`loadBackgroundChunks`) fills the remaining
`retentionHours` window in daily chunks after startup completes.
Analytics indexes are rebuilt once at the end.
- `QueryPackets` and `QueryGroupedPackets` check `oldestLoaded` and fall
back to `db.QueryPackets()` for any query whose `Since`/`Until` predates
the in-memory window — covering days 8–30 permanently (beyond
`retentionHours`) and the background-fill gap during startup.
- `/api/perf` gains `hotStartupHours`, `backgroundLoadComplete`, and
`backgroundLoadProgress` fields inside `packetStore` so operators can
monitor the fill.

### Drive-by fixes

- E2E: added `gotoPackets` navigation helper used across packet-related
tests
- E2E: rewrote stripe assertion to check per-row stripe parity rather
than a fragile computed-style comparison
- E2E: theme test updated to use `#/home` as the initial route (was
`#/`)
- `db.go`: removed the RFC3339→unix-timestamp subquery path in
`buildTransmissionWhere`; `t.first_seen` is now always compared directly
as a string for both RFC3339 and non-RFC3339 inputs

## Configuration

```json
"packetStore": {
  "retentionHours": 168,
  "hotStartupHours": 24
}
```

`hotStartupHours: 0` (default) preserves existing behavior exactly.
Recommended for large DBs to reduce startup time; set to 0 to disable
(loads full retentionHours at startup, legacy behavior).

## Test plan

- [x] `TestHotStartupConfig_Clamp` — clamping when `hotStartupHours >
retentionHours`
- [x] `TestHotStartupConfig_ZeroIsDisabled` — zero leaves feature
disabled
- [x] `TestHotStartup_LoadsOnlyHotWindow` — only hot-window packets in
memory after `Load()`
- [x] `TestHotStartup_DisabledWhenZero` — all retention packets loaded
when disabled
- [x] `TestHotStartup_loadChunk_AddsOlderData` — chunk merges correctly,
ASC order maintained
- [x] `TestHotStartup_BackgroundFillsToRetention` — background goroutine
fills to `retentionHours`
- [x] `TestHotStartup_ChunkErrorRecovery` — chunk SQL failure logged and
skipped, loop terminates
- [x] `TestHotStartup_SQLFallback_TriggeredForOldDate` — query before
`oldestLoaded` routes to SQL
- [x] `TestHotStartup_SQLFallback_NotTriggeredForRecentDate` — recent
query stays in-memory
- [x] `TestHotStartup_PerfStats` — new fields present in
`GetPerfStoreStats()` (backs the perf endpoint)
- [x] `TestHotStartup_PerfStoreHTTP` — HTTP-level: GET /api/perf returns
`hotStartupHours`, `backgroundLoadComplete`, `backgroundLoadProgress` in
`packetStore`

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: openclaw-bot <bot@openclaw.local>
Co-authored-by: CoreScope Bot <bot@corescope.local>
2026-05-15 22:46:25 -07:00

115 lines
3.4 KiB
Go

package main
import (
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// TestHotStartup_loadChunk_IndexSliceConsistency guards against regression
// of PR #1187 r3 MUST-FIX 1: the batched merge in loadChunk used to
// (1) prepend localPackets to s.packets under one critical section, then
// (2) populate s.byHash/s.byTxID/s.byObsID/s.byNode/s.byPayloadType in
// separate per-batch critical sections. Readers that acquired RLock
// between the slice update and the index updates observed packets that
// were in the slice but missing from byHash — causing GetPacketByHash to
// return nil and QueryPackets hash/node fast-paths to silently miss data
// during background load.
//
// The invariant under test: for any RLock-held snapshot, every tx in
// s.packets must also be present in s.byHash[tx.Hash]. Violation = silent
// partial data loss.
func TestHotStartup_loadChunk_IndexSliceConsistency(t *testing.T) {
// 10 recent + 1200 old: 1200 > 2 * mergeBatchSize(500) so the merge
// spans 3 batches, widening the inconsistency window for the reader.
dbPath := createTestDBWithAgedPackets(t, 10, 1200)
db, err := OpenDB(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.conn.Close()
store := NewPacketStore(db, &PacketStoreConfig{
RetentionHours: 72,
HotStartupHours: 1,
})
if err := store.Load(); err != nil {
t.Fatal(err)
}
if len(store.packets) != 10 {
t.Fatalf("setup: expected 10 packets after hot Load, got %d", len(store.packets))
}
var stop atomic.Bool
var violations atomic.Int64
var checks atomic.Int64
var wg sync.WaitGroup
// Reader: repeatedly snapshot under RLock. For each tx in s.packets,
// assert s.byHash[tx.Hash] is non-nil. Any miss = consistency violation.
wg.Add(1)
go func() {
defer wg.Done()
for !stop.Load() {
store.mu.RLock()
for _, tx := range store.packets {
if tx == nil || tx.Hash == "" {
continue
}
checks.Add(1)
if store.byHash[tx.Hash] == nil {
violations.Add(1)
}
// Also: byTxID for this tx must be populated
if store.byTxID[tx.ID] == nil {
violations.Add(1)
}
}
store.mu.RUnlock()
}
}()
// Give the reader a moment to start the loop.
time.Sleep(5 * time.Millisecond)
// Trigger the batched merge.
chunkEnd := time.Now().UTC().Add(-1 * time.Hour)
chunkStart := time.Now().UTC().Add(-72 * time.Hour)
if err := store.loadChunk(chunkStart, chunkEnd); err != nil {
stop.Store(true)
wg.Wait()
t.Fatalf("loadChunk failed: %v", err)
}
// Let reader observe a few iterations after merge completes.
time.Sleep(5 * time.Millisecond)
stop.Store(true)
wg.Wait()
if v := violations.Load(); v > 0 {
t.Fatalf("index↔slice consistency violated %d times across %d checks: "+
"packets observed in s.packets that were missing from s.byHash/s.byTxID. "+
"This is the silent-partial-data-loss regression from R2 #6 (commit 2ec762aa).",
v, checks.Load())
}
// Post-condition sanity: final state must be fully consistent.
store.mu.RLock()
defer store.mu.RUnlock()
if len(store.packets) != 1210 {
t.Errorf("expected 1210 packets after merge, got %d", len(store.packets))
}
for _, tx := range store.packets {
if store.byHash[tx.Hash] == nil {
t.Errorf("post-merge: tx %s missing from byHash", tx.Hash)
break
}
}
// Spot check: an old packet hash must be retrievable via GetPacketByHash.
// (Drop the RLock first to avoid deadlock; GetPacketByHash takes RLock.)
_ = strings.ToLower
}