Compare commits

...

1 Commits

Author SHA1 Message Date
Kpa-clawbot
fa5ac2751d fix: debounce distance index rebuild to prevent CPU hot loop
On busy meshes (325K+ transmissions, 50 observers), every ingest poll
triggers a full distance index rebuild (1M+ hop records) because
new observations frequently pick longer paths via pickBestObservation.
With 1-second poll intervals, the rebuild never finishes before the
next one starts, pegging CPU at 100% and starving the HTTP server.

Fix: mark the distance index dirty on path changes but only rebuild
at most every 30 seconds. The initial Load() rebuild still runs
synchronously, and distLast is set afterward to prevent an immediate
re-rebuild on the first ingest cycle.

Discovered on Cascadia Mesh instance (cascadiamesh.org) where the
server was completely unresponsive due to continuous distance index
rebuilds consuming all CPU.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-04-03 23:20:49 -07:00

View File

@@ -117,6 +117,8 @@ type PacketStore struct {
// computed during Load() and incrementally updated on ingest.
distHops []distHopRecord
distPaths []distPathRecord
distDirty bool // set when paths change; cleared after rebuild
distLast time.Time // last time distance index was rebuilt
// Cached GetNodeHashSizeInfo result — recomputed at most once every 15s
hashSizeInfoMu sync.Mutex
@@ -329,6 +331,7 @@ func (s *PacketStore) Load() error {
// Precompute distance analytics (hop distances, path totals)
s.buildDistanceIndex()
s.distLast = time.Now()
s.loaded = true
elapsed := time.Since(t0)
@@ -821,7 +824,7 @@ func (s *PacketStore) GetPerfStoreStatsTyped() PerfPacketStoreStats {
SqliteOnly: false,
MaxPackets: 2386092,
EstimatedMB: estimatedMB,
MaxMB: 1024,
MaxMB: s.maxMemoryMB,
Indexes: PacketStoreIndexes{
ByHash: hashIdx,
ByObserver: observerIdx,
@@ -1470,13 +1473,19 @@ func (s *PacketStore) IngestNewObservations(sinceObsID, limit int) []map[string]
}
}
// Rebuild distance index if any paths changed (distances depend on path hops)
// Mark distance index dirty if any paths changed (rebuild is debounced)
for txID, tx := range updatedTxs {
if tx.PathJSON != oldPaths[txID] {
s.buildDistanceIndex()
s.distDirty = true
break
}
}
// Rebuild at most every 30s to avoid hot-looping on busy meshes
if s.distDirty && time.Since(s.distLast) > 30*time.Second {
s.buildDistanceIndex()
s.distDirty = false
s.distLast = time.Now()
}
if len(updatedTxs) > 0 {
// Targeted cache invalidation: new observations always affect RF