Files
meshcore-analyzer/cmd/server/repeater_enrich_recomputer.go
T
efiten 981664528e perf(server): serve stale repeater enrich cache instead of inline rebuild (#1272) (#1436)
## Summary

- Removes the TTL-based inline rebuild from `GetRepeaterRelayInfoMap`
and `GetRepeaterUsefulnessScoreMap`
- When the cache is non-nil it is returned immediately, regardless of
age — no more 700ms on-request recompute
- Inline compute is retained only as a nil-cache guard (edge case: tests
without a running recomputer)
- Fixes the stale `// 15s-TTL gate` comment in
`recomputeRepeaterEnrichmentSafe`

**Root cause:** `computeRepeaterRelayInfoMap` runs inline when the TTL
expires, taking ~700ms on a busy instance.
`StartRepeaterEnrichmentRecomputer` (introduced in #1262) already keeps
the cache warm via synchronous prewarm at startup + 5-min ticks, making
the inline path dead code that fires only when the TTL is shorter than
the recomputer interval (e.g. custom `analytics.defaultIntervalSeconds >
600`).

## Test plan

- [ ] `TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry` — regression
guard: stale sentinel is returned without recompute
- [ ] `TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry` — same
for usefulness score map
- [ ] `TestGetRepeaterRelayInfoMap_BuildsWhenNil` — nil-cache fallback
still works
- [ ] Full `-short` suite passes (`go test -short ./...`)

Closes #1272

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-28 15:01:58 -07:00

105 lines
3.5 KiB
Go

package main
import (
"sync"
"time"
)
// repeaterEnrichmentRecomputerInterval is the default tick interval
// for the steady-state recompute of the repeater enrichment bulk
// caches. The on-request TTL fallback in repeater_enrich_bulk.go is
// kept as a safety net — the recomputer just makes sure the cache is
// populated before any request arrives.
//
// 5min mirrors the analytics_recomputer default from #1240 and is
// plenty fresh for an at-a-glance status column.
const repeaterEnrichmentRecomputerDefaultInterval = 5 * time.Minute
// StartRepeaterEnrichmentRecomputer is the steady-state background
// recompute loop for the repeater enrichment bulk caches consumed by
// handleNodes (GetRepeaterRelayInfoMap + GetRepeaterUsefulnessScoreMap).
//
// Why this exists (issue #1262): PR #1260 added a 15s-TTL bulk cache,
// but the rebuild itself runs on the request-serving goroutine on the
// first request after startup or after the TTL expires. On staging
// (75k tx, 600 nodes) that cold rebuild took 15.7s and was triggered
// by every cold SPA load via live.js's /api/nodes?limit=2000 call.
//
// On Start this does an initial synchronous compute (so the next
// request hits cache) and then ticks every `interval` to keep the
// snapshot fresh — same pattern as analytics_recomputer.go (#1240).
//
// Returns a stop closure that signals the goroutine and waits for it
// to exit (with a 5s defensive timeout).
//
// Safe to call multiple times: subsequent calls are no-ops and return
// a no-op stop closure (the original goroutine retains ownership).
func (s *PacketStore) StartRepeaterEnrichmentRecomputer(windowHours float64, interval time.Duration) func() {
if interval <= 0 {
interval = repeaterEnrichmentRecomputerDefaultInterval
}
s.repeaterEnrichRecompMu.Lock()
if s.repeaterEnrichRecompStarted {
s.repeaterEnrichRecompMu.Unlock()
return func() {}
}
s.repeaterEnrichRecompStarted = true
stop := make(chan struct{})
done := make(chan struct{})
s.repeaterEnrichRecompStop = stop
s.repeaterEnrichRecompDone = done
s.repeaterEnrichRecompMu.Unlock()
// Initial synchronous prewarm — the entire point of this recomputer
// is to make sure the very first /api/nodes?limit=2000 from
// live.js's SPA bootstrap (issue #1262) hits a populated cache
// instead of paying the on-thread rebuild cost.
recomputeRepeaterEnrichmentSafe(s, windowHours)
var stopOnce sync.Once
go func() {
defer close(done)
t := time.NewTicker(interval)
defer t.Stop()
for {
select {
case <-t.C:
recomputeRepeaterEnrichmentSafe(s, windowHours)
case <-stop:
return
}
}
}()
return func() {
stopOnce.Do(func() {
close(stop)
})
select {
case <-done:
case <-time.After(5 * time.Second):
}
}
}
// recomputeRepeaterEnrichmentSafe runs both bulk-cache compute paths
// behind a panic recover — a panic in compute must not kill the
// background goroutine (the previous snapshot remains valid).
func recomputeRepeaterEnrichmentSafe(s *PacketStore, windowHours float64) {
defer func() { _ = recover() }()
// Write directly to the cache fields under mutex rather than going
// through the public Get* helpers — those return the existing
// non-nil cache immediately, so calling them here would be a no-op.
relay := s.computeRepeaterRelayInfoMap(windowHours)
useful := s.computeRepeaterUsefulnessScoreMap()
now := time.Now()
s.repeaterEnrichMu.Lock()
s.repeaterRelayCache = relay
s.repeaterRelayCacheWin = windowHours
s.repeaterRelayAt = now
s.repeaterUsefulCache = useful
s.repeaterUsefulAt = now
s.repeaterEnrichMu.Unlock()
}