perf(server): serve stale repeater enrich cache instead of inline rebuild (#1272) (#1436)

## Summary

- Removes the TTL-based inline rebuild from `GetRepeaterRelayInfoMap`
and `GetRepeaterUsefulnessScoreMap`
- When the cache is non-nil it is returned immediately, regardless of
age — no more 700ms on-request recompute
- Inline compute is retained only as a nil-cache guard (edge case: tests
without a running recomputer)
- Fixes the stale `// 15s-TTL gate` comment in
`recomputeRepeaterEnrichmentSafe`

**Root cause:** `computeRepeaterRelayInfoMap` runs inline when the TTL
expires, taking ~700ms on a busy instance.
`StartRepeaterEnrichmentRecomputer` (introduced in #1262) already keeps
the cache warm via synchronous prewarm at startup + 5-min ticks, making
the inline path dead code that fires only when the TTL is shorter than
the recomputer interval (e.g. custom `analytics.defaultIntervalSeconds >
600`).

## Test plan

- [ ] `TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry` — regression
guard: stale sentinel is returned without recompute
- [ ] `TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry` — same
for usefulness score map
- [ ] `TestGetRepeaterRelayInfoMap_BuildsWhenNil` — nil-cache fallback
still works
- [ ] Full `-short` suite passes (`go test -short ./...`)

Closes #1272

🤖 Generated with [Claude Code](https://claude.com/claude-code)

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
efiten
2026-05-29 00:01:58 +02:00
committed by GitHub
parent 52f131e2dc
commit 981664528e
3 changed files with 121 additions and 32 deletions
+29 -29
View File
@@ -5,14 +5,6 @@ import (
"time"
)
// repeaterEnrichTTL is the safety-net TTL for the bulk enrichment caches.
// Derived as 2× the recomputer's default tick so the cache is always valid
// between background refreshes. Without a recomputer (tests, edge cases)
// the cache rebuilds on-thread at most once per TTL window.
// Note: if analytics.defaultIntervalSeconds is configured above 600 the
// TTL will expire before the recomputer runs; keep that value < TTL/2.
const repeaterEnrichTTL = 2 * repeaterEnrichmentRecomputerDefaultInterval
// GetRepeaterRelayInfoMap returns a cached pubkey → RepeaterRelayInfo
// map covering EVERY pubkey that currently appears as a path hop in any
// non-advert StoreTx. This is the bulk equivalent of calling
@@ -30,28 +22,34 @@ const repeaterEnrichTTL = 2 * repeaterEnrichmentRecomputerDefaultInterval
// The cached map is keyed by lowercase pubkey/hop key (same shape as
// byPathHop). Lookups should use strings.ToLower(pk).
//
// The cache is invalidated by TTL only — never by ingest. Up-to-10min
// freshness is fine for an at-a-glance status column; if a fresher
// signal is ever needed for a non-status caller, expose a non-cached path.
// The cache is refreshed by the background recomputer (every 5 min by
// default). This function never rebuilds inline on a populated cache —
// serving a slightly stale snapshot is always preferable to a 700ms
// on-request rebuild. The only time an inline compute happens is when
// the cache is nil (i.e. before the recomputer's synchronous prewarm
// completes, which can occur in tests without a running recomputer).
func (s *PacketStore) GetRepeaterRelayInfoMap(windowHours float64) map[string]RepeaterRelayInfo {
s.repeaterEnrichMu.Lock()
if s.repeaterRelayCache != nil &&
time.Since(s.repeaterRelayAt) < repeaterEnrichTTL &&
s.repeaterRelayCacheWin == windowHours {
cached := s.repeaterRelayCache
s.repeaterEnrichMu.Unlock()
cached := s.repeaterRelayCache
s.repeaterEnrichMu.Unlock()
if cached != nil {
return cached
}
s.repeaterEnrichMu.Unlock()
// Cache is nil — recomputer hasn't prewarmed yet (edge case: tests
// without a running recomputer, or a request racing the initial
// synchronous prewarm). Build once inline; the recomputer takes over.
result := s.computeRepeaterRelayInfoMap(windowHours)
s.repeaterEnrichMu.Lock()
s.repeaterRelayCache = result
s.repeaterRelayCacheWin = windowHours
s.repeaterRelayAt = time.Now()
if s.repeaterRelayCache == nil {
s.repeaterRelayCache = result
s.repeaterRelayCacheWin = windowHours
s.repeaterRelayAt = time.Now()
}
cached = s.repeaterRelayCache
s.repeaterEnrichMu.Unlock()
return result
return cached
}
// computeRepeaterRelayInfoMap walks byPathHop once under a single RLock,
@@ -177,23 +175,25 @@ func (s *PacketStore) computeRepeaterRelayInfoMap(windowHours float64) map[strin
// GetRepeaterUsefulnessScoreMap returns a cached pubkey → 0..1 score
// for every pubkey appearing in byPathHop. Bulk equivalent of
// GetRepeaterUsefulnessScore. See GetRepeaterRelayInfoMap for the
// motivation (#1257).
// motivation (#1257) and the no-inline-rebuild rationale (#1272).
func (s *PacketStore) GetRepeaterUsefulnessScoreMap() map[string]float64 {
s.repeaterEnrichMu.Lock()
if s.repeaterUsefulCache != nil && time.Since(s.repeaterUsefulAt) < repeaterEnrichTTL {
cached := s.repeaterUsefulCache
s.repeaterEnrichMu.Unlock()
cached := s.repeaterUsefulCache
s.repeaterEnrichMu.Unlock()
if cached != nil {
return cached
}
s.repeaterEnrichMu.Unlock()
result := s.computeRepeaterUsefulnessScoreMap()
s.repeaterEnrichMu.Lock()
s.repeaterUsefulCache = result
s.repeaterUsefulAt = time.Now()
if s.repeaterUsefulCache == nil {
s.repeaterUsefulCache = result
s.repeaterUsefulAt = time.Now()
}
cached = s.repeaterUsefulCache
s.repeaterEnrichMu.Unlock()
return result
return cached
}
func (s *PacketStore) computeRepeaterUsefulnessScoreMap() map[string]float64 {
+89
View File
@@ -0,0 +1,89 @@
package main
import (
"sync"
"testing"
"time"
)
// TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry is a regression guard
// for issue #1272.
//
// Background: GetRepeaterRelayInfoMap used to rebuild the cache inline
// whenever the TTL expired, causing ~700ms latency spikes on /api/nodes.
// The recomputer (StartRepeaterEnrichmentRecomputer) runs every 5 min and
// already keeps the cache warm; there is no reason to rebuild on-request.
//
// This test verifies that a populated cache is ALWAYS returned as-is,
// even when its timestamp is ancient (simulating TTL expiry under the old
// code). The stale sentinel value proves no inline recompute occurred.
func TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry(t *testing.T) {
store := &PacketStore{
byPathHop: make(map[string][]*StoreTx),
}
// Pre-populate the cache with a sentinel entry that would NOT be
// produced by computeRepeaterRelayInfoMap on the empty byPathHop.
stale := map[string]RepeaterRelayInfo{
"sentinel": {RelayCount24h: 9999},
}
store.repeaterRelayAt = time.Now().Add(-24 * time.Hour) // well past any TTL
store.repeaterRelayCache = stale
store.repeaterRelayCacheWin = 24
got := store.GetRepeaterRelayInfoMap(24)
if got["sentinel"].RelayCount24h != 9999 {
t.Fatalf("stale cache not served: sentinel missing or overwritten (RelayCount24h=%d)", got["sentinel"].RelayCount24h)
}
}
// TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry mirrors
// TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry for the usefulness
// score map (same fix, same root cause).
func TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry(t *testing.T) {
store := &PacketStore{
byPathHop: make(map[string][]*StoreTx),
byPayloadType: make(map[int][]*StoreTx),
}
stale := map[string]float64{"sentinel": 0.42}
store.repeaterUsefulAt = time.Now().Add(-24 * time.Hour)
store.repeaterUsefulCache = stale
got := store.GetRepeaterUsefulnessScoreMap()
if got["sentinel"] != 0.42 {
t.Fatalf("stale cache not served: sentinel missing or overwritten (score=%v)", got["sentinel"])
}
}
// TestGetRepeaterRelayInfoMap_BuildsWhenNil verifies that when the cache
// is nil (before the recomputer's first prewarm), GetRepeaterRelayInfoMap
// computes inline and caches the result for subsequent callers.
func TestGetRepeaterRelayInfoMap_BuildsWhenNil(t *testing.T) {
pt2 := 2
now := time.Now().UTC()
tx := &StoreTx{
ID: 1,
Hash: "abc",
FirstSeen: now.Add(-10 * time.Minute).Format(time.RFC3339Nano),
PayloadType: &pt2,
}
store := &PacketStore{
byPathHop: map[string][]*StoreTx{"aabbcc": {tx}},
byPayloadType: map[int][]*StoreTx{pt2: {tx}},
mu: sync.RWMutex{},
}
got := store.GetRepeaterRelayInfoMap(24)
if _, ok := got["aabbcc"]; !ok {
t.Fatal("inline compute did not produce entry for seeded hop key")
}
// Second call must return the cached result, not a fresh recompute.
got2 := store.GetRepeaterRelayInfoMap(24)
if got2["aabbcc"].RelayCount24h != got["aabbcc"].RelayCount24h {
t.Fatal("second call returned different map — cache not installed")
}
}
+3 -3
View File
@@ -88,9 +88,9 @@ func (s *PacketStore) StartRepeaterEnrichmentRecomputer(windowHours float64, int
// background goroutine (the previous snapshot remains valid).
func recomputeRepeaterEnrichmentSafe(s *PacketStore, windowHours float64) {
defer func() { _ = recover() }()
// Bypass the 15s-TTL gate by forcing a fresh recompute and
// installing the result. The public Get* helpers would return the
// existing cache when within TTL; we want to refresh proactively.
// Write directly to the cache fields under mutex rather than going
// through the public Get* helpers — those return the existing
// non-nil cache immediately, so calling them here would be a no-op.
relay := s.computeRepeaterRelayInfoMap(windowHours)
useful := s.computeRepeaterUsefulnessScoreMap()
now := time.Now()