mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-03 19:51:22 +00:00
## Summary - Removes the TTL-based inline rebuild from `GetRepeaterRelayInfoMap` and `GetRepeaterUsefulnessScoreMap` - When the cache is non-nil it is returned immediately, regardless of age — no more 700ms on-request recompute - Inline compute is retained only as a nil-cache guard (edge case: tests without a running recomputer) - Fixes the stale `// 15s-TTL gate` comment in `recomputeRepeaterEnrichmentSafe` **Root cause:** `computeRepeaterRelayInfoMap` runs inline when the TTL expires, taking ~700ms on a busy instance. `StartRepeaterEnrichmentRecomputer` (introduced in #1262) already keeps the cache warm via synchronous prewarm at startup + 5-min ticks, making the inline path dead code that fires only when the TTL is shorter than the recomputer interval (e.g. custom `analytics.defaultIntervalSeconds > 600`). ## Test plan - [ ] `TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry` — regression guard: stale sentinel is returned without recompute - [ ] `TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry` — same for usefulness score map - [ ] `TestGetRepeaterRelayInfoMap_BuildsWhenNil` — nil-cache fallback still works - [ ] Full `-short` suite passes (`go test -short ./...`) Closes #1272 🤖 Generated with [Claude Code](https://claude.com/claude-code) --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -5,14 +5,6 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
// repeaterEnrichTTL is the safety-net TTL for the bulk enrichment caches.
|
||||
// Derived as 2× the recomputer's default tick so the cache is always valid
|
||||
// between background refreshes. Without a recomputer (tests, edge cases)
|
||||
// the cache rebuilds on-thread at most once per TTL window.
|
||||
// Note: if analytics.defaultIntervalSeconds is configured above 600 the
|
||||
// TTL will expire before the recomputer runs; keep that value < TTL/2.
|
||||
const repeaterEnrichTTL = 2 * repeaterEnrichmentRecomputerDefaultInterval
|
||||
|
||||
// GetRepeaterRelayInfoMap returns a cached pubkey → RepeaterRelayInfo
|
||||
// map covering EVERY pubkey that currently appears as a path hop in any
|
||||
// non-advert StoreTx. This is the bulk equivalent of calling
|
||||
@@ -30,28 +22,34 @@ const repeaterEnrichTTL = 2 * repeaterEnrichmentRecomputerDefaultInterval
|
||||
// The cached map is keyed by lowercase pubkey/hop key (same shape as
|
||||
// byPathHop). Lookups should use strings.ToLower(pk).
|
||||
//
|
||||
// The cache is invalidated by TTL only — never by ingest. Up-to-10min
|
||||
// freshness is fine for an at-a-glance status column; if a fresher
|
||||
// signal is ever needed for a non-status caller, expose a non-cached path.
|
||||
// The cache is refreshed by the background recomputer (every 5 min by
|
||||
// default). This function never rebuilds inline on a populated cache —
|
||||
// serving a slightly stale snapshot is always preferable to a 700ms
|
||||
// on-request rebuild. The only time an inline compute happens is when
|
||||
// the cache is nil (i.e. before the recomputer's synchronous prewarm
|
||||
// completes, which can occur in tests without a running recomputer).
|
||||
func (s *PacketStore) GetRepeaterRelayInfoMap(windowHours float64) map[string]RepeaterRelayInfo {
|
||||
s.repeaterEnrichMu.Lock()
|
||||
if s.repeaterRelayCache != nil &&
|
||||
time.Since(s.repeaterRelayAt) < repeaterEnrichTTL &&
|
||||
s.repeaterRelayCacheWin == windowHours {
|
||||
cached := s.repeaterRelayCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
cached := s.repeaterRelayCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
if cached != nil {
|
||||
return cached
|
||||
}
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
|
||||
// Cache is nil — recomputer hasn't prewarmed yet (edge case: tests
|
||||
// without a running recomputer, or a request racing the initial
|
||||
// synchronous prewarm). Build once inline; the recomputer takes over.
|
||||
result := s.computeRepeaterRelayInfoMap(windowHours)
|
||||
|
||||
s.repeaterEnrichMu.Lock()
|
||||
s.repeaterRelayCache = result
|
||||
s.repeaterRelayCacheWin = windowHours
|
||||
s.repeaterRelayAt = time.Now()
|
||||
if s.repeaterRelayCache == nil {
|
||||
s.repeaterRelayCache = result
|
||||
s.repeaterRelayCacheWin = windowHours
|
||||
s.repeaterRelayAt = time.Now()
|
||||
}
|
||||
cached = s.repeaterRelayCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
return result
|
||||
return cached
|
||||
}
|
||||
|
||||
// computeRepeaterRelayInfoMap walks byPathHop once under a single RLock,
|
||||
@@ -177,23 +175,25 @@ func (s *PacketStore) computeRepeaterRelayInfoMap(windowHours float64) map[strin
|
||||
// GetRepeaterUsefulnessScoreMap returns a cached pubkey → 0..1 score
|
||||
// for every pubkey appearing in byPathHop. Bulk equivalent of
|
||||
// GetRepeaterUsefulnessScore. See GetRepeaterRelayInfoMap for the
|
||||
// motivation (#1257).
|
||||
// motivation (#1257) and the no-inline-rebuild rationale (#1272).
|
||||
func (s *PacketStore) GetRepeaterUsefulnessScoreMap() map[string]float64 {
|
||||
s.repeaterEnrichMu.Lock()
|
||||
if s.repeaterUsefulCache != nil && time.Since(s.repeaterUsefulAt) < repeaterEnrichTTL {
|
||||
cached := s.repeaterUsefulCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
cached := s.repeaterUsefulCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
if cached != nil {
|
||||
return cached
|
||||
}
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
|
||||
result := s.computeRepeaterUsefulnessScoreMap()
|
||||
|
||||
s.repeaterEnrichMu.Lock()
|
||||
s.repeaterUsefulCache = result
|
||||
s.repeaterUsefulAt = time.Now()
|
||||
if s.repeaterUsefulCache == nil {
|
||||
s.repeaterUsefulCache = result
|
||||
s.repeaterUsefulAt = time.Now()
|
||||
}
|
||||
cached = s.repeaterUsefulCache
|
||||
s.repeaterEnrichMu.Unlock()
|
||||
return result
|
||||
return cached
|
||||
}
|
||||
|
||||
func (s *PacketStore) computeRepeaterUsefulnessScoreMap() map[string]float64 {
|
||||
|
||||
@@ -0,0 +1,89 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry is a regression guard
|
||||
// for issue #1272.
|
||||
//
|
||||
// Background: GetRepeaterRelayInfoMap used to rebuild the cache inline
|
||||
// whenever the TTL expired, causing ~700ms latency spikes on /api/nodes.
|
||||
// The recomputer (StartRepeaterEnrichmentRecomputer) runs every 5 min and
|
||||
// already keeps the cache warm; there is no reason to rebuild on-request.
|
||||
//
|
||||
// This test verifies that a populated cache is ALWAYS returned as-is,
|
||||
// even when its timestamp is ancient (simulating TTL expiry under the old
|
||||
// code). The stale sentinel value proves no inline recompute occurred.
|
||||
func TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry(t *testing.T) {
|
||||
store := &PacketStore{
|
||||
byPathHop: make(map[string][]*StoreTx),
|
||||
}
|
||||
|
||||
// Pre-populate the cache with a sentinel entry that would NOT be
|
||||
// produced by computeRepeaterRelayInfoMap on the empty byPathHop.
|
||||
stale := map[string]RepeaterRelayInfo{
|
||||
"sentinel": {RelayCount24h: 9999},
|
||||
}
|
||||
store.repeaterRelayAt = time.Now().Add(-24 * time.Hour) // well past any TTL
|
||||
store.repeaterRelayCache = stale
|
||||
store.repeaterRelayCacheWin = 24
|
||||
|
||||
got := store.GetRepeaterRelayInfoMap(24)
|
||||
|
||||
if got["sentinel"].RelayCount24h != 9999 {
|
||||
t.Fatalf("stale cache not served: sentinel missing or overwritten (RelayCount24h=%d)", got["sentinel"].RelayCount24h)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry mirrors
|
||||
// TestGetRepeaterRelayInfoMap_ServesStaleOnTTLExpiry for the usefulness
|
||||
// score map (same fix, same root cause).
|
||||
func TestGetRepeaterUsefulnessScoreMap_ServesStaleOnTTLExpiry(t *testing.T) {
|
||||
store := &PacketStore{
|
||||
byPathHop: make(map[string][]*StoreTx),
|
||||
byPayloadType: make(map[int][]*StoreTx),
|
||||
}
|
||||
|
||||
stale := map[string]float64{"sentinel": 0.42}
|
||||
store.repeaterUsefulAt = time.Now().Add(-24 * time.Hour)
|
||||
store.repeaterUsefulCache = stale
|
||||
|
||||
got := store.GetRepeaterUsefulnessScoreMap()
|
||||
|
||||
if got["sentinel"] != 0.42 {
|
||||
t.Fatalf("stale cache not served: sentinel missing or overwritten (score=%v)", got["sentinel"])
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetRepeaterRelayInfoMap_BuildsWhenNil verifies that when the cache
|
||||
// is nil (before the recomputer's first prewarm), GetRepeaterRelayInfoMap
|
||||
// computes inline and caches the result for subsequent callers.
|
||||
func TestGetRepeaterRelayInfoMap_BuildsWhenNil(t *testing.T) {
|
||||
pt2 := 2
|
||||
now := time.Now().UTC()
|
||||
tx := &StoreTx{
|
||||
ID: 1,
|
||||
Hash: "abc",
|
||||
FirstSeen: now.Add(-10 * time.Minute).Format(time.RFC3339Nano),
|
||||
PayloadType: &pt2,
|
||||
}
|
||||
store := &PacketStore{
|
||||
byPathHop: map[string][]*StoreTx{"aabbcc": {tx}},
|
||||
byPayloadType: map[int][]*StoreTx{pt2: {tx}},
|
||||
mu: sync.RWMutex{},
|
||||
}
|
||||
|
||||
got := store.GetRepeaterRelayInfoMap(24)
|
||||
if _, ok := got["aabbcc"]; !ok {
|
||||
t.Fatal("inline compute did not produce entry for seeded hop key")
|
||||
}
|
||||
|
||||
// Second call must return the cached result, not a fresh recompute.
|
||||
got2 := store.GetRepeaterRelayInfoMap(24)
|
||||
if got2["aabbcc"].RelayCount24h != got["aabbcc"].RelayCount24h {
|
||||
t.Fatal("second call returned different map — cache not installed")
|
||||
}
|
||||
}
|
||||
@@ -88,9 +88,9 @@ func (s *PacketStore) StartRepeaterEnrichmentRecomputer(windowHours float64, int
|
||||
// background goroutine (the previous snapshot remains valid).
|
||||
func recomputeRepeaterEnrichmentSafe(s *PacketStore, windowHours float64) {
|
||||
defer func() { _ = recover() }()
|
||||
// Bypass the 15s-TTL gate by forcing a fresh recompute and
|
||||
// installing the result. The public Get* helpers would return the
|
||||
// existing cache when within TTL; we want to refresh proactively.
|
||||
// Write directly to the cache fields under mutex rather than going
|
||||
// through the public Get* helpers — those return the existing
|
||||
// non-nil cache immediately, so calling them here would be a no-op.
|
||||
relay := s.computeRepeaterRelayInfoMap(windowHours)
|
||||
useful := s.computeRepeaterUsefulnessScoreMap()
|
||||
now := time.Now()
|
||||
|
||||
Reference in New Issue
Block a user