Files
meshcore-analyzer/cmd/server/clock_skew_recompute_test.go
T
Kpa-clawbot 4cd8445233 perf(#1265): wire /api/observers/clock-skew + /api/nodes/clock-skew into analytics recomputer (#1266)
RED: 97f49a0c · CI:
https://github.com/Kpa-clawbot/CoreScope/actions/runs/26046530920

Fixes #1265.

## Problem
On staging two clock-skew endpoints serve compute-on-request:

- `/api/observers/clock-skew` — 3.3s
- `/api/nodes/clock-skew` — 8.9s

Both drive a full `clockSkew.Recompute` over 100k+ adverts while holding
`s.mu.RLock`, blocking under concurrent reader load.

## Fix
Wire both endpoints into the established `analytics_recomputer.go`
pattern (PRs #1248 / #1259 / #1263). Two new slots:

- `recompObserversClockSkew` — wraps `computeObserverCalibrations()`
- `recompNodesClockSkew` — wraps `computeFleetClockSkew()`

Accessors `GetObserverCalibrations` / `GetFleetClockSkew` now prefer the
atomic-pointer snapshot; on-request compute is fallback-only for the
brief window before initial sync compute lands (and for tests that skip
the recomputer).

Default interval **300s**, overridable via:

```json
"analytics": {
  "recomputeIntervalSeconds": {
    "observersClockSkew": 300,
    "nodesClockSkew": 300
  }
}
```

`config.example.json` + the `_comment_analytics` doc updated.

## TDD
- RED `97f49a0c` — `TestClockSkewRecomputersRegistered` +
`TestClockSkewHandlersSteadyStateLatency` (8 concurrent readers × 25
reqs per endpoint, p99 < 100ms gate). Fails on master: recomputer slots
nil.
- GREEN `19599375` — wire + accessor switch. p99 well under 5ms on the
test fixture.

## Verification
```
cd cmd/server && go test ./... -count=1   # ok 42s
bash ~/.openclaw/skills/pr-preflight/scripts/run-all.sh origin/master   # all gates pass
```

---------

Co-authored-by: CoreScope Bot <bot@corescope.local>
2026-05-18 12:27:44 -07:00

102 lines
2.7 KiB
Go

package main
import (
"net/http"
"net/http/httptest"
"sort"
"sync"
"testing"
"time"
)
// Issue #1265: /api/observers/clock-skew (3.3s) and /api/nodes/clock-skew (8.9s)
// must be wired into the steady-state analytics recomputer so reads serve
// from an atomic-pointer snapshot in <100ms p99 under concurrent load.
func TestClockSkewRecomputersRegistered(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
stop := store.StartAnalyticsRecomputers(50 * time.Millisecond)
defer stop()
time.Sleep(100 * time.Millisecond)
store.analyticsRecomputerMu.RLock()
rcObs := store.recompObserversClockSkew
rcNodes := store.recompNodesClockSkew
store.analyticsRecomputerMu.RUnlock()
if rcObs == nil {
t.Fatalf("recompObserversClockSkew not registered after StartAnalyticsRecomputers (issue #1265 not fixed)")
}
if rcNodes == nil {
t.Fatalf("recompNodesClockSkew not registered after StartAnalyticsRecomputers (issue #1265 not fixed)")
}
if rcObs.Load() == nil {
t.Fatalf("recompObserversClockSkew snapshot is nil after initial compute")
}
if rcNodes.Load() == nil {
t.Fatalf("recompNodesClockSkew snapshot is nil after initial compute")
}
}
func TestClockSkewHandlersSteadyStateLatency(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
stop := store.StartAnalyticsRecomputers(50 * time.Millisecond)
defer stop()
time.Sleep(100 * time.Millisecond)
s := &Server{store: store}
endpoints := []struct {
name string
path string
handler http.HandlerFunc
}{
{"observers", "/api/observers/clock-skew", s.handleObserverClockSkew},
{"nodes", "/api/nodes/clock-skew", s.handleFleetClockSkew},
}
for _, ep := range endpoints {
ep := ep
t.Run(ep.name, func(t *testing.T) {
const readers = 8
const perReader = 25
var (
mu sync.Mutex
samples []time.Duration
wg sync.WaitGroup
)
wg.Add(readers)
for r := 0; r < readers; r++ {
go func() {
defer wg.Done()
for i := 0; i < perReader; i++ {
rr := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, ep.path, nil)
t0 := time.Now()
ep.handler(rr, req)
dt := time.Since(t0)
if rr.Code != http.StatusOK {
t.Errorf("%s status = %d, want 200", ep.path, rr.Code)
}
mu.Lock()
samples = append(samples, dt)
mu.Unlock()
}
}()
}
wg.Wait()
sort.Slice(samples, func(i, j int) bool { return samples[i] < samples[j] })
p99 := samples[int(float64(len(samples))*0.99)]
if p99 > 100*time.Millisecond {
t.Fatalf("%s p99 latency = %v over %d reqs, want <100ms (recomputer snapshot)", ep.path, p99, len(samples))
}
})
}
}