mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-05 15:22:20 +00:00
4cd8445233
RED: 97f49a0c · CI:
https://github.com/Kpa-clawbot/CoreScope/actions/runs/26046530920
Fixes #1265.
## Problem
On staging two clock-skew endpoints serve compute-on-request:
- `/api/observers/clock-skew` — 3.3s
- `/api/nodes/clock-skew` — 8.9s
Both drive a full `clockSkew.Recompute` over 100k+ adverts while holding
`s.mu.RLock`, blocking under concurrent reader load.
## Fix
Wire both endpoints into the established `analytics_recomputer.go`
pattern (PRs #1248 / #1259 / #1263). Two new slots:
- `recompObserversClockSkew` — wraps `computeObserverCalibrations()`
- `recompNodesClockSkew` — wraps `computeFleetClockSkew()`
Accessors `GetObserverCalibrations` / `GetFleetClockSkew` now prefer the
atomic-pointer snapshot; on-request compute is fallback-only for the
brief window before initial sync compute lands (and for tests that skip
the recomputer).
Default interval **300s**, overridable via:
```json
"analytics": {
"recomputeIntervalSeconds": {
"observersClockSkew": 300,
"nodesClockSkew": 300
}
}
```
`config.example.json` + the `_comment_analytics` doc updated.
## TDD
- RED `97f49a0c` — `TestClockSkewRecomputersRegistered` +
`TestClockSkewHandlersSteadyStateLatency` (8 concurrent readers × 25
reqs per endpoint, p99 < 100ms gate). Fails on master: recomputer slots
nil.
- GREEN `19599375` — wire + accessor switch. p99 well under 5ms on the
test fixture.
## Verification
```
cd cmd/server && go test ./... -count=1 # ok 42s
bash ~/.openclaw/skills/pr-preflight/scripts/run-all.sh origin/master # all gates pass
```
---------
Co-authored-by: CoreScope Bot <bot@corescope.local>
102 lines
2.7 KiB
Go
102 lines
2.7 KiB
Go
package main
|
|
|
|
import (
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"sort"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// Issue #1265: /api/observers/clock-skew (3.3s) and /api/nodes/clock-skew (8.9s)
|
|
// must be wired into the steady-state analytics recomputer so reads serve
|
|
// from an atomic-pointer snapshot in <100ms p99 under concurrent load.
|
|
|
|
func TestClockSkewRecomputersRegistered(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
store := NewPacketStore(db, nil)
|
|
|
|
stop := store.StartAnalyticsRecomputers(50 * time.Millisecond)
|
|
defer stop()
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
store.analyticsRecomputerMu.RLock()
|
|
rcObs := store.recompObserversClockSkew
|
|
rcNodes := store.recompNodesClockSkew
|
|
store.analyticsRecomputerMu.RUnlock()
|
|
|
|
if rcObs == nil {
|
|
t.Fatalf("recompObserversClockSkew not registered after StartAnalyticsRecomputers (issue #1265 not fixed)")
|
|
}
|
|
if rcNodes == nil {
|
|
t.Fatalf("recompNodesClockSkew not registered after StartAnalyticsRecomputers (issue #1265 not fixed)")
|
|
}
|
|
if rcObs.Load() == nil {
|
|
t.Fatalf("recompObserversClockSkew snapshot is nil after initial compute")
|
|
}
|
|
if rcNodes.Load() == nil {
|
|
t.Fatalf("recompNodesClockSkew snapshot is nil after initial compute")
|
|
}
|
|
}
|
|
|
|
func TestClockSkewHandlersSteadyStateLatency(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
store := NewPacketStore(db, nil)
|
|
stop := store.StartAnalyticsRecomputers(50 * time.Millisecond)
|
|
defer stop()
|
|
time.Sleep(100 * time.Millisecond)
|
|
|
|
s := &Server{store: store}
|
|
|
|
endpoints := []struct {
|
|
name string
|
|
path string
|
|
handler http.HandlerFunc
|
|
}{
|
|
{"observers", "/api/observers/clock-skew", s.handleObserverClockSkew},
|
|
{"nodes", "/api/nodes/clock-skew", s.handleFleetClockSkew},
|
|
}
|
|
|
|
for _, ep := range endpoints {
|
|
ep := ep
|
|
t.Run(ep.name, func(t *testing.T) {
|
|
const readers = 8
|
|
const perReader = 25
|
|
var (
|
|
mu sync.Mutex
|
|
samples []time.Duration
|
|
wg sync.WaitGroup
|
|
)
|
|
wg.Add(readers)
|
|
for r := 0; r < readers; r++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
for i := 0; i < perReader; i++ {
|
|
rr := httptest.NewRecorder()
|
|
req := httptest.NewRequest(http.MethodGet, ep.path, nil)
|
|
t0 := time.Now()
|
|
ep.handler(rr, req)
|
|
dt := time.Since(t0)
|
|
if rr.Code != http.StatusOK {
|
|
t.Errorf("%s status = %d, want 200", ep.path, rr.Code)
|
|
}
|
|
mu.Lock()
|
|
samples = append(samples, dt)
|
|
mu.Unlock()
|
|
}
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
sort.Slice(samples, func(i, j int) bool { return samples[i] < samples[j] })
|
|
p99 := samples[int(float64(len(samples))*0.99)]
|
|
if p99 > 100*time.Millisecond {
|
|
t.Fatalf("%s p99 latency = %v over %d reqs, want <100ms (recomputer snapshot)", ep.path, p99, len(samples))
|
|
}
|
|
})
|
|
}
|
|
}
|