mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-19 23:55:11 +00:00
eba9e89a72
Red commit: c84a8f575a (CI run: pending
push)
Fixes #1203 — path-inspector 503 storm.
Three sub-fixes, each shipped as red→green per AGENTS TDD:
**A. Singleflight on rebuild** (`ensureNeighborGraph`)
Hand-rolled `sync.Mutex + chan` singleflight — no new deps (x/sync was
not in cmd/server's go.mod). Concurrent callers attach to one in-flight
rebuild instead of N parallel `BuildFromStore` goroutines.
- Red: `7340f23b` — test asserts ≤1 build under 10 concurrent callers
(saw 10 on master)
- Green: `abac6b3c`
**B. Stale-while-revalidate** (`handlePathInspect`)
Stale non-nil graph is served immediately with `"stale": true` while a
background rebuild runs (deduped by A). The 2s synchronous gate is gone.
Stale responses are not cached, so the next request after rebuild lands
fresh.
- Red: `c84a8f57` — test asserts 200+`stale:true`+rebuild-kickoff
(master returned 503)
- Green: `5eb86975`
**C. Cold-start 503 still kicks rebuild**
True cold start (`graph == nil`) is the only path that still returns 503
`{"retry": true}`, but it now spawns an async `ensureNeighborGraph` so
the very next request warms up.
- Green test: `f5ac7059` (passed on top of A+B)
Singleflight verified: `TestEnsureNeighborGraph_Singleflight`
Stale-while-revalidate verified:
`TestHandlePathInspect_StaleWhileRevalidate`
Cold-start verified: `TestHandlePathInspect_ColdStartKicksRebuild`
**Acceptance criteria (issue #1203):**
- [x] Concurrent requests share ONE rebuild
- [x] Stale non-nil graph served with `stale:true` async
- [x] 503 only on true cold-start
- [x] Cold-start 503 kicks rebuild → follow-up warm
- [ ] p99 < 500ms under load (not unit-testable; design satisfies it)
- [x] No regression in existing tests
**Out of scope (per issue):** 5-min TTL constant, `BuildFromStore` perf,
`/api/analytics/topology`, persist-lock contention.
No new deps.
---------
Co-authored-by: corescope-bot <bot@corescope.local>
Co-authored-by: corescope-bot <bot@corescope.dev>
78 lines
2.6 KiB
Go
78 lines
2.6 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"net/http/httptest"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestHandlePathInspect_ColdStartKicksRebuild (issue #1203 Pair C) asserts that
|
|
// a true cold start (nil graph) returns 503 immediately AND kicks off a
|
|
// background rebuild, so the next request lands warm.
|
|
//
|
|
// Anti-tautology: if the cold-start branch stops calling ensureNeighborGraph
|
|
// (the regression that motivated this fix — synchronous 2s gate version
|
|
// blocked on response instead of kicking-and-returning), the follow-up
|
|
// request would still be 503 and this test would fail.
|
|
func TestHandlePathInspect_ColdStartKicksRebuild(t *testing.T) {
|
|
srv := newTestServerForInspect(t)
|
|
srv.store.graph.Store(nil)
|
|
|
|
var built int32
|
|
origBuild := buildGraphFn
|
|
defer func() { buildGraphFn = origBuild }()
|
|
buildGraphFn = func(s *PacketStore) *NeighborGraph {
|
|
atomic.AddInt32(&built, 1)
|
|
time.Sleep(100 * time.Millisecond) // small async window
|
|
g := NewNeighborGraph()
|
|
g.builtAt = time.Now()
|
|
return g
|
|
}
|
|
|
|
// Seed nodes so the post-rebuild request can return a candidate.
|
|
pk := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
srv.store.nodeCache = []nodeInfo{{PublicKey: pk, Name: "N", Role: "repeater"}}
|
|
srv.store.nodePM = buildPrefixMap(srv.store.nodeCache)
|
|
srv.store.nodeCacheTime = time.Now()
|
|
|
|
req := httptest.NewRequest("POST", "/api/paths/inspect", bytes.NewBufferString(`{"prefixes":["aa"]}`))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
rr := httptest.NewRecorder()
|
|
|
|
start := time.Now()
|
|
srv.handlePathInspect(rr, req)
|
|
elapsed := time.Since(start)
|
|
|
|
if rr.Code != 503 {
|
|
t.Fatalf("cold start: expected 503, got %d body=%s", rr.Code, rr.Body.String())
|
|
}
|
|
if elapsed > 500*time.Millisecond {
|
|
t.Fatalf("cold-start 503 should be near-instant, took %v", elapsed)
|
|
}
|
|
|
|
// Wait for rebuild to land.
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
if atomic.LoadInt32(&built) >= 1 && srv.store.graph.Load() != nil && !srv.store.graph.Load().IsStale() {
|
|
break
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
if atomic.LoadInt32(&built) < 1 {
|
|
t.Fatal("cold-start did not kick off a rebuild")
|
|
}
|
|
|
|
// Follow-up request now lands warm (200, not 503).
|
|
// Use a different prefix so the inspect cache from pair A's earlier call
|
|
// (if any) doesn't satisfy it.
|
|
rr2 := httptest.NewRecorder()
|
|
req2 := httptest.NewRequest("POST", "/api/paths/inspect", bytes.NewBufferString(`{"prefixes":["aa"]}`))
|
|
req2.Header.Set("Content-Type", "application/json")
|
|
srv.handlePathInspect(rr2, req2)
|
|
if rr2.Code != 200 {
|
|
t.Fatalf("follow-up after cold-start rebuild expected 200, got %d body=%s", rr2.Code, rr2.Body.String())
|
|
}
|
|
}
|