mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-22 09:55:10 +00:00
eba9e89a72
Red commit: c84a8f575a (CI run: pending
push)
Fixes #1203 — path-inspector 503 storm.
Three sub-fixes, each shipped as red→green per AGENTS TDD:
**A. Singleflight on rebuild** (`ensureNeighborGraph`)
Hand-rolled `sync.Mutex + chan` singleflight — no new deps (x/sync was
not in cmd/server's go.mod). Concurrent callers attach to one in-flight
rebuild instead of N parallel `BuildFromStore` goroutines.
- Red: `7340f23b` — test asserts ≤1 build under 10 concurrent callers
(saw 10 on master)
- Green: `abac6b3c`
**B. Stale-while-revalidate** (`handlePathInspect`)
Stale non-nil graph is served immediately with `"stale": true` while a
background rebuild runs (deduped by A). The 2s synchronous gate is gone.
Stale responses are not cached, so the next request after rebuild lands
fresh.
- Red: `c84a8f57` — test asserts 200+`stale:true`+rebuild-kickoff
(master returned 503)
- Green: `5eb86975`
**C. Cold-start 503 still kicks rebuild**
True cold start (`graph == nil`) is the only path that still returns 503
`{"retry": true}`, but it now spawns an async `ensureNeighborGraph` so
the very next request warms up.
- Green test: `f5ac7059` (passed on top of A+B)
Singleflight verified: `TestEnsureNeighborGraph_Singleflight`
Stale-while-revalidate verified:
`TestHandlePathInspect_StaleWhileRevalidate`
Cold-start verified: `TestHandlePathInspect_ColdStartKicksRebuild`
**Acceptance criteria (issue #1203):**
- [x] Concurrent requests share ONE rebuild
- [x] Stale non-nil graph served with `stale:true` async
- [x] 503 only on true cold-start
- [x] Cold-start 503 kicks rebuild → follow-up warm
- [ ] p99 < 500ms under load (not unit-testable; design satisfies it)
- [x] No regression in existing tests
**Out of scope (per issue):** 5-min TTL constant, `BuildFromStore` perf,
`/api/analytics/topology`, persist-lock contention.
No new deps.
---------
Co-authored-by: corescope-bot <bot@corescope.local>
Co-authored-by: corescope-bot <bot@corescope.dev>
69 lines
2.2 KiB
Go
69 lines
2.2 KiB
Go
package main
|
|
|
|
import (
|
|
"bytes"
|
|
"net/http/httptest"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestHandlePathInspect_StaleWhileRevalidate (issue #1203 Pair B) asserts that
|
|
// when s.graph is non-nil but stale, handlePathInspect serves it immediately
|
|
// with stale:true and kicks off a background rebuild. On master the handler
|
|
// times out at 2s and returns 503 instead.
|
|
//
|
|
// Anti-tautology: revert the SWR branch and this test fails (sees 503).
|
|
func TestHandlePathInspect_StaleWhileRevalidate(t *testing.T) {
|
|
srv := newTestServerForInspect(t)
|
|
srv.store.graph.Load().builtAt = time.Now().Add(-1 * time.Hour) // stale
|
|
|
|
// Seed nodes so beamSearch returns a candidate (prefix "aa").
|
|
pk := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
srv.store.nodeCache = []nodeInfo{{PublicKey: pk, Name: "N", Role: "repeater"}}
|
|
srv.store.nodePM = buildPrefixMap(srv.store.nodeCache)
|
|
srv.store.nodeCacheTime = time.Now()
|
|
|
|
// Slow rebuild — far longer than the historical 2s gate.
|
|
var built int32
|
|
origBuild := buildGraphFn
|
|
defer func() { buildGraphFn = origBuild }()
|
|
buildGraphFn = func(s *PacketStore) *NeighborGraph {
|
|
atomic.AddInt32(&built, 1)
|
|
time.Sleep(3 * time.Second)
|
|
g := NewNeighborGraph()
|
|
g.builtAt = time.Now()
|
|
return g
|
|
}
|
|
|
|
req := httptest.NewRequest("POST", "/api/paths/inspect", bytes.NewBufferString(`{"prefixes":["aa"]}`))
|
|
req.Header.Set("Content-Type", "application/json")
|
|
rr := httptest.NewRecorder()
|
|
|
|
start := time.Now()
|
|
srv.handlePathInspect(rr, req)
|
|
elapsed := time.Since(start)
|
|
|
|
if rr.Code != 200 {
|
|
t.Fatalf("expected 200 from stale-while-revalidate, got %d body=%s", rr.Code, rr.Body.String())
|
|
}
|
|
if elapsed > 1*time.Second {
|
|
t.Fatalf("handler blocked for %v — should return near-instant from stale graph", elapsed)
|
|
}
|
|
if !bytes.Contains(rr.Body.Bytes(), []byte(`"stale":true`)) {
|
|
t.Fatalf("expected stale:true in response, got: %s", rr.Body.String())
|
|
}
|
|
|
|
// Background rebuild must have been kicked off.
|
|
deadline := time.Now().Add(500 * time.Millisecond)
|
|
for time.Now().Before(deadline) {
|
|
if atomic.LoadInt32(&built) >= 1 {
|
|
break
|
|
}
|
|
time.Sleep(10 * time.Millisecond)
|
|
}
|
|
if atomic.LoadInt32(&built) < 1 {
|
|
t.Fatal("expected background rebuild to be kicked off")
|
|
}
|
|
}
|