Files
meshcore-analyzer/cmd/server/path_inspect_coldstart_test.go
Kpa-clawbot eba9e89a72 fix(#1203): path-inspector — singleflight + stale-while-revalidate (#1208)
Red commit: c84a8f575a (CI run: pending
push)

Fixes #1203 — path-inspector 503 storm.

Three sub-fixes, each shipped as red→green per AGENTS TDD:

**A. Singleflight on rebuild** (`ensureNeighborGraph`)
Hand-rolled `sync.Mutex + chan` singleflight — no new deps (x/sync was
not in cmd/server's go.mod). Concurrent callers attach to one in-flight
rebuild instead of N parallel `BuildFromStore` goroutines.
- Red: `7340f23b` — test asserts ≤1 build under 10 concurrent callers
(saw 10 on master)
- Green: `abac6b3c`

**B. Stale-while-revalidate** (`handlePathInspect`)
Stale non-nil graph is served immediately with `"stale": true` while a
background rebuild runs (deduped by A). The 2s synchronous gate is gone.
Stale responses are not cached, so the next request after rebuild lands
fresh.
- Red: `c84a8f57` — test asserts 200+`stale:true`+rebuild-kickoff
(master returned 503)
- Green: `5eb86975`

**C. Cold-start 503 still kicks rebuild**
True cold start (`graph == nil`) is the only path that still returns 503
`{"retry": true}`, but it now spawns an async `ensureNeighborGraph` so
the very next request warms up.
- Green test: `f5ac7059` (passed on top of A+B)

Singleflight verified: `TestEnsureNeighborGraph_Singleflight`
Stale-while-revalidate verified:
`TestHandlePathInspect_StaleWhileRevalidate`
Cold-start verified: `TestHandlePathInspect_ColdStartKicksRebuild`

**Acceptance criteria (issue #1203):**
- [x] Concurrent requests share ONE rebuild
- [x] Stale non-nil graph served with `stale:true` async
- [x] 503 only on true cold-start
- [x] Cold-start 503 kicks rebuild → follow-up warm
- [ ] p99 < 500ms under load (not unit-testable; design satisfies it)
- [x] No regression in existing tests

**Out of scope (per issue):** 5-min TTL constant, `BuildFromStore` perf,
`/api/analytics/topology`, persist-lock contention.

No new deps.

---------

Co-authored-by: corescope-bot <bot@corescope.local>
Co-authored-by: corescope-bot <bot@corescope.dev>
2026-05-15 22:46:28 -07:00

78 lines
2.6 KiB
Go

package main
import (
"bytes"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
)
// TestHandlePathInspect_ColdStartKicksRebuild (issue #1203 Pair C) asserts that
// a true cold start (nil graph) returns 503 immediately AND kicks off a
// background rebuild, so the next request lands warm.
//
// Anti-tautology: if the cold-start branch stops calling ensureNeighborGraph
// (the regression that motivated this fix — synchronous 2s gate version
// blocked on response instead of kicking-and-returning), the follow-up
// request would still be 503 and this test would fail.
func TestHandlePathInspect_ColdStartKicksRebuild(t *testing.T) {
srv := newTestServerForInspect(t)
srv.store.graph.Store(nil)
var built int32
origBuild := buildGraphFn
defer func() { buildGraphFn = origBuild }()
buildGraphFn = func(s *PacketStore) *NeighborGraph {
atomic.AddInt32(&built, 1)
time.Sleep(100 * time.Millisecond) // small async window
g := NewNeighborGraph()
g.builtAt = time.Now()
return g
}
// Seed nodes so the post-rebuild request can return a candidate.
pk := "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
srv.store.nodeCache = []nodeInfo{{PublicKey: pk, Name: "N", Role: "repeater"}}
srv.store.nodePM = buildPrefixMap(srv.store.nodeCache)
srv.store.nodeCacheTime = time.Now()
req := httptest.NewRequest("POST", "/api/paths/inspect", bytes.NewBufferString(`{"prefixes":["aa"]}`))
req.Header.Set("Content-Type", "application/json")
rr := httptest.NewRecorder()
start := time.Now()
srv.handlePathInspect(rr, req)
elapsed := time.Since(start)
if rr.Code != 503 {
t.Fatalf("cold start: expected 503, got %d body=%s", rr.Code, rr.Body.String())
}
if elapsed > 500*time.Millisecond {
t.Fatalf("cold-start 503 should be near-instant, took %v", elapsed)
}
// Wait for rebuild to land.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
if atomic.LoadInt32(&built) >= 1 && srv.store.graph.Load() != nil && !srv.store.graph.Load().IsStale() {
break
}
time.Sleep(20 * time.Millisecond)
}
if atomic.LoadInt32(&built) < 1 {
t.Fatal("cold-start did not kick off a rebuild")
}
// Follow-up request now lands warm (200, not 503).
// Use a different prefix so the inspect cache from pair A's earlier call
// (if any) doesn't satisfy it.
rr2 := httptest.NewRecorder()
req2 := httptest.NewRequest("POST", "/api/paths/inspect", bytes.NewBufferString(`{"prefixes":["aa"]}`))
req2.Header.Set("Content-Type", "application/json")
srv.handlePathInspect(rr2, req2)
if rr2.Code != 200 {
t.Fatalf("follow-up after cold-start rebuild expected 200, got %d body=%s", rr2.Code, rr2.Body.String())
}
}