mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-19 22:25:15 +00:00
eba9e89a72
Red commit: c84a8f575a (CI run: pending
push)
Fixes #1203 — path-inspector 503 storm.
Three sub-fixes, each shipped as red→green per AGENTS TDD:
**A. Singleflight on rebuild** (`ensureNeighborGraph`)
Hand-rolled `sync.Mutex + chan` singleflight — no new deps (x/sync was
not in cmd/server's go.mod). Concurrent callers attach to one in-flight
rebuild instead of N parallel `BuildFromStore` goroutines.
- Red: `7340f23b` — test asserts ≤1 build under 10 concurrent callers
(saw 10 on master)
- Green: `abac6b3c`
**B. Stale-while-revalidate** (`handlePathInspect`)
Stale non-nil graph is served immediately with `"stale": true` while a
background rebuild runs (deduped by A). The 2s synchronous gate is gone.
Stale responses are not cached, so the next request after rebuild lands
fresh.
- Red: `c84a8f57` — test asserts 200+`stale:true`+rebuild-kickoff
(master returned 503)
- Green: `5eb86975`
**C. Cold-start 503 still kicks rebuild**
True cold start (`graph == nil`) is the only path that still returns 503
`{"retry": true}`, but it now spawns an async `ensureNeighborGraph` so
the very next request warms up.
- Green test: `f5ac7059` (passed on top of A+B)
Singleflight verified: `TestEnsureNeighborGraph_Singleflight`
Stale-while-revalidate verified:
`TestHandlePathInspect_StaleWhileRevalidate`
Cold-start verified: `TestHandlePathInspect_ColdStartKicksRebuild`
**Acceptance criteria (issue #1203):**
- [x] Concurrent requests share ONE rebuild
- [x] Stale non-nil graph served with `stale:true` async
- [x] 503 only on true cold-start
- [x] Cold-start 503 kicks rebuild → follow-up warm
- [ ] p99 < 500ms under load (not unit-testable; design satisfies it)
- [x] No regression in existing tests
**Out of scope (per issue):** 5-min TTL constant, `BuildFromStore` perf,
`/api/analytics/topology`, persist-lock contention.
No new deps.
---------
Co-authored-by: corescope-bot <bot@corescope.local>
Co-authored-by: corescope-bot <bot@corescope.dev>
53 lines
1.5 KiB
Go
53 lines
1.5 KiB
Go
package main
|
|
|
|
import (
|
|
"sync"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestEnsureNeighborGraph_Singleflight (issue #1203 Pair A) asserts that N
|
|
// concurrent callers over a stale graph trigger at most ONE buildGraphFn
|
|
// invocation. On master the handler spawns one BuildFromStore goroutine per
|
|
// request — wasted CPU and the rebuild-storm that produces the 503 loop.
|
|
//
|
|
// Anti-tautology: revert singleflight and this test fails (it observes 10).
|
|
func TestEnsureNeighborGraph_Singleflight(t *testing.T) {
|
|
store := &PacketStore{}
|
|
stale := NewNeighborGraph()
|
|
stale.builtAt = time.Now().Add(-1 * time.Hour) // stale by TTL
|
|
store.graph.Store(stale)
|
|
|
|
var count int32
|
|
origBuild := buildGraphFn
|
|
defer func() { buildGraphFn = origBuild }()
|
|
buildGraphFn = func(s *PacketStore) *NeighborGraph {
|
|
atomic.AddInt32(&count, 1)
|
|
time.Sleep(50 * time.Millisecond) // ensure callers actually overlap
|
|
g := NewNeighborGraph()
|
|
g.builtAt = time.Now()
|
|
return g
|
|
}
|
|
|
|
var wg sync.WaitGroup
|
|
const N = 10
|
|
wg.Add(N)
|
|
for i := 0; i < N; i++ {
|
|
go func() {
|
|
defer wg.Done()
|
|
store.ensureNeighborGraph()
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
|
|
got := atomic.LoadInt32(&count)
|
|
if got != 1 {
|
|
// Singleflight must produce EXACTLY 1 build call. got==0 means the
|
|
// builder was silently skipped (a wrong impl that still passes
|
|
// `got <= 1`). got>1 means singleflight is missing/broken. Both
|
|
// are mutation-detected with `got != 1`.
|
|
t.Fatalf("expected exactly 1 buildGraphFn invocation under singleflight, got %d", got)
|
|
}
|
|
}
|