mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-07-02 00:41:38 +00:00
Red commit: 67088342ec (CI run: pending)
## Summary
Fixes #1631 — `scanReachRows` swallowed `QueryContext` / `rows.Err()`
failures and returned `nil`. The handler treated that as "genuinely no
reach" and rendered a 200 with empty arrays (or 404 in some flows), so
transient SQLite failures surfaced to operators as "this node has no
reach" — misleading and undiagnosable without log access.
## Fix
`cmd/server/node_reach.go`:
- `scanReachRows` now returns `([]pathRow, error)`; propagates
`QueryContext` + `rows.Err()` failures.
- `computeNodeReach` signature gains an error return: non-nil error
means real backend failure (NOT "unknown node").
- `handleNodeReach` renders **500** on that error path and does **NOT**
cache the failure (next request retries cleanly). Genuinely-empty reach
still renders **200** with empty arrays; unknown/blacklisted nodes still
render 404.
## TDD
- Red commit `67088342`: adds `TestNodeReach_ScanDBErrorReturns500` —
warms the integration DB, drops the `observations` table, asserts
handler returns 500. Pre-fix this got 200 with empty arrays.
- Green commit `5408be3a`: the fix + caller updates. Adds
`TestScanReachRows_ErrorReturn` (unit-level: closed-DB → non-nil err).
- `TestNodeReach_ShapeAndClamp` had to be tightened: the v2 fixture's
`observations` table was missing `observer_idx`; the swallowed error
masked that schema gap. Now rebuilt with the right shape.
## Scope
- `cmd/server/node_reach.go` — fix.
- `cmd/server/node_reach_endpoint_test.go` — new red test +
ShapeAndClamp fixture fix.
- `cmd/server/node_reach_test.go`, `node_reach_bench_test.go` — caller
updates for new signature + one new unit assertion test.
No cache changes (#1629 is separate). No sibling refactors. No frontend.
## Verification
- `go test ./cmd/server/...` — green (48s, all tests).
- pr-preflight — clean (PII, scope, red-commit, CSS vars, LIKE-on-JSON,
async-migration, XSS).
---------
Co-authored-by: clawbot <bot@kpa-clawbot.local>
This commit is contained in:
+32
-11
@@ -388,7 +388,13 @@ func (s *Server) handleNodeReach(w http.ResponseWriter, r *http.Request) {
|
||||
if raw, ok := s.reachCacheGet(cacheKey); ok {
|
||||
return raw, nil
|
||||
}
|
||||
resp, ok := s.computeNodeReach(r.Context(), pubkey, days)
|
||||
resp, ok, cErr := s.computeNodeReach(r.Context(), pubkey, days)
|
||||
if cErr != nil {
|
||||
// Real backend failure (e.g. DB scan exploded) — propagate so the
|
||||
// caller renders 500 instead of the misleading empty-reach
|
||||
// response. Do NOT cache. (#1631)
|
||||
return nil, cErr
|
||||
}
|
||||
if !ok {
|
||||
return []byte(nil), nil
|
||||
}
|
||||
@@ -413,15 +419,18 @@ func (s *Server) handleNodeReach(w http.ResponseWriter, r *http.Request) {
|
||||
w.Write(raw)
|
||||
}
|
||||
|
||||
// computeNodeReach does the read-only scan + assembly. ok=false → 404.
|
||||
func (s *Server) computeNodeReach(ctx context.Context, pubkey string, days int) (NodeReachResponse, bool) {
|
||||
// computeNodeReach does the read-only scan + assembly. ok=false → 404
|
||||
// (target node not present / inputs unavailable). A non-nil error signals a
|
||||
// real backend failure (e.g. DB scan exploded) — caller should render 500,
|
||||
// not 404 (issue #1631).
|
||||
func (s *Server) computeNodeReach(ctx context.Context, pubkey string, days int) (NodeReachResponse, bool, error) {
|
||||
if s.store == nil || s.db == nil || s.db.conn == nil {
|
||||
return NodeReachResponse{}, false
|
||||
return NodeReachResponse{}, false, nil
|
||||
}
|
||||
nodeMap := s.buildNodeInfoMap()
|
||||
self, found := nodeMap[pubkey]
|
||||
if !found {
|
||||
return NodeReachResponse{}, false
|
||||
return NodeReachResponse{}, false, nil
|
||||
}
|
||||
_, pm := s.store.getCachedNodesAndPM()
|
||||
tokens := reliableTokens(pubkey, pm)
|
||||
@@ -431,7 +440,10 @@ func (s *Server) computeNodeReach(ctx context.Context, pubkey string, days int)
|
||||
|
||||
var d dirCounts
|
||||
if len(tokens) > 0 {
|
||||
rows := s.scanReachRows(ctx, tokens, sinceEpoch)
|
||||
rows, err := s.scanReachRows(ctx, tokens, sinceEpoch)
|
||||
if err != nil {
|
||||
return NodeReachResponse{}, false, err
|
||||
}
|
||||
d = attributeDirections(rows, tokens, pubkey, newResolver(pm))
|
||||
} else {
|
||||
d = dirCounts{we: map[string]int{}, they: map[string]int{}, obs: map[string]obsAgg{}}
|
||||
@@ -520,7 +532,7 @@ func (s *Server) computeNodeReach(ctx context.Context, pubkey string, days int)
|
||||
},
|
||||
DirectObservers: directObs,
|
||||
Links: links,
|
||||
}, true
|
||||
}, true, nil
|
||||
}
|
||||
|
||||
// --- neighbor-degree snapshot ---------------------------------------------
|
||||
@@ -602,9 +614,14 @@ func (s *Server) getDegreeSnapshot(ctx context.Context) *degreeSnapshot {
|
||||
// id and originator pubkey are lowercased in SQL (not per row), the path slice
|
||||
// is uppercased in place (no second allocation), and the result is hard-capped
|
||||
// at reachScanRowLimit.
|
||||
func (s *Server) scanReachRows(ctx context.Context, tokens map[string]bool, sinceEpoch int64) []pathRow {
|
||||
//
|
||||
// Returns a non-nil error if the underlying QueryContext or rows.Err() fails;
|
||||
// callers MUST treat that as a 500 (issue #1631 — previously the error was
|
||||
// swallowed, surfacing a transient DB failure as a misleading 404 / empty
|
||||
// reach to operators).
|
||||
func (s *Server) scanReachRows(ctx context.Context, tokens map[string]bool, sinceEpoch int64) ([]pathRow, error) {
|
||||
if len(tokens) == 0 {
|
||||
return nil // defensive: an empty LIKE chain would render `AND ()` (SQL error)
|
||||
return nil, nil // defensive: an empty LIKE chain would render `AND ()` (SQL error)
|
||||
}
|
||||
likes := make([]string, 0, len(tokens))
|
||||
args := []interface{}{sinceEpoch}
|
||||
@@ -630,7 +647,7 @@ func (s *Server) scanReachRows(ctx context.Context, tokens map[string]bool, sinc
|
||||
rows, err := s.db.conn.QueryContext(ctx, q, args...)
|
||||
if err != nil {
|
||||
log.Printf("[reach] scan query failed: %v", err)
|
||||
return nil
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
// Modest preallocation: most nodes return far fewer than the cap, so seed a
|
||||
@@ -660,5 +677,9 @@ func (s *Server) scanReachRows(ctx context.Context, tokens map[string]bool, sinc
|
||||
if skipped > 0 {
|
||||
log.Printf("[reach] scan discarded %d malformed/empty rows (kept %d)", skipped, len(out))
|
||||
}
|
||||
return out
|
||||
if err := rows.Err(); err != nil {
|
||||
log.Printf("[reach] scan rows iteration failed: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
@@ -77,7 +77,7 @@ func BenchmarkNodeReachScan(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
rows := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
if len(rows) == 0 {
|
||||
b.Fatal("expected rows")
|
||||
}
|
||||
@@ -95,7 +95,7 @@ func BenchmarkNodeReachScanMixed(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
rows := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
if len(rows) == 0 {
|
||||
b.Fatal("expected rows")
|
||||
}
|
||||
@@ -113,7 +113,7 @@ func BenchmarkNodeReachScanLowerCase(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
rows := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
if len(rows) == 0 {
|
||||
b.Fatal("expected rows")
|
||||
}
|
||||
@@ -127,7 +127,7 @@ func BenchmarkNodeReachAttribute(b *testing.B) {
|
||||
tokens := map[string]bool{"01FA": true}
|
||||
db := benchReachDB(b, 100000, 1, false)
|
||||
srv := &Server{db: db}
|
||||
rows := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
|
||||
if len(rows) == 0 {
|
||||
b.Fatal("expected rows")
|
||||
}
|
||||
@@ -149,3 +149,27 @@ func BenchmarkNodeReachAttribute(b *testing.B) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestScanReachRows_ErrorReturn anchors the new ([]pathRow, error) signature
|
||||
// at the unit-level (issue #1631). Passing a Server whose db.conn is closed
|
||||
// must surface an error, not a swallowed nil. Lives in this file because
|
||||
// the bench callers in the same file rely on the same signature.
|
||||
func TestScanReachRows_ErrorReturn(t *testing.T) {
|
||||
conn, err := sql.Open("sqlite", ":memory:")
|
||||
if err != nil {
|
||||
t.Fatalf("open: %v", err)
|
||||
}
|
||||
// PREFLIGHT: async=true reason="test-only in-memory scratch schema, immediately closed"
|
||||
if _, err := conn.Exec(`CREATE TABLE observations (id INTEGER); CREATE TABLE transmissions (id INTEGER); CREATE TABLE observers (rowid INTEGER, id TEXT)`); err != nil {
|
||||
t.Fatalf("schema: %v", err)
|
||||
}
|
||||
conn.Close() // force QueryContext to fail
|
||||
srv := &Server{db: &DB{conn: conn}}
|
||||
rows, err := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
|
||||
if err == nil {
|
||||
t.Fatalf("expected error from closed DB, got nil (rows=%d)", len(rows))
|
||||
}
|
||||
if rows != nil {
|
||||
t.Fatalf("expected nil rows on error, got %d", len(rows))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -222,6 +222,21 @@ func TestNodeReach_ShapeAndClamp(t *testing.T) {
|
||||
const pk = "01fa326b475800a31105abcb9e4cac000b3e5d9e2b5ba0739981ce8d5f3a6754"
|
||||
mustExecDB(t, db, `INSERT INTO nodes (public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
|
||||
VALUES ('`+pk+`', 'BE-Test', 'repeater', 50.9, 5.4, '2026-06-07T00:00:00Z', '2026-06-01T00:00:00Z', 3)`)
|
||||
// scanReachRows joins observations on observer_idx; the v2 schema's
|
||||
// observations table lacks that column. Previously the scan error was
|
||||
// swallowed (issue #1631) and the test still saw empty arrays. With the
|
||||
// fix that returns 500, we rebuild observations to the observer_idx
|
||||
// shape (empty — no rows needed for shape/clamp assertions).
|
||||
mustExecDB(t, db, `DROP TABLE observations`)
|
||||
// PREFLIGHT: async=true reason="test-only in-memory schema rebuild; not a production migration"
|
||||
mustExecDB(t, db, `CREATE TABLE observations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
transmission_id INTEGER,
|
||||
observer_idx INTEGER,
|
||||
snr REAL,
|
||||
path_json TEXT,
|
||||
timestamp INTEGER
|
||||
)`)
|
||||
|
||||
cfg := &Config{}
|
||||
srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
|
||||
@@ -248,6 +263,42 @@ func TestNodeReach_ShapeAndClamp(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Issue #1631: a DB failure inside scanReachRows must surface as 500, not
|
||||
// as a misleading "no reach" 200 or 404. We warm the integration DB, drop
|
||||
// the observations table so the next reach scan query fails inside
|
||||
// QueryContext, then assert the handler returns 500 (not 200 with empty
|
||||
// arrays, which is the buggy current behavior — scanReachRows swallows the
|
||||
// error and returns nil).
|
||||
func TestNodeReach_ScanDBErrorReturns500(t *testing.T) {
|
||||
resetReachState(t)
|
||||
db, n := newReachIntegrationDB(t, `["AABB","01FA","CCDD"]`)
|
||||
defer db.conn.Close()
|
||||
cfg := &Config{}
|
||||
srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
|
||||
|
||||
// Warm the store's node cache (so buildNodeInfoMap on the failing call
|
||||
// still finds the target node). One healthy call also primes the
|
||||
// reach response cache — clear it below so the next call recomputes.
|
||||
if rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30"); rr.Code != http.StatusOK {
|
||||
t.Fatalf("warm-up call: status=%d want 200 (body=%s)", rr.Code, rr.Body.String())
|
||||
}
|
||||
srv.reach.cacheMu.Lock()
|
||||
srv.reach.cache = map[string]reachCacheEntry{}
|
||||
srv.reach.cacheMu.Unlock()
|
||||
|
||||
// Break the table that scanReachRows reads from. nodes / observers /
|
||||
// neighbor_edges remain intact so the failure is isolated to the
|
||||
// scanReachRows QueryContext path.
|
||||
if _, err := db.conn.Exec("DROP TABLE observations"); err != nil {
|
||||
t.Fatalf("drop observations: %v", err)
|
||||
}
|
||||
|
||||
rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
|
||||
if rr.Code != http.StatusInternalServerError {
|
||||
t.Fatalf("expected 500 on DB error inside scanReachRows, got %d (body=%s)", rr.Code, rr.Body.String())
|
||||
}
|
||||
}
|
||||
|
||||
func contains(s []string, v string) bool {
|
||||
for _, x := range s {
|
||||
if x == v {
|
||||
|
||||
@@ -179,7 +179,7 @@ func TestScanReachRows_CapTruncates(t *testing.T) {
|
||||
db := newReachScanTestDB(t)
|
||||
defer db.conn.Close()
|
||||
srv := &Server{db: db}
|
||||
rows := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
|
||||
if len(rows) != 1 {
|
||||
t.Fatalf("scan must hard-cap at reachScanRowLimit (1), got %d rows", len(rows))
|
||||
}
|
||||
@@ -264,7 +264,7 @@ func TestScanReachRows_DecodesRows(t *testing.T) {
|
||||
db := newReachScanTestDB(t)
|
||||
defer db.conn.Close()
|
||||
srv := &Server{db: db}
|
||||
rows := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
|
||||
rows, _ := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
|
||||
if len(rows) != 2 {
|
||||
t.Fatalf("expected 2 matching rows (non-matching path excluded), got %d", len(rows))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user