perf(neighbor_api): fold first_seen into cached map — fix #1627 r3 regression (#1632)

## TL;DR
Post-merge regression introduced by #1627 r3 (commit `e2212f50`):
`buildNodeInfoMap` in `cmd/server/neighbor_api.go` ran an uncached
`SELECT … FROM nodes` scan on every call. Folded `first_seen` into the
already-cached `getCachedNodesAndPM` (30s TTL) so the 4 hot handlers
that call `buildNodeInfoMap` no longer pay for a full table scan per
request.

## Before / After

`buildNodeInfoMap` is called by **4 hot handlers**:
- `cmd/server/neighbor_api.go:130`
- `cmd/server/neighbor_api.go:297`
- `cmd/server/neighbor_debug.go:83`
- `cmd/server/node_reach.go:421`

| | Before | After |
|---|---|---|
| `SELECT … FROM nodes` per call | 1 (uncached) | 0 (cache hit) |
| `SELECT … FROM observers` per call | 1 (uncached) | 1 (unchanged) |
| At Cascadia scale (~2600 nodes) | full scan × 4 handlers × N req/s |
one scan / 30s |

## How

- Extended the `getAllNodes` schema probe to also `COALESCE(first_seen,
'')`. Falls back through the existing richest → leanest ladder if the
column is missing.
- `nodeInfo.FirstSeen` is therefore populated for every cached entry in
`getCachedNodesAndPM`.
- `buildNodeInfoMap` drops its second `SELECT` entirely and just copies
`nodeInfo` values out of the cached map.
- Public signature of `buildNodeInfoMap` is unchanged.
`node_reach.go:421` still sees `nodeInfo.FirstSeen` populated, served
from cache.

`cmd/server/store.go` is touched because `getAllNodes` is the only
sensible owner of the `first_seen` SELECT — adding a parallel cache
would duplicate the 30s TTL machinery this fix is designed to leverage.

## Test (red → green)

- Commit 1 (`test:`): `TestBuildNodeInfoMap_FirstSeenIsCached` — calls
`buildNodeInfoMap`, mutates `first_seen` out-of-band via a separate rw
connection, calls it again, and asserts both calls return the same
(cached) value. Fails on `origin/master` (call 2 sees the mutated value,
proving the uncached scan).
- Commit 2 (`perf:`): the fold. Test now passes.

## Refs

Post-merge audit identified this as the only MAJOR finding from #1627;
recommendation was a follow-up hot-fix PR. This is that PR.

---------

Co-authored-by: openclaw-bot <bot@openclaw>
Co-authored-by: openclaw-bot <bot@openclaw.local>
This commit is contained in:
Kpa-clawbot
2026-06-09 01:24:46 -07:00
committed by GitHub
parent 8540b01cb1
commit 078225a54e
3 changed files with 151 additions and 35 deletions
+3 -22
View File
@@ -429,6 +429,9 @@ func (s *Server) buildNodeInfoMap() map[string]nodeInfo {
if s.store == nil {
return nil
}
// FirstSeen is folded into getAllNodes (and therefore into the 30s
// node cache) so callers like /api/nodes/{pk}/reach get the field
// without a per-request SELECT — fixes #1627 r3 regression.
nodes, _ := s.store.getCachedNodesAndPM()
m := make(map[string]nodeInfo, len(nodes))
for _, n := range nodes {
@@ -452,28 +455,6 @@ func (s *Server) buildNodeInfoMap() map[string]nodeInfo {
}
}
}
// Fold in nodes.first_seen so callers (e.g. /api/nodes/{pk}/reach)
// don't need a per-request single-row SELECT. One bulk scan amortises
// across the whole map; missing/NULL rows are silently skipped (the
// node may be observer-only or pre-first_seen-schema).
fsRows, err := s.db.conn.Query("SELECT LOWER(public_key), COALESCE(first_seen,'') FROM nodes")
if err == nil {
defer fsRows.Close()
for fsRows.Next() {
var pk, fs string
if fsRows.Scan(&pk, &fs) != nil {
continue
}
if fs == "" {
continue
}
if entry, ok := m[pk]; ok {
entry.FirstSeen = fs
m[pk] = entry
}
}
}
}
return m
+120
View File
@@ -525,3 +525,123 @@ func TestBuildNodeInfoMap_ObserverEnrichment(t *testing.T) {
}
}
}
// TestBuildNodeInfoMap_FirstSeenIsCached asserts the regression introduced by
// #1627 r3 stays fixed: the per-pubkey first_seen field MUST come from the
// already-30s-cached getCachedNodesAndPM path, not from a fresh uncached
// `SELECT … FROM nodes` scan on every call.
//
// Method (no DB-driver wrapper needed): mutate the underlying SQLite file's
// first_seen via a separate rw connection between two consecutive calls to
// buildNodeInfoMap(). If first_seen is read fresh on every call (the
// regression), the second call sees the new value. If folded into the
// existing 30s node cache, both calls return the original value — same as
// every other nodeInfo field that comes from getAllNodes().
func TestBuildNodeInfoMap_FirstSeenIsCached(t *testing.T) {
tmpDir := t.TempDir()
dbPath := tmpDir + "/test.db"
// Seed via rw connection.
rw, err := sql.Open("sqlite", dbPath)
if err != nil {
t.Fatal(err)
}
defer rw.Close()
for _, stmt := range []string{
"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, first_seen TEXT, advert_count INTEGER)",
"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
"INSERT INTO nodes VALUES ('AAAA1111', 'Repeater-1', 'repeater', 0, 0, '', '2024-01-01T00:00:00Z', 0)",
} {
if _, err := rw.Exec(stmt); err != nil {
t.Fatalf("seed exec %q: %v", stmt, err)
}
}
db, err := OpenDB(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.conn.Close()
store := NewPacketStore(db, nil)
store.Load()
srv := &Server{
db: db,
store: store,
perfStats: NewPerfStats(),
}
// Call 1: warm cache and record observed first_seen.
m1 := srv.buildNodeInfoMap()
first1 := m1["aaaa1111"].FirstSeen
if first1 != "2024-01-01T00:00:00Z" {
t.Fatalf("setup: expected first_seen=2024-01-01T00:00:00Z, got %q", first1)
}
// Mutate first_seen out-of-band via the rw connection. Any code path
// that re-reads first_seen from disk (uncached) will see this new
// value; a path that folds first_seen into the 30s node cache will
// not, because the cache is well under 30s old.
if _, err := rw.Exec("UPDATE nodes SET first_seen='2099-12-31T23:59:59Z' WHERE public_key='AAAA1111'"); err != nil {
t.Fatalf("mutate: %v", err)
}
// Call 2: should match call 1 if first_seen is cached.
m2 := srv.buildNodeInfoMap()
first2 := m2["aaaa1111"].FirstSeen
if first2 != first1 {
t.Errorf("buildNodeInfoMap re-scanned nodes.first_seen uncached (#1627 r3 regression): "+
"call 1 saw %q, call 2 saw %q after out-of-band UPDATE; expected both calls to return "+
"the cached value because getCachedNodesAndPM has a 30s TTL",
first1, first2)
}
}
// TestGetAllNodes_FirstSeenSchemaFallback exercises the schema-probe rung that
// fires when nodes.first_seen is missing. The richest SELECT errors out, the
// loop falls through to the next-richest query, and the resulting nodeInfo
// values must have empty FirstSeen with no panic. Regression coverage for the
// existing fallback branch (#1632 review loop 1).
func TestGetAllNodes_FirstSeenSchemaFallback(t *testing.T) {
tmpDir := t.TempDir()
dbPath := tmpDir + "/test.db"
// Seed a nodes table WITHOUT first_seen (advert_count + last_seen present).
rw, err := sql.Open("sqlite", dbPath)
if err != nil {
t.Fatal(err)
}
defer rw.Close()
for _, stmt := range []string{
"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, advert_count INTEGER)",
"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
"INSERT INTO nodes VALUES ('BBBB2222', 'Repeater-2', 'repeater', 0, 0, '2024-02-02T00:00:00Z', 3)",
} {
if _, err := rw.Exec(stmt); err != nil {
t.Fatalf("seed exec %q: %v", stmt, err)
}
}
db, err := OpenDB(dbPath)
if err != nil {
t.Fatal(err)
}
defer db.conn.Close()
store := NewPacketStore(db, nil)
nodes := store.getAllNodes()
if len(nodes) != 1 {
t.Fatalf("expected 1 row from fallback rung, got %d", len(nodes))
}
n := nodes[0]
if n.PublicKey != "BBBB2222" {
t.Errorf("PublicKey mismatch: got %q", n.PublicKey)
}
if n.FirstSeen != "" {
t.Errorf("FirstSeen should be empty when nodes.first_seen column is missing, got %q", n.FirstSeen)
}
if n.ObservationCount != 3 {
t.Errorf("ObservationCount should still populate from advert_count fallback, got %d", n.ObservationCount)
}
}
+28 -13
View File
@@ -6064,23 +6064,32 @@ func (s *PacketStore) logSchemaDegradationOnce(msg string) {
}
func (s *PacketStore) getAllNodes() []nodeInfo {
// Schema probe: try richest → leanest. Logs a one-shot warning when we
// fall back to a thinner schema so operators see that a column is
// missing and the new tiebreak features are degraded. See #1197
// (adversarial r1 #10).
rows, err := s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0) FROM nodes")
// Schema probe: try richest → leanest. Logs a one-shot warning at each
// rung when we fall back to a thinner schema so operators see that a
// column is missing and the new tiebreak features are degraded. See
// #1197 (adversarial r1 #10). first_seen was added (#1627 r3) so callers
// like /api/nodes/{pk}/reach can avoid a per-request single-row SELECT;
// we fold it into the cached load (see getCachedNodesAndPM, 30s TTL) so
// the 4 buildNodeInfoMap call sites don't each pay for a fresh scan.
rows, err := s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0), COALESCE(first_seen, '') FROM nodes")
hasLastSeen := true
hasAdvertCount := true
hasFirstSeen := true
if err != nil {
s.logSchemaDegradationOnce("nodes.advert_count missing — tier-3/4 ObservationCount tiebreak degraded; resolveWithContext will fall back to lex-pubkey order")
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen FROM nodes")
hasAdvertCount = false
s.logSchemaDegradationOnce("nodes.first_seen missing — buildNodeInfoMap will not populate FirstSeen for /api/nodes/{pk}/reach and friends")
hasFirstSeen = false
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0) FROM nodes")
if err != nil {
s.logSchemaDegradationOnce("nodes.last_seen missing — node freshness signal unavailable")
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon FROM nodes")
hasLastSeen = false
s.logSchemaDegradationOnce("nodes.advert_count missing — tier-3/4 ObservationCount tiebreak degraded; resolveWithContext will fall back to lex-pubkey order")
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen FROM nodes")
hasAdvertCount = false
if err != nil {
return nil
s.logSchemaDegradationOnce("nodes.last_seen missing — node freshness signal unavailable")
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon FROM nodes")
hasLastSeen = false
if err != nil {
return nil
}
}
}
}
@@ -6092,7 +6101,10 @@ func (s *PacketStore) getAllNodes() []nodeInfo {
var lat, lon sql.NullFloat64
var lastSeen sql.NullString
var advertCount sql.NullInt64
if hasAdvertCount {
var firstSeen sql.NullString
if hasFirstSeen {
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen, &advertCount, &firstSeen)
} else if hasAdvertCount {
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen, &advertCount)
} else if hasLastSeen {
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen)
@@ -6100,6 +6112,9 @@ func (s *PacketStore) getAllNodes() []nodeInfo {
rows.Scan(&pk, &name, &role, &lat, &lon)
}
n := nodeInfo{PublicKey: pk, Name: nullStrVal(name), Role: nullStrVal(role)}
if hasFirstSeen && firstSeen.Valid {
n.FirstSeen = firstSeen.String
}
if lat.Valid && lon.Valid {
n.Lat = lat.Float64
n.Lon = lon.Float64