mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-07-02 00:41:38 +00:00
## TL;DR Post-merge regression introduced by #1627 r3 (commit `e2212f50`): `buildNodeInfoMap` in `cmd/server/neighbor_api.go` ran an uncached `SELECT … FROM nodes` scan on every call. Folded `first_seen` into the already-cached `getCachedNodesAndPM` (30s TTL) so the 4 hot handlers that call `buildNodeInfoMap` no longer pay for a full table scan per request. ## Before / After `buildNodeInfoMap` is called by **4 hot handlers**: - `cmd/server/neighbor_api.go:130` - `cmd/server/neighbor_api.go:297` - `cmd/server/neighbor_debug.go:83` - `cmd/server/node_reach.go:421` | | Before | After | |---|---|---| | `SELECT … FROM nodes` per call | 1 (uncached) | 0 (cache hit) | | `SELECT … FROM observers` per call | 1 (uncached) | 1 (unchanged) | | At Cascadia scale (~2600 nodes) | full scan × 4 handlers × N req/s | one scan / 30s | ## How - Extended the `getAllNodes` schema probe to also `COALESCE(first_seen, '')`. Falls back through the existing richest → leanest ladder if the column is missing. - `nodeInfo.FirstSeen` is therefore populated for every cached entry in `getCachedNodesAndPM`. - `buildNodeInfoMap` drops its second `SELECT` entirely and just copies `nodeInfo` values out of the cached map. - Public signature of `buildNodeInfoMap` is unchanged. `node_reach.go:421` still sees `nodeInfo.FirstSeen` populated, served from cache. `cmd/server/store.go` is touched because `getAllNodes` is the only sensible owner of the `first_seen` SELECT — adding a parallel cache would duplicate the 30s TTL machinery this fix is designed to leverage. ## Test (red → green) - Commit 1 (`test:`): `TestBuildNodeInfoMap_FirstSeenIsCached` — calls `buildNodeInfoMap`, mutates `first_seen` out-of-band via a separate rw connection, calls it again, and asserts both calls return the same (cached) value. Fails on `origin/master` (call 2 sees the mutated value, proving the uncached scan). - Commit 2 (`perf:`): the fold. Test now passes. ## Refs Post-merge audit identified this as the only MAJOR finding from #1627; recommendation was a follow-up hot-fix PR. This is that PR. --------- Co-authored-by: openclaw-bot <bot@openclaw> Co-authored-by: openclaw-bot <bot@openclaw.local>
This commit is contained in:
@@ -429,6 +429,9 @@ func (s *Server) buildNodeInfoMap() map[string]nodeInfo {
|
||||
if s.store == nil {
|
||||
return nil
|
||||
}
|
||||
// FirstSeen is folded into getAllNodes (and therefore into the 30s
|
||||
// node cache) so callers like /api/nodes/{pk}/reach get the field
|
||||
// without a per-request SELECT — fixes #1627 r3 regression.
|
||||
nodes, _ := s.store.getCachedNodesAndPM()
|
||||
m := make(map[string]nodeInfo, len(nodes))
|
||||
for _, n := range nodes {
|
||||
@@ -452,28 +455,6 @@ func (s *Server) buildNodeInfoMap() map[string]nodeInfo {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fold in nodes.first_seen so callers (e.g. /api/nodes/{pk}/reach)
|
||||
// don't need a per-request single-row SELECT. One bulk scan amortises
|
||||
// across the whole map; missing/NULL rows are silently skipped (the
|
||||
// node may be observer-only or pre-first_seen-schema).
|
||||
fsRows, err := s.db.conn.Query("SELECT LOWER(public_key), COALESCE(first_seen,'') FROM nodes")
|
||||
if err == nil {
|
||||
defer fsRows.Close()
|
||||
for fsRows.Next() {
|
||||
var pk, fs string
|
||||
if fsRows.Scan(&pk, &fs) != nil {
|
||||
continue
|
||||
}
|
||||
if fs == "" {
|
||||
continue
|
||||
}
|
||||
if entry, ok := m[pk]; ok {
|
||||
entry.FirstSeen = fs
|
||||
m[pk] = entry
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return m
|
||||
|
||||
@@ -525,3 +525,123 @@ func TestBuildNodeInfoMap_ObserverEnrichment(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TestBuildNodeInfoMap_FirstSeenIsCached asserts the regression introduced by
|
||||
// #1627 r3 stays fixed: the per-pubkey first_seen field MUST come from the
|
||||
// already-30s-cached getCachedNodesAndPM path, not from a fresh uncached
|
||||
// `SELECT … FROM nodes` scan on every call.
|
||||
//
|
||||
// Method (no DB-driver wrapper needed): mutate the underlying SQLite file's
|
||||
// first_seen via a separate rw connection between two consecutive calls to
|
||||
// buildNodeInfoMap(). If first_seen is read fresh on every call (the
|
||||
// regression), the second call sees the new value. If folded into the
|
||||
// existing 30s node cache, both calls return the original value — same as
|
||||
// every other nodeInfo field that comes from getAllNodes().
|
||||
func TestBuildNodeInfoMap_FirstSeenIsCached(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
dbPath := tmpDir + "/test.db"
|
||||
|
||||
// Seed via rw connection.
|
||||
rw, err := sql.Open("sqlite", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer rw.Close()
|
||||
for _, stmt := range []string{
|
||||
"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, first_seen TEXT, advert_count INTEGER)",
|
||||
"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
|
||||
"INSERT INTO nodes VALUES ('AAAA1111', 'Repeater-1', 'repeater', 0, 0, '', '2024-01-01T00:00:00Z', 0)",
|
||||
} {
|
||||
if _, err := rw.Exec(stmt); err != nil {
|
||||
t.Fatalf("seed exec %q: %v", stmt, err)
|
||||
}
|
||||
}
|
||||
|
||||
db, err := OpenDB(dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.conn.Close()
|
||||
|
||||
store := NewPacketStore(db, nil)
|
||||
store.Load()
|
||||
|
||||
srv := &Server{
|
||||
db: db,
|
||||
store: store,
|
||||
perfStats: NewPerfStats(),
|
||||
}
|
||||
|
||||
// Call 1: warm cache and record observed first_seen.
|
||||
m1 := srv.buildNodeInfoMap()
|
||||
first1 := m1["aaaa1111"].FirstSeen
|
||||
if first1 != "2024-01-01T00:00:00Z" {
|
||||
t.Fatalf("setup: expected first_seen=2024-01-01T00:00:00Z, got %q", first1)
|
||||
}
|
||||
|
||||
// Mutate first_seen out-of-band via the rw connection. Any code path
|
||||
// that re-reads first_seen from disk (uncached) will see this new
|
||||
// value; a path that folds first_seen into the 30s node cache will
|
||||
// not, because the cache is well under 30s old.
|
||||
if _, err := rw.Exec("UPDATE nodes SET first_seen='2099-12-31T23:59:59Z' WHERE public_key='AAAA1111'"); err != nil {
|
||||
t.Fatalf("mutate: %v", err)
|
||||
}
|
||||
|
||||
// Call 2: should match call 1 if first_seen is cached.
|
||||
m2 := srv.buildNodeInfoMap()
|
||||
first2 := m2["aaaa1111"].FirstSeen
|
||||
if first2 != first1 {
|
||||
t.Errorf("buildNodeInfoMap re-scanned nodes.first_seen uncached (#1627 r3 regression): "+
|
||||
"call 1 saw %q, call 2 saw %q after out-of-band UPDATE; expected both calls to return "+
|
||||
"the cached value because getCachedNodesAndPM has a 30s TTL",
|
||||
first1, first2)
|
||||
}
|
||||
}
|
||||
|
||||
// TestGetAllNodes_FirstSeenSchemaFallback exercises the schema-probe rung that
|
||||
// fires when nodes.first_seen is missing. The richest SELECT errors out, the
|
||||
// loop falls through to the next-richest query, and the resulting nodeInfo
|
||||
// values must have empty FirstSeen with no panic. Regression coverage for the
|
||||
// existing fallback branch (#1632 review loop 1).
|
||||
func TestGetAllNodes_FirstSeenSchemaFallback(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
dbPath := tmpDir + "/test.db"
|
||||
|
||||
// Seed a nodes table WITHOUT first_seen (advert_count + last_seen present).
|
||||
rw, err := sql.Open("sqlite", dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer rw.Close()
|
||||
for _, stmt := range []string{
|
||||
"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, advert_count INTEGER)",
|
||||
"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
|
||||
"INSERT INTO nodes VALUES ('BBBB2222', 'Repeater-2', 'repeater', 0, 0, '2024-02-02T00:00:00Z', 3)",
|
||||
} {
|
||||
if _, err := rw.Exec(stmt); err != nil {
|
||||
t.Fatalf("seed exec %q: %v", stmt, err)
|
||||
}
|
||||
}
|
||||
|
||||
db, err := OpenDB(dbPath)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer db.conn.Close()
|
||||
|
||||
store := NewPacketStore(db, nil)
|
||||
nodes := store.getAllNodes()
|
||||
if len(nodes) != 1 {
|
||||
t.Fatalf("expected 1 row from fallback rung, got %d", len(nodes))
|
||||
}
|
||||
n := nodes[0]
|
||||
if n.PublicKey != "BBBB2222" {
|
||||
t.Errorf("PublicKey mismatch: got %q", n.PublicKey)
|
||||
}
|
||||
if n.FirstSeen != "" {
|
||||
t.Errorf("FirstSeen should be empty when nodes.first_seen column is missing, got %q", n.FirstSeen)
|
||||
}
|
||||
if n.ObservationCount != 3 {
|
||||
t.Errorf("ObservationCount should still populate from advert_count fallback, got %d", n.ObservationCount)
|
||||
}
|
||||
}
|
||||
|
||||
+28
-13
@@ -6064,23 +6064,32 @@ func (s *PacketStore) logSchemaDegradationOnce(msg string) {
|
||||
}
|
||||
|
||||
func (s *PacketStore) getAllNodes() []nodeInfo {
|
||||
// Schema probe: try richest → leanest. Logs a one-shot warning when we
|
||||
// fall back to a thinner schema so operators see that a column is
|
||||
// missing and the new tiebreak features are degraded. See #1197
|
||||
// (adversarial r1 #10).
|
||||
rows, err := s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0) FROM nodes")
|
||||
// Schema probe: try richest → leanest. Logs a one-shot warning at each
|
||||
// rung when we fall back to a thinner schema so operators see that a
|
||||
// column is missing and the new tiebreak features are degraded. See
|
||||
// #1197 (adversarial r1 #10). first_seen was added (#1627 r3) so callers
|
||||
// like /api/nodes/{pk}/reach can avoid a per-request single-row SELECT;
|
||||
// we fold it into the cached load (see getCachedNodesAndPM, 30s TTL) so
|
||||
// the 4 buildNodeInfoMap call sites don't each pay for a fresh scan.
|
||||
rows, err := s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0), COALESCE(first_seen, '') FROM nodes")
|
||||
hasLastSeen := true
|
||||
hasAdvertCount := true
|
||||
hasFirstSeen := true
|
||||
if err != nil {
|
||||
s.logSchemaDegradationOnce("nodes.advert_count missing — tier-3/4 ObservationCount tiebreak degraded; resolveWithContext will fall back to lex-pubkey order")
|
||||
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen FROM nodes")
|
||||
hasAdvertCount = false
|
||||
s.logSchemaDegradationOnce("nodes.first_seen missing — buildNodeInfoMap will not populate FirstSeen for /api/nodes/{pk}/reach and friends")
|
||||
hasFirstSeen = false
|
||||
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen, COALESCE(advert_count, 0) FROM nodes")
|
||||
if err != nil {
|
||||
s.logSchemaDegradationOnce("nodes.last_seen missing — node freshness signal unavailable")
|
||||
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon FROM nodes")
|
||||
hasLastSeen = false
|
||||
s.logSchemaDegradationOnce("nodes.advert_count missing — tier-3/4 ObservationCount tiebreak degraded; resolveWithContext will fall back to lex-pubkey order")
|
||||
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon, last_seen FROM nodes")
|
||||
hasAdvertCount = false
|
||||
if err != nil {
|
||||
return nil
|
||||
s.logSchemaDegradationOnce("nodes.last_seen missing — node freshness signal unavailable")
|
||||
rows, err = s.db.conn.Query("SELECT public_key, name, role, lat, lon FROM nodes")
|
||||
hasLastSeen = false
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -6092,7 +6101,10 @@ func (s *PacketStore) getAllNodes() []nodeInfo {
|
||||
var lat, lon sql.NullFloat64
|
||||
var lastSeen sql.NullString
|
||||
var advertCount sql.NullInt64
|
||||
if hasAdvertCount {
|
||||
var firstSeen sql.NullString
|
||||
if hasFirstSeen {
|
||||
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen, &advertCount, &firstSeen)
|
||||
} else if hasAdvertCount {
|
||||
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen, &advertCount)
|
||||
} else if hasLastSeen {
|
||||
rows.Scan(&pk, &name, &role, &lat, &lon, &lastSeen)
|
||||
@@ -6100,6 +6112,9 @@ func (s *PacketStore) getAllNodes() []nodeInfo {
|
||||
rows.Scan(&pk, &name, &role, &lat, &lon)
|
||||
}
|
||||
n := nodeInfo{PublicKey: pk, Name: nullStrVal(name), Role: nullStrVal(role)}
|
||||
if hasFirstSeen && firstSeen.Valid {
|
||||
n.FirstSeen = firstSeen.String
|
||||
}
|
||||
if lat.Valid && lon.Valid {
|
||||
n.Lat = lat.Float64
|
||||
n.Lon = lon.Float64
|
||||
|
||||
Reference in New Issue
Block a user