mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-27 13:51:44 +00:00
938153dd92
## Problem A relay node's **activity timeline** — and its per-node `packetsToday` / observer counts — collapses to *"only the hour the server restarted"* after every restart. Before the restart the timeline shows only the node's own adverts (~1–2/hr); all of its relay activity piles into the single post-restart hour. ## Root cause All DB cold-load paths (`Load`, `loadChunk`, `scanAndMergeChunk`) index relay-hop attribution into `byNode` **only** from `observations.resolved_path`. But since #1287 the ingestor persists relay data as aggregate `neighbor_edges` and **never writes `resolved_path`** — it is `NULL` on every deployment (verified on a live DB: 0 of ~440k rows populated). So relay attribution is never reconstructed on startup; it only re-accumulates from live traffic (`IngestNew*`, which re-resolves from `path_json` + the neighbor graph), piling a relay node's whole history into the post-restart window. ## Fix Server read-side only — **no schema / ingestor / migration change**. When `resolved_path` is empty, re-resolve relay hops from the already-persisted `path_json` using the in-memory prefix map + neighbor graph (the same `resolvePathForObs` compute the live ingest path already runs). `main.go` now loads the persisted neighbor graph *before* the packet load so resolution has the graph available. Two correctness details worth a close look: 1. **Fetch the prefix-map/graph snapshot BEFORE opening each load cursor.** `getCachedNodesAndPM` issues its own DB query; doing so while a load cursor is open deadlocks on a single-connection SQLite pool (the test harness uses one). 2. **Index into `byNode` ONLY** — not the `resolved_path` / path-hop indexes. Those are cross-checked by `handleNodePaths` against the persisted `resolved_path` column (NULL here); populating them from an in-memory re-resolution would make that SQL confirmation fail and wrongly drop the tx from paths-through (#1352). ## Tests New coverage asserts a relay pubkey reachable *only* via `path_json` lands in `byNode` after a restart-style load, for both the hot-window (`LoadChunked`) and background-window (`loadChunk`) paths. Existing #1558 (`resolved_path`) and #1352 (paths-through) tests still pass. Full `cd cmd/server && go test ./...` is green under `-race`. ## Perf The fallback runs `resolvePathForObs` per observation with a non-empty `path_json` during cold load — the same per-packet compute the live ingest path already performs, so no new asymptotic cost. The prefix map + graph are snapshotted **once per load** (not per row); `getCachedNodesAndPM` is 30s-cached. In `loadChunk` the resolution runs in the existing lock-free scan and is accumulated locally, matching that function's "build local, merge under lock" design. ## Note on a pre-existing flaky test `TestDistanceConcurrentRequestsDuringBuildReturn202` is timing-fragile (fails ~1/15 on `master` without this change). It relies on the lazy distance build being slow because it's the first caller of `getCachedNodesAndPM` (cold cache). This PR pre-warms that cache during `Load`, narrowing the build window, so the test fails more often in **non-race** local runs. It passes reliably under `-race` (CI mode), where the build stays slow. Flagging in case you want to harden the test separately. --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com> Co-authored-by: openclaw-bot <openclaw-bot@users.noreply.github.com> Co-authored-by: openclaw-bot <bot@openclaw>
181 lines
7.4 KiB
Go
181 lines
7.4 KiB
Go
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
// createTestDBPathJSONNoResolvedPath builds a fixture that mirrors the LIVE
|
|
// deployment state after #1287: observations carry a path_json hop list but
|
|
// observations.resolved_path is NULL (the ingestor no longer writes it; relay
|
|
// data is persisted as aggregate neighbor_edges instead). A single repeater
|
|
// node whose public_key starts with hopPrefix lets the in-memory prefix map
|
|
// resolve that hop unambiguously to relayPubkey.
|
|
//
|
|
// The transmission's decoded_json is empty ({}), so relayPubkey is NOT an
|
|
// endpoint (pubKey/destPubKey/srcPubKey). The ONLY way it can enter
|
|
// s.byNode is via path_json → resolvePathForObs relay-hop resolution.
|
|
func createTestDBPathJSONNoResolvedPath(t *testing.T, relayPubkey, hopPrefix, firstSeen string) string {
|
|
t.Helper()
|
|
dir := t.TempDir()
|
|
dbPath := filepath.Join(dir, "test.db")
|
|
|
|
conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer conn.Close()
|
|
|
|
exec := func(s string, args ...interface{}) {
|
|
if _, err := conn.Exec(s, args...); err != nil {
|
|
t.Fatalf("setup exec failed: %v\nSQL: %s", err, s)
|
|
}
|
|
}
|
|
|
|
// PREFLIGHT: async=true reason="test fixture: in-memory t.TempDir SQLite, never touches a real DB. Tables are CREATE-from-empty in a one-shot OpenDB call, not a schema migration over existing data."
|
|
exec(`CREATE TABLE transmissions (
|
|
id INTEGER PRIMARY KEY,
|
|
raw_hex TEXT, hash TEXT, first_seen TEXT,
|
|
route_type INTEGER, payload_type INTEGER, payload_version INTEGER,
|
|
decoded_json TEXT
|
|
)`)
|
|
// resolved_path column present (matches live schema) but left NULL.
|
|
// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
|
|
exec(`CREATE TABLE observations (
|
|
id INTEGER PRIMARY KEY,
|
|
transmission_id INTEGER,
|
|
observer_id TEXT, observer_name TEXT,
|
|
direction TEXT, snr REAL, rssi REAL, score INTEGER,
|
|
path_json TEXT, timestamp TEXT,
|
|
raw_hex TEXT,
|
|
resolved_path TEXT
|
|
)`)
|
|
// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
|
|
exec(`CREATE TABLE observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`)
|
|
// Production nodes schema uses public_key (not pubkey) — getAllNodes /
|
|
// buildPrefixMap reads public_key, role, advert_count, first_seen.
|
|
// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
|
|
exec(`CREATE TABLE nodes (
|
|
public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
|
|
last_seen TEXT, first_seen TEXT, advert_count INTEGER DEFAULT 0
|
|
)`)
|
|
// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
|
|
exec(`CREATE TABLE schema_version (version INTEGER)`)
|
|
exec(`INSERT INTO schema_version (version) VALUES (1)`)
|
|
// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
|
|
exec(`CREATE INDEX idx_tx_first_seen ON transmissions(first_seen)`)
|
|
|
|
// Repeater node so canAppearInPath() admits it to the prefix map.
|
|
exec(`INSERT INTO nodes (public_key, name, role, advert_count) VALUES (?,?,?,?)`,
|
|
relayPubkey, "Relay One", "repeater", 10)
|
|
|
|
exec("INSERT INTO transmissions VALUES (?,?,?,?,0,4,1,?)",
|
|
1, "aa", "hashpjf_1", firstSeen, `{}`)
|
|
// resolved_path explicitly NULL; path_json carries the relay hop prefix.
|
|
exec("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp, raw_hex, resolved_path) VALUES (?,?,?,?,?,?,?,?,?,?,?,NULL)",
|
|
1, 1, "obs1", "Obs1", "RX", -10.0, -80.0, 5, fmt.Sprintf(`[%q]`, hopPrefix), firstSeen, "")
|
|
|
|
return dbPath
|
|
}
|
|
|
|
// TestLoadChunked_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty pins the
|
|
// fix for the "relay-node analytics empty after every restart" bug.
|
|
//
|
|
// On live, observations.resolved_path is 100% NULL (since #1287 the ingestor
|
|
// persists relay data as neighbor_edges, not per-observation resolved_path).
|
|
// The cold-load paths (Load / scanAndMergeChunk) indexed relay hops ONLY from
|
|
// resolved_path, so a relay node's path-hop attribution was never rebuilt on
|
|
// startup — it only re-accumulated from live traffic, collapsing the activity
|
|
// timeline to "just the hour the server restarted".
|
|
//
|
|
// The fix: when resolved_path is empty, fall back to resolving the hops from
|
|
// the persisted path_json using the in-memory prefix map + neighbor graph
|
|
// (exactly what the live ingest path already does), then index the relay hops.
|
|
func TestLoadChunked_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty(t *testing.T) {
|
|
relayPK := "aabbccddeeff00112233445566778899aabbccddeeff00112233445566778899"
|
|
hop := "aa" // 2-hex-char path hop; unique 2-char prefix of relayPK
|
|
|
|
ts := time.Now().UTC().Format(time.RFC3339)
|
|
dbPath := createTestDBPathJSONNoResolvedPath(t, relayPK, hop, ts)
|
|
|
|
db, err := OpenDB(dbPath)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer db.conn.Close()
|
|
|
|
if !db.hasResolvedPath {
|
|
t.Fatalf("setup: fixture should expose resolved_path column; hasResolvedPath=false")
|
|
}
|
|
|
|
store := NewPacketStore(db, &PacketStoreConfig{RetentionHours: 72})
|
|
// Empty graph is sufficient: a single prefix candidate resolves without
|
|
// neighbor-affinity disambiguation. Mirrors a freshly restarted server
|
|
// that has loaded its neighbor_edges snapshot before the packet load.
|
|
store.graph.Store(NewNeighborGraph())
|
|
|
|
if err := store.LoadChunked(0); err != nil {
|
|
t.Fatalf("LoadChunked: %v", err)
|
|
}
|
|
|
|
// The relay pubkey only reachable through path_json resolution must be
|
|
// indexed in byNode for the transmission.
|
|
if got := len(store.byNode[relayPK]); got != 1 {
|
|
t.Errorf("byNode[%s]: got %d transmissions, want 1 — cold load did not "+
|
|
"resolve relay hops from path_json when resolved_path was NULL "+
|
|
"(relay history lost on restart)", relayPK, got)
|
|
}
|
|
}
|
|
|
|
// TestLoadChunk_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty covers the
|
|
// background-window loader (loadBackgroundChunks → loadChunk), which on live
|
|
// loads everything older than hotStartupHours (24h) up to retentionHours
|
|
// (168h). Without the path_json fallback here, a relay node's analytics for
|
|
// the older 6 days would still vanish on every restart even with the hot
|
|
// window fixed.
|
|
func TestLoadChunk_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty(t *testing.T) {
|
|
relayPK := "ccddeeff00112233445566778899aabbccddeeff00112233445566778899aabb"
|
|
hop := "cc"
|
|
|
|
// Aged 48h so it falls in the background window, not the hot window.
|
|
aged := time.Now().UTC().Add(-48 * time.Hour).Format(time.RFC3339)
|
|
dbPath := createTestDBPathJSONNoResolvedPath(t, relayPK, hop, aged)
|
|
|
|
db, err := OpenDB(dbPath)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer db.conn.Close()
|
|
|
|
store := NewPacketStore(db, &PacketStoreConfig{
|
|
RetentionHours: 72,
|
|
HotStartupHours: 1, // hot load must NOT pick up the 48h-old row
|
|
})
|
|
store.graph.Store(NewNeighborGraph())
|
|
|
|
if err := store.LoadChunked(0); err != nil {
|
|
t.Fatalf("LoadChunked: %v", err)
|
|
}
|
|
if got := len(store.byNode[relayPK]); got != 0 {
|
|
t.Fatalf("setup: hot load unexpectedly picked up 48h-old row; "+
|
|
"byNode[relayPK]=%d (want 0) — test would not exercise loadChunk", got)
|
|
}
|
|
|
|
chunkStart := time.Now().UTC().Add(-72 * time.Hour)
|
|
chunkEnd := time.Now().UTC().Add(-1 * time.Hour)
|
|
if err := store.loadChunk(chunkStart, chunkEnd); err != nil {
|
|
t.Fatalf("loadChunk: %v", err)
|
|
}
|
|
|
|
if got := len(store.byNode[relayPK]); got != 1 {
|
|
t.Errorf("byNode[%s]: got %d transmissions, want 1 — background loadChunk "+
|
|
"did not resolve relay hops from path_json when resolved_path was NULL "+
|
|
"(relay history lost on restart for the older retention window)", relayPK, got)
|
|
}
|
|
}
|