From 45f30fcadc42231cb7fefa8e187bcc45dc315549 Mon Sep 17 00:00:00 2001 From: Kpa-clawbot Date: Tue, 5 May 2026 01:17:52 -0700 Subject: [PATCH] =?UTF-8?q?feat(repeater):=20liveness=20detection=20?= =?UTF-8?q?=E2=80=94=20distinguish=20actively=20relaying=20from=20advert-o?= =?UTF-8?q?nly=20(#662)=20(#1073)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Implements repeater liveness detection per #662 — distinguishes a repeater that is **actively relaying traffic** from one that is **alive but idle** (only sending its own adverts). ## Approach The backend already maintains a `byPathHop` index keyed by lowercase hop/pubkey for every transmission. Decode-window writes also key it by **resolved pubkey** for relay hops. We just weren't surfacing it. `GetRepeaterRelayInfo(pubkey, windowHours)`: - Reads `byPathHop[pubkey]`. - Skips packets whose `payload_type == 4` (advert) — a self-advert proves liveness, not relaying. - Returns the most recent `FirstSeen` as `lastRelayed`, plus `relayActive` (within window) and the `windowHours` actually used. ## Three states (per issue) | State | Indicator | Condition | |---|---|---| | 🟢 Relaying | green | `last_relayed` within `relayActiveHours` | | 🟡 Alive (idle) | yellow | repeater is in the DB but `relay_active=false` (no recent path-hop appearance, or none ever) | | ⚪ Stale | existing | falls out of the existing `getNodeStatus` logic | ## API - `GET /api/nodes` — repeater/room rows now include `last_relayed` (omitted if never observed) and `relay_active`. - `GET /api/nodes/{pubkey}` — same fields plus `relay_window_hours`. ## Config New optional field under `healthThresholds`: ```json "healthThresholds": { ..., "relayActiveHours": 24 } ``` Default 24h. Documented in `config.example.json`. ## Frontend Node detail page gains a **Last Relayed** row for repeaters/rooms with the 🟢/🟡 state badge. Tooltip explains the distinction from "Last Heard". ## TDD - **Red commit** `4445f91`: `repeater_liveness_test.go` + stub `GetRepeaterRelayInfo` returning zero. Active and Stale tests fail on assertion (LastRelayed empty / mismatched). Idle and IgnoresAdverts already match the desired behavior under the stub. Compiles, runs, fails on assertions — not on imports. - **Green commit** `5fcfb57`: Implementation. All four tests pass. Full `cmd/server` suite green (~22s). ## Performance `O(N)` over `byPathHop[pubkey]` per call. The index is bounded by store eviction; a single repeater has at most a few hundred entries on real data. The `/api/nodes` loop adds one map read + scan per repeater row — negligible against the existing enrichment work. ## Limitations (per issue body) 1. Observer coverage gaps — if no observer hears a repeater's relay, it'll show as idle even when actively relaying. This is inherent to passive observation. 2. Low-traffic networks — a repeater in a quiet area legitimately shows idle. The 🟡 indicator copy makes that explicit ("alive (idle)"). 3. Hash collisions are mitigated by the existing `resolveWithContext` path before pubkeys land in `byPathHop`. Fixes #662 --------- Co-authored-by: clawbot --- cmd/server/config.go | 8 ++ cmd/server/repeater_liveness.go | 143 +++++++++++++++++++++++ cmd/server/repeater_liveness_test.go | 162 +++++++++++++++++++++++++++ cmd/server/routes.go | 21 ++++ config.example.json | 3 +- public/nodes.js | 1 + 6 files changed, 337 insertions(+), 1 deletion(-) create mode 100644 cmd/server/repeater_liveness.go create mode 100644 cmd/server/repeater_liveness_test.go diff --git a/cmd/server/config.go b/cmd/server/config.go index 31784b11..5a8043e1 100644 --- a/cmd/server/config.go +++ b/cmd/server/config.go @@ -221,6 +221,10 @@ type HealthThresholds struct { InfraSilentHours float64 `json:"infraSilentHours"` NodeDegradedHours float64 `json:"nodeDegradedHours"` NodeSilentHours float64 `json:"nodeSilentHours"` + // RelayActiveHours: how recent a path-hop appearance must be for a + // repeater to be considered "actively relaying" vs only "alive + // (advert-only)". See issue #662. Defaults to 24h. + RelayActiveHours float64 `json:"relayActiveHours"` } // ThemeFile mirrors theme.json overlay. @@ -289,6 +293,7 @@ func (c *Config) GetHealthThresholds() HealthThresholds { InfraSilentHours: 72, NodeDegradedHours: 1, NodeSilentHours: 24, + RelayActiveHours: 24, } if c.HealthThresholds != nil { if c.HealthThresholds.InfraDegradedHours > 0 { @@ -303,6 +308,9 @@ func (c *Config) GetHealthThresholds() HealthThresholds { if c.HealthThresholds.NodeSilentHours > 0 { h.NodeSilentHours = c.HealthThresholds.NodeSilentHours } + if c.HealthThresholds.RelayActiveHours > 0 { + h.RelayActiveHours = c.HealthThresholds.RelayActiveHours + } } return h } diff --git a/cmd/server/repeater_liveness.go b/cmd/server/repeater_liveness.go new file mode 100644 index 00000000..47442ad3 --- /dev/null +++ b/cmd/server/repeater_liveness.go @@ -0,0 +1,143 @@ +package main + +import ( + "strings" + "time" +) + +// RepeaterRelayInfo describes whether a repeater has been observed +// relaying traffic (appearing as a path hop in non-advert packets) and +// when. This is distinct from advert-based liveness (last_seen / last_heard), +// which only proves the repeater can transmit its own adverts. +// +// See issue #662. +type RepeaterRelayInfo struct { + // LastRelayed is the ISO-8601 timestamp of the most recent non-advert + // packet where this pubkey appeared as a relay hop. Empty if never. + LastRelayed string `json:"lastRelayed,omitempty"` + // RelayActive is true if LastRelayed falls within the configured + // activity window (default 24h). + RelayActive bool `json:"relayActive"` + // WindowHours is the active-window threshold actually used. + WindowHours float64 `json:"windowHours"` + // RelayCount1h is the count of distinct non-advert packets where this + // pubkey appeared as a relay hop in the last 1 hour. + RelayCount1h int `json:"relayCount1h"` + // RelayCount24h is the count of distinct non-advert packets where this + // pubkey appeared as a relay hop in the last 24 hours. + RelayCount24h int `json:"relayCount24h"` +} + +// payloadTypeAdvert is the MeshCore payload type for ADVERT packets. +// See firmware/src/Mesh.h. Adverts are NOT considered relay activity: +// a repeater that only sends adverts proves it is alive, not that it +// is forwarding traffic for other nodes. +const payloadTypeAdvert = 4 + +// parseRelayTS attempts to parse a packet first-seen timestamp using the +// formats CoreScope writes in practice. Returns zero time and false on +// failure. Accepted (in order): +// - RFC3339Nano — Go's default UTC marshal output +// - RFC3339 — second-precision ISO-8601 with offset +// - "2006-01-02T15:04:05.000Z" — millisecond-precision Z form used by ingest +func parseRelayTS(ts string) (time.Time, bool) { + if ts == "" { + return time.Time{}, false + } + if t, err := time.Parse(time.RFC3339Nano, ts); err == nil { + return t, true + } + if t, err := time.Parse(time.RFC3339, ts); err == nil { + return t, true + } + if t, err := time.Parse("2006-01-02T15:04:05.000Z", ts); err == nil { + return t, true + } + return time.Time{}, false +} + +// GetRepeaterRelayInfo returns relay-activity information for a node by +// scanning the byPathHop index for non-advert packets that name the +// pubkey as a hop. It computes the most recent appearance timestamp, +// 1h/24h hop counts, and whether the latest appearance falls within +// windowHours. +// +// Cost: O(N) over the indexed entries for `pubkey`. The byPathHop index +// is bounded by store eviction; on real data this is small per-node. +// +// Note on self-as-source: byPathHop is keyed by every hop in a packet's +// resolved path, including the originator. For ADVERT packets that's the +// node itself, which is filtered above by the payloadTypeAdvert check. +// For non-advert packets a node "originates" rather than "relays" only +// when it is the source; we don't currently have a clean signal for that +// distinction, so the count here is *path-hop appearances in non-advert +// packets*. In practice for a repeater nearly all such appearances are +// relay hops (the firmware doesn't originate user traffic), so this is +// the right approximation for issue #662. +func (s *PacketStore) GetRepeaterRelayInfo(pubkey string, windowHours float64) RepeaterRelayInfo { + info := RepeaterRelayInfo{WindowHours: windowHours} + if pubkey == "" { + return info + } + key := strings.ToLower(pubkey) + + s.mu.RLock() + txList := s.byPathHop[key] + // Copy only the timestamps + payload types we need so we can release + // the read lock before doing parsing/compare work below. + type entry struct { + ts string + pt int + } + scratch := make([]entry, 0, len(txList)) + for _, tx := range txList { + if tx == nil { + continue + } + pt := -1 + if tx.PayloadType != nil { + pt = *tx.PayloadType + } + scratch = append(scratch, entry{ts: tx.FirstSeen, pt: pt}) + } + s.mu.RUnlock() + + now := time.Now().UTC() + cutoff1h := now.Add(-1 * time.Hour) + cutoff24h := now.Add(-24 * time.Hour) + + var latest time.Time + var latestRaw string + for _, e := range scratch { + // Self-originated adverts are not relay activity (see header comment). + if e.pt == payloadTypeAdvert { + continue + } + t, ok := parseRelayTS(e.ts) + if !ok { + continue + } + if t.After(latest) { + latest = t + latestRaw = e.ts + } + if t.After(cutoff24h) { + info.RelayCount24h++ + if t.After(cutoff1h) { + info.RelayCount1h++ + } + } + } + if latestRaw == "" { + return info + } + info.LastRelayed = latestRaw + + if windowHours > 0 { + cutoff := now.Add(-time.Duration(windowHours * float64(time.Hour))) + if latest.After(cutoff) { + info.RelayActive = true + } + } + return info +} diff --git a/cmd/server/repeater_liveness_test.go b/cmd/server/repeater_liveness_test.go new file mode 100644 index 00000000..14615540 --- /dev/null +++ b/cmd/server/repeater_liveness_test.go @@ -0,0 +1,162 @@ +package main + +import ( + "testing" + "time" +) + +// TestRepeaterRelayActivity_Active verifies that a repeater whose pubkey +// appears as a relay hop in a recent (non-advert) packet is reported with +// a non-zero lastRelayed timestamp and relayActive=true. +func TestRepeaterRelayActivity_Active(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "aabbccdd11223344" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepActive", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + // A non-advert packet (payload_type=1, TXT_MSG) with the repeater pubkey + // indexed as a path hop. Index by lowercase pubkey directly to mirror + // the resolved-path entries that decode-window writes. + pt := 1 + relayed := &StoreTx{ + RawHex: "0100", + PayloadType: &pt, + PathJSON: `["aa"]`, + FirstSeen: recentTS(2), + } + store.mu.Lock() + relayed.ID = len(store.packets) + 1 + relayed.Hash = "test-relay-1" + store.packets = append(store.packets, relayed) + store.byHash[relayed.Hash] = relayed + store.byTxID[relayed.ID] = relayed + store.byPathHop[pubkey] = append(store.byPathHop[pubkey], relayed) + store.mu.Unlock() + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.LastRelayed == "" { + t.Fatalf("expected non-empty LastRelayed for active relayer, got empty (RelayActive=%v)", info.RelayActive) + } + if !info.RelayActive { + t.Errorf("expected RelayActive=true within 24h window, got false (LastRelayed=%s)", info.LastRelayed) + } + if info.RelayCount1h != 0 { + t.Errorf("expected RelayCount1h=0 (relay was 2h ago, outside 1h window), got %d", info.RelayCount1h) + } + if info.RelayCount24h != 1 { + t.Errorf("expected RelayCount24h=1 (relay was 2h ago, inside 24h window), got %d", info.RelayCount24h) + } +} + +// TestRepeaterRelayActivity_Idle verifies that a repeater whose pubkey +// has not appeared as a relay hop reports an empty LastRelayed and +// relayActive=false. +func TestRepeaterRelayActivity_Idle(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "ccddeeff55667788" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepIdle", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.LastRelayed != "" { + t.Errorf("expected empty LastRelayed for idle repeater, got %q", info.LastRelayed) + } + if info.RelayActive { + t.Errorf("expected RelayActive=false for idle repeater, got true") + } + if info.RelayCount1h != 0 || info.RelayCount24h != 0 { + t.Errorf("expected zero relay counts for idle repeater, got 1h=%d 24h=%d", info.RelayCount1h, info.RelayCount24h) + } +} + +// TestRepeaterRelayActivity_Stale verifies that a repeater whose only +// relay-hop appearances are older than the configured window reports +// a non-empty LastRelayed but relayActive=false. +func TestRepeaterRelayActivity_Stale(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "1122334455667788" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepStale", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + pt := 1 + staleTS := time.Now().UTC().Add(-48 * time.Hour).Format("2006-01-02T15:04:05.000Z") + old := &StoreTx{ + RawHex: "0100", + PayloadType: &pt, + PathJSON: `["11"]`, + FirstSeen: staleTS, + } + store.mu.Lock() + old.ID = len(store.packets) + 1 + old.Hash = "test-relay-stale" + store.packets = append(store.packets, old) + store.byHash[old.Hash] = old + store.byTxID[old.ID] = old + store.byPathHop[pubkey] = append(store.byPathHop[pubkey], old) + store.mu.Unlock() + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.LastRelayed != staleTS { + t.Errorf("expected LastRelayed=%q (stale ts), got %q", staleTS, info.LastRelayed) + } + if info.RelayActive { + t.Errorf("expected RelayActive=false for relay older than window, got true") + } + if info.RelayCount1h != 0 || info.RelayCount24h != 0 { + t.Errorf("expected zero relay counts for stale (>24h) repeater, got 1h=%d 24h=%d", info.RelayCount1h, info.RelayCount24h) + } +} + +// TestRepeaterRelayActivity_IgnoresAdverts verifies that adverts originated +// by the repeater itself (payload_type=4) are NOT counted as relay activity — +// adverts demonstrate liveness, not relaying. +func TestRepeaterRelayActivity_IgnoresAdverts(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "deadbeef00000001" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepAdvertOnly", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + // Self-advert with the repeater as its own first hop. Should NOT count. + pt := 4 + adv := &StoreTx{ + RawHex: "0140de", + PayloadType: &pt, + PathJSON: `["de"]`, + FirstSeen: recentTS(2), + } + store.mu.Lock() + adv.ID = len(store.packets) + 1 + adv.Hash = "test-advert-1" + store.packets = append(store.packets, adv) + store.byHash[adv.Hash] = adv + store.byTxID[adv.ID] = adv + store.byPathHop[pubkey] = append(store.byPathHop[pubkey], adv) + store.mu.Unlock() + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.LastRelayed != "" { + t.Errorf("expected empty LastRelayed (adverts ignored), got %q", info.LastRelayed) + } + if info.RelayActive { + t.Errorf("expected RelayActive=false (adverts ignored), got true") + } + if info.RelayCount1h != 0 || info.RelayCount24h != 0 { + t.Errorf("expected zero relay counts (adverts ignored), got 1h=%d 24h=%d", info.RelayCount1h, info.RelayCount24h) + } +} diff --git a/cmd/server/routes.go b/cmd/server/routes.go index 457da763..57aa04d5 100644 --- a/cmd/server/routes.go +++ b/cmd/server/routes.go @@ -1097,10 +1097,20 @@ func (s *Server) handleNodes(w http.ResponseWriter, r *http.Request) { if s.store != nil { hashInfo := s.store.GetNodeHashSizeInfo() mbCap := s.store.GetMultiByteCapMap() + relayWindow := s.cfg.GetHealthThresholds().RelayActiveHours for _, node := range nodes { if pk, ok := node["public_key"].(string); ok { EnrichNodeWithHashSize(node, hashInfo[pk]) EnrichNodeWithMultiByte(node, mbCap[pk]) + if role, _ := node["role"].(string); role == "repeater" || role == "room" { + info := s.store.GetRepeaterRelayInfo(pk, relayWindow) + if info.LastRelayed != "" { + node["last_relayed"] = info.LastRelayed + } + node["relay_active"] = info.RelayActive + node["relay_count_1h"] = info.RelayCount1h + node["relay_count_24h"] = info.RelayCount24h + } } } } @@ -1197,6 +1207,17 @@ func (s *Server) handleNodeDetail(w http.ResponseWriter, r *http.Request) { EnrichNodeWithHashSize(node, hashInfo[pubkey]) mbCap := s.store.GetMultiByteCapMap() EnrichNodeWithMultiByte(node, mbCap[pubkey]) + if role, _ := node["role"].(string); role == "repeater" || role == "room" { + ht := s.cfg.GetHealthThresholds() + info := s.store.GetRepeaterRelayInfo(pubkey, ht.RelayActiveHours) + if info.LastRelayed != "" { + node["last_relayed"] = info.LastRelayed + } + node["relay_active"] = info.RelayActive + node["relay_window_hours"] = info.WindowHours + node["relay_count_1h"] = info.RelayCount1h + node["relay_count_24h"] = info.RelayCount24h + } } name := "" diff --git a/config.example.json b/config.example.json index 1b8b0d27..23596151 100644 --- a/config.example.json +++ b/config.example.json @@ -155,7 +155,8 @@ "infraSilentHours": 72, "nodeDegradedHours": 1, "nodeSilentHours": 24, - "_comment": "How long (hours) before nodes show as degraded/silent. 'infra' = repeaters & rooms, 'node' = companions & others." + "relayActiveHours": 24, + "_comment": "How long (hours) before nodes show as degraded/silent. 'infra' = repeaters & rooms, 'node' = companions & others. relayActiveHours: a repeater is shown as 'actively relaying' if its pubkey appeared as a path hop in a non-advert packet within this window (issue #662)." }, "defaultRegion": "SJC", "mapDefaults": { diff --git a/public/nodes.js b/public/nodes.js index d1397df1..4c13fd4c 100644 --- a/public/nodes.js +++ b/public/nodes.js @@ -531,6 +531,7 @@ + ${(n.role === 'repeater' || n.role === 'room') ? `` : ''}
Status${statusLabel} ${statusExplanation}
Last Heard${renderNodeTimestampHtml(lastHeard || n.last_seen)}
Last Relayed${n.last_relayed ? renderNodeTimestampHtml(n.last_relayed) + ' ' + (n.relay_active ? '🟢 actively relaying' : '🟡 alive (idle)') : 'never observed as relay hop 🟡 alive (idle)'}${(n.relay_count_1h != null || n.relay_count_24h != null) ? ` (${n.relay_count_1h || 0} relays/hr, ${n.relay_count_24h || 0} relays/24h)` : ''}
First Seen${renderNodeTimestampHtml(n.first_seen)}
Total Packets${stats.totalTransmissions || stats.totalPackets || n.advert_count || 0}${stats.totalObservations && stats.totalObservations !== (stats.totalTransmissions || stats.totalPackets) ? ' (seen ' + stats.totalObservations + '×)' : ''}
Packets Today${stats.packetsToday || 0}