mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-12 11:54:43 +00:00
45f30fcadc
## Summary Implements repeater liveness detection per #662 — distinguishes a repeater that is **actively relaying traffic** from one that is **alive but idle** (only sending its own adverts). ## Approach The backend already maintains a `byPathHop` index keyed by lowercase hop/pubkey for every transmission. Decode-window writes also key it by **resolved pubkey** for relay hops. We just weren't surfacing it. `GetRepeaterRelayInfo(pubkey, windowHours)`: - Reads `byPathHop[pubkey]`. - Skips packets whose `payload_type == 4` (advert) — a self-advert proves liveness, not relaying. - Returns the most recent `FirstSeen` as `lastRelayed`, plus `relayActive` (within window) and the `windowHours` actually used. ## Three states (per issue) | State | Indicator | Condition | |---|---|---| | 🟢 Relaying | green | `last_relayed` within `relayActiveHours` | | 🟡 Alive (idle) | yellow | repeater is in the DB but `relay_active=false` (no recent path-hop appearance, or none ever) | | ⚪ Stale | existing | falls out of the existing `getNodeStatus` logic | ## API - `GET /api/nodes` — repeater/room rows now include `last_relayed` (omitted if never observed) and `relay_active`. - `GET /api/nodes/{pubkey}` — same fields plus `relay_window_hours`. ## Config New optional field under `healthThresholds`: ```json "healthThresholds": { ..., "relayActiveHours": 24 } ``` Default 24h. Documented in `config.example.json`. ## Frontend Node detail page gains a **Last Relayed** row for repeaters/rooms with the 🟢/🟡 state badge. Tooltip explains the distinction from "Last Heard". ## TDD - **Red commit** `4445f91`: `repeater_liveness_test.go` + stub `GetRepeaterRelayInfo` returning zero. Active and Stale tests fail on assertion (LastRelayed empty / mismatched). Idle and IgnoresAdverts already match the desired behavior under the stub. Compiles, runs, fails on assertions — not on imports. - **Green commit** `5fcfb57`: Implementation. All four tests pass. Full `cmd/server` suite green (~22s). ## Performance `O(N)` over `byPathHop[pubkey]` per call. The index is bounded by store eviction; a single repeater has at most a few hundred entries on real data. The `/api/nodes` loop adds one map read + scan per repeater row — negligible against the existing enrichment work. ## Limitations (per issue body) 1. Observer coverage gaps — if no observer hears a repeater's relay, it'll show as idle even when actively relaying. This is inherent to passive observation. 2. Low-traffic networks — a repeater in a quiet area legitimately shows idle. The 🟡 indicator copy makes that explicit ("alive (idle)"). 3. Hash collisions are mitigated by the existing `resolveWithContext` path before pubkeys land in `byPathHop`. Fixes #662 --------- Co-authored-by: clawbot <bot@corescope.local>
144 lines
4.6 KiB
Go
144 lines
4.6 KiB
Go
package main
|
|
|
|
import (
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// RepeaterRelayInfo describes whether a repeater has been observed
|
|
// relaying traffic (appearing as a path hop in non-advert packets) and
|
|
// when. This is distinct from advert-based liveness (last_seen / last_heard),
|
|
// which only proves the repeater can transmit its own adverts.
|
|
//
|
|
// See issue #662.
|
|
type RepeaterRelayInfo struct {
|
|
// LastRelayed is the ISO-8601 timestamp of the most recent non-advert
|
|
// packet where this pubkey appeared as a relay hop. Empty if never.
|
|
LastRelayed string `json:"lastRelayed,omitempty"`
|
|
// RelayActive is true if LastRelayed falls within the configured
|
|
// activity window (default 24h).
|
|
RelayActive bool `json:"relayActive"`
|
|
// WindowHours is the active-window threshold actually used.
|
|
WindowHours float64 `json:"windowHours"`
|
|
// RelayCount1h is the count of distinct non-advert packets where this
|
|
// pubkey appeared as a relay hop in the last 1 hour.
|
|
RelayCount1h int `json:"relayCount1h"`
|
|
// RelayCount24h is the count of distinct non-advert packets where this
|
|
// pubkey appeared as a relay hop in the last 24 hours.
|
|
RelayCount24h int `json:"relayCount24h"`
|
|
}
|
|
|
|
// payloadTypeAdvert is the MeshCore payload type for ADVERT packets.
|
|
// See firmware/src/Mesh.h. Adverts are NOT considered relay activity:
|
|
// a repeater that only sends adverts proves it is alive, not that it
|
|
// is forwarding traffic for other nodes.
|
|
const payloadTypeAdvert = 4
|
|
|
|
// parseRelayTS attempts to parse a packet first-seen timestamp using the
|
|
// formats CoreScope writes in practice. Returns zero time and false on
|
|
// failure. Accepted (in order):
|
|
// - RFC3339Nano — Go's default UTC marshal output
|
|
// - RFC3339 — second-precision ISO-8601 with offset
|
|
// - "2006-01-02T15:04:05.000Z" — millisecond-precision Z form used by ingest
|
|
func parseRelayTS(ts string) (time.Time, bool) {
|
|
if ts == "" {
|
|
return time.Time{}, false
|
|
}
|
|
if t, err := time.Parse(time.RFC3339Nano, ts); err == nil {
|
|
return t, true
|
|
}
|
|
if t, err := time.Parse(time.RFC3339, ts); err == nil {
|
|
return t, true
|
|
}
|
|
if t, err := time.Parse("2006-01-02T15:04:05.000Z", ts); err == nil {
|
|
return t, true
|
|
}
|
|
return time.Time{}, false
|
|
}
|
|
|
|
// GetRepeaterRelayInfo returns relay-activity information for a node by
|
|
// scanning the byPathHop index for non-advert packets that name the
|
|
// pubkey as a hop. It computes the most recent appearance timestamp,
|
|
// 1h/24h hop counts, and whether the latest appearance falls within
|
|
// windowHours.
|
|
//
|
|
// Cost: O(N) over the indexed entries for `pubkey`. The byPathHop index
|
|
// is bounded by store eviction; on real data this is small per-node.
|
|
//
|
|
// Note on self-as-source: byPathHop is keyed by every hop in a packet's
|
|
// resolved path, including the originator. For ADVERT packets that's the
|
|
// node itself, which is filtered above by the payloadTypeAdvert check.
|
|
// For non-advert packets a node "originates" rather than "relays" only
|
|
// when it is the source; we don't currently have a clean signal for that
|
|
// distinction, so the count here is *path-hop appearances in non-advert
|
|
// packets*. In practice for a repeater nearly all such appearances are
|
|
// relay hops (the firmware doesn't originate user traffic), so this is
|
|
// the right approximation for issue #662.
|
|
func (s *PacketStore) GetRepeaterRelayInfo(pubkey string, windowHours float64) RepeaterRelayInfo {
|
|
info := RepeaterRelayInfo{WindowHours: windowHours}
|
|
if pubkey == "" {
|
|
return info
|
|
}
|
|
key := strings.ToLower(pubkey)
|
|
|
|
s.mu.RLock()
|
|
txList := s.byPathHop[key]
|
|
// Copy only the timestamps + payload types we need so we can release
|
|
// the read lock before doing parsing/compare work below.
|
|
type entry struct {
|
|
ts string
|
|
pt int
|
|
}
|
|
scratch := make([]entry, 0, len(txList))
|
|
for _, tx := range txList {
|
|
if tx == nil {
|
|
continue
|
|
}
|
|
pt := -1
|
|
if tx.PayloadType != nil {
|
|
pt = *tx.PayloadType
|
|
}
|
|
scratch = append(scratch, entry{ts: tx.FirstSeen, pt: pt})
|
|
}
|
|
s.mu.RUnlock()
|
|
|
|
now := time.Now().UTC()
|
|
cutoff1h := now.Add(-1 * time.Hour)
|
|
cutoff24h := now.Add(-24 * time.Hour)
|
|
|
|
var latest time.Time
|
|
var latestRaw string
|
|
for _, e := range scratch {
|
|
// Self-originated adverts are not relay activity (see header comment).
|
|
if e.pt == payloadTypeAdvert {
|
|
continue
|
|
}
|
|
t, ok := parseRelayTS(e.ts)
|
|
if !ok {
|
|
continue
|
|
}
|
|
if t.After(latest) {
|
|
latest = t
|
|
latestRaw = e.ts
|
|
}
|
|
if t.After(cutoff24h) {
|
|
info.RelayCount24h++
|
|
if t.After(cutoff1h) {
|
|
info.RelayCount1h++
|
|
}
|
|
}
|
|
}
|
|
if latestRaw == "" {
|
|
return info
|
|
}
|
|
info.LastRelayed = latestRaw
|
|
|
|
if windowHours > 0 {
|
|
cutoff := now.Add(-time.Duration(windowHours * float64(time.Hour)))
|
|
if latest.After(cutoff) {
|
|
info.RelayActive = true
|
|
}
|
|
}
|
|
return info
|
|
}
|