mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-14 11:25:14 +00:00
6f35d4d417
## RF Health Dashboard — M1: Observer Metrics Storage, API & Small Multiples Grid Implements M1 of #600. ### What this does Adds a complete RF health monitoring pipeline: MQTT stats ingestion → SQLite storage → REST API → interactive dashboard with small multiples grid. ### Backend Changes **Ingestor (`cmd/ingestor/`)** - New `observer_metrics` table via migration system (`_migrations` pattern) - Parse `tx_air_secs`, `rx_air_secs`, `recv_errors` from MQTT status messages (same pattern as existing `noise_floor` and `battery_mv`) - `INSERT OR REPLACE` with timestamps rounded to nearest 5-min interval boundary (using ingestor wall clock, not observer timestamps) - Missing fields stored as NULLs — partial data is always better than no data - Configurable retention pruning: `retention.metricsDays` (default 30), runs on startup + every 24h **Server (`cmd/server/`)** - `GET /api/observers/{id}/metrics?since=...&until=...` — per-observer time-series data - `GET /api/observers/metrics/summary?window=24h` — fleet summary with current NF, avg/max NF, sample count - `parseWindowDuration()` supports `1h`, `24h`, `3d`, `7d`, `30d` etc. - Server-side metrics retention pruning (same config, staggered 2min after packet prune) ### Frontend Changes **RF Health tab (`public/analytics.js`, `public/style.css`)** - Small multiples grid showing all observers simultaneously — anomalies pop out visually - Per-observer cell: name, current NF value, battery voltage, sparkline, avg/max stats - NF status coloring: warning (amber) at ≥-100 dBm, critical (red) at ≥-85 dBm — text color only, no background fills - Click any cell → expanded detail view with full noise floor line chart - Reference lines with direct text labels (`-100 warning`, `-85 critical`) — not color bands - Min/max points labeled directly on the chart - Time range selector: preset buttons (1h/3h/6h/12h/24h/3d/7d/30d) + custom from/to datetime picker - Deep linking: `#/analytics?tab=rf-health&observer=...&range=...` - All charts use SVG, matching existing analytics.js patterns - Responsive: 3-4 columns on desktop, 1 on mobile ### Design Decisions (from spec) - Labels directly on data, not in legends - Reference lines with text labels, not color bands - Small multiples grid, not card+accordion (Tufte: instant visual fleet comparison) - Ingestor wall clock for all timestamps (observer clocks may drift) ### Tests Added **Ingestor tests:** - `TestRoundToInterval` — 5 cases for rounding to 5-min boundaries - `TestInsertMetrics` — basic insertion with all fields - `TestInsertMetricsIdempotent` — INSERT OR REPLACE deduplication - `TestInsertMetricsNullFields` — partial data with NULLs - `TestPruneOldMetrics` — retention pruning - `TestExtractObserverMetaNewFields` — parsing tx_air_secs, rx_air_secs, recv_errors **Server tests:** - `TestGetObserverMetrics` — time-series query with since/until filters, NULL handling - `TestGetMetricsSummary` — fleet summary aggregation - `TestObserverMetricsAPIEndpoints` — DB query verification - `TestMetricsAPIEndpoints` — HTTP endpoint response shape - `TestParseWindowDuration` — duration parsing for h/d formats ### Test Results ``` cd cmd/ingestor && go test ./... → PASS (26s) cd cmd/server && go test ./... → PASS (5s) ``` ### What's NOT in this PR (deferred to M2+) - Server-side delta computation for cumulative counters - Airtime charts (TX/RX percentage lines) - Channel quality chart (recv_error_rate) - Battery voltage chart - Reboot detection and chart annotations - Resolution downsampling (1h, 1d aggregates) - Pattern detection / automated diagnosis --------- Co-authored-by: you <you@example.com>
285 lines
8.4 KiB
Go
285 lines
8.4 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"log"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/meshcore-analyzer/geofilter"
|
|
)
|
|
|
|
// Config mirrors the Node.js config.json structure (read-only fields).
|
|
type Config struct {
|
|
Port int `json:"port"`
|
|
APIKey string `json:"apiKey"`
|
|
DBPath string `json:"dbPath"`
|
|
|
|
Branding map[string]interface{} `json:"branding"`
|
|
Theme map[string]interface{} `json:"theme"`
|
|
ThemeDark map[string]interface{} `json:"themeDark"`
|
|
NodeColors map[string]interface{} `json:"nodeColors"`
|
|
TypeColors map[string]interface{} `json:"typeColors"`
|
|
Home map[string]interface{} `json:"home"`
|
|
|
|
MapDefaults struct {
|
|
Center []float64 `json:"center"`
|
|
Zoom int `json:"zoom"`
|
|
} `json:"mapDefaults"`
|
|
|
|
Regions map[string]string `json:"regions"`
|
|
|
|
Roles map[string]interface{} `json:"roles"`
|
|
HealthThresholds *HealthThresholds `json:"healthThresholds"`
|
|
Tiles map[string]interface{} `json:"tiles"`
|
|
SnrThresholds map[string]interface{} `json:"snrThresholds"`
|
|
DistThresholds map[string]interface{} `json:"distThresholds"`
|
|
MaxHopDist *float64 `json:"maxHopDist"`
|
|
Limits map[string]interface{} `json:"limits"`
|
|
PerfSlowMs *int `json:"perfSlowMs"`
|
|
WsReconnectMs *int `json:"wsReconnectMs"`
|
|
CacheInvalidMs *int `json:"cacheInvalidateMs"`
|
|
ExternalUrls map[string]interface{} `json:"externalUrls"`
|
|
|
|
LiveMap struct {
|
|
PropagationBufferMs int `json:"propagationBufferMs"`
|
|
} `json:"liveMap"`
|
|
|
|
CacheTTL map[string]interface{} `json:"cacheTTL"`
|
|
|
|
Retention *RetentionConfig `json:"retention,omitempty"`
|
|
|
|
PacketStore *PacketStoreConfig `json:"packetStore,omitempty"`
|
|
|
|
GeoFilter *GeoFilterConfig `json:"geo_filter,omitempty"`
|
|
|
|
Timestamps *TimestampConfig `json:"timestamps,omitempty"`
|
|
|
|
DebugAffinity bool `json:"debugAffinity,omitempty"`
|
|
}
|
|
|
|
// PacketStoreConfig controls in-memory packet store limits.
|
|
type PacketStoreConfig struct {
|
|
RetentionHours float64 `json:"retentionHours"` // max age of packets in hours (0 = unlimited)
|
|
MaxMemoryMB int `json:"maxMemoryMB"` // hard memory ceiling in MB (0 = unlimited)
|
|
}
|
|
|
|
// GeoFilterConfig is an alias for the shared geofilter.Config type.
|
|
type GeoFilterConfig = geofilter.Config
|
|
|
|
type RetentionConfig struct {
|
|
NodeDays int `json:"nodeDays"`
|
|
PacketDays int `json:"packetDays"`
|
|
MetricsDays int `json:"metricsDays"`
|
|
}
|
|
|
|
// MetricsRetentionDays returns configured metrics retention or 30 days default.
|
|
func (c *Config) MetricsRetentionDays() int {
|
|
if c.Retention != nil && c.Retention.MetricsDays > 0 {
|
|
return c.Retention.MetricsDays
|
|
}
|
|
return 30
|
|
}
|
|
|
|
|
|
type TimestampConfig struct {
|
|
DefaultMode string `json:"defaultMode"` // "ago" | "absolute"
|
|
Timezone string `json:"timezone"` // "local" | "utc"
|
|
FormatPreset string `json:"formatPreset"` // "iso" | "iso-seconds" | "locale"
|
|
CustomFormat string `json:"customFormat"` // freeform, only used when AllowCustomFormat=true
|
|
AllowCustomFormat bool `json:"allowCustomFormat"` // admin gate
|
|
}
|
|
|
|
func defaultTimestampConfig() TimestampConfig {
|
|
return TimestampConfig{
|
|
DefaultMode: "ago",
|
|
Timezone: "local",
|
|
FormatPreset: "iso",
|
|
CustomFormat: "",
|
|
AllowCustomFormat: false,
|
|
}
|
|
}
|
|
|
|
// NodeDaysOrDefault returns the configured retention.nodeDays or 7 if not set.
|
|
func (c *Config) NodeDaysOrDefault() int {
|
|
if c.Retention != nil && c.Retention.NodeDays > 0 {
|
|
return c.Retention.NodeDays
|
|
}
|
|
return 7
|
|
}
|
|
|
|
type HealthThresholds struct {
|
|
InfraDegradedHours float64 `json:"infraDegradedHours"`
|
|
InfraSilentHours float64 `json:"infraSilentHours"`
|
|
NodeDegradedHours float64 `json:"nodeDegradedHours"`
|
|
NodeSilentHours float64 `json:"nodeSilentHours"`
|
|
}
|
|
|
|
// ThemeFile mirrors theme.json overlay.
|
|
type ThemeFile struct {
|
|
Branding map[string]interface{} `json:"branding"`
|
|
Theme map[string]interface{} `json:"theme"`
|
|
ThemeDark map[string]interface{} `json:"themeDark"`
|
|
NodeColors map[string]interface{} `json:"nodeColors"`
|
|
TypeColors map[string]interface{} `json:"typeColors"`
|
|
Home map[string]interface{} `json:"home"`
|
|
}
|
|
|
|
func LoadConfig(baseDirs ...string) (*Config, error) {
|
|
if len(baseDirs) == 0 {
|
|
baseDirs = []string{"."}
|
|
}
|
|
paths := make([]string, 0)
|
|
for _, d := range baseDirs {
|
|
paths = append(paths, filepath.Join(d, "config.json"))
|
|
paths = append(paths, filepath.Join(d, "data", "config.json"))
|
|
}
|
|
|
|
cfg := &Config{Port: 3000}
|
|
for _, p := range paths {
|
|
data, err := os.ReadFile(p)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
if err := json.Unmarshal(data, cfg); err != nil {
|
|
continue
|
|
}
|
|
cfg.NormalizeTimestampConfig()
|
|
return cfg, nil
|
|
}
|
|
cfg.NormalizeTimestampConfig()
|
|
return cfg, nil // defaults
|
|
}
|
|
|
|
func LoadTheme(baseDirs ...string) *ThemeFile {
|
|
if len(baseDirs) == 0 {
|
|
baseDirs = []string{"."}
|
|
}
|
|
for _, d := range baseDirs {
|
|
for _, name := range []string{"theme.json"} {
|
|
p := filepath.Join(d, name)
|
|
data, err := os.ReadFile(p)
|
|
if err != nil {
|
|
p = filepath.Join(d, "data", name)
|
|
data, err = os.ReadFile(p)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
}
|
|
var t ThemeFile
|
|
if json.Unmarshal(data, &t) == nil {
|
|
return &t
|
|
}
|
|
}
|
|
}
|
|
return &ThemeFile{}
|
|
}
|
|
|
|
func (c *Config) GetHealthThresholds() HealthThresholds {
|
|
h := HealthThresholds{
|
|
InfraDegradedHours: 24,
|
|
InfraSilentHours: 72,
|
|
NodeDegradedHours: 1,
|
|
NodeSilentHours: 24,
|
|
}
|
|
if c.HealthThresholds != nil {
|
|
if c.HealthThresholds.InfraDegradedHours > 0 {
|
|
h.InfraDegradedHours = c.HealthThresholds.InfraDegradedHours
|
|
}
|
|
if c.HealthThresholds.InfraSilentHours > 0 {
|
|
h.InfraSilentHours = c.HealthThresholds.InfraSilentHours
|
|
}
|
|
if c.HealthThresholds.NodeDegradedHours > 0 {
|
|
h.NodeDegradedHours = c.HealthThresholds.NodeDegradedHours
|
|
}
|
|
if c.HealthThresholds.NodeSilentHours > 0 {
|
|
h.NodeSilentHours = c.HealthThresholds.NodeSilentHours
|
|
}
|
|
}
|
|
return h
|
|
}
|
|
|
|
// GetHealthMs returns degraded/silent thresholds in ms for a given role.
|
|
func (h HealthThresholds) GetHealthMs(role string) (degradedMs, silentMs int) {
|
|
const hourMs = 3600000
|
|
if role == "repeater" || role == "room" {
|
|
return int(h.InfraDegradedHours * hourMs), int(h.InfraSilentHours * hourMs)
|
|
}
|
|
return int(h.NodeDegradedHours * hourMs), int(h.NodeSilentHours * hourMs)
|
|
}
|
|
|
|
// ToClientMs returns the thresholds as ms for the frontend.
|
|
func (h HealthThresholds) ToClientMs() map[string]int {
|
|
const hourMs = 3600000
|
|
return map[string]int{
|
|
"infraDegradedMs": int(h.InfraDegradedHours * hourMs),
|
|
"infraSilentMs": int(h.InfraSilentHours * hourMs),
|
|
"nodeDegradedMs": int(h.NodeDegradedHours * hourMs),
|
|
"nodeSilentMs": int(h.NodeSilentHours * hourMs),
|
|
}
|
|
}
|
|
|
|
func (c *Config) ResolveDBPath(baseDir string) string {
|
|
if c.DBPath != "" {
|
|
return c.DBPath
|
|
}
|
|
if v := os.Getenv("DB_PATH"); v != "" {
|
|
return v
|
|
}
|
|
return filepath.Join(baseDir, "data", "meshcore.db")
|
|
}
|
|
|
|
|
|
func (c *Config) NormalizeTimestampConfig() {
|
|
defaults := defaultTimestampConfig()
|
|
if c.Timestamps == nil {
|
|
log.Printf("[config] timestamps not configured - using defaults (ago/local/iso)")
|
|
c.Timestamps = &defaults
|
|
return
|
|
}
|
|
|
|
origMode := c.Timestamps.DefaultMode
|
|
mode := strings.ToLower(strings.TrimSpace(origMode))
|
|
switch mode {
|
|
case "ago", "absolute":
|
|
c.Timestamps.DefaultMode = mode
|
|
default:
|
|
log.Printf("[config] warning: timestamps.defaultMode=%q is invalid, using %q", origMode, defaults.DefaultMode)
|
|
c.Timestamps.DefaultMode = defaults.DefaultMode
|
|
}
|
|
|
|
origTimezone := c.Timestamps.Timezone
|
|
timezone := strings.ToLower(strings.TrimSpace(origTimezone))
|
|
switch timezone {
|
|
case "local", "utc":
|
|
c.Timestamps.Timezone = timezone
|
|
default:
|
|
log.Printf("[config] warning: timestamps.timezone=%q is invalid, using %q", origTimezone, defaults.Timezone)
|
|
c.Timestamps.Timezone = defaults.Timezone
|
|
}
|
|
|
|
origPreset := c.Timestamps.FormatPreset
|
|
formatPreset := strings.ToLower(strings.TrimSpace(origPreset))
|
|
switch formatPreset {
|
|
case "iso", "iso-seconds", "locale":
|
|
c.Timestamps.FormatPreset = formatPreset
|
|
default:
|
|
log.Printf("[config] warning: timestamps.formatPreset=%q is invalid, using %q", origPreset, defaults.FormatPreset)
|
|
c.Timestamps.FormatPreset = defaults.FormatPreset
|
|
}
|
|
}
|
|
|
|
func (c *Config) GetTimestampConfig() TimestampConfig {
|
|
if c == nil || c.Timestamps == nil {
|
|
return defaultTimestampConfig()
|
|
}
|
|
return *c.Timestamps
|
|
}
|
|
func (c *Config) PropagationBufferMs() int {
|
|
if c.LiveMap.PropagationBufferMs > 0 {
|
|
return c.LiveMap.PropagationBufferMs
|
|
}
|
|
return 5000
|
|
}
|