Files
meshcore-analyzer/cmd/ingestor/main.go
T
Joel Claw 95d7916530 fix(channels): normalize known channel display names (public → Public) (#777)
Normalizes well-known channel display names (currently only `public` → `Public`) so existing deployments with pre-#761 lowercase config keys show the canonical firmware-default name `Public` in the UI.

Behavior:
- `knownChannelCasing` lookup (`decoder.go`) — single-entry map, easy to extend.
- `normalizeChannelName()` applied at config load (`loadChannelKeys`) AND at decode time (defense in depth).
- One-shot SQLite migration `channel_hash_casing_v1` backfills `channel_hash='public'` → `'Public'` on `payload_type=5` rows so channel-grouping queries don't split across the upgrade boundary.
- Hardcoded list intentionally tiny (1 entry); custom/user channels left untouched.

Safety:
- Channel-hash derivation (`SHA256(channelName)[:16]` for `#`-prefixed `HashChannels`) is unchanged — normalization only renames map keys for explicit `ChannelKeys` entries (which don't feed `deriveHashtagChannelKey`).
- PSK lookup is by hash byte, not by name — mesh interop preserved.
- Migration is gated by `_migrations.name='channel_hash_casing_v1'`, idempotent.

Tests (`cmd/ingestor/normalize_channel_test.go`):
- `TestNormalizeChannelName` covers known + hashtag + custom + empty.
- `TestLoadChannelKeys_NormalizesKnownDisplayNames` — verifies `public` → `Public` at load.
- `TestLoadChannelKeys_LeavesCustomNamesUntouched` — custom names not auto-capitalized.
- `TestLoadChannelKeys_DuplicateCasingLogsWarning` — config containing both casings resolves deterministically (canonical wins).

Mutation test confirmed: reverting load-time normalize → `TestLoadChannelKeys_NormalizesKnownDisplayNames` and `_DuplicateCasingLogsWarning` both fail on assertions.

Related: #761
2026-05-25 23:05:07 -07:00

1280 lines
41 KiB
Go

package main
import (
"crypto/hmac"
"crypto/sha256"
"crypto/tls"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"log"
"math"
"net/http"
_ "net/http/pprof"
"net/url"
"os"
"os/signal"
"path/filepath"
"strconv"
"strings"
"sync/atomic"
"syscall"
"time"
mqtt "github.com/eclipse/paho.mqtt.golang"
)
func main() {
// pprof profiling — off by default, enable with ENABLE_PPROF=true
if os.Getenv("ENABLE_PPROF") == "true" {
pprofPort := os.Getenv("PPROF_PORT")
if pprofPort == "" {
pprofPort = "6061"
}
go func() {
log.Printf("[pprof] ingestor profiling at http://localhost:%s/debug/pprof/", pprofPort)
if err := http.ListenAndServe(":"+pprofPort, nil); err != nil {
log.Printf("[pprof] failed to start: %v (non-fatal)", err)
}
}()
}
configPath := flag.String("config", "config.json", "path to config file")
flag.Parse()
log.SetFlags(log.LstdFlags | log.Lmsgprefix)
log.SetPrefix("[ingestor] ")
cfg, err := LoadConfig(*configPath)
if err != nil {
log.Fatalf("config: %v", err)
}
sources := cfg.ResolvedSources()
store, err := OpenStoreWithInterval(cfg.DBPath, cfg.MetricsSampleInterval())
if err != nil {
log.Fatalf("db: %v", err)
}
defer store.Close()
log.Printf("SQLite opened: %s", cfg.DBPath)
// Async backfill: path_json from raw_hex (#888) — must not block MQTT startup
store.BackfillPathJSONAsync()
// Soft-delete blacklisted observers (#1287 — moved from cmd/server).
if len(cfg.ObserverBlacklist) > 0 {
store.SoftDeleteBlacklistedObservers(cfg.ObserverBlacklist)
}
// Async backfill: from_pubkey for legacy ADVERT rows (#1143).
// Moved from cmd/server in #1287. Best-effort; must not block MQTT.
go store.BackfillFromPubkey(5000, 100*time.Millisecond, nil)
// Check auto_vacuum mode and optionally migrate (#919)
store.CheckAutoVacuum(cfg)
// Node retention: move stale nodes to inactive_nodes on startup
nodeDays := cfg.NodeDaysOrDefault()
store.MoveStaleNodes(nodeDays)
// Observer retention: remove stale observers on startup
observerDays := cfg.ObserverDaysOrDefault()
store.RemoveStaleObservers(observerDays)
// Metrics retention: prune old metrics on startup
metricsDays := cfg.MetricsRetentionDays()
store.PruneOldMetrics(metricsDays)
store.PruneDroppedPackets(metricsDays)
// Packet (transmissions) retention: previously lived in cmd/server,
// moved to ingestor in #1283 to eliminate cross-process write
// contention (SQLITE_BUSY). 0 = disabled.
packetDays := cfg.PacketDaysOrZero()
if packetDays > 0 {
if n, err := store.PruneOldPackets(packetDays); err != nil {
log.Printf("[prune] error: %v", err)
} else if n > 0 {
log.Printf("[prune] startup pruned %d transmissions older than %d days", n, packetDays)
}
}
vacuumPages := cfg.IncrementalVacuumPages()
store.RunIncrementalVacuum(vacuumPages)
// Daily ticker for node retention
retentionTicker := time.NewTicker(1 * time.Hour)
go func() {
for range retentionTicker.C {
store.MoveStaleNodes(nodeDays)
store.RunIncrementalVacuum(vacuumPages)
}
}()
// Daily ticker for observer retention (every 24h, staggered 90s after startup)
observerRetentionTicker := time.NewTicker(24 * time.Hour)
go func() {
time.Sleep(90 * time.Second) // stagger after metrics prune
store.RemoveStaleObservers(observerDays)
store.RunIncrementalVacuum(vacuumPages)
for range observerRetentionTicker.C {
store.RemoveStaleObservers(observerDays)
store.RunIncrementalVacuum(vacuumPages)
}
}()
// Daily ticker for metrics retention (every 24h)
metricsRetentionTicker := time.NewTicker(24 * time.Hour)
go func() {
for range metricsRetentionTicker.C {
store.PruneOldMetrics(metricsDays)
store.PruneDroppedPackets(metricsDays)
store.RunIncrementalVacuum(vacuumPages)
}
}()
// Daily ticker for transmission retention (#1283).
var packetRetentionTicker *time.Ticker
if packetDays > 0 {
packetRetentionTicker = time.NewTicker(24 * time.Hour)
go func() {
for range packetRetentionTicker.C {
if n, err := store.PruneOldPackets(packetDays); err != nil {
log.Printf("[prune] error: %v", err)
} else if n > 0 {
store.RunIncrementalVacuum(vacuumPages)
}
}
}()
log.Printf("[prune] auto-prune enabled: packets older than %d days will be removed daily", packetDays)
}
// Daily neighbor_edges retention (#1287 — moved from cmd/server).
{
nDays := cfg.NeighborEdgesDaysOrDefault()
neighborPruneTicker := time.NewTicker(24 * time.Hour)
go func() {
time.Sleep(4 * time.Minute) // stagger
if n, err := store.PruneNeighborEdges(nDays); err != nil {
log.Printf("[neighbor-prune] error: %v", err)
} else if n > 0 {
log.Printf("[neighbor-prune] startup pruned %d edges older than %d days", n, nDays)
}
for range neighborPruneTicker.C {
if n, err := store.PruneNeighborEdges(nDays); err != nil {
log.Printf("[neighbor-prune] error: %v", err)
} else if n > 0 {
log.Printf("[neighbor-prune] pruned %d edges older than %d days", n, nDays)
}
}
}()
log.Printf("[neighbor-prune] auto-prune enabled: edges older than %d days", nDays)
}
// Periodic stats logging (every 5 minutes)
statsTicker := time.NewTicker(5 * time.Minute)
go func() {
for range statsTicker.C {
store.LogStats()
}
}()
// Prune-request queue (#669 M4 / #738): the read-only server enqueues
// geo-prune requests as marker files; the ingestor (which holds the
// write handle) executes the DELETEs. Process on startup, then every
// 15 seconds — short enough for a one-click UX, long enough to avoid
// useless wake-ups.
store.RunPendingPruneRequests()
pruneQueueTicker := time.NewTicker(15 * time.Second)
go func() {
for range pruneQueueTicker.C {
store.RunPendingPruneRequests()
}
}()
// Per-second stats file writer for the server's /api/perf/write-sources
// endpoint (#1120). Best-effort; never fatal.
StartStatsFileWriter(store, time.Second)
// Multi-byte capability persister (#1324 follow-up): the server's
// analytics cycle publishes a snapshot file via internal/mbcapqueue
// (it cannot UPDATE itself, mode=ro since #1289). The ingestor
// applies the snapshot here every 5 minutes — derived/cached
// columns, ingestor owns the write.
multibytePersistTicker := time.NewTicker(5 * time.Minute)
go func() {
time.Sleep(2 * time.Minute) // stagger after analytics warmup
if _, err := store.RunMultibyteCapPersist(); err != nil {
log.Printf("[multibyte-persist] error: %v", err)
}
for range multibytePersistTicker.C {
if _, err := store.RunMultibyteCapPersist(); err != nil {
log.Printf("[multibyte-persist] error: %v", err)
}
}
}()
log.Printf("[multibyte-persist] enabled (interval=5m)")
// Neighbor-edges builder (#1287 — Option 4): ingestor owns
// neighbor_edges writes. Runs every 60s. Server reads the snapshot
// via cmd/server/neighbor_recomputer.go on the same cadence.
stopNeighborBuilder := store.StartNeighborEdgesBuilder(NeighborEdgesBuilderInterval)
defer stopNeighborBuilder()
log.Printf("[neighbor-build] enabled (interval=%s)", NeighborEdgesBuilderInterval)
channelKeys := loadChannelKeys(cfg, *configPath)
if len(channelKeys) > 0 {
log.Printf("Loaded %d channel keys for GRP_TXT decryption", len(channelKeys))
} else {
log.Printf("No channel keys loaded — GRP_TXT packets will not be decrypted")
}
regionKeys := loadRegionKeys(cfg)
store.BackfillDefaultScopeAsync(regionKeys)
// Connect to each MQTT source
var clients []mqtt.Client
connectedCount := 0
for _, source := range sources {
tag := source.Name
if tag == "" {
tag = source.Broker
}
opts := buildMQTTOpts(source)
connectTimeout := source.ConnectTimeoutOrDefault()
log.Printf("MQTT [%s] connect timeout: %ds", tag, connectTimeout)
// Pre-allocate the liveness pointer so OnConnect can reset its
// stale-message clock on reconnect (PR #1216 r1 item 2). IsConnectedFn
// is wired below once the client exists.
liveness := &SourceLivenessState{
Tag: tag,
Broker: source.Broker,
}
opts.SetOnConnectHandler(func(c mqtt.Client) {
log.Printf("MQTT [%s] connected to %s", tag, source.Broker)
// PR #1216 r1 item 2: clear the stale LastMessageUnix from
// before the outage so the watchdog doesn't immediately scream
// "stalled for 2h". Also restarts the cold-start grace window
// and clears the alert cooldown so a fresh stall edge can fire.
liveness.MarkReconnected(time.Now())
topics := source.Topics
if len(topics) == 0 {
topics = []string{"meshcore/#"}
}
for _, t := range topics {
token := c.Subscribe(t, 0, nil)
token.Wait()
if token.Error() != nil {
log.Printf("MQTT [%s] subscribe error for %s: %v", tag, t, token.Error())
} else {
log.Printf("MQTT [%s] subscribed to %s", tag, t)
}
}
})
opts.SetConnectionLostHandler(func(c mqtt.Client, err error) {
log.Printf("MQTT [%s] disconnected from %s: %v", tag, source.Broker, err)
})
opts.SetReconnectingHandler(func(c mqtt.Client, options *mqtt.ClientOptions) {
log.Printf("MQTT [%s] reconnecting to %s", tag, source.Broker)
})
// Capture source for closure
src := source
opts.SetDefaultPublishHandler(func(c mqtt.Client, m mqtt.Message) {
handleMessage(store, tag, src, m, channelKeys, regionKeys, cfg)
})
client := mqtt.NewClient(opts)
// Wire IsConnectedFn now that the client exists, then register.
// Registration BEFORE Connect so the attempt counter is available
// to OnConnectAttempt on the very first dial.
liveness.IsConnectedFn = client.IsConnected
// #1335: wire force-reconnect so the watchdog can drop a
// half-open TCP socket and re-dial when paho.IsConnected==true
// but no messages have flowed past the stall threshold. Throttled
// per source by the watchdog itself (forceReconnectThrottle).
// Disconnect(250) gives in-flight publishes 250ms to drain;
// Connect() returns immediately and paho's reconnect machinery
// takes over from there. Captured-by-value `client` is the same
// pointer used everywhere else for this source.
liveness.ForceReconnectFn = func() {
client.Disconnect(250)
client.Connect()
}
// PR #1216 r2 item 3: tag collisions used to log.Fatalf, which
// killed the entire ingestor over one config typo and recreated
// the #1212 total-ingest-stop class this PR exists to prevent.
// registerLivenessOrSkip logs ERROR + skips liveness registration
// for the duplicate; the MQTT source still attempts to connect,
// it just isn't tracked by the watchdog. First registration
// remains authoritative.
registerLivenessOrSkip(liveness)
token := client.Connect()
// With ConnectRetry=true, token.Wait() blocks forever for unreachable brokers.
// WaitTimeout lets startup proceed; the client keeps retrying in the background
// and OnConnect fires (subscribing) when it eventually connects (#910).
if !token.WaitTimeout(time.Duration(connectTimeout) * time.Second) {
log.Printf("MQTT [%s] initial connection timed out — retrying in background", tag)
clients = append(clients, client)
continue
}
if token.Error() != nil {
log.Printf("MQTT [%s] connection failed (non-fatal): %v", tag, token.Error())
// BL1 fix: Disconnect to stop Paho's internal retry goroutines.
// With ConnectRetry=true, Connect() spawns background goroutines
// that leak if the client is simply discarded.
client.Disconnect(0)
continue
}
connectedCount++
clients = append(clients, client)
}
// BL2 fix: require at least one immediately-connected source. Timed-out
// clients are retrying in background (tracked in clients) but don't count
// as "connected" — a single unreachable broker must not silently run with
// zero active connections.
if connectedCount == 0 {
// Clean up any timed-out clients still retrying
for _, c := range clients {
c.Disconnect(0)
}
log.Fatal("no MQTT sources connected — all timed out or failed. Check broker is running (default: mqtt://localhost:1883). Set MQTT_BROKER env var or configure mqttSources in config.json")
}
if connectedCount < len(clients) {
log.Printf("Running — %d MQTT source(s) connected, %d retrying in background", connectedCount, len(clients)-connectedCount)
} else {
log.Printf("Running — %d MQTT source(s) connected", connectedCount)
}
// #1212: per-source stall watchdog. Detects "silently dead" sources
// where the client reports connected but no messages have flowed. Logs
// a WARN line every minute for any source silent for >5m. Scan every
// 60s so detection latency is bounded.
stopWatchdog := runLivenessWatchdog(60*time.Second, 5*time.Minute)
// Wait for shutdown signal
sig := make(chan os.Signal, 1)
signal.Notify(sig, syscall.SIGINT, syscall.SIGTERM)
<-sig
log.Println("Shutting down...")
retentionTicker.Stop()
metricsRetentionTicker.Stop()
if packetRetentionTicker != nil {
packetRetentionTicker.Stop()
}
statsTicker.Stop()
pruneQueueTicker.Stop()
stopWatchdog()
store.LogStats() // final stats on shutdown
for _, c := range clients {
c.Disconnect(5000) // 5s to allow in-flight messages to drain
}
log.Println("Done.")
}
// buildMQTTOpts creates MQTT client options for a source with bounded reconnect
// backoff, connect timeout, and TLS/auth configuration.
//
// Logs every TCP/TLS dial via OnConnectAttempt. Unlike SetReconnectingHandler
// (which only fires inside paho's reconnect goroutine and can be silent if
// that loop never iterates), OnConnectAttempt fires on every attempt — the
// initial Connect() and every reconnect. This is the observability fix for
// #1212 (prod outage on 2026-05-15 where the disconnect was logged but no
// reconnect activity was ever visible).
func buildMQTTOpts(source MQTTSource) *mqtt.ClientOptions {
tag := source.Name
if tag == "" {
tag = source.Broker
}
opts := mqtt.NewClientOptions().
AddBroker(source.Broker).
SetAutoReconnect(true).
SetConnectRetry(true).
SetOrderMatters(true).
SetMaxReconnectInterval(30 * time.Second).
SetConnectTimeout(10 * time.Second).
SetWriteTimeout(10 * time.Second).
// #1335: TCP-level keepalive surfaces a half-open socket within
// ~30-60s instead of waiting for the application-level watchdog
// (5m) to notice no messages. paho's MQTT PINGREQ uses this
// interval too — if the broker's PINGRESP doesn't arrive,
// ConnectionLost fires and auto-reconnect kicks in. Was unset
// (paho default 30s actually — making this explicit so it can't
// drift, and so operators reading the code know it's intentional
// per the #1335 RCA).
SetKeepAlive(30 * time.Second)
opts.SetConnectionAttemptHandler(func(broker *url.URL, tlsCfg *tls.Config) *tls.Config {
// Look up the per-source liveness state (registered in main) so we
// can attach an attempt counter. If not yet registered (first dial
// from Connect()), fall through with attempt=1.
var attempt int64 = 1
livenessRegistryMu.RLock()
s := livenessRegistry[tag]
livenessRegistryMu.RUnlock()
if s != nil {
attempt = atomic.AddInt64(&s.AttemptCount, 1)
}
log.Printf("MQTT [%s] connection attempt #%d to %s", tag, attempt, broker.String())
return tlsCfg
})
if source.Username != "" {
opts.SetUsername(source.Username)
}
if source.Password != "" {
opts.SetPassword(source.Password)
}
if source.RejectUnauthorized != nil && !*source.RejectUnauthorized {
opts.SetTLSConfig(&tls.Config{InsecureSkipVerify: true})
} else if strings.HasPrefix(source.Broker, "ssl://") {
opts.SetTLSConfig(&tls.Config{})
}
return opts
}
func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message, channelKeys map[string]string, regionKeys map[string][]byte, cfg *Config) {
// Liveness watchdog (#1212): record receipt before any processing so a
// slow handler still counts as "source is alive". Cheap atomic store.
markLivenessForTag(tag, time.Now())
defer func() {
if r := recover(); r != nil {
log.Printf("MQTT [%s] panic in handler: %v", tag, r)
}
}()
topic := m.Topic()
parts := strings.Split(topic, "/")
var msg map[string]interface{}
if err := json.Unmarshal(m.Payload(), &msg); err != nil {
return
}
// Skip status/connection topics
if topic == "meshcore/status" || topic == "meshcore/events/connection" {
return
}
// Observer blacklist: drop ALL messages from blacklisted observers before any
// DB writes (status, metrics, packets). Trumps IATA filter.
if len(parts) > 2 && cfg.IsObserverBlacklisted(parts[2]) {
log.Printf("MQTT [%s] observer %.8s blacklisted, dropping", tag, parts[2])
return
}
// Global observer IATA whitelist: if configured, drop messages from observers
// in non-whitelisted IATA regions. Applies to ALL message types (status + packets).
if len(parts) > 1 && !cfg.IsObserverIATAAllowed(parts[1]) {
return
}
// Status topic: meshcore/<region>/<observer_id>/status
// Per-source IATA filter does NOT apply here — observer metadata (noise_floor, battery, etc.)
// is region-independent and should be accepted from all observers regardless of
// which IATA regions are configured for packet ingestion.
if len(parts) >= 4 && parts[3] == "status" {
observerID := parts[2]
name, _ := msg["origin"].(string)
iata := parts[1]
meta := extractObserverMeta(msg)
if err := store.UpsertObserverAt(observerID, name, iata, meta, resolveRxTime(msg, tag)); err != nil {
log.Printf("MQTT [%s] observer status error: %v", tag, err)
}
// Insert metrics sample from status message
if meta != nil {
metricsData := &MetricsData{
ObserverID: observerID,
NoiseFloor: meta.NoiseFloor,
TxAirSecs: meta.TxAirSecs,
RxAirSecs: meta.RxAirSecs,
RecvErrors: meta.RecvErrors,
BatteryMv: meta.BatteryMv,
PacketsSent: meta.PacketsSent,
PacketsRecv: meta.PacketsRecv,
}
if err := store.InsertMetrics(metricsData); err != nil {
log.Printf("MQTT [%s] metrics insert error: %v", tag, err)
}
}
log.Printf("MQTT [%s] status: %s (%s)", tag, firstNonEmpty(name, observerID), iata)
return
}
// IATA filter applies to packet messages only — not status messages above.
if len(source.IATAFilter) > 0 && len(parts) > 1 {
region := parts[1]
matched := false
for _, f := range source.IATAFilter {
if f == region {
matched = true
break
}
}
if !matched {
return
}
}
// Format 1: Raw packet (meshcoretomqtt / Cisien format)
rawHex, _ := msg["raw"].(string)
if rawHex != "" {
validateSigs := cfg.ShouldValidateSignatures()
decoded, err := DecodePacket(rawHex, channelKeys, validateSigs)
if err != nil {
// Per #1211: include enough context to repro malformed-packet drops,
// but NEVER log the full observer ID (PII / fingerprinting risk).
// We log:
// - topic prefix (with observer segment elided)
// - 8-char observer prefix
// - payload length, claimed length (rawHex len)
obs := ""
if len(parts) > 2 {
obs = parts[2]
}
// Build a redacted topic that replaces parts[2] (the observer id)
// with the 8-char prefix, so the rest of the topic is preserved
// for debugging without leaking the full identifier.
redactedTopic := topic
if len(parts) > 2 {
redactedParts := make([]string, len(parts))
copy(redactedParts, parts)
if len(parts[2]) > 8 {
redactedParts[2] = parts[2][:8]
}
redactedTopic = strings.Join(redactedParts, "/")
}
log.Printf("MQTT [%s] decode error: %v (topic=%s observer=%.8s rawHexLen=%d)", tag, err, redactedTopic, obs, len(rawHex))
return
}
observerID := ""
region := ""
if len(parts) > 2 {
observerID = parts[2]
}
if len(parts) > 1 {
region = parts[1]
}
// Fallback to source-level region config when topic has no region (#788)
if region == "" && source.Region != "" {
region = source.Region
}
mqttMsg := &MQTTPacketMessage{Raw: rawHex}
mqttMsg.Timestamp = resolveRxTime(msg, tag)
// Parse optional region from JSON payload (#788)
if v, ok := msg["region"].(string); ok && v != "" {
mqttMsg.Region = v
}
if v, ok := msg["SNR"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.SNR = &f
}
} else if v, ok := msg["snr"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.SNR = &f
}
}
if v, ok := msg["RSSI"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.RSSI = &f
}
} else if v, ok := msg["rssi"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.RSSI = &f
}
}
if v, ok := msg["score"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.Score = &f
}
} else if v, ok := msg["Score"]; ok {
if f, ok := toFloat64(v); ok {
mqttMsg.Score = &f
}
}
if v, ok := msg["direction"].(string); ok {
mqttMsg.Direction = &v
} else if v, ok := msg["Direction"].(string); ok {
mqttMsg.Direction = &v
}
if v, ok := msg["origin"].(string); ok {
mqttMsg.Origin = v
}
// For ADVERT packets with known coordinates, enforce geo_filter before
// storing anything — drop the entire message if outside the area.
if decoded.Header.PayloadTypeName == "ADVERT" && decoded.Payload.PubKey != "" {
ok, reason := ValidateAdvert(&decoded.Payload)
if !ok {
log.Printf("MQTT [%s] skipping corrupted ADVERT: %s", tag, reason)
return
}
// Signature validation: drop adverts with invalid ed25519 signatures
if validateSigs && decoded.Payload.SignatureValid != nil && !*decoded.Payload.SignatureValid {
hash := ComputeContentHash(rawHex)
truncPK := decoded.Payload.PubKey
if len(truncPK) > 16 {
truncPK = truncPK[:16]
}
log.Printf("MQTT [%s] DROPPED invalid signature: hash=%s name=%s observer=%s pubkey=%s",
tag, hash, decoded.Payload.Name, firstNonEmpty(mqttMsg.Origin, observerID), truncPK)
store.InsertDroppedPacket(&DroppedPacket{
Hash: hash,
RawHex: rawHex,
Reason: "invalid signature",
ObserverID: observerID,
ObserverName: mqttMsg.Origin,
NodePubKey: decoded.Payload.PubKey,
NodeName: decoded.Payload.Name,
})
return
}
foreign := false
if !NodePassesGeoFilter(decoded.Payload.Lat, decoded.Payload.Lon, cfg.GeoFilter) {
if cfg.ForeignAdverts.IsDropMode() {
return
}
foreign = true
lat, lon := 0.0, 0.0
if decoded.Payload.Lat != nil {
lat = *decoded.Payload.Lat
}
if decoded.Payload.Lon != nil {
lon = *decoded.Payload.Lon
}
truncPK := decoded.Payload.PubKey
if len(truncPK) > 16 {
truncPK = truncPK[:16]
}
log.Printf("MQTT [%s] foreign advert: node=%s name=%s lat=%.4f lon=%.4f observer=%s",
tag, truncPK, decoded.Payload.Name, lat, lon, firstNonEmpty(mqttMsg.Origin, observerID))
}
pktData := BuildPacketData(mqttMsg, decoded, observerID, region, regionKeys)
pktData.Foreign = foreign
isNew, err := store.InsertTransmission(pktData)
if err != nil {
log.Printf("MQTT [%s] db insert error: %v", tag, err)
}
role := advertRole(decoded.Payload.Flags)
if err := store.UpsertNode(decoded.Payload.PubKey, decoded.Payload.Name, role, decoded.Payload.Lat, decoded.Payload.Lon, pktData.Timestamp); err != nil {
log.Printf("MQTT [%s] node upsert error: %v", tag, err)
}
if foreign {
if err := store.MarkNodeForeign(decoded.Payload.PubKey); err != nil {
log.Printf("MQTT [%s] mark foreign error: %v", tag, err)
}
}
if isNew {
if err := store.IncrementAdvertCount(decoded.Payload.PubKey); err != nil {
log.Printf("MQTT [%s] advert count error: %v", tag, err)
}
}
// Update telemetry if present in advert
if decoded.Payload.BatteryMv != nil || decoded.Payload.TemperatureC != nil {
if err := store.UpdateNodeTelemetry(decoded.Payload.PubKey, decoded.Payload.BatteryMv, decoded.Payload.TemperatureC); err != nil {
log.Printf("MQTT [%s] node telemetry update error: %v", tag, err)
}
}
// Update default_scope when advert carries a matched transport scope (#899)
if pktData.IsTransportScoped {
if err := store.UpdateNodeDefaultScope(decoded.Payload.PubKey, pktData.ScopeName); err != nil {
log.Printf("MQTT [%s] node default_scope update error: %v", tag, err)
}
}
} else {
// Non-ADVERT packets: store normally (routing/channel messages from
// in-area observers are relevant regardless of relay hop origin).
pktData := BuildPacketData(mqttMsg, decoded, observerID, region, regionKeys)
if _, err := store.InsertTransmission(pktData); err != nil {
log.Printf("MQTT [%s] db insert error: %v", tag, err)
}
}
// Upsert observer
if observerID != "" {
origin, _ := msg["origin"].(string)
// Use effective region: payload > topic > source config (#788)
effectiveRegion := region
if mqttMsg.Region != "" {
effectiveRegion = mqttMsg.Region
}
if err := store.UpsertObserverAt(observerID, origin, effectiveRegion, nil, mqttMsg.Timestamp); err != nil {
log.Printf("MQTT [%s] observer upsert error: %v", tag, err)
}
}
return
}
// Format 2: Companion bridge channel message (meshcore/message/channel/<n>)
if strings.HasPrefix(topic, "meshcore/message/channel/") {
text, _ := msg["text"].(string)
if text == "" {
return
}
channelIdx := ""
if len(parts) >= 4 {
channelIdx = parts[3]
}
if ci, ok := msg["channel_idx"]; ok {
channelIdx = fmt.Sprintf("%v", ci)
}
// Extract sender from "Name: message" format
sender := ""
if idx := strings.Index(text, ": "); idx > 0 && idx < 50 {
sender = text[:idx]
}
channelName := fmt.Sprintf("ch%s", channelIdx)
// Build decoded JSON matching Node.js CHAN format
channelMsg := map[string]interface{}{
"type": "CHAN",
"channel": channelName,
"text": text,
"sender": sender,
}
if st, ok := msg["sender_timestamp"]; ok {
channelMsg["sender_timestamp"] = st
}
decodedJSON, _ := json.Marshal(channelMsg)
ingestNow := time.Now().UTC().Format(time.RFC3339)
hashInput := fmt.Sprintf("ch:%s:%s:%s", channelIdx, text, ingestNow)
h := sha256.Sum256([]byte(hashInput))
hash := hex.EncodeToString(h[:])[:16]
var snr, rssi, score *float64
var direction *string
if v, ok := msg["SNR"]; ok {
if f, ok := toFloat64(v); ok {
snr = &f
}
} else if v, ok := msg["snr"]; ok {
if f, ok := toFloat64(v); ok {
snr = &f
}
}
if v, ok := msg["RSSI"]; ok {
if f, ok := toFloat64(v); ok {
rssi = &f
}
} else if v, ok := msg["rssi"]; ok {
if f, ok := toFloat64(v); ok {
rssi = &f
}
}
if v, ok := msg["score"]; ok {
if f, ok := toFloat64(v); ok {
score = &f
}
} else if v, ok := msg["Score"]; ok {
if f, ok := toFloat64(v); ok {
score = &f
}
}
if v, ok := msg["direction"].(string); ok {
direction = &v
} else if v, ok := msg["Direction"].(string); ok {
direction = &v
}
pktData := &PacketData{
Timestamp: ingestNow, // #1370 (counters #1233): server ingest time, not envelope rxTime
ObserverID: "companion",
ObserverName: "L1 Pro (BLE)",
SNR: snr,
RSSI: rssi,
Score: score,
Direction: direction,
Hash: hash,
RouteType: 1, // FLOOD
PayloadType: 5, // GRP_TXT
PathJSON: "[]",
DecodedJSON: string(decodedJSON),
ChannelHash: channelName, // fast channel queries (#762)
}
if _, err := store.InsertTransmission(pktData); err != nil {
log.Printf("MQTT [%s] channel insert error: %v", tag, err)
}
// Note: we intentionally do NOT create a node entry for channel message senders.
// Channel messages don't carry the sender's real pubkey, so any entry we create
// would use a synthetic key ("sender-<name>") that doesn't match the real pubkey
// used for claiming/health lookups. The node will get a proper entry when it
// sends an advert. See issue #665.
log.Printf("MQTT [%s] channel message: ch%s from %s", tag, channelIdx, firstNonEmpty(sender, "unknown"))
return
}
// Format 2b: Companion bridge direct message (meshcore/message/direct/<id>)
if strings.HasPrefix(topic, "meshcore/message/direct/") {
text, _ := msg["text"].(string)
if text == "" {
return
}
sender := ""
if idx := strings.Index(text, ": "); idx > 0 && idx < 50 {
sender = text[:idx]
}
dm := map[string]interface{}{
"type": "DM",
"text": text,
"sender": sender,
}
if st, ok := msg["sender_timestamp"]; ok {
dm["sender_timestamp"] = st
}
decodedJSON, _ := json.Marshal(dm)
ingestNow := time.Now().UTC().Format(time.RFC3339)
hashInput := fmt.Sprintf("dm:%s:%s", text, ingestNow)
h := sha256.Sum256([]byte(hashInput))
hash := hex.EncodeToString(h[:])[:16]
var snr, rssi, score *float64
var direction *string
if v, ok := msg["SNR"]; ok {
if f, ok := toFloat64(v); ok {
snr = &f
}
} else if v, ok := msg["snr"]; ok {
if f, ok := toFloat64(v); ok {
snr = &f
}
}
if v, ok := msg["RSSI"]; ok {
if f, ok := toFloat64(v); ok {
rssi = &f
}
} else if v, ok := msg["rssi"]; ok {
if f, ok := toFloat64(v); ok {
rssi = &f
}
}
if v, ok := msg["score"]; ok {
if f, ok := toFloat64(v); ok {
score = &f
}
} else if v, ok := msg["Score"]; ok {
if f, ok := toFloat64(v); ok {
score = &f
}
}
if v, ok := msg["direction"].(string); ok {
direction = &v
} else if v, ok := msg["Direction"].(string); ok {
direction = &v
}
pktData := &PacketData{
Timestamp: ingestNow, // #1370 (counters #1233): server ingest time, not envelope rxTime
ObserverID: "companion",
ObserverName: "L1 Pro (BLE)",
SNR: snr,
RSSI: rssi,
Score: score,
Direction: direction,
Hash: hash,
RouteType: 1, // FLOOD
PayloadType: 2, // TXT_MSG
PathJSON: "[]",
DecodedJSON: string(decodedJSON),
}
if _, err := store.InsertTransmission(pktData); err != nil {
log.Printf("MQTT [%s] DM insert error: %v", tag, err)
}
log.Printf("MQTT [%s] direct message from %s", tag, firstNonEmpty(sender, "unknown"))
return
}
}
func toFloat64(v interface{}) (float64, bool) {
switch n := v.(type) {
case float64:
return n, true
case float32:
return float64(n), true
case int:
return float64(n), true
case int64:
return float64(n), true
case json.Number:
f, err := n.Float64()
return f, err == nil
case string:
s := strings.TrimSpace(n)
s = stripUnitSuffix(s)
f, err := strconv.ParseFloat(s, 64)
return f, err == nil
case uint:
return float64(n), true
case uint64:
return float64(n), true
default:
return 0, false
}
}
// unitSuffixes lists common RF/signal unit suffixes to strip before parsing.
var unitSuffixes = []string{"dBm", "dB", "mW", "km", "mi", "m"}
// stripUnitSuffix removes a trailing unit suffix (case-insensitive) from a
// numeric string so that values like "-110dBm" can be parsed as float64.
func stripUnitSuffix(s string) string {
lower := strings.ToLower(s)
for _, suffix := range unitSuffixes {
if strings.HasSuffix(lower, strings.ToLower(suffix)) {
return strings.TrimSpace(s[:len(s)-len(suffix)])
}
}
return s
}
// extractObserverMeta extracts hardware metadata from an MQTT status message.
// Casts battery_mv and uptime_secs to integers (they're always whole numbers).
func extractObserverMeta(msg map[string]interface{}) *ObserverMeta {
meta := &ObserverMeta{}
hasData := false
if v, ok := msg["model"].(string); ok && v != "" {
meta.Model = &v
hasData = true
}
if v, ok := msg["firmware"].(string); ok && v != "" {
meta.Firmware = &v
hasData = true
}
if v, ok := msg["firmware_version"].(string); ok && v != "" {
meta.Firmware = &v
hasData = true
}
if v, ok := msg["client_version"].(string); ok && v != "" {
meta.ClientVersion = &v
hasData = true
}
if v, ok := msg["clientVersion"].(string); ok && v != "" {
meta.ClientVersion = &v
hasData = true
}
if v, ok := msg["radio"].(string); ok && v != "" {
meta.Radio = &v
hasData = true
}
// Stats fields may be nested under a "stats" object or at top level.
// Try nested first, fall back to top-level for backward compatibility.
stats, _ := msg["stats"].(map[string]interface{})
if v := nestedOrTopLevel(stats, msg, "battery_mv"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.BatteryMv = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "uptime_secs"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int64(math.Round(f))
meta.UptimeSecs = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "noise_floor"); v != nil {
if f, ok := toFloat64(v); ok {
meta.NoiseFloor = &f
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "tx_air_secs"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.TxAirSecs = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "rx_air_secs"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.RxAirSecs = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "recv_errors"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.RecvErrors = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "packets_sent"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.PacketsSent = &iv
hasData = true
}
}
if v := nestedOrTopLevel(stats, msg, "packets_recv"); v != nil {
if f, ok := toFloat64(v); ok {
iv := int(math.Round(f))
meta.PacketsRecv = &iv
hasData = true
}
}
if !hasData {
return nil
}
return meta
}
// nestedOrTopLevel looks up a key in the nested map first, then the top-level map.
func nestedOrTopLevel(nested, toplevel map[string]interface{}, key string) interface{} {
if nested != nil {
if v, ok := nested[key]; ok {
return v
}
}
if v, ok := toplevel[key]; ok {
return v
}
return nil
}
func firstNonEmpty(vals ...string) string {
for _, v := range vals {
if v != "" {
return v
}
}
return ""
}
// resolveRxTime returns the observer receive-time for a packet, taken from
// the MQTT envelope's "timestamp" field. Falls back to ingest time only when
// the field is missing, unparseable, or implausibly in the future (a
// clock-skewed observer). Result is always RFC3339 UTC.
//
// The envelope timestamp is stamped by the uploader when the radio receives
// the frame, not when the MQTT message is published — so a buffered packet
// uploaded hours late still carries its true receive time. Using ingest time
// (time.Now()) here mis-dated such packets by the upload delay.
func resolveRxTime(msg map[string]interface{}, tag string) string {
now := time.Now().UTC()
raw, _ := msg["timestamp"].(string)
if raw == "" {
return now.Format(time.RFC3339)
}
t, err := parseEnvelopeTime(raw)
if err != nil {
log.Printf("MQTT [%s] unparseable timestamp %q, using ingest time", tag, raw)
return now.Format(time.RFC3339)
}
// Hard reject: > 14h ahead is a genuine clock error (UTC+14 is the maximum
// standard offset, so nothing valid should be further ahead than that).
if t.After(now.Add(14 * time.Hour)) {
log.Printf("MQTT [%s] future timestamp %q, using ingest time", tag, raw)
return now.Format(time.RFC3339)
}
// Hard reject: > 30 days in the past is an RTC-reset node reporting a
// factory date (e.g. 2020-01-01). Such a value would permanently drag
// transmissions.first_seen backwards via stmtUpdateTxFirstSeen in
// InsertTransmission. No legitimate buffered upload is that stale.
if t.Before(now.Add(-30 * 24 * time.Hour)) {
log.Printf("MQTT [%s] stale timestamp %q (>30d old), using ingest time", tag, raw)
return now.Format(time.RFC3339)
}
// Soft clamp: naive local-clock timestamps from UTC+N observers are parsed
// as-if UTC, making them appear N hours in the future. A UTC+2 observer's
// live packet looks 2h ahead, but it is NOT a buffered packet — the whole
// point of using rxTime is to preserve the past timestamp for packets that
// were buffered offline. If rxTime is ahead of now, the packet is live and
// ingest time is the correct value. This also prevents storing future
// timestamps that would show ⚠️ in the UI for every packet from UTC+N nodes.
if t.After(now) {
return now.Format(time.RFC3339)
}
return t.UTC().Format(time.RFC3339)
}
// parseEnvelopeTime parses the MQTT envelope timestamp. Two on-wire forms
// occur: zone-aware ISO8601 (RFC3339), and a naive local-clock ISO string
// with no zone (python datetime.isoformat()). Zone-aware layouts are tried
// first; naive layouts are assumed UTC, leaving a bounded residual offset
// equal to the observer's UTC offset for naive-timestamp uploaders.
func parseEnvelopeTime(s string) (time.Time, error) {
for _, layout := range []string{
time.RFC3339, // 2026-05-16T10:00:00Z / +02:00
"2006-01-02T15:04:05.999999", // python isoformat w/ microseconds
"2006-01-02T15:04:05", // naive ISO
} {
if t, err := time.Parse(layout, s); err == nil {
return t, nil
}
}
return time.Time{}, fmt.Errorf("unrecognized timestamp layout: %q", s)
}
// deriveHashtagChannelKey derives an AES-128 key from a channel name.
// Same algorithm as Node.js: SHA-256(channelName) → first 32 hex chars (16 bytes).
func deriveHashtagChannelKey(channelName string) string {
h := sha256.Sum256([]byte(channelName))
return hex.EncodeToString(h[:16])
}
// loadChannelKeys loads channel decryption keys from config and/or a JSON file.
// Merge priority: rainbow (lowest) → derived from hashChannels → explicit config (highest).
func loadChannelKeys(cfg *Config, configPath string) map[string]string {
keys := make(map[string]string)
// 1. Rainbow table keys (lowest priority)
keysPath := os.Getenv("CHANNEL_KEYS_PATH")
if keysPath == "" {
keysPath = cfg.ChannelKeysPath
}
if keysPath == "" {
keysPath = filepath.Join(filepath.Dir(configPath), "channel-rainbow.json")
}
rainbowCount := 0
if data, err := os.ReadFile(keysPath); err == nil {
var fileKeys map[string]string
if err := json.Unmarshal(data, &fileKeys); err == nil {
for k, v := range fileKeys {
keys[k] = v
}
rainbowCount = len(fileKeys)
log.Printf("Loaded %d channel keys from %s", rainbowCount, keysPath)
} else {
log.Printf("Warning: failed to parse channel keys file %s: %v", keysPath, err)
}
}
// 2. Derived keys from hashChannels (middle priority)
derivedCount := 0
for _, raw := range cfg.HashChannels {
trimmed := strings.TrimSpace(raw)
if trimmed == "" {
continue
}
channelName := trimmed
if !strings.HasPrefix(channelName, "#") {
channelName = "#" + channelName
}
// Skip if explicit config already has this key
if _, exists := cfg.ChannelKeys[channelName]; exists {
continue
}
keys[channelName] = deriveHashtagChannelKey(channelName)
derivedCount++
}
if derivedCount > 0 {
log.Printf("[channels] %d derived from hashChannels", derivedCount)
}
// 3. Explicit config keys (highest priority — overrides rainbow + derived)
for k, v := range cfg.ChannelKeys {
normalized := normalizeChannelName(k)
if normalized != k {
log.Printf("[channels] Normalizing known channel key %q → %q for display", k, normalized)
}
// Detect config collision: if both "public" and "Public" are present,
// the normalized key collides. Resolve deterministically: prefer the
// canonical (already-normalized) form over the lowercase variant.
if _, dupe := keys[normalized]; dupe {
// If the incoming key IS the canonical form, it wins (overwrite).
// If the incoming key is a non-canonical form (e.g., "public"), keep existing.
if k == normalized {
log.Printf("[channels] Resolving duplicate %q: canonical form wins over non-canonical", normalized)
keys[normalized] = v
} else {
log.Printf("[channels] WARNING: duplicate channel key %q — config has %q normalizing to %q, keeping canonical value", normalized, k, normalized)
}
} else {
keys[normalized] = v
}
}
return keys
}
func loadRegionKeys(cfg *Config) map[string][]byte {
keys := make(map[string][]byte)
for _, raw := range cfg.HashRegions {
name := strings.TrimSpace(raw)
if name == "" {
log.Printf("[regions] skipping empty hashRegions entry")
continue
}
if !strings.HasPrefix(name, "#") {
name = "#" + name
}
if _, exists := keys[name]; exists {
log.Printf("[regions] duplicate region %q ignored", name)
continue
}
h := sha256.Sum256([]byte(name))
keys[name] = h[:16]
}
if len(keys) > 0 {
log.Printf("[regions] %d region key(s) loaded", len(keys))
}
return keys
}
// matchScope performs one HMAC-SHA256 per configured region. Expected
// len(regionKeys) ≤ 50; beyond that, consider a pre-indexed lookup table.
func matchScope(regionKeys map[string][]byte, payloadType byte, payloadRaw []byte, code1 string) string {
if code1 == "0000" || len(regionKeys) == 0 || len(payloadRaw) == 0 {
return ""
}
for name, key := range regionKeys {
mac := hmac.New(sha256.New, key)
mac.Write([]byte{payloadType})
mac.Write(payloadRaw)
hmacBytes := mac.Sum(nil)
code := uint16(hmacBytes[0]) | uint16(hmacBytes[1])<<8
if code == 0 {
code = 1
} else if code == 0xFFFF {
code = 0xFFFE
}
codeBytes := [2]byte{byte(code & 0xFF), byte(code >> 8)}
if strings.ToUpper(hex.EncodeToString(codeBytes[:])) == code1 {
return name
}
}
return ""
}
// Version info (set via ldflags)
var version = "dev"
func init() {
if len(os.Args) > 1 && os.Args[1] == "--version" {
fmt.Println("corescope-ingestor", version)
os.Exit(0)
}
}