From ba7cd0fba785b41934b13bccfa5be6b7e2cfa1ba Mon Sep 17 00:00:00 2001 From: Kpa-clawbot Date: Thu, 16 Apr 2026 08:10:47 -0700 Subject: [PATCH] =?UTF-8?q?fix:=20clock=20skew=20sanity=20checks=20?= =?UTF-8?q?=E2=80=94=20filter=20epoch-0,=20cap=20drift,=20min=20samples=20?= =?UTF-8?q?(#769)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Nodes with dead RTCs show -690d skew and -3 billion s/day drift. Fix: 1. **No Clock severity**: |skew| > 365d → `no_clock`, skip drift 2. **Drift cap**: |drift| > 86400 s/day → nil (physically impossible) 3. **Min samples**: < 5 samples → no drift regression 4. **Frontend**: 'No Clock' badge, '–' for unreliable drift Fixes the crazy stats on the Clock Health fleet view. --------- Co-authored-by: you --- cmd/server/clock_skew.go | 32 +++++- cmd/server/clock_skew_test.go | 179 +++++++++++++++++++++++++++++++++- public/analytics.js | 10 +- public/nodes.js | 5 +- public/roles.js | 10 +- public/style.css | 2 + 6 files changed, 224 insertions(+), 14 deletions(-) diff --git a/cmd/server/clock_skew.go b/cmd/server/clock_skew.go index 41ec86a..f6fc024 100644 --- a/cmd/server/clock_skew.go +++ b/cmd/server/clock_skew.go @@ -16,13 +16,23 @@ const ( SkewWarning SkewSeverity = "warning" // 5 min – 1 hour SkewCritical SkewSeverity = "critical" // 1 hour – 30 days SkewAbsurd SkewSeverity = "absurd" // > 30 days + SkewNoClock SkewSeverity = "no_clock" // > 365 days — uninitialized RTC ) // Default thresholds in seconds. const ( - skewThresholdWarnSec = 5 * 60 // 5 minutes - skewThresholdCriticalSec = 60 * 60 // 1 hour - skewThresholdAbsurdSec = 30 * 24 * 3600 // 30 days + skewThresholdWarnSec = 5 * 60 // 5 minutes + skewThresholdCriticalSec = 60 * 60 // 1 hour + skewThresholdAbsurdSec = 30 * 24 * 3600 // 30 days + skewThresholdNoClockSec = 365 * 24 * 3600 // 365 days — uninitialized RTC + + // minDriftSamples is the minimum number of advert transmissions needed + // to compute a meaningful linear drift rate. + minDriftSamples = 5 + + // maxReasonableDriftPerDay caps drift display. Physically impossible + // drift rates (> 1 day/day) indicate insufficient or outlier samples. + maxReasonableDriftPerDay = 86400.0 ) // classifySkew maps absolute skew (seconds) to a severity level. @@ -30,6 +40,8 @@ const ( // and thresholds are integer multiples of 60 — no rounding artifacts. func classifySkew(absSkewSec float64) SkewSeverity { switch { + case absSkewSec >= skewThresholdNoClockSec: + return SkewNoClock case absSkewSec >= skewThresholdAbsurdSec: return SkewAbsurd case absSkewSec >= skewThresholdCriticalSec: @@ -408,7 +420,17 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { medSkew := median(allSkews) meanSkew := mean(allSkews) absMedian := math.Abs(medSkew) - drift := computeDrift(tsSkews) + severity := classifySkew(absMedian) + + // For no_clock nodes (uninitialized RTC), skip drift — data is meaningless. + var drift float64 + if severity != SkewNoClock && len(tsSkews) >= minDriftSamples { + drift = computeDrift(tsSkews) + // Cap physically impossible drift rates. + if math.Abs(drift) > maxReasonableDriftPerDay { + drift = 0 + } + } // Build sparkline samples from tsSkews (sorted by time). sort.Slice(tsSkews, func(i, j int) bool { return tsSkews[i].ts < tsSkews[j].ts }) @@ -423,7 +445,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { MedianSkewSec: round(medSkew, 1), LastSkewSec: round(lastSkew, 1), DriftPerDaySec: round(drift, 2), - Severity: classifySkew(absMedian), + Severity: severity, SampleCount: totalSamples, Calibrated: anyCal, LastAdvertTS: lastAdvTS, diff --git a/cmd/server/clock_skew_test.go b/cmd/server/clock_skew_test.go index d359b5f..bb8a3d6 100644 --- a/cmd/server/clock_skew_test.go +++ b/cmd/server/clock_skew_test.go @@ -1,8 +1,10 @@ package main import ( + "fmt" "math" "testing" + "time" ) // ── classifySkew ─────────────────────────────────────────────────────────────── @@ -22,7 +24,9 @@ func TestClassifySkew(t *testing.T) { {86400, SkewCritical}, // 1 day {2592000 - 1, SkewCritical}, // just under 30 days {2592000, SkewAbsurd}, // exactly 30 days - {86400 * 365, SkewAbsurd}, // 1 year + {86400 * 365 - 1, SkewAbsurd}, // just under 365 days + {86400 * 365, SkewNoClock}, // exactly 365 days + {86400 * 365 * 10, SkewNoClock}, // 10 years (epoch-0 style) } for _, tc := range tests { got := classifySkew(tc.absSkew) @@ -367,3 +371,176 @@ func TestGetNodeClockSkew_NoData(t *testing.T) { t.Error("expected nil for nonexistent node") } } + +// ── Sanity check tests (#XXX — clock skew crazy stats) ──────────────────────── + +func TestGetNodeClockSkew_NoClock_EpochZero(t *testing.T) { + // Node with epoch-0 timestamp produces huge skew → no_clock severity, drift=0. + ps := NewPacketStore(nil, nil) + pt := 4 // ADVERT + + // Epoch-ish advert: advertTS near start of 2020, observed in 2023 → |skew| > 365 days + var txs []*StoreTx + baseObs := int64(1700000000) // ~Nov 2023 + for i := 0; i < 6; i++ { + obsTS := baseObs + int64(i)*7200 + tx := &StoreTx{ + Hash: "epoch-h" + string(rune('0'+i)), + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":1577836800}}`, // Jan 1 2020 — valid but way off + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + } + txs = append(txs, tx) + } + + ps.mu.Lock() + ps.byNode["EPOCH"] = txs + for _, tx := range txs { + ps.byPayloadType[4] = append(ps.byPayloadType[4], tx) + } + ps.clockSkew.computeInterval = 0 + ps.mu.Unlock() + + result := ps.GetNodeClockSkew("EPOCH") + if result == nil { + t.Fatal("expected clock skew result for epoch-0 node") + } + if result.Severity != SkewNoClock { + t.Errorf("severity = %v, want no_clock", result.Severity) + } + if result.DriftPerDaySec != 0 { + t.Errorf("drift = %v, want 0 for no_clock node", result.DriftPerDaySec) + } +} + +func TestGetNodeClockSkew_TooFewSamplesForDrift(t *testing.T) { + // Node with only 2 advert samples → drift should not be computed. + ps := NewPacketStore(nil, nil) + pt := 4 + + baseObs := int64(1700000000) + var txs []*StoreTx + for i := 0; i < 2; i++ { + obsTS := baseObs + int64(i)*7200 + advTS := obsTS + 120 // 120s ahead + tx := &StoreTx{ + Hash: "few-h" + string(rune('0'+i)), + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`, + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + } + txs = append(txs, tx) + } + + ps.mu.Lock() + ps.byNode["FEWSAMP"] = txs + for _, tx := range txs { + ps.byPayloadType[4] = append(ps.byPayloadType[4], tx) + } + ps.clockSkew.computeInterval = 0 + ps.mu.Unlock() + + result := ps.GetNodeClockSkew("FEWSAMP") + if result == nil { + t.Fatal("expected clock skew result") + } + if result.DriftPerDaySec != 0 { + t.Errorf("drift = %v, want 0 for 2-sample node (minimum is %d)", result.DriftPerDaySec, minDriftSamples) + } +} + +func TestGetNodeClockSkew_AbsurdDriftCapped(t *testing.T) { + // Node with wildly varying skew producing |drift| > 86400 s/day → drift capped to 0. + ps := NewPacketStore(nil, nil) + pt := 4 + + // Create 6 samples with extreme skew variation to produce absurd drift. + baseObs := int64(1700000000) + var txs []*StoreTx + for i := 0; i < 6; i++ { + obsTS := baseObs + int64(i)*3600 + // Alternate between huge positive and negative skew offsets + skewOffset := int64(50000 * (1 - 2*(i%2))) // +50000 or -50000 + advTS := obsTS + skewOffset + tx := &StoreTx{ + Hash: "wild-h" + string(rune('0'+i)), + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`, + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + } + txs = append(txs, tx) + } + + ps.mu.Lock() + ps.byNode["WILD"] = txs + for _, tx := range txs { + ps.byPayloadType[4] = append(ps.byPayloadType[4], tx) + } + ps.clockSkew.computeInterval = 0 + ps.mu.Unlock() + + result := ps.GetNodeClockSkew("WILD") + if result == nil { + t.Fatal("expected clock skew result") + } + if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay { + t.Errorf("drift = %v, should be capped (|drift| > %v)", result.DriftPerDaySec, maxReasonableDriftPerDay) + } +} + +func TestGetNodeClockSkew_NormalNodeWithDrift(t *testing.T) { + // Normal node with 6 samples and consistent linear drift → drift computed correctly. + ps := NewPacketStore(nil, nil) + pt := 4 + + baseObs := int64(1700000000) + var txs []*StoreTx + for i := 0; i < 6; i++ { + obsTS := baseObs + int64(i)*7200 // every 2 hours + // Drift: 1 sec/hour = 24 sec/day + advTS := obsTS + 120 + int64(i) // skew grows by 1s per sample (2h apart) + tx := &StoreTx{ + Hash: "norm-h" + string(rune('0'+i)), + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`, + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + } + txs = append(txs, tx) + } + + ps.mu.Lock() + ps.byNode["NORMAL"] = txs + for _, tx := range txs { + ps.byPayloadType[4] = append(ps.byPayloadType[4], tx) + } + ps.clockSkew.computeInterval = 0 + ps.mu.Unlock() + + result := ps.GetNodeClockSkew("NORMAL") + if result == nil { + t.Fatal("expected clock skew result") + } + if result.Severity != SkewOK { + t.Errorf("severity = %v, want ok", result.Severity) + } + // 1s per 7200s = 12 s/day + if result.DriftPerDaySec == 0 { + t.Error("expected non-zero drift for linearly drifting node") + } + if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay { + t.Errorf("drift = %v, should be reasonable", result.DriftPerDaySec) + } +} + +// formatInt64 is a test helper to format int64 as string for JSON embedding. +func formatInt64(n int64) string { + return fmt.Sprintf("%d", n) +} diff --git a/public/analytics.js b/public/analytics.js index 3fd19b3..127cda1 100644 --- a/public/analytics.js +++ b/public/analytics.js @@ -3448,8 +3448,8 @@ function destroy() { _analyticsData = {}; _channelData = null; if (_ngState && _ data.forEach(function(n) { if (counts[n.severity] !== undefined) counts[n.severity]++; }); // Filter buttons (also serve as summary — no separate stats pills needed) - var filterColors = { ok: 'var(--status-green)', warning: 'var(--status-yellow)', critical: 'var(--status-orange)', absurd: 'var(--status-purple)' }; - var filters = ['all', 'ok', 'warning', 'critical', 'absurd']; + var filterColors = { ok: 'var(--status-green)', warning: 'var(--status-yellow)', critical: 'var(--status-orange)', absurd: 'var(--status-purple)', no_clock: 'var(--text-muted)' }; + var filters = ['all', 'ok', 'warning', 'critical', 'absurd', 'no_clock']; var filterHtml = '
' + filters.map(function(f) { var dot = f !== 'all' ? '' : ''; return '