fix: clock skew sanity checks — filter epoch-0, cap drift, min samples (#769)

Nodes with dead RTCs show -690d skew and -3 billion s/day drift. Fix:

1. **No Clock severity**: |skew| > 365d → `no_clock`, skip drift
2. **Drift cap**: |drift| > 86400 s/day → nil (physically impossible)
3. **Min samples**: < 5 samples → no drift regression
4. **Frontend**: 'No Clock' badge, '–' for unreliable drift

Fixes the crazy stats on the Clock Health fleet view.

---------

Co-authored-by: you <you@example.com>
This commit is contained in:
Kpa-clawbot
2026-04-16 08:10:47 -07:00
committed by GitHub
parent 6a648dea11
commit ba7cd0fba7
6 changed files with 224 additions and 14 deletions

View File

@@ -16,13 +16,23 @@ const (
SkewWarning SkewSeverity = "warning" // 5 min 1 hour
SkewCritical SkewSeverity = "critical" // 1 hour 30 days
SkewAbsurd SkewSeverity = "absurd" // > 30 days
SkewNoClock SkewSeverity = "no_clock" // > 365 days — uninitialized RTC
)
// Default thresholds in seconds.
const (
skewThresholdWarnSec = 5 * 60 // 5 minutes
skewThresholdCriticalSec = 60 * 60 // 1 hour
skewThresholdAbsurdSec = 30 * 24 * 3600 // 30 days
skewThresholdWarnSec = 5 * 60 // 5 minutes
skewThresholdCriticalSec = 60 * 60 // 1 hour
skewThresholdAbsurdSec = 30 * 24 * 3600 // 30 days
skewThresholdNoClockSec = 365 * 24 * 3600 // 365 days — uninitialized RTC
// minDriftSamples is the minimum number of advert transmissions needed
// to compute a meaningful linear drift rate.
minDriftSamples = 5
// maxReasonableDriftPerDay caps drift display. Physically impossible
// drift rates (> 1 day/day) indicate insufficient or outlier samples.
maxReasonableDriftPerDay = 86400.0
)
// classifySkew maps absolute skew (seconds) to a severity level.
@@ -30,6 +40,8 @@ const (
// and thresholds are integer multiples of 60 — no rounding artifacts.
func classifySkew(absSkewSec float64) SkewSeverity {
switch {
case absSkewSec >= skewThresholdNoClockSec:
return SkewNoClock
case absSkewSec >= skewThresholdAbsurdSec:
return SkewAbsurd
case absSkewSec >= skewThresholdCriticalSec:
@@ -408,7 +420,17 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
medSkew := median(allSkews)
meanSkew := mean(allSkews)
absMedian := math.Abs(medSkew)
drift := computeDrift(tsSkews)
severity := classifySkew(absMedian)
// For no_clock nodes (uninitialized RTC), skip drift — data is meaningless.
var drift float64
if severity != SkewNoClock && len(tsSkews) >= minDriftSamples {
drift = computeDrift(tsSkews)
// Cap physically impossible drift rates.
if math.Abs(drift) > maxReasonableDriftPerDay {
drift = 0
}
}
// Build sparkline samples from tsSkews (sorted by time).
sort.Slice(tsSkews, func(i, j int) bool { return tsSkews[i].ts < tsSkews[j].ts })
@@ -423,7 +445,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
MedianSkewSec: round(medSkew, 1),
LastSkewSec: round(lastSkew, 1),
DriftPerDaySec: round(drift, 2),
Severity: classifySkew(absMedian),
Severity: severity,
SampleCount: totalSamples,
Calibrated: anyCal,
LastAdvertTS: lastAdvTS,

View File

@@ -1,8 +1,10 @@
package main
import (
"fmt"
"math"
"testing"
"time"
)
// ── classifySkew ───────────────────────────────────────────────────────────────
@@ -22,7 +24,9 @@ func TestClassifySkew(t *testing.T) {
{86400, SkewCritical}, // 1 day
{2592000 - 1, SkewCritical}, // just under 30 days
{2592000, SkewAbsurd}, // exactly 30 days
{86400 * 365, SkewAbsurd}, // 1 year
{86400 * 365 - 1, SkewAbsurd}, // just under 365 days
{86400 * 365, SkewNoClock}, // exactly 365 days
{86400 * 365 * 10, SkewNoClock}, // 10 years (epoch-0 style)
}
for _, tc := range tests {
got := classifySkew(tc.absSkew)
@@ -367,3 +371,176 @@ func TestGetNodeClockSkew_NoData(t *testing.T) {
t.Error("expected nil for nonexistent node")
}
}
// ── Sanity check tests (#XXX — clock skew crazy stats) ────────────────────────
func TestGetNodeClockSkew_NoClock_EpochZero(t *testing.T) {
// Node with epoch-0 timestamp produces huge skew → no_clock severity, drift=0.
ps := NewPacketStore(nil, nil)
pt := 4 // ADVERT
// Epoch-ish advert: advertTS near start of 2020, observed in 2023 → |skew| > 365 days
var txs []*StoreTx
baseObs := int64(1700000000) // ~Nov 2023
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*7200
tx := &StoreTx{
Hash: "epoch-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":1577836800}}`, // Jan 1 2020 — valid but way off
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["EPOCH"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("EPOCH")
if result == nil {
t.Fatal("expected clock skew result for epoch-0 node")
}
if result.Severity != SkewNoClock {
t.Errorf("severity = %v, want no_clock", result.Severity)
}
if result.DriftPerDaySec != 0 {
t.Errorf("drift = %v, want 0 for no_clock node", result.DriftPerDaySec)
}
}
func TestGetNodeClockSkew_TooFewSamplesForDrift(t *testing.T) {
// Node with only 2 advert samples → drift should not be computed.
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 2; i++ {
obsTS := baseObs + int64(i)*7200
advTS := obsTS + 120 // 120s ahead
tx := &StoreTx{
Hash: "few-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["FEWSAMP"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("FEWSAMP")
if result == nil {
t.Fatal("expected clock skew result")
}
if result.DriftPerDaySec != 0 {
t.Errorf("drift = %v, want 0 for 2-sample node (minimum is %d)", result.DriftPerDaySec, minDriftSamples)
}
}
func TestGetNodeClockSkew_AbsurdDriftCapped(t *testing.T) {
// Node with wildly varying skew producing |drift| > 86400 s/day → drift capped to 0.
ps := NewPacketStore(nil, nil)
pt := 4
// Create 6 samples with extreme skew variation to produce absurd drift.
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*3600
// Alternate between huge positive and negative skew offsets
skewOffset := int64(50000 * (1 - 2*(i%2))) // +50000 or -50000
advTS := obsTS + skewOffset
tx := &StoreTx{
Hash: "wild-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["WILD"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("WILD")
if result == nil {
t.Fatal("expected clock skew result")
}
if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay {
t.Errorf("drift = %v, should be capped (|drift| > %v)", result.DriftPerDaySec, maxReasonableDriftPerDay)
}
}
func TestGetNodeClockSkew_NormalNodeWithDrift(t *testing.T) {
// Normal node with 6 samples and consistent linear drift → drift computed correctly.
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*7200 // every 2 hours
// Drift: 1 sec/hour = 24 sec/day
advTS := obsTS + 120 + int64(i) // skew grows by 1s per sample (2h apart)
tx := &StoreTx{
Hash: "norm-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["NORMAL"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("NORMAL")
if result == nil {
t.Fatal("expected clock skew result")
}
if result.Severity != SkewOK {
t.Errorf("severity = %v, want ok", result.Severity)
}
// 1s per 7200s = 12 s/day
if result.DriftPerDaySec == 0 {
t.Error("expected non-zero drift for linearly drifting node")
}
if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay {
t.Errorf("drift = %v, should be reasonable", result.DriftPerDaySec)
}
}
// formatInt64 is a test helper to format int64 as string for JSON embedding.
func formatInt64(n int64) string {
return fmt.Sprintf("%d", n)
}

View File

@@ -3448,8 +3448,8 @@ function destroy() { _analyticsData = {}; _channelData = null; if (_ngState && _
data.forEach(function(n) { if (counts[n.severity] !== undefined) counts[n.severity]++; });
// Filter buttons (also serve as summary — no separate stats pills needed)
var filterColors = { ok: 'var(--status-green)', warning: 'var(--status-yellow)', critical: 'var(--status-orange)', absurd: 'var(--status-purple)' };
var filters = ['all', 'ok', 'warning', 'critical', 'absurd'];
var filterColors = { ok: 'var(--status-green)', warning: 'var(--status-yellow)', critical: 'var(--status-orange)', absurd: 'var(--status-purple)', no_clock: 'var(--text-muted)' };
var filters = ['all', 'ok', 'warning', 'critical', 'absurd', 'no_clock'];
var filterHtml = '<div style="margin-bottom:10px">' + filters.map(function(f) {
var dot = f !== 'all' ? '<span style="display:inline-block;width:8px;height:8px;border-radius:50%;background:' + filterColors[f] + ';margin-right:4px;vertical-align:middle"></span>' : '';
return '<button class="clock-filter-btn' + (activeFilter === f ? ' active' : '') + '" data-filter="' + f + '">' +
@@ -3461,11 +3461,13 @@ function destroy() { _analyticsData = {}; _channelData = null; if (_ngState && _
var rowsHtml = filtered.map(function(n) {
var rowClass = 'clock-fleet-row--' + (n.severity || 'ok');
var lastAdv = n.lastObservedTS ? new Date(n.lastObservedTS * 1000).toISOString().replace('T', ' ').replace(/\.\d+Z/, ' UTC') : '—';
var skewText = n.severity === 'no_clock' ? 'No Clock' : formatSkew(n.medianSkewSec);
var driftText = n.severity === 'no_clock' || !n.driftPerDaySec ? '' : formatDrift(n.driftPerDaySec);
return '<tr class="' + rowClass + '" data-pubkey="' + esc(n.pubkey) + '" style="cursor:pointer">' +
'<td><strong>' + esc(n.nodeName || n.pubkey.slice(0, 12)) + '</strong></td>' +
'<td style="font-family:var(--mono,monospace)">' + formatSkew(n.medianSkewSec) + '</td>' +
'<td style="font-family:var(--mono,monospace)">' + skewText + '</td>' +
'<td>' + renderSkewBadge(n.severity, n.medianSkewSec) + '</td>' +
'<td style="font-family:var(--mono,monospace)">' + formatDrift(n.driftPerDaySec) + '</td>' +
'<td style="font-family:var(--mono,monospace)">' + driftText + '</td>' +
'<td style="font-size:11px">' + lastAdv + '</td>' +
'</tr>';
}).join('');

View File

@@ -640,10 +640,13 @@
var severityLabel = SKEW_SEVERITY_LABELS[cs.severity] || cs.severity;
var driftHtml = cs.driftPerDaySec ? '<div style="font-size:12px;color:var(--text-muted);margin-top:2px">Drift: ' + formatDrift(cs.driftPerDaySec) + '</div>' : '';
var sparkHtml = renderSkewSparkline(cs.samples, 200, 32);
var skewDisplay = cs.severity === 'no_clock'
? '<span style="font-size:18px;font-weight:700;color:var(--text-muted)">No Clock</span>'
: '<span style="font-size:18px;font-weight:700;font-family:var(--mono)">' + formatSkew(cs.medianSkewSec) + '</span>';
container.innerHTML =
'<h4 style="margin:0 0 6px">⏰ Clock Skew</h4>' +
'<div style="display:flex;align-items:center;gap:12px;flex-wrap:wrap">' +
'<span style="font-size:18px;font-weight:700;font-family:var(--mono)">' + formatSkew(cs.medianSkewSec) + '</span>' +
skewDisplay +
renderSkewBadge(cs.severity, cs.medianSkewSec) +
(cs.calibrated ? ' <span style="font-size:10px;color:var(--text-muted)" title="Observer-calibrated">✓ calibrated</span>' : '') +
'</div>' +

View File

@@ -400,12 +400,13 @@
ok: 'var(--status-green)',
warning: 'var(--status-yellow)',
critical: 'var(--status-orange)',
absurd: 'var(--status-purple)'
absurd: 'var(--status-purple)',
no_clock: 'var(--text-muted)'
};
var SKEW_SEVERITY_LABELS = {
ok: 'OK', warning: 'Warning', critical: 'Critical', absurd: 'Absurd'
ok: 'OK', warning: 'Warning', critical: 'Critical', absurd: 'Absurd', no_clock: 'No Clock'
};
var SKEW_SEVERITY_ORDER = { absurd: 0, critical: 1, warning: 2, ok: 3 };
var SKEW_SEVERITY_ORDER = { no_clock: 0, absurd: 1, critical: 2, warning: 3, ok: 4 };
window.SKEW_SEVERITY_COLORS = SKEW_SEVERITY_COLORS;
window.SKEW_SEVERITY_LABELS = SKEW_SEVERITY_LABELS;
@@ -432,6 +433,9 @@
window.renderSkewBadge = function(severity, skewSec) {
if (!severity) return '';
var cls = 'skew-badge skew-badge--' + severity;
if (severity === 'no_clock') {
return '<span class="' + cls + '" title="Uninitialized RTC — no valid clock">🚫 No Clock</span>';
}
var label = severity === 'ok' ? '⏰' : '⏰ ' + window.formatSkew(skewSec);
return '<span class="' + cls + '" title="Clock skew: ' + window.formatSkew(skewSec) + ' (' + (SKEW_SEVERITY_LABELS[severity] || severity) + ')">' + label + '</span>';
};

View File

@@ -2278,6 +2278,7 @@ th.sort-active { color: var(--accent, #60a5fa); }
.skew-badge--warning { background: var(--status-yellow); color: #000; }
.skew-badge--critical { background: var(--status-orange); color: #fff; }
.skew-badge--absurd { background: var(--status-purple); color: #fff; }
.skew-badge--no_clock { background: var(--text-muted); color: #fff; }
.skew-detail-section { padding: 10px 16px; margin-bottom: 8px; }
.skew-sparkline-wrap { margin-top: 6px; }
@@ -2287,6 +2288,7 @@ th.sort-active { color: var(--accent, #60a5fa); }
.clock-fleet-row--warning { background: color-mix(in srgb, var(--status-yellow) 10%, transparent); }
.clock-fleet-row--critical { background: color-mix(in srgb, var(--status-orange) 10%, transparent); }
.clock-fleet-row--absurd { background: color-mix(in srgb, var(--status-purple) 10%, transparent); }
.clock-fleet-row--no_clock { background: color-mix(in srgb, var(--text-muted) 10%, transparent); }
.clock-filter-btn { font-size: 12px; padding: 3px 8px; border: 1px solid var(--border); border-radius: 4px; background: var(--card-bg, #fff); color: var(--text); cursor: pointer; margin-right: 4px; }
.clock-filter-btn.active { background: var(--accent); color: #fff; border-color: var(--accent); }