Files
meshcore-analyzer/cmd/server/clock_skew_test.go
T
Kpa-clawbot 441409203e feat(#845): bimodal_clock severity — surface flaky-RTC nodes instead of hiding as 'No Clock' (#850)
## Problem

Nodes with flaky RTC (firmware emitting interleaved good and nonsense
timestamps) were classified as `no_clock` because the broken samples
poisoned the recent median. Operators lost visibility into these nodes —
they showed "No Clock" even though ~60% of their adverts had valid
timestamps.

Observed on staging: a node with 31K samples where recent adverts
interleave good skew (-6.8s, -13.6s) with firmware nonsense (-56M, -60M
seconds). Under the old logic, median of the mixed window → `no_clock`.

## Solution

New `bimodal_clock` severity tier that surfaces flaky-RTC nodes with
their real (good-sample) skew value.

### Classification order (first match wins)

| Severity | Good Fraction | Description |
|----------|--------------|-------------|
| `no_clock` | < 10% | Essentially no real clock |
| `bimodal_clock` | 10–80% (and bad > 0) | Mixed good/bad — flaky RTC |
| `ok`/`warn`/`critical`/`absurd` | ≥ 80% | Normal classification |

"Good" = `|skew| <= 1 hour`; "bad" = likely uninitialized RTC nonsense.

When `bimodal_clock`, `recentMedianSkewSec` is computed from **good
samples only**, so the dashboard shows the real working-clock value
(e.g. -7s) instead of the broken median.

### Backend changes
- New constant `BimodalSkewThresholdSec = 3600`
- New severity `bimodal_clock` in classification logic
- New API fields: `goodFraction`, `recentBadSampleCount`,
`recentSampleCount`

### Frontend changes
- Amber `Bimodal` badge with tooltip showing bad-sample percentage
- Bimodal nodes render skew value like ok/warn/severe (not the "No
Clock" path)
- Warning line below sparkline: "⚠️ X of last Y adverts had nonsense
timestamps (likely RTC reset)"

### Tests
- 3 new Go unit tests: bimodal (60% good → bimodal_clock), all-bad (→
no_clock), 90%-good (→ ok)
- 1 new frontend test: bimodal badge rendering with tooltip
- Existing `TestReporterScenario_789` passes unchanged

Builds on #789 (recent-window severity).

Closes #845

---------

Co-authored-by: you <you@example.com>
2026-04-21 09:11:14 -07:00

957 lines
29 KiB
Go

package main
import (
"fmt"
"math"
"testing"
"time"
)
// ── classifySkew ───────────────────────────────────────────────────────────────
func TestClassifySkew(t *testing.T) {
tests := []struct {
absSkew float64
expected SkewSeverity
}{
{0, SkewOK},
{60, SkewOK}, // 1 min
{299, SkewOK}, // just under 5 min
{300, SkewWarning}, // exactly 5 min
{1800, SkewWarning}, // 30 min
{3599, SkewWarning}, // just under 1 hour
{3600, SkewCritical}, // exactly 1 hour
{86400, SkewCritical}, // 1 day
{2592000 - 1, SkewCritical}, // just under 30 days
{2592000, SkewAbsurd}, // exactly 30 days
{86400 * 365 - 1, SkewAbsurd}, // just under 365 days
{86400 * 365, SkewNoClock}, // exactly 365 days
{86400 * 365 * 10, SkewNoClock}, // 10 years (epoch-0 style)
}
for _, tc := range tests {
got := classifySkew(tc.absSkew)
if got != tc.expected {
t.Errorf("classifySkew(%v) = %v, want %v", tc.absSkew, got, tc.expected)
}
}
}
// ── median ─────────────────────────────────────────────────────────────────────
func TestMedian(t *testing.T) {
tests := []struct {
vals []float64
expected float64
}{
{nil, 0},
{[]float64{}, 0},
{[]float64{5}, 5},
{[]float64{1, 3}, 2},
{[]float64{3, 1, 2}, 2},
{[]float64{4, 1, 3, 2}, 2.5},
{[]float64{-10, 0, 10}, 0},
}
for _, tc := range tests {
got := median(tc.vals)
if got != tc.expected {
t.Errorf("median(%v) = %v, want %v", tc.vals, got, tc.expected)
}
}
}
func TestMean(t *testing.T) {
tests := []struct {
vals []float64
expected float64
}{
{nil, 0},
{[]float64{10}, 10},
{[]float64{2, 4, 6}, 4},
}
for _, tc := range tests {
got := mean(tc.vals)
if got != tc.expected {
t.Errorf("mean(%v) = %v, want %v", tc.vals, got, tc.expected)
}
}
}
// ── parseISO ───────────────────────────────────────────────────────────────────
func TestParseISO(t *testing.T) {
tests := []struct {
input string
expected int64
}{
{"", 0},
{"garbage", 0},
{"2026-04-15T12:00:00Z", 1776254400},
{"2026-04-15T12:00:00+00:00", 1776254400},
}
for _, tc := range tests {
got := parseISO(tc.input)
if got != tc.expected {
t.Errorf("parseISO(%q) = %v, want %v", tc.input, got, tc.expected)
}
}
}
// ── extractTimestamp ────────────────────────────────────────────────────────────
func TestExtractTimestamp(t *testing.T) {
// Nested payload.timestamp
decoded := map[string]interface{}{
"payload": map[string]interface{}{
"timestamp": float64(1776340800),
},
}
got := extractTimestamp(decoded)
if got != 1776340800 {
t.Errorf("extractTimestamp (nested) = %v, want 1776340800", got)
}
// Top-level timestamp
decoded2 := map[string]interface{}{
"timestamp": float64(1776340900),
}
got2 := extractTimestamp(decoded2)
if got2 != 1776340900 {
t.Errorf("extractTimestamp (top-level) = %v, want 1776340900", got2)
}
// No timestamp
decoded3 := map[string]interface{}{"foo": "bar"}
got3 := extractTimestamp(decoded3)
if got3 != 0 {
t.Errorf("extractTimestamp (missing) = %v, want 0", got3)
}
}
// ── calibrateObservers ─────────────────────────────────────────────────────────
func TestCalibrateObservers_SingleObserver(t *testing.T) {
// Single-observer packets can't calibrate — should return empty.
samples := []skewSample{
{advertTS: 1000, observedTS: 1000, observerID: "obs1", hash: "h1"},
{advertTS: 2000, observedTS: 2000, observerID: "obs1", hash: "h2"},
}
offsets, _ := calibrateObservers(samples)
if len(offsets) != 0 {
t.Errorf("expected no offsets for single-observer, got %v", offsets)
}
}
func TestCalibrateObservers_MultiObserver(t *testing.T) {
// Packet h1 seen by 3 observers: obs1 at t=100, obs2 at t=110, obs3 at t=100.
// Median observation = 100. obs1=0, obs2=+10, obs3=0
// Packet h2 seen by 3 observers: obs1 at t=200, obs2 at t=210, obs3 at t=200.
// Median observation = 200. obs1=0, obs2=+10, obs3=0
samples := []skewSample{
{advertTS: 100, observedTS: 100, observerID: "obs1", hash: "h1"},
{advertTS: 100, observedTS: 110, observerID: "obs2", hash: "h1"},
{advertTS: 100, observedTS: 100, observerID: "obs3", hash: "h1"},
{advertTS: 200, observedTS: 200, observerID: "obs1", hash: "h2"},
{advertTS: 200, observedTS: 210, observerID: "obs2", hash: "h2"},
{advertTS: 200, observedTS: 200, observerID: "obs3", hash: "h2"},
}
offsets, _ := calibrateObservers(samples)
if offsets["obs1"] != 0 {
t.Errorf("obs1 offset = %v, want 0", offsets["obs1"])
}
if offsets["obs2"] != 10 {
t.Errorf("obs2 offset = %v, want 10", offsets["obs2"])
}
if offsets["obs3"] != 0 {
t.Errorf("obs3 offset = %v, want 0", offsets["obs3"])
}
}
// ── computeNodeSkew ────────────────────────────────────────────────────────────
func TestComputeNodeSkew_BasicCorrection(t *testing.T) {
// Validates observer offset correction direction.
//
// Setup: node is 60s ahead, obs1 accurate, obs2 is 10s ahead.
// With 2 observers, median obs_ts = 1005.
// obs1 offset = 1000 - 1005 = -5
// obs2 offset = 1010 - 1005 = +5
// Correction: corrected = raw_skew + obsOffset
// obs1: raw=60, corrected = 60 + (-5) = 55
// obs2: raw=50, corrected = 50 + 5 = 55
// Both converge to 55 (not exact 60 because with only 2 observers,
// the median can't fully distinguish which observer is drifted).
samples := []skewSample{
// Same packet seen by accurate obs1 and obs2 (+10s ahead)
{advertTS: 1060, observedTS: 1000, observerID: "obs1", hash: "h1"},
{advertTS: 1060, observedTS: 1010, observerID: "obs2", hash: "h1"},
}
offsets, _ := calibrateObservers(samples)
// median obs = 1005, obs1 offset = -5, obs2 offset = +5
// So the median approach finds obs2 is +5 ahead (relative to median)
// Now compute node skew with those offsets:
nodeSkew := computeNodeSkew(samples, offsets)
cs, ok := nodeSkew["h1"]
if !ok {
t.Fatal("expected skew data for hash h1")
}
// With only 2 observers, median obs_ts = 1005.
// obs1 offset = 1000-1005 = -5, obs2 offset = 1010-1005 = +5
// raw from obs1 = 60, corrected = 60 + (-5) = 55
// raw from obs2 = 50, corrected = 50 + 5 = 55
// median = 55
if cs.MedianSkewSec != 55 {
t.Errorf("median skew = %v, want 55", cs.MedianSkewSec)
}
}
func TestComputeNodeSkew_ThreeObservers(t *testing.T) {
// Node is exactly 60s ahead. obs1 accurate, obs2 accurate, obs3 +30s ahead.
// advertTS = 1060, real time = 1000
samples := []skewSample{
{advertTS: 1060, observedTS: 1000, observerID: "obs1", hash: "h1"},
{advertTS: 1060, observedTS: 1000, observerID: "obs2", hash: "h1"},
{advertTS: 1060, observedTS: 1030, observerID: "obs3", hash: "h1"},
}
offsets, _ := calibrateObservers(samples)
// median obs_ts = 1000. obs1=0, obs2=0, obs3=+30
if offsets["obs3"] != 30 {
t.Errorf("obs3 offset = %v, want 30", offsets["obs3"])
}
nodeSkew := computeNodeSkew(samples, offsets)
cs := nodeSkew["h1"]
if cs == nil {
t.Fatal("expected skew data for h1")
}
// raw from obs1 = 60, corrected = 60 + 0 = 60
// raw from obs2 = 60, corrected = 60 + 0 = 60
// raw from obs3 = 30, corrected = 30 + 30 = 60
// All three converge to 60.
if cs.MedianSkewSec != 60 {
t.Errorf("median skew = %v, want 60 (node is 60s ahead)", cs.MedianSkewSec)
}
}
// ── computeDrift ───────────────────────────────────────────────────────────────
func TestComputeDrift_Stable(t *testing.T) {
// Constant skew = no drift.
pairs := []tsSkewPair{
{ts: 0, skew: 60},
{ts: 7200, skew: 60},
{ts: 14400, skew: 60},
}
drift := computeDrift(pairs)
if drift != 0 {
t.Errorf("drift = %v, want 0 for stable skew", drift)
}
}
func TestComputeDrift_LinearDrift(t *testing.T) {
// 1 second drift per hour = 24 sec/day.
pairs := []tsSkewPair{
{ts: 0, skew: 0},
{ts: 3600, skew: 1},
{ts: 7200, skew: 2},
}
drift := computeDrift(pairs)
expected := 24.0
if math.Abs(drift-expected) > 0.1 {
t.Errorf("drift = %v, want ~%v", drift, expected)
}
}
func TestComputeDrift_TooFewSamples(t *testing.T) {
pairs := []tsSkewPair{{ts: 0, skew: 10}}
if computeDrift(pairs) != 0 {
t.Error("expected 0 drift for single sample")
}
}
func TestComputeDrift_TooShortSpan(t *testing.T) {
// Less than 1 hour apart.
pairs := []tsSkewPair{
{ts: 0, skew: 0},
{ts: 1800, skew: 10},
}
if computeDrift(pairs) != 0 {
t.Error("expected 0 drift for short time span")
}
}
// ── jsonNumber ─────────────────────────────────────────────────────────────────
func TestJsonNumber(t *testing.T) {
m := map[string]interface{}{
"a": float64(42),
"b": int64(99),
"c": "not a number",
"d": nil,
}
if jsonNumber(m, "a") != 42 {
t.Error("float64 case failed")
}
if jsonNumber(m, "b") != 99 {
t.Error("int64 case failed")
}
if jsonNumber(m, "c") != 0 {
t.Error("string case should return 0")
}
if jsonNumber(m, "d") != 0 {
t.Error("nil case should return 0")
}
if jsonNumber(m, "missing") != 0 {
t.Error("missing key should return 0")
}
}
// ── Integration: GetNodeClockSkew via PacketStore ──────────────────────────────
func TestGetNodeClockSkew_Integration(t *testing.T) {
ps := NewPacketStore(nil, nil)
// Simulate two ADVERT transmissions for the same node, seen by 2 observers each.
// Node "AABB" has clock 120s ahead.
pt := 4 // ADVERT
tx1 := &StoreTx{
Hash: "hash1",
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":1700002320}}`, // obs=1700002200, node ahead by 120s
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: "2023-11-14T22:50:00Z"}, // 1700002200
{ObserverID: "obs2", Timestamp: "2023-11-14T22:50:00Z"}, // 1700002200
},
}
tx2 := &StoreTx{
Hash: "hash2",
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":1700005920}}`, // obs=1700005800, node ahead by 120s
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: "2023-11-14T23:50:00Z"}, // 1700005800
{ObserverID: "obs2", Timestamp: "2023-11-14T23:50:00Z"}, // 1700005800
},
}
ps.mu.Lock()
ps.byNode["AABB"] = []*StoreTx{tx1, tx2}
ps.byPayloadType[4] = []*StoreTx{tx1, tx2}
// Force recompute by setting interval to 0.
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("AABB")
if result == nil {
t.Fatal("expected clock skew result for node AABB")
}
if result.Pubkey != "AABB" {
t.Errorf("pubkey = %q, want AABB", result.Pubkey)
}
// Both transmissions show 120s skew, so median should be 120.
if result.MedianSkewSec != 120 {
t.Errorf("median skew = %v, want 120", result.MedianSkewSec)
}
if result.SampleCount < 2 {
t.Errorf("sample count = %v, want >= 2", result.SampleCount)
}
if result.Severity != SkewOK {
t.Errorf("severity = %v, want ok (120s < 5min)", result.Severity)
}
// Drift should be ~0 since skew is constant.
if math.Abs(result.DriftPerDaySec) > 1 {
t.Errorf("drift = %v, want ~0 for constant skew", result.DriftPerDaySec)
}
}
func TestGetNodeClockSkew_NoData(t *testing.T) {
ps := NewPacketStore(nil, nil)
result := ps.GetNodeClockSkew("nonexistent")
if result != nil {
t.Error("expected nil for nonexistent node")
}
}
// ── Sanity check tests (#XXX — clock skew crazy stats) ────────────────────────
func TestGetNodeClockSkew_NoClock_EpochZero(t *testing.T) {
// Node with epoch-0 timestamp produces huge skew → no_clock severity, drift=0.
ps := NewPacketStore(nil, nil)
pt := 4 // ADVERT
// Epoch-ish advert: advertTS near start of 2020, observed in 2023 → |skew| > 365 days
var txs []*StoreTx
baseObs := int64(1700000000) // ~Nov 2023
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*7200
tx := &StoreTx{
Hash: "epoch-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":1577836800}}`, // Jan 1 2020 — valid but way off
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["EPOCH"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("EPOCH")
if result == nil {
t.Fatal("expected clock skew result for epoch-0 node")
}
if result.Severity != SkewNoClock {
t.Errorf("severity = %v, want no_clock", result.Severity)
}
if result.DriftPerDaySec != 0 {
t.Errorf("drift = %v, want 0 for no_clock node", result.DriftPerDaySec)
}
}
func TestGetNodeClockSkew_TooFewSamplesForDrift(t *testing.T) {
// Node with only 2 advert samples → drift should not be computed.
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 2; i++ {
obsTS := baseObs + int64(i)*7200
advTS := obsTS + 120 // 120s ahead
tx := &StoreTx{
Hash: "few-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["FEWSAMP"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("FEWSAMP")
if result == nil {
t.Fatal("expected clock skew result")
}
if result.DriftPerDaySec != 0 {
t.Errorf("drift = %v, want 0 for 2-sample node (minimum is %d)", result.DriftPerDaySec, minDriftSamples)
}
}
func TestGetNodeClockSkew_AbsurdDriftCapped(t *testing.T) {
// Node with wildly varying skew producing |drift| > 86400 s/day → drift capped to 0.
ps := NewPacketStore(nil, nil)
pt := 4
// Create 6 samples with extreme skew variation to produce absurd drift.
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*3600
// Alternate between huge positive and negative skew offsets
skewOffset := int64(50000 * (1 - 2*(i%2))) // +50000 or -50000
advTS := obsTS + skewOffset
tx := &StoreTx{
Hash: "wild-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["WILD"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("WILD")
if result == nil {
t.Fatal("expected clock skew result")
}
if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay {
t.Errorf("drift = %v, should be capped (|drift| > %v)", result.DriftPerDaySec, maxReasonableDriftPerDay)
}
}
func TestGetNodeClockSkew_NormalNodeWithDrift(t *testing.T) {
// Normal node with 6 samples and consistent linear drift → drift computed correctly.
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 6; i++ {
obsTS := baseObs + int64(i)*7200 // every 2 hours
// Drift: 1 sec/hour = 24 sec/day
advTS := obsTS + 120 + int64(i) // skew grows by 1s per sample (2h apart)
tx := &StoreTx{
Hash: "norm-h" + string(rune('0'+i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["NORMAL"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
result := ps.GetNodeClockSkew("NORMAL")
if result == nil {
t.Fatal("expected clock skew result")
}
if result.Severity != SkewOK {
t.Errorf("severity = %v, want ok", result.Severity)
}
// 1s per 7200s = 12 s/day
if result.DriftPerDaySec == 0 {
t.Error("expected non-zero drift for linearly drifting node")
}
if math.Abs(result.DriftPerDaySec) > maxReasonableDriftPerDay {
t.Errorf("drift = %v, should be reasonable", result.DriftPerDaySec)
}
}
// formatInt64 is a test helper to format int64 as string for JSON embedding.
func formatInt64(n int64) string {
return fmt.Sprintf("%d", n)
}
// ── #789: Recent-window severity & robust drift ───────────────────────────────
// TestSeverityUsesRecentNotMedian: 100 historical bad samples (skew=-60s,
// each ~5min apart) followed by 5 fresh good samples (skew=-1s). All-time
// median is still huge-ish but recent-window severity must reflect the
// current healthy state.
func TestSeverityUsesRecentNotMedian(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 105; i++ {
obsTS := baseObs + int64(i)*300 // 5 min apart
var skew int64 = -60
if i >= 100 {
skew = -1 // good samples at the tail
}
advTS := obsTS + skew
tx := &StoreTx{
Hash: fmt.Sprintf("recent-h%03d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["RECENT"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("RECENT")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewOK {
t.Errorf("severity = %v, want ok (recent samples are healthy)", r.Severity)
}
if math.Abs(r.RecentMedianSkewSec) > 5 {
t.Errorf("recentMedianSkewSec = %v, want ~-1", r.RecentMedianSkewSec)
}
// Historical median should still be retained for context.
if math.Abs(r.MedianSkewSec) < 30 {
t.Errorf("medianSkewSec = %v, expected historical median to remain large", r.MedianSkewSec)
}
}
// TestDriftRejectsCorrectionJump: 30 minutes of clean linear drift, then a
// single 60-second skew jump. The pre-jump slope should win — drift must
// not be catastrophically inflated by the correction event.
func TestDriftRejectsCorrectionJump(t *testing.T) {
pairs := []tsSkewPair{}
// 30 min of stable, ~12 sec/day drift: 1s per 7200s.
for i := 0; i < 12; i++ {
ts := int64(i) * 300
skew := float64(i) * (1.0 / 24.0) // ~0.04s per 5min step → 12 s/day
pairs = append(pairs, tsSkewPair{ts: ts, skew: skew})
}
// Wait an hour, then a single 1000-sec correction jump (clearly outlier).
pairs = append(pairs, tsSkewPair{ts: 3600 + 12*300, skew: 1000})
drift := computeDrift(pairs)
// Without rejection this would be ~ (1000-0)/(end-0) * 86400 = enormous.
if math.Abs(drift) > 100 {
t.Errorf("drift = %v, expected small (~12 s/day), correction jump should be filtered", drift)
}
}
// TestTheilSenMatchesOLSWhenClean: on clean linear data Theil-Sen should
// produce essentially the OLS answer.
func TestTheilSenMatchesOLSWhenClean(t *testing.T) {
// 1 sec drift per hour = 24 sec/day, 20 evenly-spaced samples.
pairs := []tsSkewPair{}
for i := 0; i < 20; i++ {
pairs = append(pairs, tsSkewPair{
ts: int64(i) * 600,
skew: float64(i) * (600.0 / 3600.0),
})
}
drift := computeDrift(pairs)
if math.Abs(drift-24.0) > 0.25 { // ~1%
t.Errorf("drift = %v, want ~24", drift)
}
}
// TestReporterScenario_789: reproduce the exact scenario from issue #789.
// Reporter saw mean=-52565156, median=-59063561, last=-0.8, sample count
// 1662, drift +1793549.9 s/day, severity=absurd. After the fix, severity
// must be ok (recent samples are healthy) and drift must be sane.
func TestReporterScenario_789(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
// 1657 samples with the bad ~-683-day skew (the historical poison),
// then 5 freshly corrected samples at -0.8s — totals 1662.
for i := 0; i < 1662; i++ {
obsTS := baseObs + int64(i)*60 // 1 min apart
var skew int64
if i < 1657 {
skew = -59063561 // ~ -683 days
} else {
skew = -1 // corrected (rounded; reporter saw -0.8)
}
advTS := obsTS + skew
tx := &StoreTx{
Hash: fmt.Sprintf("rep-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["REPNODE"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("REPNODE")
if r == nil {
t.Fatal("nil result")
}
// Severity must reflect current health, not the all-time median.
if r.Severity != SkewOK && r.Severity != SkewWarning {
t.Errorf("severity = %v, want ok/warning (recent samples are healthy)", r.Severity)
}
if math.Abs(r.RecentMedianSkewSec) > 5 {
t.Errorf("recentMedianSkewSec = %v, want near 0", r.RecentMedianSkewSec)
}
// Drift must not be absurd. The historical jump is one event between
// the 1657th and 1658th sample; outlier rejection must contain it.
if math.Abs(r.DriftPerDaySec) > maxReasonableDriftPerDay {
t.Errorf("drift = %v, must be <= cap %v", r.DriftPerDaySec, maxReasonableDriftPerDay)
}
// And it should be close to zero (stable historical + stable corrected).
if math.Abs(r.DriftPerDaySec) > 1000 {
t.Errorf("drift = %v, expected near zero after outlier rejection", r.DriftPerDaySec)
}
// Historical median is preserved as context.
if math.Abs(r.MedianSkewSec) < 1e6 {
t.Errorf("medianSkewSec = %v, expected historical poison preserved as context", r.MedianSkewSec)
}
}
// TestBimodalClock_845: 60% good samples → bimodal_clock severity.
func TestBimodalClock_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
// 6 good samples (-5s each), 4 bad samples (-50000000s each) = 60% good
// Interleave so the recent window (last 5) captures both good and bad.
skews := []int64{-5, -5, -50000000, -5, -50000000, -5, -50000000, -5, -50000000, -5}
for i := 0; i < 10; i++ {
obsTS := baseObs + int64(i)*60
advTS := obsTS + skews[i]
tx := &StoreTx{
Hash: fmt.Sprintf("bimodal-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["BIMODAL"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("BIMODAL")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewBimodalClock {
t.Errorf("severity = %v, want bimodal_clock", r.Severity)
}
if math.Abs(r.RecentMedianSkewSec-(-5)) > 1 {
t.Errorf("recentMedianSkewSec = %v, want ≈ -5 (median of good samples)", r.RecentMedianSkewSec)
}
if r.GoodFraction < 0.5 || r.GoodFraction > 0.7 {
t.Errorf("goodFraction = %v, want ~0.6", r.GoodFraction)
}
if r.RecentBadSampleCount < 1 {
t.Errorf("recentBadSampleCount = %v, want > 0", r.RecentBadSampleCount)
}
}
// TestAllBad_NoClock_845: all samples bad → no_clock.
func TestAllBad_NoClock_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 10; i++ {
obsTS := baseObs + int64(i)*60
advTS := obsTS - 50000000
tx := &StoreTx{
Hash: fmt.Sprintf("allbad-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["ALLBAD"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("ALLBAD")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewNoClock {
t.Errorf("severity = %v, want no_clock", r.Severity)
}
}
// TestMostlyGood_OK_845: 90% good 10% bad → ok (outlier filtered).
func TestMostlyGood_OK_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
// 9 good at -5s, 1 bad at -50000000s
for i := 0; i < 10; i++ {
obsTS := baseObs + int64(i)*60
var skew int64
if i < 9 {
skew = -5
} else {
skew = -50000000
}
advTS := obsTS + skew
tx := &StoreTx{
Hash: fmt.Sprintf("mostly-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["MOSTLY"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("MOSTLY")
if r == nil {
t.Fatal("nil result")
}
// 90% good → normal classification path, median of good samples = -5s → ok
if r.Severity != SkewOK {
t.Errorf("severity = %v, want ok", r.Severity)
}
if math.Abs(r.RecentMedianSkewSec-(-5)) > 1 {
t.Errorf("recentMedianSkewSec = %v, want ≈ -5", r.RecentMedianSkewSec)
}
}
// TestSingleSample_845: one good sample → ok.
func TestSingleSample_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
obsTS := int64(1700000000)
advTS := obsTS - 30 // 30s skew
tx := &StoreTx{
Hash: "single-0001",
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
ps.mu.Lock()
ps.byNode["SINGLE"] = []*StoreTx{tx}
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("SINGLE")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewOK {
t.Errorf("severity = %v, want ok", r.Severity)
}
if r.RecentSampleCount != 1 {
t.Errorf("recentSampleCount = %d, want 1", r.RecentSampleCount)
}
if r.GoodFraction != 1.0 {
t.Errorf("goodFraction = %v, want 1.0", r.GoodFraction)
}
}
// TestFiftyFifty_Bimodal_845: 50% good / 50% bad → bimodal_clock.
func TestFiftyFifty_Bimodal_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 10; i++ {
obsTS := baseObs + int64(i)*60
var skew int64
if i%2 == 0 {
skew = -10
} else {
skew = -50000000
}
tx := &StoreTx{
Hash: fmt.Sprintf("fifty-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(obsTS+skew) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["FIFTY"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("FIFTY")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewBimodalClock {
t.Errorf("severity = %v, want bimodal_clock", r.Severity)
}
if r.GoodFraction < 0.4 || r.GoodFraction > 0.6 {
t.Errorf("goodFraction = %v, want ~0.5", r.GoodFraction)
}
}
// TestAllGood_OK_845: all samples good → ok, no bimodal.
func TestAllGood_OK_845(t *testing.T) {
ps := NewPacketStore(nil, nil)
pt := 4
baseObs := int64(1700000000)
var txs []*StoreTx
for i := 0; i < 10; i++ {
obsTS := baseObs + int64(i)*60
tx := &StoreTx{
Hash: fmt.Sprintf("allgood-%04d", i),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(obsTS-3) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
}
txs = append(txs, tx)
}
ps.mu.Lock()
ps.byNode["ALLGOOD"] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
r := ps.GetNodeClockSkew("ALLGOOD")
if r == nil {
t.Fatal("nil result")
}
if r.Severity != SkewOK {
t.Errorf("severity = %v, want ok", r.Severity)
}
if r.GoodFraction != 1.0 {
t.Errorf("goodFraction = %v, want 1.0", r.GoodFraction)
}
if r.RecentBadSampleCount != 0 {
t.Errorf("recentBadSampleCount = %v, want 0", r.RecentBadSampleCount)
}
}