mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-03 21:51:47 +00:00
467b01a1b3
Red commit: 97c9a22a55 (CI:
https://github.com/Kpa-clawbot/CoreScope/commit/97c9a22a55b07d1576c579aa9d23b290dad33eb6/checks)
Fixes #1285.
## What was broken
**Bug A — outlier-dominated hash-evidence median.** On the per-hash
evidence panel a single observer reporting an RTC-reset advert (firmware
emitting factory timestamp, ~700d off) dragged the displayed median to
"median corrected: -704d 18h" even when every other observer of that
hash saw a normal value.
**Bug B — false "N of last K had nonsense timestamps" warning.**
`recentBadSampleCount` lumped RTC-reset adverts in with "bimodal-bad"
samples. On the repro node every recent skew was -16…-22s (healthy), but
the lone RTC-reset advert that landed inside the recent window was
counted as bad → "3 of last 5 adverts had nonsense timestamps" fired and
the node was misclassified `bimodal_clock`.
Root cause of B: the recent-window split (`cmd/server/clock_skew.go`
~L575) classified anything `|corrected skew| > 1h` as "bad". That
conflates true bimodal RTC oscillation (1h…24h) with factory-timestamp
resets (>24h, already surfaced via the RTC-reset badge).
## Fix
- New `rtcResetOutlierThresholdSec = 24h`. Rationale: real µC drift is
sub-second/advert; real bimodal RTC misbehaves in the hours range;
anything >1d is not a drift signal.
- Recent-window split puts `|skew| > 24h` in a third bucket excluded
from both `recentSampleCount` and `recentBadCount`.
- New `hashEvidenceMedian()` filters outliers before computing the
per-hash median. UI labels the hash "insufficient data (N RTC-reset
outliers excluded)" when every observer saw a reset-shaped advert.
- Three pre-existing #845 tests used -50M-sec "bad" samples (RTC-reset
range) — re-pointed to -7200s (true bimodal range), what `bimodal_clock`
actually models.
## Preflight overrides
- check-branch-clean: cross-stack: justified — backend computes
counts/median; frontend renders the new label.
## Browser verification
Confirmed staging node `c0dedad…` repro matches the test fixture. No new
CSS vars.
## E2E assertion added
`cmd/server/clock_skew_issue1285_test.go:81` and `:103`.
---------
Co-authored-by: corescope-bot <bot@corescope.local>
130 lines
4.5 KiB
Go
130 lines
4.5 KiB
Go
package main
|
||
|
||
// Regression tests for #1285:
|
||
//
|
||
// Bug A: per-hash evidence's MedianCorrectedSkewSec includes 700-day RTC-reset
|
||
// outliers, dragging the displayed median into "−704d 18h" garbage even
|
||
// though every recent sample is small (< 30s).
|
||
//
|
||
// Bug B: RecentBadSampleCount counts samples outside the *displayed* recent
|
||
// window (or counts against raw skew, not corrected) → "3 of last 5
|
||
// adverts had nonsense timestamps" warning fires on healthy nodes.
|
||
//
|
||
// Both must pass without disturbing the existing bimodal / no-clock logic.
|
||
|
||
import (
|
||
"testing"
|
||
"time"
|
||
)
|
||
|
||
// Synthesizes the repro from issue #1285:
|
||
// 30 healthy adverts (skew ~−20s) + 1 historical advert with an RTC reset
|
||
// (advertTS = 2024-06-13, observed today → −705d skew).
|
||
// Returns the populated store.
|
||
func seedIssue1285Repro(t *testing.T) *PacketStore {
|
||
t.Helper()
|
||
ps := NewPacketStore(nil, nil)
|
||
pt := 4 // ADVERT
|
||
|
||
const pubkey = "RTCRESET"
|
||
const skewSec = int64(-20) // node clock is 20s BEHIND wall-clock
|
||
|
||
baseObs := int64(1779000000) // ~mid-2026
|
||
rtcResetAdv := int64(1718281640) // 2024-06-13 (from the issue repro)
|
||
|
||
var txs []*StoreTx
|
||
|
||
// 30 healthy adverts spanning the older end of the recent window.
|
||
for i := 0; i < 30; i++ {
|
||
obsTS := baseObs + int64(i)*60
|
||
advTS := obsTS + skewSec
|
||
tx := &StoreTx{
|
||
Hash: "healthy-" + formatInt64(int64(i)),
|
||
PayloadType: &pt,
|
||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
|
||
Observations: []*StoreObs{
|
||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||
},
|
||
}
|
||
txs = append(txs, tx)
|
||
}
|
||
|
||
// One RTC-reset packet observed MOST RECENTLY (so it sits in the
|
||
// per-hash evidence list AND is included in the recent-window count
|
||
// on master). Its advertTS is from 2024 → corrected skew ≈ -60M sec.
|
||
rtcResetObs := baseObs + int64(30*60) + 60
|
||
rtcTx := &StoreTx{
|
||
Hash: "rtc-reset-0001",
|
||
PayloadType: &pt,
|
||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(rtcResetAdv) + `}}`,
|
||
Observations: []*StoreObs{
|
||
{ObserverID: "obs1", Timestamp: time.Unix(rtcResetObs, 0).UTC().Format(time.RFC3339)},
|
||
},
|
||
}
|
||
txs = append(txs, rtcTx)
|
||
|
||
ps.mu.Lock()
|
||
ps.byNode[pubkey] = txs
|
||
for _, tx := range txs {
|
||
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
|
||
}
|
||
ps.clockSkew.computeInterval = 0
|
||
ps.mu.Unlock()
|
||
return ps
|
||
}
|
||
|
||
// Bug A — per-hash evidence median must EXCLUDE the 705-day RTC-reset outlier.
|
||
// On master this asserts on the RTC-reset hash's MedianCorrectedSkewSec being
|
||
// ≈ −60M sec ("−704d 18h"); after the fix the field is suppressed (0) or
|
||
// otherwise marked insufficient, never displayed as the garbage value.
|
||
func TestIssue1285_HashEvidence_OutlierExcludedFromMedian(t *testing.T) {
|
||
ps := seedIssue1285Repro(t)
|
||
r := ps.GetNodeClockSkew("RTCRESET")
|
||
if r == nil {
|
||
t.Fatal("expected clock skew result")
|
||
}
|
||
|
||
// The recent-hash evidence list is the source of the "median corrected:
|
||
// −704d 18h" string in the UI. After the fix, NO entry in this list
|
||
// should report a |median| above the 24h sanity threshold — the fix is
|
||
// to drop outlier samples (or flag the hash as insufficient-data) before
|
||
// publishing the median.
|
||
const maxSaneAbsSec = float64(24 * 3600)
|
||
for _, ev := range r.RecentHashEvidence {
|
||
if abs(ev.MedianCorrectedSkewSec) > maxSaneAbsSec {
|
||
t.Errorf("hash %s exposes outlier-dominated median %.0fs (~%.1fd); "+
|
||
"expected entry to be filtered out or marked insufficient (|median| <= %.0fs)",
|
||
ev.Hash, ev.MedianCorrectedSkewSec,
|
||
ev.MedianCorrectedSkewSec/86400, maxSaneAbsSec)
|
||
}
|
||
}
|
||
}
|
||
|
||
// Bug B — RecentBadSampleCount must be 0 when every sample in the recent
|
||
// window is healthy (<30s |corrected skew|). On master this fires because
|
||
// "recent" is computed over the wrong set (or against raw skew).
|
||
func TestIssue1285_RecentBadCount_NotPollutedByOldOutlier(t *testing.T) {
|
||
ps := seedIssue1285Repro(t)
|
||
r := ps.GetNodeClockSkew("RTCRESET")
|
||
if r == nil {
|
||
t.Fatal("expected clock skew result")
|
||
}
|
||
if r.RecentBadSampleCount != 0 {
|
||
t.Errorf("RecentBadSampleCount = %d, want 0 — recent samples are all "+
|
||
"~−20s (healthy); the historical RTC-reset outlier is outside the "+
|
||
"recent window and must not be counted", r.RecentBadSampleCount)
|
||
}
|
||
if r.Severity == SkewBimodalClock || r.Severity == SkewNoClock {
|
||
t.Errorf("severity = %v, want ok/warning — recent samples are all "+
|
||
"healthy (~−20s skew), one historical outlier must not flip the node "+
|
||
"to bimodal/no-clock", r.Severity)
|
||
}
|
||
}
|
||
|
||
func abs(v float64) float64 {
|
||
if v < 0 {
|
||
return -v
|
||
}
|
||
return v
|
||
}
|