diff --git a/cmd/server/clock_skew.go b/cmd/server/clock_skew.go index 43376751..0ac00027 100644 --- a/cmd/server/clock_skew.go +++ b/cmd/server/clock_skew.go @@ -133,6 +133,7 @@ type NodeClockSkew struct { Samples []SkewSample `json:"samples,omitempty"` // time-series for sparklines GoodFraction float64 `json:"goodFraction"` // fraction of recent samples with |skew| <= 1h RecentBadSampleCount int `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h + RecentBadSamples []BadSample `json:"recentBadSamples,omitempty"` // #1094: per-bad-sample evidence (hash + bad advertTS) RecentSampleCount int `json:"recentSampleCount"` // total recent samples in window RecentHashEvidence []HashEvidence `json:"recentHashEvidence,omitempty"` CalibrationSummary *CalibrationSummary `json:"calibrationSummary,omitempty"` @@ -146,6 +147,15 @@ type SkewSample struct { SkewSec float64 `json:"skew"` // corrected skew in seconds } +// BadSample is a single recent advert flagged as having a nonsense timestamp +// (|corrected skew| in the bimodal-bad band — > 1h, <= 24h). #1094: surfaced +// so the UI can link each offender to its packet detail page. +type BadSample struct { + Hash string `json:"hash"` // transmission hash for packet-detail deep-link + AdvertTS int64 `json:"advertTS"` // the offending advert Unix timestamp + SkewSec float64 `json:"skewSec"` // corrected skew vs observer at observation time +} + // HashEvidenceObserver is one observer's contribution to a per-hash evidence entry. type HashEvidenceObserver struct { ObserverID string `json:"observerID"` @@ -512,7 +522,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { lastSkew = cs.LastSkewSec lastAdvTS = cs.LastAdvertTS } - tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec}) + tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec, hash: tx.Hash, advertTS: cs.LastAdvertTS}) } if len(allSkews) == 0 { @@ -536,6 +546,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { recentSkew := lastSkew var recentVals []float64 + var recentPairs []tsSkewPair if n := len(tsSkews); n > 0 { latestTS := tsSkews[n-1].ts // Index-based window: last K samples. @@ -559,6 +570,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { start = startByTime } recentVals = make([]float64, 0, n-start) + recentPairs = tsSkews[start:n] for i := start; i < n; i++ { recentVals = append(recentVals, tsSkews[i].skew) } @@ -583,13 +595,25 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { // adverts had nonsense timestamps") on otherwise-healthy nodes. var goodSamples []float64 var rtcResetCount int - for _, v := range recentVals { + var recentBadSamples []BadSample // #1094: per-bad-sample evidence (hash + advertTS) + for i, v := range recentVals { absV := math.Abs(v) switch { case absV > rtcResetOutlierThresholdSec: rtcResetCount++ // ignored for good/bad classification case absV <= bimodalSkewThresholdSec: goodSamples = append(goodSamples, v) + default: + // Bimodal-bad: 1h < |skew| <= 24h. Capture hash + advertTS so + // the UI can link each offender to its packet detail page + // instead of showing a count without evidence (#1094). + if i < len(recentPairs) && recentPairs[i].hash != "" { + recentBadSamples = append(recentBadSamples, BadSample{ + Hash: recentPairs[i].hash, + AdvertTS: recentPairs[i].advertTS, + SkewSec: round(v, 1), + }) + } } } recentSampleCount := len(recentVals) - rtcResetCount @@ -715,6 +739,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew { Samples: samples, GoodFraction: round(goodFraction, 2), RecentBadSampleCount: recentBadCount, + RecentBadSamples: recentBadSamples, RecentSampleCount: recentSampleCount, RecentHashEvidence: recentEvidence, CalibrationSummary: &calSummary, @@ -875,10 +900,16 @@ func mean(vals []float64) float64 { return sum / float64(len(vals)) } -// tsSkewPair is a (timestamp, skew) pair for drift estimation. +// tsSkewPair is a (timestamp, skew) pair for drift estimation. Also carries +// the source hash + advertTS so callers building per-sample evidence (e.g. +// recentBadSamples for #1094) can identify the offending packet without a +// second pass. Drift code reads only ts/skew; the extra fields are inert +// there. type tsSkewPair struct { - ts int64 - skew float64 + ts int64 + skew float64 + hash string + advertTS int64 } // computeDrift estimates linear drift in seconds per day from time-ordered diff --git a/cmd/server/clock_skew_issue1094_test.go b/cmd/server/clock_skew_issue1094_test.go new file mode 100644 index 00000000..11cff309 --- /dev/null +++ b/cmd/server/clock_skew_issue1094_test.go @@ -0,0 +1,109 @@ +package main + +// Regression test for #1094: the bimodal-clock warning currently exposes only +// RecentBadSampleCount, leaving the UI to render "⚠️ N of M adverts had +// nonsense timestamps" without telling the operator WHICH packets were bad. +// +// This test pins the additive API contract: alongside the count, the response +// must expose RecentBadSamples — a slice of (hash, advertTS, skewSec) — so the +// frontend can render each offending hash as a clickable link with its bad +// timestamp. + +import ( + "testing" + "time" +) + +// Seeds 5 recent adverts: 3 healthy (~-20s skew) and 2 with a "nonsense" +// bimodal-bad timestamp (|skew| in (1h, 24h]). The recent window is exactly +// 5 samples, so all five are inside it. +func seedIssue1094Repro(t *testing.T) (*PacketStore, []string, []int64) { + t.Helper() + ps := NewPacketStore(nil, nil) + pt := 4 // ADVERT + + const pubkey = "BADTS1094" + baseObs := int64(1779000000) + + var txs []*StoreTx + var badHashes []string + var badAdvertTSs []int64 + + // 3 healthy adverts (skew = -20s). + for i := 0; i < 3; i++ { + obsTS := baseObs + int64(i)*60 + advTS := obsTS - 20 + txs = append(txs, &StoreTx{ + Hash: "healthy-1094-" + formatInt64(int64(i)), + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`, + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + }) + } + + // 2 nonsense-timestamp adverts (skew = -7200s = -2h — bimodal-bad, + // below the 24h RTC-reset exclusion so they DO count in recentBadCount). + for i := 0; i < 2; i++ { + obsTS := baseObs + int64(3+i)*60 + advTS := obsTS - 7200 + hash := "bad-1094-" + formatInt64(int64(i)) + txs = append(txs, &StoreTx{ + Hash: hash, + PayloadType: &pt, + DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`, + Observations: []*StoreObs{ + {ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)}, + }, + }) + badHashes = append(badHashes, hash) + badAdvertTSs = append(badAdvertTSs, advTS) + } + + ps.mu.Lock() + ps.byNode[pubkey] = txs + for _, tx := range txs { + ps.byPayloadType[4] = append(ps.byPayloadType[4], tx) + } + ps.clockSkew.computeInterval = 0 + ps.mu.Unlock() + return ps, badHashes, badAdvertTSs +} + +func TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp(t *testing.T) { + ps, wantHashes, wantAdvertTSs := seedIssue1094Repro(t) + r := ps.GetNodeClockSkew("BADTS1094") + if r == nil { + t.Fatal("expected clock skew result") + } + + // Pre-condition: count must already be 2 (gates the test against the + // existing field — if this drops we'd be measuring the wrong thing). + if r.RecentBadSampleCount != 2 { + t.Fatalf("RecentBadSampleCount = %d, want 2 (seed bug, not the field-under-test)", + r.RecentBadSampleCount) + } + + if len(r.RecentBadSamples) != 2 { + t.Fatalf("RecentBadSamples len = %d, want 2 — operators need to see which "+ + "adverts had nonsense timestamps, not just the count", + len(r.RecentBadSamples)) + } + + gotByHash := map[string]int64{} + for _, bs := range r.RecentBadSamples { + gotByHash[bs.Hash] = bs.AdvertTS + } + for i, h := range wantHashes { + ts, ok := gotByHash[h] + if !ok { + t.Errorf("RecentBadSamples missing hash %q", h) + continue + } + if ts != wantAdvertTSs[i] { + t.Errorf("RecentBadSamples[%q].AdvertTS = %d, want %d (the bad advertTS)", + h, ts, wantAdvertTSs[i]) + } + } +} diff --git a/public/nodes.js b/public/nodes.js index 69784fa9..1acfa7a0 100644 --- a/public/nodes.js +++ b/public/nodes.js @@ -1042,7 +1042,21 @@ var bimodalWarning = ''; if (cs.severity === 'bimodal_clock') { var totalRecent = cs.recentSampleCount || 0; - bimodalWarning = '
⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)
'; + var summary = '⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)'; + var badList = ''; + if (Array.isArray(cs.recentBadSamples) && cs.recentBadSamples.length) { + var items = cs.recentBadSamples.map(function(bs) { + var hash = String(bs.hash || ''); + if (!hash) return ''; + var iso = bs.advertTS ? new Date(bs.advertTS * 1000).toISOString() : ''; + var label = iso ? formatTimestamp(iso) : '—'; + return '
  • ' + escapeHtml(hash.slice(0, 8)) + '' + escapeHtml(label) + '
  • '; + }).filter(Boolean).join(''); + if (items) { + badList = ''; + } + } + bimodalWarning = '
    ' + summary + badList + '
    '; } container.innerHTML = '

    ⏰ Clock Skew

    ' +