feat(node-detail): link RTC-reset warning to offending packet hashes (#1094) (#1590)

## Problem
Node detail's bimodal-clock warning showed only `⚠️ N of last M adverts
had nonsense timestamps (likely RTC reset)` — no way to tell which
packets, no way to verify the heuristic, no way to drill in.

## Fix
Additive, two-sides:

**Backend** (`cmd/server/clock_skew.go`)
- New type `BadSample { Hash, AdvertTS, SkewSec }`.
- New field `NodeClockSkew.RecentBadSamples []BadSample` (`omitempty`).
- Populated from the **same** bimodal-bad classification pass that
produces `RecentBadSampleCount` — no heuristic change. `tsSkewPair`
carries `hash` + `advertTS` so the classifier can record per-sample
evidence without a second walk; drift code is unaffected (reads only
`ts`/`skew`).

**Frontend** (`public/nodes.js`)
- `bimodalWarning` preserves the existing count summary line, then
renders a `<ul>` of bad samples: each `<li>` is `<a
href="#/packets/HASH">hash[:8]</a> → formatTimestamp(advertTS)` with ISO
tooltip. Defensive `Array.isArray` so older API responses still render
the summary alone.

## TDD
- **Red:**
`cmd/server/clock_skew_issue1094_test.go::TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp`
— seeds 3 healthy + 2 bimodal-bad adverts, asserts `RecentBadSamples`
has length 2 with the expected hashes and advert timestamps. Fails on
the assertion (`len = 0, want 2`) with the stub-only commit.
- **Green:** classifier populates the slice; existing #1285 and bimodal
tests stay green.
- Red commit: `ed501f4b`
- Green commit: `54305b06`

## Cross-stack
Backend + frontend ship together (`cross-stack: justified` commit). API
stays backward compatible (`omitempty` server, `Array.isArray` client)
but the feature only lights up with both halves present.

## Preflight
Clean — PII, branch scope, red-commit, CSS vars, XSS sinks, migrations,
fixture coverage all pass.

## Acceptance
- [x] Warning lists specific packet hashes
- [x] Each hash links to `#/packets/<hash>`
- [x] Bad advert timestamp shown next to the hash
- [x] Pattern is reusable — `BadSample` is a clean shape any future
heuristic that flags specific packets can adopt

Fixes #1094

---------

Co-authored-by: openclaw-bot <bot@openclaw.local>
This commit is contained in:
Kpa-clawbot
2026-06-04 18:48:27 -07:00
committed by GitHub
parent af669438ff
commit 1a2b8c48be
3 changed files with 160 additions and 6 deletions
+36 -5
View File
@@ -133,6 +133,7 @@ type NodeClockSkew struct {
Samples []SkewSample `json:"samples,omitempty"` // time-series for sparklines
GoodFraction float64 `json:"goodFraction"` // fraction of recent samples with |skew| <= 1h
RecentBadSampleCount int `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h
RecentBadSamples []BadSample `json:"recentBadSamples,omitempty"` // #1094: per-bad-sample evidence (hash + bad advertTS)
RecentSampleCount int `json:"recentSampleCount"` // total recent samples in window
RecentHashEvidence []HashEvidence `json:"recentHashEvidence,omitempty"`
CalibrationSummary *CalibrationSummary `json:"calibrationSummary,omitempty"`
@@ -146,6 +147,15 @@ type SkewSample struct {
SkewSec float64 `json:"skew"` // corrected skew in seconds
}
// BadSample is a single recent advert flagged as having a nonsense timestamp
// (|corrected skew| in the bimodal-bad band — > 1h, <= 24h). #1094: surfaced
// so the UI can link each offender to its packet detail page.
type BadSample struct {
Hash string `json:"hash"` // transmission hash for packet-detail deep-link
AdvertTS int64 `json:"advertTS"` // the offending advert Unix timestamp
SkewSec float64 `json:"skewSec"` // corrected skew vs observer at observation time
}
// HashEvidenceObserver is one observer's contribution to a per-hash evidence entry.
type HashEvidenceObserver struct {
ObserverID string `json:"observerID"`
@@ -512,7 +522,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
lastSkew = cs.LastSkewSec
lastAdvTS = cs.LastAdvertTS
}
tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec})
tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec, hash: tx.Hash, advertTS: cs.LastAdvertTS})
}
if len(allSkews) == 0 {
@@ -536,6 +546,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
recentSkew := lastSkew
var recentVals []float64
var recentPairs []tsSkewPair
if n := len(tsSkews); n > 0 {
latestTS := tsSkews[n-1].ts
// Index-based window: last K samples.
@@ -559,6 +570,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
start = startByTime
}
recentVals = make([]float64, 0, n-start)
recentPairs = tsSkews[start:n]
for i := start; i < n; i++ {
recentVals = append(recentVals, tsSkews[i].skew)
}
@@ -583,13 +595,25 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
// adverts had nonsense timestamps") on otherwise-healthy nodes.
var goodSamples []float64
var rtcResetCount int
for _, v := range recentVals {
var recentBadSamples []BadSample // #1094: per-bad-sample evidence (hash + advertTS)
for i, v := range recentVals {
absV := math.Abs(v)
switch {
case absV > rtcResetOutlierThresholdSec:
rtcResetCount++ // ignored for good/bad classification
case absV <= bimodalSkewThresholdSec:
goodSamples = append(goodSamples, v)
default:
// Bimodal-bad: 1h < |skew| <= 24h. Capture hash + advertTS so
// the UI can link each offender to its packet detail page
// instead of showing a count without evidence (#1094).
if i < len(recentPairs) && recentPairs[i].hash != "" {
recentBadSamples = append(recentBadSamples, BadSample{
Hash: recentPairs[i].hash,
AdvertTS: recentPairs[i].advertTS,
SkewSec: round(v, 1),
})
}
}
}
recentSampleCount := len(recentVals) - rtcResetCount
@@ -715,6 +739,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
Samples: samples,
GoodFraction: round(goodFraction, 2),
RecentBadSampleCount: recentBadCount,
RecentBadSamples: recentBadSamples,
RecentSampleCount: recentSampleCount,
RecentHashEvidence: recentEvidence,
CalibrationSummary: &calSummary,
@@ -875,10 +900,16 @@ func mean(vals []float64) float64 {
return sum / float64(len(vals))
}
// tsSkewPair is a (timestamp, skew) pair for drift estimation.
// tsSkewPair is a (timestamp, skew) pair for drift estimation. Also carries
// the source hash + advertTS so callers building per-sample evidence (e.g.
// recentBadSamples for #1094) can identify the offending packet without a
// second pass. Drift code reads only ts/skew; the extra fields are inert
// there.
type tsSkewPair struct {
ts int64
skew float64
ts int64
skew float64
hash string
advertTS int64
}
// computeDrift estimates linear drift in seconds per day from time-ordered
+109
View File
@@ -0,0 +1,109 @@
package main
// Regression test for #1094: the bimodal-clock warning currently exposes only
// RecentBadSampleCount, leaving the UI to render "⚠️ N of M adverts had
// nonsense timestamps" without telling the operator WHICH packets were bad.
//
// This test pins the additive API contract: alongside the count, the response
// must expose RecentBadSamples — a slice of (hash, advertTS, skewSec) — so the
// frontend can render each offending hash as a clickable link with its bad
// timestamp.
import (
"testing"
"time"
)
// Seeds 5 recent adverts: 3 healthy (~-20s skew) and 2 with a "nonsense"
// bimodal-bad timestamp (|skew| in (1h, 24h]). The recent window is exactly
// 5 samples, so all five are inside it.
func seedIssue1094Repro(t *testing.T) (*PacketStore, []string, []int64) {
t.Helper()
ps := NewPacketStore(nil, nil)
pt := 4 // ADVERT
const pubkey = "BADTS1094"
baseObs := int64(1779000000)
var txs []*StoreTx
var badHashes []string
var badAdvertTSs []int64
// 3 healthy adverts (skew = -20s).
for i := 0; i < 3; i++ {
obsTS := baseObs + int64(i)*60
advTS := obsTS - 20
txs = append(txs, &StoreTx{
Hash: "healthy-1094-" + formatInt64(int64(i)),
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
})
}
// 2 nonsense-timestamp adverts (skew = -7200s = -2h — bimodal-bad,
// below the 24h RTC-reset exclusion so they DO count in recentBadCount).
for i := 0; i < 2; i++ {
obsTS := baseObs + int64(3+i)*60
advTS := obsTS - 7200
hash := "bad-1094-" + formatInt64(int64(i))
txs = append(txs, &StoreTx{
Hash: hash,
PayloadType: &pt,
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
Observations: []*StoreObs{
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
},
})
badHashes = append(badHashes, hash)
badAdvertTSs = append(badAdvertTSs, advTS)
}
ps.mu.Lock()
ps.byNode[pubkey] = txs
for _, tx := range txs {
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
}
ps.clockSkew.computeInterval = 0
ps.mu.Unlock()
return ps, badHashes, badAdvertTSs
}
func TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp(t *testing.T) {
ps, wantHashes, wantAdvertTSs := seedIssue1094Repro(t)
r := ps.GetNodeClockSkew("BADTS1094")
if r == nil {
t.Fatal("expected clock skew result")
}
// Pre-condition: count must already be 2 (gates the test against the
// existing field — if this drops we'd be measuring the wrong thing).
if r.RecentBadSampleCount != 2 {
t.Fatalf("RecentBadSampleCount = %d, want 2 (seed bug, not the field-under-test)",
r.RecentBadSampleCount)
}
if len(r.RecentBadSamples) != 2 {
t.Fatalf("RecentBadSamples len = %d, want 2 — operators need to see which "+
"adverts had nonsense timestamps, not just the count",
len(r.RecentBadSamples))
}
gotByHash := map[string]int64{}
for _, bs := range r.RecentBadSamples {
gotByHash[bs.Hash] = bs.AdvertTS
}
for i, h := range wantHashes {
ts, ok := gotByHash[h]
if !ok {
t.Errorf("RecentBadSamples missing hash %q", h)
continue
}
if ts != wantAdvertTSs[i] {
t.Errorf("RecentBadSamples[%q].AdvertTS = %d, want %d (the bad advertTS)",
h, ts, wantAdvertTSs[i])
}
}
}
+15 -1
View File
@@ -1042,7 +1042,21 @@
var bimodalWarning = '';
if (cs.severity === 'bimodal_clock') {
var totalRecent = cs.recentSampleCount || 0;
bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)</div>';
var summary = '⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)';
var badList = '';
if (Array.isArray(cs.recentBadSamples) && cs.recentBadSamples.length) {
var items = cs.recentBadSamples.map(function(bs) {
var hash = String(bs.hash || '');
if (!hash) return '';
var iso = bs.advertTS ? new Date(bs.advertTS * 1000).toISOString() : '';
var label = iso ? formatTimestamp(iso) : '—';
return '<li><a href="#/packets/' + encodeURIComponent(hash) + '">' + escapeHtml(hash.slice(0, 8)) + '</a> → <span title="' + escapeHtml(iso) + '">' + escapeHtml(label) + '</span></li>';
}).filter(Boolean).join('');
if (items) {
badList = '<ul style="margin:4px 0 0 18px;padding:0;font-size:11px;color:var(--text-muted)">' + items + '</ul>';
}
}
bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">' + summary + badList + '</div>';
}
container.innerHTML =
'<h4 style="margin:0 0 6px">⏰ Clock Skew</h4>' +