feat(node-detail): link RTC-reset warning to offending packet hashes (#1094) (#1590)

## Problem Node detail's bimodal-clock warning showed only `⚠️ N of last M adverts had nonsense timestamps (likely RTC reset)` — no way to tell which packets, no way to verify the heuristic, no way to drill in. ## Fix Additive, two-sides: **Backend** (`cmd/server/clock_skew.go`) - New type `BadSample { Hash, AdvertTS, SkewSec }`. - New field `NodeClockSkew.RecentBadSamples []BadSample` (`omitempty`). - Populated from the **same** bimodal-bad classification pass that produces `RecentBadSampleCount` — no heuristic change. `tsSkewPair` carries `hash` + `advertTS` so the classifier can record per-sample evidence without a second walk; drift code is unaffected (reads only `ts`/`skew`). **Frontend** (`public/nodes.js`) - `bimodalWarning` preserves the existing count summary line, then renders a `<ul>` of bad samples: each `<li>` is `<a href="#/packets/HASH">hash[:8]</a> → formatTimestamp(advertTS)` with ISO tooltip. Defensive `Array.isArray` so older API responses still render the summary alone. ## TDD - **Red:** `cmd/server/clock_skew_issue1094_test.go::TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp` — seeds 3 healthy + 2 bimodal-bad adverts, asserts `RecentBadSamples` has length 2 with the expected hashes and advert timestamps. Fails on the assertion (`len = 0, want 2`) with the stub-only commit. - **Green:** classifier populates the slice; existing #1285 and bimodal tests stay green. - Red commit: `ed501f4b` - Green commit: `54305b06` ## Cross-stack Backend + frontend ship together (`cross-stack: justified` commit). API stays backward compatible (`omitempty` server, `Array.isArray` client) but the feature only lights up with both halves present. ## Preflight Clean — PII, branch scope, red-commit, CSS vars, XSS sinks, migrations, fixture coverage all pass. ## Acceptance - [x] Warning lists specific packet hashes - [x] Each hash links to `#/packets/<hash>` - [x] Bad advert timestamp shown next to the hash - [x] Pattern is reusable — `BadSample` is a clean shape any future heuristic that flags specific packets can adopt Fixes #1094 --------- Co-authored-by: openclaw-bot <bot@openclaw.local>
2026-06-28 05:41:38 +00:00 · 2026-06-04 18:48:27 -07:00
parent af669438ff
commit 1a2b8c48be
3 changed files with 160 additions and 6 deletions
@@ -133,6 +133,7 @@ type NodeClockSkew struct {
 	Samples         []SkewSample `json:"samples,omitempty"` // time-series for sparklines
 	GoodFraction        float64  `json:"goodFraction"`        // fraction of recent samples with |skew| <= 1h
 	RecentBadSampleCount int     `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h
+	RecentBadSamples     []BadSample `json:"recentBadSamples,omitempty"` // #1094: per-bad-sample evidence (hash + bad advertTS)
 	RecentSampleCount    int     `json:"recentSampleCount"`    // total recent samples in window
 	RecentHashEvidence  []HashEvidence      `json:"recentHashEvidence,omitempty"`
 	CalibrationSummary  *CalibrationSummary `json:"calibrationSummary,omitempty"`
@@ -146,6 +147,15 @@ type SkewSample struct {
 	SkewSec   float64 `json:"skew"` // corrected skew in seconds
 }

+// BadSample is a single recent advert flagged as having a nonsense timestamp
+// (|corrected skew| in the bimodal-bad band — > 1h, <= 24h). #1094: surfaced
+// so the UI can link each offender to its packet detail page.
+type BadSample struct {
+	Hash     string  `json:"hash"`     // transmission hash for packet-detail deep-link
+	AdvertTS int64   `json:"advertTS"` // the offending advert Unix timestamp
+	SkewSec  float64 `json:"skewSec"`  // corrected skew vs observer at observation time
+}
+
 // HashEvidenceObserver is one observer's contribution to a per-hash evidence entry.
 type HashEvidenceObserver struct {
 	ObserverID      string  `json:"observerID"`
@@ -512,7 +522,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 			lastSkew = cs.LastSkewSec
 			lastAdvTS = cs.LastAdvertTS
 		}
-		tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec})
+		tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec, hash: tx.Hash, advertTS: cs.LastAdvertTS})
 	}

 	if len(allSkews) == 0 {
@@ -536,6 +546,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {

 	recentSkew := lastSkew
 	var recentVals []float64
+	var recentPairs []tsSkewPair
 	if n := len(tsSkews); n > 0 {
 		latestTS := tsSkews[n-1].ts
 		// Index-based window: last K samples.
@@ -559,6 +570,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 			start = startByTime
 		}
 		recentVals = make([]float64, 0, n-start)
+		recentPairs = tsSkews[start:n]
 		for i := start; i < n; i++ {
 			recentVals = append(recentVals, tsSkews[i].skew)
 		}
@@ -583,13 +595,25 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 	// adverts had nonsense timestamps") on otherwise-healthy nodes.
 	var goodSamples []float64
 	var rtcResetCount int
-	for _, v := range recentVals {
+	var recentBadSamples []BadSample // #1094: per-bad-sample evidence (hash + advertTS)
+	for i, v := range recentVals {
 		absV := math.Abs(v)
 		switch {
 		case absV > rtcResetOutlierThresholdSec:
 			rtcResetCount++ // ignored for good/bad classification
 		case absV <= bimodalSkewThresholdSec:
 			goodSamples = append(goodSamples, v)
+		default:
+			// Bimodal-bad: 1h < |skew| <= 24h. Capture hash + advertTS so
+			// the UI can link each offender to its packet detail page
+			// instead of showing a count without evidence (#1094).
+			if i < len(recentPairs) && recentPairs[i].hash != "" {
+				recentBadSamples = append(recentBadSamples, BadSample{
+					Hash:     recentPairs[i].hash,
+					AdvertTS: recentPairs[i].advertTS,
+					SkewSec:  round(v, 1),
+				})
+			}
 		}
 	}
 	recentSampleCount := len(recentVals) - rtcResetCount
@@ -715,6 +739,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 		Samples:              samples,
 		GoodFraction:         round(goodFraction, 2),
 		RecentBadSampleCount: recentBadCount,
+		RecentBadSamples:     recentBadSamples,
 		RecentSampleCount:    recentSampleCount,
 		RecentHashEvidence:   recentEvidence,
 		CalibrationSummary:   &calSummary,
@@ -875,10 +900,16 @@ func mean(vals []float64) float64 {
 	return sum / float64(len(vals))
 }

-// tsSkewPair is a (timestamp, skew) pair for drift estimation.
+// tsSkewPair is a (timestamp, skew) pair for drift estimation. Also carries
+// the source hash + advertTS so callers building per-sample evidence (e.g.
+// recentBadSamples for #1094) can identify the offending packet without a
+// second pass. Drift code reads only ts/skew; the extra fields are inert
+// there.
 type tsSkewPair struct {
-	ts   int64
-	skew float64
+	ts       int64
+	skew     float64
+	hash     string
+	advertTS int64
 }

 // computeDrift estimates linear drift in seconds per day from time-ordered
@@ -0,0 +1,109 @@
+package main
+
+// Regression test for #1094: the bimodal-clock warning currently exposes only
+// RecentBadSampleCount, leaving the UI to render "⚠️ N of M adverts had
+// nonsense timestamps" without telling the operator WHICH packets were bad.
+//
+// This test pins the additive API contract: alongside the count, the response
+// must expose RecentBadSamples — a slice of (hash, advertTS, skewSec) — so the
+// frontend can render each offending hash as a clickable link with its bad
+// timestamp.
+
+import (
+	"testing"
+	"time"
+)
+
+// Seeds 5 recent adverts: 3 healthy (~-20s skew) and 2 with a "nonsense"
+// bimodal-bad timestamp (|skew| in (1h, 24h]). The recent window is exactly
+// 5 samples, so all five are inside it.
+func seedIssue1094Repro(t *testing.T) (*PacketStore, []string, []int64) {
+	t.Helper()
+	ps := NewPacketStore(nil, nil)
+	pt := 4 // ADVERT
+
+	const pubkey = "BADTS1094"
+	baseObs := int64(1779000000)
+
+	var txs []*StoreTx
+	var badHashes []string
+	var badAdvertTSs []int64
+
+	// 3 healthy adverts (skew = -20s).
+	for i := 0; i < 3; i++ {
+		obsTS := baseObs + int64(i)*60
+		advTS := obsTS - 20
+		txs = append(txs, &StoreTx{
+			Hash:        "healthy-1094-" + formatInt64(int64(i)),
+			PayloadType: &pt,
+			DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
+			Observations: []*StoreObs{
+				{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
+			},
+		})
+	}
+
+	// 2 nonsense-timestamp adverts (skew = -7200s = -2h — bimodal-bad,
+	// below the 24h RTC-reset exclusion so they DO count in recentBadCount).
+	for i := 0; i < 2; i++ {
+		obsTS := baseObs + int64(3+i)*60
+		advTS := obsTS - 7200
+		hash := "bad-1094-" + formatInt64(int64(i))
+		txs = append(txs, &StoreTx{
+			Hash:        hash,
+			PayloadType: &pt,
+			DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
+			Observations: []*StoreObs{
+				{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
+			},
+		})
+		badHashes = append(badHashes, hash)
+		badAdvertTSs = append(badAdvertTSs, advTS)
+	}
+
+	ps.mu.Lock()
+	ps.byNode[pubkey] = txs
+	for _, tx := range txs {
+		ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
+	}
+	ps.clockSkew.computeInterval = 0
+	ps.mu.Unlock()
+	return ps, badHashes, badAdvertTSs
+}
+
+func TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp(t *testing.T) {
+	ps, wantHashes, wantAdvertTSs := seedIssue1094Repro(t)
+	r := ps.GetNodeClockSkew("BADTS1094")
+	if r == nil {
+		t.Fatal("expected clock skew result")
+	}
+
+	// Pre-condition: count must already be 2 (gates the test against the
+	// existing field — if this drops we'd be measuring the wrong thing).
+	if r.RecentBadSampleCount != 2 {
+		t.Fatalf("RecentBadSampleCount = %d, want 2 (seed bug, not the field-under-test)",
+			r.RecentBadSampleCount)
+	}
+
+	if len(r.RecentBadSamples) != 2 {
+		t.Fatalf("RecentBadSamples len = %d, want 2 — operators need to see which "+
+			"adverts had nonsense timestamps, not just the count",
+			len(r.RecentBadSamples))
+	}
+
+	gotByHash := map[string]int64{}
+	for _, bs := range r.RecentBadSamples {
+		gotByHash[bs.Hash] = bs.AdvertTS
+	}
+	for i, h := range wantHashes {
+		ts, ok := gotByHash[h]
+		if !ok {
+			t.Errorf("RecentBadSamples missing hash %q", h)
+			continue
+		}
+		if ts != wantAdvertTSs[i] {
+			t.Errorf("RecentBadSamples[%q].AdvertTS = %d, want %d (the bad advertTS)",
+				h, ts, wantAdvertTSs[i])
+		}
+	}
+}
@@ -1042,7 +1042,21 @@
      var bimodalWarning = '';
      if (cs.severity === 'bimodal_clock') {
        var totalRecent = cs.recentSampleCount || 0;
-        bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)</div>';
+        var summary = '⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)';
+        var badList = '';
+        if (Array.isArray(cs.recentBadSamples) && cs.recentBadSamples.length) {
+          var items = cs.recentBadSamples.map(function(bs) {
+            var hash = String(bs.hash || '');
+            if (!hash) return '';
+            var iso = bs.advertTS ? new Date(bs.advertTS * 1000).toISOString() : '';
+            var label = iso ? formatTimestamp(iso) : '—';
+            return '<li><a href="#/packets/' + encodeURIComponent(hash) + '">' + escapeHtml(hash.slice(0, 8)) + '</a> → <span title="' + escapeHtml(iso) + '">' + escapeHtml(label) + '</span></li>';
+          }).filter(Boolean).join('');
+          if (items) {
+            badList = '<ul style="margin:4px 0 0 18px;padding:0;font-size:11px;color:var(--text-muted)">' + items + '</ul>';
+          }
+        }
+        bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">' + summary + badList + '</div>';
      }
      container.innerHTML =
        '<h4 style="margin:0 0 6px">⏰ Clock Skew</h4>' +