mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-28 05:41:38 +00:00
## Problem Node detail's bimodal-clock warning showed only `⚠️ N of last M adverts had nonsense timestamps (likely RTC reset)` — no way to tell which packets, no way to verify the heuristic, no way to drill in. ## Fix Additive, two-sides: **Backend** (`cmd/server/clock_skew.go`) - New type `BadSample { Hash, AdvertTS, SkewSec }`. - New field `NodeClockSkew.RecentBadSamples []BadSample` (`omitempty`). - Populated from the **same** bimodal-bad classification pass that produces `RecentBadSampleCount` — no heuristic change. `tsSkewPair` carries `hash` + `advertTS` so the classifier can record per-sample evidence without a second walk; drift code is unaffected (reads only `ts`/`skew`). **Frontend** (`public/nodes.js`) - `bimodalWarning` preserves the existing count summary line, then renders a `<ul>` of bad samples: each `<li>` is `<a href="#/packets/HASH">hash[:8]</a> → formatTimestamp(advertTS)` with ISO tooltip. Defensive `Array.isArray` so older API responses still render the summary alone. ## TDD - **Red:** `cmd/server/clock_skew_issue1094_test.go::TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp` — seeds 3 healthy + 2 bimodal-bad adverts, asserts `RecentBadSamples` has length 2 with the expected hashes and advert timestamps. Fails on the assertion (`len = 0, want 2`) with the stub-only commit. - **Green:** classifier populates the slice; existing #1285 and bimodal tests stay green. - Red commit: `ed501f4b` - Green commit: `54305b06` ## Cross-stack Backend + frontend ship together (`cross-stack: justified` commit). API stays backward compatible (`omitempty` server, `Array.isArray` client) but the feature only lights up with both halves present. ## Preflight Clean — PII, branch scope, red-commit, CSS vars, XSS sinks, migrations, fixture coverage all pass. ## Acceptance - [x] Warning lists specific packet hashes - [x] Each hash links to `#/packets/<hash>` - [x] Bad advert timestamp shown next to the hash - [x] Pattern is reusable — `BadSample` is a clean shape any future heuristic that flags specific packets can adopt Fixes #1094 --------- Co-authored-by: openclaw-bot <bot@openclaw.local>
This commit is contained in:
@@ -133,6 +133,7 @@ type NodeClockSkew struct {
|
||||
Samples []SkewSample `json:"samples,omitempty"` // time-series for sparklines
|
||||
GoodFraction float64 `json:"goodFraction"` // fraction of recent samples with |skew| <= 1h
|
||||
RecentBadSampleCount int `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h
|
||||
RecentBadSamples []BadSample `json:"recentBadSamples,omitempty"` // #1094: per-bad-sample evidence (hash + bad advertTS)
|
||||
RecentSampleCount int `json:"recentSampleCount"` // total recent samples in window
|
||||
RecentHashEvidence []HashEvidence `json:"recentHashEvidence,omitempty"`
|
||||
CalibrationSummary *CalibrationSummary `json:"calibrationSummary,omitempty"`
|
||||
@@ -146,6 +147,15 @@ type SkewSample struct {
|
||||
SkewSec float64 `json:"skew"` // corrected skew in seconds
|
||||
}
|
||||
|
||||
// BadSample is a single recent advert flagged as having a nonsense timestamp
|
||||
// (|corrected skew| in the bimodal-bad band — > 1h, <= 24h). #1094: surfaced
|
||||
// so the UI can link each offender to its packet detail page.
|
||||
type BadSample struct {
|
||||
Hash string `json:"hash"` // transmission hash for packet-detail deep-link
|
||||
AdvertTS int64 `json:"advertTS"` // the offending advert Unix timestamp
|
||||
SkewSec float64 `json:"skewSec"` // corrected skew vs observer at observation time
|
||||
}
|
||||
|
||||
// HashEvidenceObserver is one observer's contribution to a per-hash evidence entry.
|
||||
type HashEvidenceObserver struct {
|
||||
ObserverID string `json:"observerID"`
|
||||
@@ -512,7 +522,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
lastSkew = cs.LastSkewSec
|
||||
lastAdvTS = cs.LastAdvertTS
|
||||
}
|
||||
tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec})
|
||||
tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec, hash: tx.Hash, advertTS: cs.LastAdvertTS})
|
||||
}
|
||||
|
||||
if len(allSkews) == 0 {
|
||||
@@ -536,6 +546,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
|
||||
recentSkew := lastSkew
|
||||
var recentVals []float64
|
||||
var recentPairs []tsSkewPair
|
||||
if n := len(tsSkews); n > 0 {
|
||||
latestTS := tsSkews[n-1].ts
|
||||
// Index-based window: last K samples.
|
||||
@@ -559,6 +570,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
start = startByTime
|
||||
}
|
||||
recentVals = make([]float64, 0, n-start)
|
||||
recentPairs = tsSkews[start:n]
|
||||
for i := start; i < n; i++ {
|
||||
recentVals = append(recentVals, tsSkews[i].skew)
|
||||
}
|
||||
@@ -583,13 +595,25 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
// adverts had nonsense timestamps") on otherwise-healthy nodes.
|
||||
var goodSamples []float64
|
||||
var rtcResetCount int
|
||||
for _, v := range recentVals {
|
||||
var recentBadSamples []BadSample // #1094: per-bad-sample evidence (hash + advertTS)
|
||||
for i, v := range recentVals {
|
||||
absV := math.Abs(v)
|
||||
switch {
|
||||
case absV > rtcResetOutlierThresholdSec:
|
||||
rtcResetCount++ // ignored for good/bad classification
|
||||
case absV <= bimodalSkewThresholdSec:
|
||||
goodSamples = append(goodSamples, v)
|
||||
default:
|
||||
// Bimodal-bad: 1h < |skew| <= 24h. Capture hash + advertTS so
|
||||
// the UI can link each offender to its packet detail page
|
||||
// instead of showing a count without evidence (#1094).
|
||||
if i < len(recentPairs) && recentPairs[i].hash != "" {
|
||||
recentBadSamples = append(recentBadSamples, BadSample{
|
||||
Hash: recentPairs[i].hash,
|
||||
AdvertTS: recentPairs[i].advertTS,
|
||||
SkewSec: round(v, 1),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
recentSampleCount := len(recentVals) - rtcResetCount
|
||||
@@ -715,6 +739,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
Samples: samples,
|
||||
GoodFraction: round(goodFraction, 2),
|
||||
RecentBadSampleCount: recentBadCount,
|
||||
RecentBadSamples: recentBadSamples,
|
||||
RecentSampleCount: recentSampleCount,
|
||||
RecentHashEvidence: recentEvidence,
|
||||
CalibrationSummary: &calSummary,
|
||||
@@ -875,10 +900,16 @@ func mean(vals []float64) float64 {
|
||||
return sum / float64(len(vals))
|
||||
}
|
||||
|
||||
// tsSkewPair is a (timestamp, skew) pair for drift estimation.
|
||||
// tsSkewPair is a (timestamp, skew) pair for drift estimation. Also carries
|
||||
// the source hash + advertTS so callers building per-sample evidence (e.g.
|
||||
// recentBadSamples for #1094) can identify the offending packet without a
|
||||
// second pass. Drift code reads only ts/skew; the extra fields are inert
|
||||
// there.
|
||||
type tsSkewPair struct {
|
||||
ts int64
|
||||
skew float64
|
||||
ts int64
|
||||
skew float64
|
||||
hash string
|
||||
advertTS int64
|
||||
}
|
||||
|
||||
// computeDrift estimates linear drift in seconds per day from time-ordered
|
||||
|
||||
@@ -0,0 +1,109 @@
|
||||
package main
|
||||
|
||||
// Regression test for #1094: the bimodal-clock warning currently exposes only
|
||||
// RecentBadSampleCount, leaving the UI to render "⚠️ N of M adverts had
|
||||
// nonsense timestamps" without telling the operator WHICH packets were bad.
|
||||
//
|
||||
// This test pins the additive API contract: alongside the count, the response
|
||||
// must expose RecentBadSamples — a slice of (hash, advertTS, skewSec) — so the
|
||||
// frontend can render each offending hash as a clickable link with its bad
|
||||
// timestamp.
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Seeds 5 recent adverts: 3 healthy (~-20s skew) and 2 with a "nonsense"
|
||||
// bimodal-bad timestamp (|skew| in (1h, 24h]). The recent window is exactly
|
||||
// 5 samples, so all five are inside it.
|
||||
func seedIssue1094Repro(t *testing.T) (*PacketStore, []string, []int64) {
|
||||
t.Helper()
|
||||
ps := NewPacketStore(nil, nil)
|
||||
pt := 4 // ADVERT
|
||||
|
||||
const pubkey = "BADTS1094"
|
||||
baseObs := int64(1779000000)
|
||||
|
||||
var txs []*StoreTx
|
||||
var badHashes []string
|
||||
var badAdvertTSs []int64
|
||||
|
||||
// 3 healthy adverts (skew = -20s).
|
||||
for i := 0; i < 3; i++ {
|
||||
obsTS := baseObs + int64(i)*60
|
||||
advTS := obsTS - 20
|
||||
txs = append(txs, &StoreTx{
|
||||
Hash: "healthy-1094-" + formatInt64(int64(i)),
|
||||
PayloadType: &pt,
|
||||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
|
||||
Observations: []*StoreObs{
|
||||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// 2 nonsense-timestamp adverts (skew = -7200s = -2h — bimodal-bad,
|
||||
// below the 24h RTC-reset exclusion so they DO count in recentBadCount).
|
||||
for i := 0; i < 2; i++ {
|
||||
obsTS := baseObs + int64(3+i)*60
|
||||
advTS := obsTS - 7200
|
||||
hash := "bad-1094-" + formatInt64(int64(i))
|
||||
txs = append(txs, &StoreTx{
|
||||
Hash: hash,
|
||||
PayloadType: &pt,
|
||||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
|
||||
Observations: []*StoreObs{
|
||||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||||
},
|
||||
})
|
||||
badHashes = append(badHashes, hash)
|
||||
badAdvertTSs = append(badAdvertTSs, advTS)
|
||||
}
|
||||
|
||||
ps.mu.Lock()
|
||||
ps.byNode[pubkey] = txs
|
||||
for _, tx := range txs {
|
||||
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
|
||||
}
|
||||
ps.clockSkew.computeInterval = 0
|
||||
ps.mu.Unlock()
|
||||
return ps, badHashes, badAdvertTSs
|
||||
}
|
||||
|
||||
func TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp(t *testing.T) {
|
||||
ps, wantHashes, wantAdvertTSs := seedIssue1094Repro(t)
|
||||
r := ps.GetNodeClockSkew("BADTS1094")
|
||||
if r == nil {
|
||||
t.Fatal("expected clock skew result")
|
||||
}
|
||||
|
||||
// Pre-condition: count must already be 2 (gates the test against the
|
||||
// existing field — if this drops we'd be measuring the wrong thing).
|
||||
if r.RecentBadSampleCount != 2 {
|
||||
t.Fatalf("RecentBadSampleCount = %d, want 2 (seed bug, not the field-under-test)",
|
||||
r.RecentBadSampleCount)
|
||||
}
|
||||
|
||||
if len(r.RecentBadSamples) != 2 {
|
||||
t.Fatalf("RecentBadSamples len = %d, want 2 — operators need to see which "+
|
||||
"adverts had nonsense timestamps, not just the count",
|
||||
len(r.RecentBadSamples))
|
||||
}
|
||||
|
||||
gotByHash := map[string]int64{}
|
||||
for _, bs := range r.RecentBadSamples {
|
||||
gotByHash[bs.Hash] = bs.AdvertTS
|
||||
}
|
||||
for i, h := range wantHashes {
|
||||
ts, ok := gotByHash[h]
|
||||
if !ok {
|
||||
t.Errorf("RecentBadSamples missing hash %q", h)
|
||||
continue
|
||||
}
|
||||
if ts != wantAdvertTSs[i] {
|
||||
t.Errorf("RecentBadSamples[%q].AdvertTS = %d, want %d (the bad advertTS)",
|
||||
h, ts, wantAdvertTSs[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
+15
-1
@@ -1042,7 +1042,21 @@
|
||||
var bimodalWarning = '';
|
||||
if (cs.severity === 'bimodal_clock') {
|
||||
var totalRecent = cs.recentSampleCount || 0;
|
||||
bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)</div>';
|
||||
var summary = '⚠️ ' + (cs.recentBadSampleCount || '?') + ' of last ' + (totalRecent || '?') + ' adverts had nonsense timestamps (likely RTC reset)';
|
||||
var badList = '';
|
||||
if (Array.isArray(cs.recentBadSamples) && cs.recentBadSamples.length) {
|
||||
var items = cs.recentBadSamples.map(function(bs) {
|
||||
var hash = String(bs.hash || '');
|
||||
if (!hash) return '';
|
||||
var iso = bs.advertTS ? new Date(bs.advertTS * 1000).toISOString() : '';
|
||||
var label = iso ? formatTimestamp(iso) : '—';
|
||||
return '<li><a href="#/packets/' + encodeURIComponent(hash) + '">' + escapeHtml(hash.slice(0, 8)) + '</a> → <span title="' + escapeHtml(iso) + '">' + escapeHtml(label) + '</span></li>';
|
||||
}).filter(Boolean).join('');
|
||||
if (items) {
|
||||
badList = '<ul style="margin:4px 0 0 18px;padding:0;font-size:11px;color:var(--text-muted)">' + items + '</ul>';
|
||||
}
|
||||
}
|
||||
bimodalWarning = '<div style="font-size:12px;color:var(--status-amber-text);margin-top:4px">' + summary + badList + '</div>';
|
||||
}
|
||||
container.innerHTML =
|
||||
'<h4 style="margin:0 0 6px">⏰ Clock Skew</h4>' +
|
||||
|
||||
Reference in New Issue
Block a user