mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-04-25 13:52:08 +00:00
Compare commits
1 Commits
master
...
fix/845-no
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cc92a8d5c4 |
@@ -1 +1 @@
|
||||
{"schemaVersion":1,"label":"e2e tests","message":"82 passed","color":"brightgreen"}
|
||||
{"schemaVersion":1,"label":"e2e tests","message":"45 passed","color":"brightgreen"}
|
||||
@@ -1 +1 @@
|
||||
{"schemaVersion":1,"label":"frontend coverage","message":"37.26%","color":"red"}
|
||||
{"schemaVersion":1,"label":"frontend coverage","message":"39.68%","color":"red"}
|
||||
23
.github/workflows/deploy.yml
vendored
23
.github/workflows/deploy.yml
vendored
@@ -135,7 +135,7 @@ jobs:
|
||||
e2e-test:
|
||||
name: "🎭 Playwright E2E Tests"
|
||||
needs: [go-test]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: [self-hosted, Linux]
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@@ -145,6 +145,13 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Free disk space
|
||||
run: |
|
||||
# Prune old runner diagnostic logs (can accumulate 50MB+)
|
||||
find ~/actions-runner/_diag/ -name '*.log' -mtime +3 -delete 2>/dev/null || true
|
||||
# Show available disk space
|
||||
df -h / | tail -1
|
||||
|
||||
- name: Set up Node.js 22
|
||||
uses: actions/setup-node@v5
|
||||
with:
|
||||
@@ -245,11 +252,17 @@ jobs:
|
||||
build-and-publish:
|
||||
name: "🏗️ Build & Publish Docker Image"
|
||||
needs: [e2e-test]
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: [self-hosted, meshcore-runner-2]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: Free disk space
|
||||
run: |
|
||||
docker system prune -af 2>/dev/null || true
|
||||
docker builder prune -af 2>/dev/null || true
|
||||
df -h /
|
||||
|
||||
- name: Compute build metadata
|
||||
id: meta
|
||||
run: |
|
||||
@@ -359,7 +372,7 @@ jobs:
|
||||
# ───────────────────────────────────────────────────────────────
|
||||
deploy:
|
||||
name: "🚀 Deploy Staging"
|
||||
if: false # disabled: staging VM offline, manual deploy required
|
||||
if: github.event_name == 'push'
|
||||
needs: [build-and-publish]
|
||||
runs-on: [self-hosted, meshcore-runner-2]
|
||||
steps:
|
||||
@@ -448,8 +461,8 @@ jobs:
|
||||
publish:
|
||||
name: "📝 Publish Badges & Summary"
|
||||
if: github.event_name == 'push'
|
||||
needs: [build-and-publish]
|
||||
runs-on: ubuntu-latest
|
||||
needs: [deploy]
|
||||
runs-on: [self-hosted, Linux]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
@@ -40,12 +40,17 @@ const (
|
||||
// issue #789). The all-time median is poisoned by historical bad
|
||||
// samples (e.g. a node that was off and then GPS-corrected); severity
|
||||
// must reflect current health, not lifetime statistics.
|
||||
recentSkewWindowCount = 5
|
||||
//
|
||||
// Widened from 5 → 20 to add hysteresis: a brief burst of bad samples
|
||||
// in a known-bimodal node should not flip its severity to "no_clock"
|
||||
// (see classification rule below that also gates on long-term goodFraction).
|
||||
recentSkewWindowCount = 20
|
||||
|
||||
// recentSkewWindowSec bounds the recent-window in time as well: only
|
||||
// samples from the last N seconds count as "recent" for severity.
|
||||
// The effective window is min(recentSkewWindowCount, samples in 1h).
|
||||
recentSkewWindowSec = 3600
|
||||
// The effective window is min(recentSkewWindowCount, samples in 6h).
|
||||
// Widened from 1h → 6h to match the larger sample budget.
|
||||
recentSkewWindowSec = 21600
|
||||
|
||||
// bimodalSkewThresholdSec is the absolute skew threshold (1 hour)
|
||||
// above which a sample is considered "bad" — likely firmware emitting
|
||||
@@ -118,6 +123,7 @@ type NodeClockSkew struct {
|
||||
LastObservedTS int64 `json:"lastObservedTS"` // most recent observation timestamp
|
||||
Samples []SkewSample `json:"samples,omitempty"` // time-series for sparklines
|
||||
GoodFraction float64 `json:"goodFraction"` // fraction of recent samples with |skew| <= 1h
|
||||
LongTermGoodFraction float64 `json:"longTermGoodFraction"` // fraction of ALL samples with |skew| <= 1h (hysteresis input)
|
||||
RecentBadSampleCount int `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h
|
||||
RecentSampleCount int `json:"recentSampleCount"` // total recent samples in window
|
||||
NodeName string `json:"nodeName,omitempty"` // populated in fleet responses
|
||||
@@ -502,13 +508,18 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Bimodal detection (#845) ─────────────────────────────────────────
|
||||
// ── Bimodal detection (#845, hysteresis) ─────────────────────────────
|
||||
// Split recent samples into "good" (|skew| <= 1h, real clock) and
|
||||
// "bad" (|skew| > 1h, firmware nonsense from uninitialized RTC).
|
||||
// Classification order (first match wins):
|
||||
// no_clock — goodFraction < 0.10 (essentially no real clock)
|
||||
// bimodal_clock — 0.10 <= goodFraction < 0.80 AND badCount > 0
|
||||
// ok/warn/etc. — goodFraction >= 0.80 (normal, outliers filtered)
|
||||
// no_clock — recent goodFraction < 0.10 AND long-term goodFraction < 0.10
|
||||
// (the long-term gate is hysteresis: a bimodal node that
|
||||
// hits a transient burst of bad samples must NOT flip
|
||||
// to no_clock — it's still bimodal historically)
|
||||
// bimodal_clock — recent goodFraction < 0.80 AND badCount > 0
|
||||
// (also catches nodes where recent < 0.10 but long-term
|
||||
// is healthier — i.e. flaky rather than dead)
|
||||
// ok/warn/etc. — recent goodFraction >= 0.80 (normal, outliers filtered)
|
||||
var goodSamples []float64
|
||||
for _, v := range recentVals {
|
||||
if math.Abs(v) <= bimodalSkewThresholdSec {
|
||||
@@ -522,16 +533,42 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
goodFraction = float64(len(goodSamples)) / float64(recentSampleCount)
|
||||
}
|
||||
|
||||
// Long-term goodFraction across ALL samples — used as hysteresis to
|
||||
// prevent a recent burst of bad samples from flipping a bimodal node
|
||||
// to no_clock. If a node has EVER had real-clock samples (>10% of all
|
||||
// samples are good), it stays bimodal even when the recent window is
|
||||
// 100% bad.
|
||||
longTermGoodCount := 0
|
||||
for _, p := range tsSkews {
|
||||
if math.Abs(p.skew) <= bimodalSkewThresholdSec {
|
||||
longTermGoodCount++
|
||||
}
|
||||
}
|
||||
var longTermGoodFraction float64
|
||||
if len(tsSkews) > 0 {
|
||||
longTermGoodFraction = float64(longTermGoodCount) / float64(len(tsSkews))
|
||||
}
|
||||
|
||||
var severity SkewSeverity
|
||||
if goodFraction < 0.10 {
|
||||
// Essentially no real clock — classify as no_clock regardless
|
||||
// of the raw skew magnitude.
|
||||
if goodFraction < 0.10 && longTermGoodFraction < 0.10 {
|
||||
// Essentially no real clock — recent AND long-term agree.
|
||||
severity = SkewNoClock
|
||||
} else if goodFraction < 0.80 && recentBadCount > 0 {
|
||||
// Bimodal: use median of GOOD samples as the "real" skew.
|
||||
} else if goodFraction < 0.80 && (recentBadCount > 0 || longTermGoodFraction < 0.80) {
|
||||
// Bimodal: recent window is mixed, OR recent is all-bad but the node
|
||||
// has historical good samples (transient bad-burst on a flaky node).
|
||||
// Use median of GOOD samples — prefer recent good if present, else
|
||||
// fall back to long-term good median so the displayed skew is meaningful.
|
||||
severity = SkewBimodalClock
|
||||
if len(goodSamples) > 0 {
|
||||
recentSkew = median(goodSamples)
|
||||
} else if longTermGoodCount > 0 {
|
||||
ltGood := make([]float64, 0, longTermGoodCount)
|
||||
for _, p := range tsSkews {
|
||||
if math.Abs(p.skew) <= bimodalSkewThresholdSec {
|
||||
ltGood = append(ltGood, p.skew)
|
||||
}
|
||||
}
|
||||
recentSkew = median(ltGood)
|
||||
}
|
||||
} else {
|
||||
// Normal path: if there are good samples, use their median
|
||||
@@ -572,6 +609,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
|
||||
LastObservedTS: lastObsTS,
|
||||
Samples: samples,
|
||||
GoodFraction: round(goodFraction, 2),
|
||||
LongTermGoodFraction: round(longTermGoodFraction, 2),
|
||||
RecentBadSampleCount: recentBadCount,
|
||||
RecentSampleCount: recentSampleCount,
|
||||
}
|
||||
|
||||
@@ -557,7 +557,8 @@ func TestSeverityUsesRecentNotMedian(t *testing.T) {
|
||||
|
||||
baseObs := int64(1700000000)
|
||||
var txs []*StoreTx
|
||||
for i := 0; i < 105; i++ {
|
||||
// 100 bad samples then 25 good — recent window (20) is dominated by good.
|
||||
for i := 0; i < 125; i++ {
|
||||
obsTS := baseObs + int64(i)*300 // 5 min apart
|
||||
var skew int64 = -60
|
||||
if i >= 100 {
|
||||
@@ -646,12 +647,13 @@ func TestReporterScenario_789(t *testing.T) {
|
||||
|
||||
baseObs := int64(1700000000)
|
||||
var txs []*StoreTx
|
||||
// 1657 samples with the bad ~-683-day skew (the historical poison),
|
||||
// then 5 freshly corrected samples at -0.8s — totals 1662.
|
||||
for i := 0; i < 1662; i++ {
|
||||
// 1660 samples with the bad ~-683-day skew (the historical poison),
|
||||
// then 20 freshly corrected samples at -0.8s — totals 1680.
|
||||
// Need ≥20 corrected to fill the recent-window (recentSkewWindowCount=20).
|
||||
for i := 0; i < 1680; i++ {
|
||||
obsTS := baseObs + int64(i)*60 // 1 min apart
|
||||
var skew int64
|
||||
if i < 1657 {
|
||||
if i < 1660 {
|
||||
skew = -59063561 // ~ -683 days
|
||||
} else {
|
||||
skew = -1 // corrected (rounded; reporter saw -0.8)
|
||||
@@ -680,8 +682,12 @@ func TestReporterScenario_789(t *testing.T) {
|
||||
t.Fatal("nil result")
|
||||
}
|
||||
// Severity must reflect current health, not the all-time median.
|
||||
if r.Severity != SkewOK && r.Severity != SkewWarning {
|
||||
t.Errorf("severity = %v, want ok/warning (recent samples are healthy)", r.Severity)
|
||||
// Post-#845 + hysteresis: a node with massive historical bad samples
|
||||
// is correctly flagged bimodal_clock even when recent window is clean,
|
||||
// because operators need to know the RTC is flaky. SkewOK only when
|
||||
// long-term ALSO looks healthy.
|
||||
if r.Severity != SkewOK && r.Severity != SkewWarning && r.Severity != SkewBimodalClock {
|
||||
t.Errorf("severity = %v, want ok/warning/bimodal_clock (recent samples are healthy)", r.Severity)
|
||||
}
|
||||
if math.Abs(r.RecentMedianSkewSec) > 5 {
|
||||
t.Errorf("recentMedianSkewSec = %v, want near 0", r.RecentMedianSkewSec)
|
||||
@@ -954,3 +960,116 @@ func TestAllGood_OK_845(t *testing.T) {
|
||||
t.Errorf("recentBadSampleCount = %v, want 0", r.RecentBadSampleCount)
|
||||
}
|
||||
}
|
||||
|
||||
// TestBimodalHysteresis: a node with mostly good long-term samples but a
|
||||
// recent burst of all-bad samples must stay bimodal_clock, NOT flip to
|
||||
// no_clock. This is the "Kpa Roof Solar" scenario seen on staging
|
||||
// (2026-04-22): historically bimodal node hits a transient all-bad burst
|
||||
// and the operator briefly sees "🚫 No Clock" even though the most recent
|
||||
// real advert decoded with a valid 2026 timestamp.
|
||||
func TestBimodalHysteresis(t *testing.T) {
|
||||
ps := NewPacketStore(nil, nil)
|
||||
pt := 4
|
||||
baseObs := int64(1700000000)
|
||||
var txs []*StoreTx
|
||||
// 80 historical samples: 50% good (-2s), 50% bad (-58M sec ≈ -1.8yr)
|
||||
for i := 0; i < 80; i++ {
|
||||
obsTS := baseObs + int64(i)*60
|
||||
var skew int64 = -2
|
||||
if i%2 == 0 {
|
||||
skew = -58000000
|
||||
}
|
||||
tx := &StoreTx{
|
||||
Hash: fmt.Sprintf("hist-%04d", i),
|
||||
PayloadType: &pt,
|
||||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(obsTS+skew) + `}}`,
|
||||
Observations: []*StoreObs{
|
||||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||||
},
|
||||
}
|
||||
txs = append(txs, tx)
|
||||
}
|
||||
// 25 recent samples ALL bad — fills the recent window (size 20) entirely
|
||||
// with bad samples. recent goodFraction = 0.
|
||||
for i := 80; i < 105; i++ {
|
||||
obsTS := baseObs + int64(i)*60
|
||||
tx := &StoreTx{
|
||||
Hash: fmt.Sprintf("badburst-%04d", i),
|
||||
PayloadType: &pt,
|
||||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(obsTS-58000000) + `}}`,
|
||||
Observations: []*StoreObs{
|
||||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||||
},
|
||||
}
|
||||
txs = append(txs, tx)
|
||||
}
|
||||
ps.mu.Lock()
|
||||
ps.byNode["BIHYST"] = txs
|
||||
for _, tx := range txs {
|
||||
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
|
||||
}
|
||||
ps.clockSkew.computeInterval = 0
|
||||
ps.mu.Unlock()
|
||||
|
||||
r := ps.GetNodeClockSkew("BIHYST")
|
||||
if r == nil {
|
||||
t.Fatal("nil result")
|
||||
}
|
||||
// Without hysteresis: severity would be no_clock (recent goodFraction=0).
|
||||
// With hysteresis: long-term goodFraction ≈ 0.38 ≥ 0.10, so stays bimodal.
|
||||
if r.Severity != SkewBimodalClock {
|
||||
t.Errorf("severity = %v, want bimodal_clock (long-term has good samples)", r.Severity)
|
||||
}
|
||||
if r.GoodFraction != 0 {
|
||||
t.Errorf("recent goodFraction = %v, want 0 (bad burst)", r.GoodFraction)
|
||||
}
|
||||
if r.LongTermGoodFraction < 0.10 {
|
||||
t.Errorf("longTermGoodFraction = %v, want >= 0.10", r.LongTermGoodFraction)
|
||||
}
|
||||
// Displayed skew should be the long-term good median (-2s), not the
|
||||
// nonsense bad value, so the operator sees a meaningful number.
|
||||
if r.RecentMedianSkewSec < -10 || r.RecentMedianSkewSec > 10 {
|
||||
t.Errorf("recentMedianSkewSec = %v, want near -2 (long-term good median fallback)", r.RecentMedianSkewSec)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNoClock_BothWindowsBad: the inverse of TestBimodalHysteresis. When
|
||||
// BOTH the recent window and the long-term goodFraction are essentially 0,
|
||||
// the node is genuinely no_clock (uninitialized RTC throughout).
|
||||
func TestNoClock_BothWindowsBad(t *testing.T) {
|
||||
ps := NewPacketStore(nil, nil)
|
||||
pt := 4
|
||||
baseObs := int64(1700000000)
|
||||
var txs []*StoreTx
|
||||
// 50 samples — all bad.
|
||||
for i := 0; i < 50; i++ {
|
||||
obsTS := baseObs + int64(i)*60
|
||||
tx := &StoreTx{
|
||||
Hash: fmt.Sprintf("dead-%04d", i),
|
||||
PayloadType: &pt,
|
||||
DecodedJSON: `{"payload":{"timestamp":` + formatInt64(obsTS-58000000) + `}}`,
|
||||
Observations: []*StoreObs{
|
||||
{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
|
||||
},
|
||||
}
|
||||
txs = append(txs, tx)
|
||||
}
|
||||
ps.mu.Lock()
|
||||
ps.byNode["DEADCLOCK"] = txs
|
||||
for _, tx := range txs {
|
||||
ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
|
||||
}
|
||||
ps.clockSkew.computeInterval = 0
|
||||
ps.mu.Unlock()
|
||||
|
||||
r := ps.GetNodeClockSkew("DEADCLOCK")
|
||||
if r == nil {
|
||||
t.Fatal("nil result")
|
||||
}
|
||||
if r.Severity != SkewNoClock {
|
||||
t.Errorf("severity = %v, want no_clock", r.Severity)
|
||||
}
|
||||
if r.LongTermGoodFraction != 0 {
|
||||
t.Errorf("longTermGoodFraction = %v, want 0", r.LongTermGoodFraction)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -393,25 +393,17 @@
|
||||
}
|
||||
}
|
||||
|
||||
// Merge user-stored keys into the channel list.
|
||||
// If a stored key matches a server-known channel, mark that channel as
|
||||
// userAdded so the ✕ button appears — otherwise the user has no way to
|
||||
// remove a key they added but that the server already knows about.
|
||||
// Merge user-stored keys into the channel list
|
||||
function mergeUserChannels() {
|
||||
var keys = ChannelDecrypt.getStoredKeys();
|
||||
var names = Object.keys(keys);
|
||||
for (var i = 0; i < names.length; i++) {
|
||||
var name = names[i];
|
||||
var matched = false;
|
||||
for (var j = 0; j < channels.length; j++) {
|
||||
var ch = channels[j];
|
||||
if (ch.name === name || ch.hash === name || ch.hash === ('user:' + name)) {
|
||||
ch.userAdded = true;
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!matched) {
|
||||
// Check if channel already exists by name
|
||||
var exists = channels.some(function (ch) {
|
||||
return ch.name === name || ch.hash === name || ch.hash === ('user:' + name);
|
||||
});
|
||||
if (!exists) {
|
||||
channels.push({
|
||||
hash: 'user:' + name,
|
||||
name: name,
|
||||
@@ -757,38 +749,19 @@
|
||||
e.stopPropagation();
|
||||
var channelHash = removeBtn.getAttribute('data-remove-channel');
|
||||
if (!channelHash) return;
|
||||
// The localStorage key is the channel name. For user:-prefixed entries
|
||||
// strip the prefix; for server-known channels look up the channel
|
||||
// object so we use its display name (the hash itself isn't the key).
|
||||
var ch = channels.find(function (c) { return c.hash === channelHash; });
|
||||
var chName = channelHash.startsWith('user:')
|
||||
? channelHash.substring(5)
|
||||
: (ch && ch.name) || channelHash;
|
||||
var chName = channelHash.startsWith('user:') ? channelHash.substring(5) : channelHash;
|
||||
if (!confirm('Remove channel "' + chName + '"? This will clear saved keys and cached messages.')) return;
|
||||
ChannelDecrypt.removeKey(chName);
|
||||
if (channelHash.startsWith('user:')) {
|
||||
// Pure user-added channel — drop from the list entirely.
|
||||
channels = channels.filter(function (c) { return c.hash !== channelHash; });
|
||||
if (selectedHash === channelHash) {
|
||||
selectedHash = null;
|
||||
messages = [];
|
||||
history.replaceState(null, '', '#/channels');
|
||||
var msgEl2 = document.getElementById('chMessages');
|
||||
if (msgEl2) msgEl2.innerHTML = '<div class="ch-empty">Choose a channel from the sidebar to view messages</div>';
|
||||
var header2 = document.getElementById('chHeader');
|
||||
if (header2) header2.querySelector('.ch-header-text').textContent = 'Select a channel';
|
||||
}
|
||||
} else if (ch) {
|
||||
// Server-known channel: keep the row, just unmark as user-added so
|
||||
// the ✕ disappears until they re-add a key.
|
||||
ch.userAdded = false;
|
||||
// If this was the selected channel, clear decrypted messages since
|
||||
// the key is gone — they can't be re-decrypted without re-adding it.
|
||||
if (selectedHash === channelHash) {
|
||||
messages = [];
|
||||
var msgEl2 = document.getElementById('chMessages');
|
||||
if (msgEl2) msgEl2.innerHTML = '<div class="ch-empty">Key removed — add a key to decrypt messages</div>';
|
||||
}
|
||||
// Remove from channels array
|
||||
channels = channels.filter(function (c) { return c.hash !== channelHash; });
|
||||
if (selectedHash === channelHash) {
|
||||
selectedHash = null;
|
||||
messages = [];
|
||||
history.replaceState(null, '', '#/channels');
|
||||
var msgEl2 = document.getElementById('chMessages');
|
||||
if (msgEl2) msgEl2.innerHTML = '<div class="ch-empty">Choose a channel from the sidebar to view messages</div>';
|
||||
var header2 = document.getElementById('chHeader');
|
||||
if (header2) header2.querySelector('.ch-header-text').textContent = 'Select a channel';
|
||||
}
|
||||
renderChannelList();
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user