Compare commits

..

4 Commits

Author SHA1 Message Date
you b0c9ff9b2b fix: 3 critical bugs + 5 non-blocking review items
Critical fixes:
1. API endpoint: /api/observers/metrics/summary doesn't exist in prod.
   Use /api/observers which returns observer data with noise_floor,
   battery_mv, packet_count, last_seen. Unwrap {observers:[...]} wrapper.

2. WS dead connection detection: add ping/pong keepalive (30s ping,
   60s read deadline reset on pong). Replaces 2s polling deadline with
   proper keepalive that detects dead connections reliably.

3. WS packet parsing: server sends {type:'packet',data:{...}} envelope.
   parseWSMessage now unwraps the envelope and reads fields from the
   correct locations: decoded.header.payloadTypeName for type,
   top-level rssi/snr/observer_name, decoded.payload for text/hops.

Non-blocking items (from Carmack review):
A. Render coalescing: 16ms tick (60fps cap) decouples packet ingestion
   from rendering. Packets accumulate in Update, View only re-renders
   on renderTickMsg.
B+D. Rune-aware truncation: truncate() and safePrefix() use []rune(s)
   for safe UTF-8 handling instead of byte slicing.
E. Dashboard sort moved from View to Update: observers pre-sorted when
   data arrives, not on every render call.
2026-04-05 14:32:18 +00:00
you 12b8c176f1 fix: address 4 must-fix review items from Carmack
1. Goroutine stall: always return listenForWSMsg() cmd from Update,
   even for unhandled message types, preventing wsMsgChan blocking.

2. Double-close panic: wrap close(m.wsDone) in sync.Once to prevent
   panic on repeated quit key presses.

3. Ring buffer allocations: replace slice append+copy with fixed-size
   array using head/tail indices. Zero allocations in steady state.

4. Unbounded HTTP read: wrap resp.Body with io.LimitReader(1MB) on
   the summary endpoint to cap memory usage.
2026-04-05 07:29:52 +00:00
you 3e39776178 fix: TUI goroutine leaks, WS reconnect, ring buffer GC, panic recovery
- Fix goroutine leak: statusChan goroutine in Init() never terminated.
  Replaced separate statusChan+packetChan with unified wsMsgChan that
  carries both wsStatusMsg and packetMsg as tea.Msg values.
- Fix WS goroutine unable to exit on quit: ReadMessage blocked
  indefinitely. Added 2s read deadline so the done channel is checked
  periodically.
- Add panic recovery in connectWS goroutine.
- Fix ring buffer GC leak: old slicing kept backing array alive.
  Now copies to fresh slice when trimming.
- Fix potential panic: ObserverID[:8] on short IDs. Added safePrefix().
- Fix potential panic: ts[:8] on short timestamp strings.
- Send graceful WebSocket close frame on quit.
- Remove unused sync.Mutex field.
- Handle wsStatusMsg as proper tea.Msg type instead of sentinel packet.
2026-04-05 07:25:54 +00:00
you 8851d996f2 feat: CoreScope TUI MVP — terminal dashboard + live packet feed
Two-view bubbletea TUI that connects to any CoreScope instance:

View 1 - Fleet Dashboard:
- Polls /api/observers/metrics/summary every 5s
- Table: Observer, NF(dBm), Avg NF, Max NF, Battery, Samples
- Sorted by worst noise floor first
- Color coded: green (normal), yellow (>-100), red (>-85)

View 2 - Live Packet Feed:
- WebSocket connection to /ws
- 500-packet ring buffer
- Shows timestamp, type, observer, hops, RSSI/SNR, channel text
- Auto-reconnect with exponential backoff (1s→30s)

Navigation: Tab/1/2 to switch views, q to quit
CLI: corescope-tui --url http://localhost:3000

Refs #609
2026-04-05 07:15:43 +00:00
716 changed files with 6699 additions and 154782 deletions
+1 -1
View File
@@ -1 +1 @@
{"schemaVersion":1,"label":"e2e tests","message":"821 passed","color":"brightgreen"}
{"schemaVersion":1,"label":"e2e tests","message":"45 passed","color":"brightgreen"}
+1 -1
View File
@@ -1 +1 @@
{"schemaVersion":1,"label":"frontend coverage","message":"36.64%","color":"red"}
{"schemaVersion":1,"label":"frontend coverage","message":"39.68%","color":"red"}
-287
View File
@@ -1,287 +0,0 @@
{
"parserOptions": {
"ecmaVersion": 2022,
"sourceType": "script"
},
"env": {
"browser": true,
"es2022": true
},
"globals": {
"AreaFilter": "readonly",
"CACHE_INVALIDATE_MS": "readonly",
"CLIENT_CONFIG": "readonly",
"CLIENT_TTL": "readonly",
"ChannelColorPicker": "readonly",
"ChannelColors": "readonly",
"ChannelDecrypt": "readonly",
"ChannelQR": "readonly",
"Chart": "readonly",
"DIST_THRESHOLDS": "readonly",
"DragManager": "readonly",
"EXTERNAL_URLS": "readonly",
"FAV_KEY": "readonly",
"FilterUX": "readonly",
"GestureHints": "readonly",
"HEALTH_THRESHOLDS": "readonly",
"HashColor": "readonly",
"HopDisplay": "readonly",
"HopResolver": "readonly",
"IATA_CITIES": "readonly",
"IATA_COORDS_GEO": "readonly",
"L": "readonly",
"LIMITS": "readonly",
"Logo": "readonly",
"MAX_HOP_DIST": "readonly",
"MeshAudio": "readonly",
"MeshConfigReady": "readonly",
"PAYLOAD_COLORS": "readonly",
"PAYLOAD_TYPES": "readonly",
"PERF_SLOW_MS": "readonly",
"PROPAGATION_BUFFER_MS": "readonly",
"PULL_THRESHOLD_PX": "readonly",
"PacketFilter": "readonly",
"PathInspector": "readonly",
"PrefixReserved": "readonly",
"QRCode": "readonly",
"ROLE_COLORS": "readonly",
"ROLE_EMOJI": "readonly",
"ROLE_LABELS": "readonly",
"ROLE_SHAPES": "readonly",
"ROLE_SORT": "readonly",
"ROLE_STYLE": "readonly",
"ROUTE_TYPES": "readonly",
"RegionFilter": "readonly",
"RegionShowAll": "readonly",
"SITE_CONFIG": "readonly",
"SKEW_SEVERITY_COLORS": "readonly",
"SKEW_SEVERITY_LABELS": "readonly",
"SKEW_SEVERITY_ORDER": "readonly",
"SNR_THRESHOLDS": "readonly",
"SlideOver": "readonly",
"TILE_DARK": "readonly",
"TILE_LIGHT": "readonly",
"MC_TILE_PROVIDERS": "readonly",
"MC_setDarkTileProvider": "readonly",
"MC_getDarkTileProvider": "readonly",
"MC_setServerDefaultTileProvider": "readonly",
"MC_applyTileFilter": "readonly",
"MC_DARK_TILE_DEFAULT": "readonly",
"TYPE_COLORS": "readonly",
"TableResponsive": "readonly",
"TableSort": "readonly",
"TouchGestures": "readonly",
"TracesHelpers": "readonly",
"URLState": "readonly",
"WS_RECONNECT_MS": "readonly",
"_SITE_CONFIG_ORIGINAL_HOME": "readonly",
"__PERF_LOG_RENDER": "readonly",
"__bottomNavInitDone": "readonly",
"__corescopeLogo": "readonly",
"__dirname": "readonly",
"__filename": "readonly",
"__gestureHints1065Init": "readonly",
"__liveMQLBindCount": "readonly",
"__meshcoreMapInternals": "readonly",
"__navDrawer": "readonly",
"__navDrawerPointerBindCount": "readonly",
"__pathOverflowWired": "readonly",
"__scrollLock": "readonly",
"__touchGestures1062InitCount": "readonly",
"_analyticsChannelTbodyHtml": "readonly",
"_analyticsChannelTheadHtml": "readonly",
"_analyticsDecorateChannels": "readonly",
"_analyticsHashStatCardsHtml": "readonly",
"_analyticsLoadChannelSort": "readonly",
"_analyticsRenderCollisionsFromServer": "readonly",
"_analyticsRenderMultiByteAdopters": "readonly",
"_analyticsRenderMultiByteCapability": "readonly",
"_analyticsRfNFColumnChart": "readonly",
"_analyticsSaveChannelSort": "readonly",
"_analyticsSortChannels": "readonly",
"_apiCache": "readonly",
"_apiPerf": "readonly",
"_channelsBeginMessageRequestForTest": "readonly",
"_channelsGetStateForTest": "readonly",
"_channelsHandleWSBatchForTest": "readonly",
"_channelsIsStaleMessageRequestForTest": "readonly",
"_channelsLoadChannelsForTest": "readonly",
"_channelsProcessWSBatchForTest": "readonly",
"_channelsReconcileSelectionForTest": "readonly",
"_channelsRefreshMessagesForTest": "readonly",
"_channelsSelectChannelForTest": "readonly",
"_channelsSetObserverRegionsForTest": "readonly",
"_channelsSetStateForTest": "readonly",
"_channelsShouldProcessWSMessageForRegion": "readonly",
"_customizerV2": "readonly",
"_ensurePullIndicator": "readonly",
"_inflight": "readonly",
"_isTouchDevice": "readonly",
"_liveAddFeedItem": "readonly",
"_liveBufferPacket": "readonly",
"_liveBuildClickablePathPopupHtml": "readonly",
"_liveBuildObserverIataMap": "readonly",
"_liveClickablePaths": "readonly",
"_liveDbPacketToLive": "readonly",
"_liveExpandToBufferEntries": "readonly",
"_liveExpandToBufferEntriesAsync": "readonly",
"_liveFormatLiveTimestampHtml": "readonly",
"_liveGetFavoritePubkeys": "readonly",
"_liveGetNodeFilterKeys": "readonly",
"_liveGetObserverIataMap": "readonly",
"_liveIsNodeFavorited": "readonly",
"_liveNodeActivity": "readonly",
"_liveNodeData": "readonly",
"_liveNodeMarkers": "readonly",
"_livePacketInvolvesFavorite": "readonly",
"_livePacketInvolvesFilterNode": "readonly",
"_livePacketMatchesRegion": "readonly",
"_livePruneClickablePaths": "readonly",
"_livePruneStaleNodes": "readonly",
"_liveRebuildFeedList": "readonly",
"_liveResolveHopPositions": "readonly",
"_liveSEG_MAP": "readonly",
"_liveSetMarkerColor": "readonly",
"_liveSetMarkerSize": "readonly",
"_liveSetNodeFilter": "readonly",
"_liveSetObserverIataMap": "readonly",
"_liveSpeedLabel": "readonly",
"_liveVCR": "readonly",
"_liveVcrPause": "readonly",
"_liveVcrResumeLive": "readonly",
"_liveVcrSetMode": "readonly",
"_liveVcrSpeedCycle": "readonly",
"_live_packetTimestamp": "readonly",
"_mapGetNeighborPubkeys": "readonly",
"_mapSelectRefNode": "readonly",
"_meshAudioVoices": "readonly",
"_meshcoreHeatLayer": "readonly",
"_meshcoreLiveHeatLayer": "readonly",
"_nodesGetAllNodes": "readonly",
"_nodesGetSortState": "readonly",
"_nodesGetStatusInfo": "readonly",
"_nodesGetStatusTooltip": "readonly",
"_nodesIsAdvertMessage": "readonly",
"_nodesMatchesSearch": "readonly",
"_nodesRenderNodeTimestampHtml": "readonly",
"_nodesRenderNodeTimestampText": "readonly",
"_nodesSetAllNodes": "readonly",
"_nodesSetSortState": "readonly",
"_nodesSortArrow": "readonly",
"_nodesSortNodes": "readonly",
"_nodesSyncClaimedToFavorites": "readonly",
"_nodesToggleSort": "readonly",
"_packetsTestAPI": "readonly",
"_panelCorner": "readonly",
"_pendingPathInspectorRoute": "readonly",
"_perfWriteSourcesPrev": "readonly",
"_pullIndicator": "readonly",
"_pullToast": "readonly",
"_pullToastTimer": "readonly",
"_reducedMotionMQL": "readonly",
"_showPullToast": "readonly",
"_themeRefreshTimer": "readonly",
"_vcrFormatTime": "readonly",
"addEventListener": "readonly",
"api": "readonly",
"apiPerf": "readonly",
"bindFavStars": "readonly",
"buildHexLegend": "readonly",
"buildNodesQuery": "readonly",
"buildPacketsQuery": "readonly",
"clearParsedCache": "readonly",
"closeMoreMenu": "readonly",
"closeNav": "readonly",
"comparePacketSets": "readonly",
"computeBreakdownRanges": "readonly",
"computeOverlapStats": "readonly",
"connectWS": "readonly",
"copyToClipboard": "readonly",
"createColoredHexDump": "readonly",
"currentPage": "readonly",
"currentSkewValue": "readonly",
"debounce": "readonly",
"debouncedOnWS": "readonly",
"destroy": "readonly",
"devicePixelRatio": "readonly",
"dispatchEvent": "readonly",
"drawPacketRoute": "readonly",
"escapeHtml": "readonly",
"exports": "readonly",
"favStar": "readonly",
"fetchAllNodes": "readonly",
"filterPacketsByRoute": "readonly",
"formatAbsoluteTimestamp": "readonly",
"formatChartAxisLabel": "readonly",
"formatDistance": "readonly",
"formatDistanceRound": "readonly",
"formatDrift": "readonly",
"formatHex": "readonly",
"formatIsoLike": "readonly",
"formatSkew": "readonly",
"formatTimestamp": "readonly",
"formatTimestampCustom": "readonly",
"formatTimestampWithTooltip": "readonly",
"getDistanceUnit": "readonly",
"getFavorites": "readonly",
"getHashParams": "readonly",
"getHealthThresholds": "readonly",
"getNodeStatus": "readonly",
"getParsedDecoded": "readonly",
"getParsedPath": "readonly",
"getPathLenOffset": "readonly",
"getResolvedPath": "readonly",
"getTileUrl": "readonly",
"getTimestampCustomFormat": "readonly",
"getTimestampFormatPreset": "readonly",
"getTimestampMode": "readonly",
"getTimestampTimezone": "readonly",
"global": "readonly",
"initGeoFilterOverlay": "readonly",
"initTabBar": "readonly",
"invalidateApiCache": "readonly",
"isFavorite": "readonly",
"isTransportRoute": "readonly",
"makeColumnsResizable": "readonly",
"makeRoleMarkerSVG": "readonly",
"miniMarkdown": "readonly",
"module": "readonly",
"navigate": "readonly",
"observerSkewSeverity": "readonly",
"offWS": "readonly",
"onWS": "readonly",
"pad2": "readonly",
"pad3": "readonly",
"pages": "readonly",
"payloadTypeColor": "readonly",
"payloadTypeName": "readonly",
"process": "readonly",
"pullReconnect": "readonly",
"qrcode": "readonly",
"registerPage": "readonly",
"renderVersionCard": "readonly",
"renderSkewBadge": "readonly",
"renderSkewSparkline": "readonly",
"require": "readonly",
"routeLayer": "readonly",
"routeTypeName": "readonly",
"setupPullToReconnect": "readonly",
"syncBadgeColors": "readonly",
"timeAgo": "readonly",
"toggleFavorite": "readonly",
"transportBadge": "readonly",
"truncate": "readonly",
"ws": "readonly",
"wsListeners": "readonly"
},
"rules": {
"no-undef": "error",
"no-unused-vars": [
"warn",
{
"argsIgnorePattern": "^_",
"varsIgnorePattern": "^_"
}
]
}
}
+46 -443
View File
@@ -7,13 +7,9 @@ on:
branches: [master]
workflow_dispatch:
permissions:
contents: read
packages: write
concurrency:
group: ci-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
cancel-in-progress: true
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
@@ -22,8 +18,8 @@ env:
STAGING_CONTAINER: corescope-staging-go
# Pipeline (sequential, fail-fast):
# go-test → e2e-test → build-and-publish → deploy → publish-badges
# PRs stop after build-and-publish (no GHCR push). Master continues to deploy + badges.
# go-test → e2e-test → build → deploy → publish
# PRs stop after build. Master continues to deploy + publish.
jobs:
# ───────────────────────────────────────────────────────────────
@@ -54,9 +50,7 @@ jobs:
set -e -o pipefail
cd cmd/server
go build .
# -race gates PR #1208's atomic.Pointer migration: the race-detector
# is what makes path_inspect_atomic_race_test.go actually assert.
go test -timeout 15m -race -coverprofile=server-coverage.out ./... 2>&1 | tee server-test.log
go test -coverprofile=server-coverage.out ./... 2>&1 | tee server-test.log
echo "--- Go Server Coverage ---"
go tool cover -func=server-coverage.out | tail -1
@@ -65,120 +59,10 @@ jobs:
set -e -o pipefail
cd cmd/ingestor
go build .
go test -timeout 15m -coverprofile=ingestor-coverage.out ./... 2>&1 | tee ingestor-test.log
go test -coverprofile=ingestor-coverage.out ./... 2>&1 | tee ingestor-test.log
echo "--- Go Ingestor Coverage ---"
go tool cover -func=ingestor-coverage.out | tail -1
- name: Build and test channel library + decrypt CLI
run: |
set -e -o pipefail
cd internal/channel
go test ./...
echo "--- Channel library tests passed ---"
cd ../../cmd/decrypt
CGO_ENABLED=0 go build -ldflags="-s -w" -o corescope-decrypt .
go test ./...
echo "--- Decrypt CLI tests passed ---"
- name: Verify Dockerfile COPY invariants (issue #1316)
run: bash scripts/check-dockerfile-internal-pkgs.sh
- name: Staging disk-monitor unit tests (issue #1684)
run: bash scripts/staging/test-disk-monitor.sh
- name: Lint CSS variables (issue #1128)
run: |
set -e
node scripts/check-css-vars.js
node scripts/test-check-css-vars.js
- name: Run JS unit tests (packet-filter)
run: |
set -e
node test-packet-filter.js
node test-packet-filter-time.js
node test-confidence-indicator.js
node test-1659-analytics-warmup.js
node test-channels-merge-1498-unit.js
node test-issue-1518-home-url.js
node test-channel-decrypt-insecure-context.js
node test-live-region-filter.js
node test-issue-1136-observer-iata-map.js
node test-channel-qr.js
node test-channel-qr-wiring.js
node test-channel-modal-ux.js
node test-channel-issue-1087.js
node test-issue-1409-no-encrypted-flood.js
node test-channel-issue-1101.js
node test-observer-iata-1188.js
node test-pull-to-reconnect-1091.js
node test-channel-fluid-layout.js
node test-issue-1279-p2-code-filter.js
node test-area-filter.js
node test-issue-1293-marker-shapes.js
node test-issue-1356-map-a11y.js
node test-issue-1360-pill-letter-count.js
node test-issue-1364-pill-no-clamp.js
node test-issue-1375-scope-stats-fetch.js
node test-issue-1361-cb-presets.js
node test-issue-1380-cb-sim-overlay.js
node test-issue-1380-cb-reset-button.js
node test-issue-1407-cb-preset-propagation.js
node test-issue-1412-customizer-no-override.js
node test-issue-1418-raw-hex-extraction.js
node test-issue-1418-edge-weights.js
node test-issue-1418-cb-preset-ramp.js
node test-issue-1418-spider-fan.js
node test-issue-1418-deeplink-hops-channels.js
node test-issue-1418-polish-review.js
node test-issue-1420-tile-providers.js
node test-issue-1614-tile-url-function.js
node test-issue-1438-marker-css-vars.js
node test-issue-1562-observers-summary.js
node test-issue-1509-nav-active-bg.js
node test-issue-1509-detect-preset.js
node test-live.js
node test-issue-1107-live-layout.js
node test-issue-1532-live-fullscreen.js
node test-issue-1619-feed-detail-card-draggable.js
node test-xss-escape-sinks.js
node test-preflight-xss-gate.js
node test-traces.js
node test-issue-1648-m4-emoji-scan.js
node test-issue-1668-m3-typography.js
node test-mqtt-status-panel.js
node test-issue-1697-mqtt-mobile-e2e.js
node test-warmup-banner.js
node test-issue-1633-hide-1byte-hops.js
node test-issue-1668-m4-per-route.js
node test-a11y-axe-1668-selftest.js
- name: 🛡️ Preflight XSS gate — actual --diff check (PR only)
# The fixture self-test above (test-preflight-xss-gate.js) only
# asserts the script's behavior against fixtures. It does NOT scan
# the PR's own changes. This step closes that gap by running the
# gate against added lines in public/**/*.{js,html} on the PR.
# Gate is PR-scoped only (per djb finding: merge commits would
# slip an opt-out otherwise). Master pushes skip this step.
if: github.event_name == 'pull_request'
env:
PR_BODY: ${{ github.event.pull_request.body }}
PREFLIGHT_PR_LABELS: ${{ join(github.event.pull_request.labels.*.name, ' ') }}
run: |
set -e
git fetch origin master --depth=50 2>&1 | tail -3 || true
# Materialize PR body to a file for the opt-out parser.
printf '%s' "$PR_BODY" > /tmp/pr-body.md
PREFLIGHT_PR_BODY=/tmp/pr-body.md bash scripts/check-xss-sinks.sh --diff origin/master
- name: 🧹 Frontend lint (eslint no-undef) — issue #1342
run: |
set -e
# Use eslint@8 (legacy .eslintrc.json). Don't migrate to flat-config / eslint@9.
# --no-save: avoid touching package.json / no committed node_modules.
npm install --no-save --no-audit --no-fund eslint@8
npx eslint public/*.js
- name: Verify proto syntax
run: |
set -e
@@ -235,7 +119,7 @@ jobs:
e2e-test:
name: "🎭 Playwright E2E Tests"
needs: [go-test]
runs-on: ubuntu-latest
runs-on: [self-hosted, Linux]
defaults:
run:
shell: bash
@@ -245,6 +129,13 @@ jobs:
with:
fetch-depth: 0
- name: Free disk space
run: |
# Prune old runner diagnostic logs (can accumulate 50MB+)
find ~/actions-runner/_diag/ -name '*.log' -mtime +3 -delete 2>/dev/null || true
# Show available disk space
df -h / | tail -1
- name: Set up Node.js 22
uses: actions/setup-node@v5
with:
@@ -265,12 +156,6 @@ jobs:
go build -o ../../corescope-server .
echo "Go server built successfully"
- name: Build Go migrate tool
run: |
cd cmd/migrate
go build -o ../../corescope-migrate .
echo "Go migrate tool built successfully"
- name: Install npm dependencies
run: npm ci --production=false
@@ -282,66 +167,6 @@ jobs:
- name: Instrument frontend JS for coverage
run: sh scripts/instrument-frontend.sh
- name: Freshen fixture timestamps
run: bash tools/freshen-fixture.sh test-fixtures/e2e-fixture.db
- name: Seed grouped-packet row for #1486 collapse test
# The committed fixture has 499 packets, each with exactly ONE
# observation, so the packets-page renders only flat
# (select-hash) rows. The #1486 repro needs at least one grouped
# (toggle-select) row. Insert a NEW transmission with 3
# observations.
#
# The server's async hash-migrate (cmd/server/hash_migrate.go)
# recomputes `transmissions.hash` from `raw_hex` via
# ComputeContentHash(), so the inserted hash MUST equal that
# function's output for the chosen raw_hex — otherwise the row
# gets relabelled and the E2E can't find it.
#
# raw_hex 15000102030405060708090a0b0c0d0e0f
# → header=0x15 (route_type=1, payload_type=5)
# → ComputeContentHash(...) = fae0c9e6d357a814
#
# The first_seen / observation timestamps are pinned to a date
# within retentionHours but outside the default 15-min UI
# window so the row is hidden in the default view (keeping
# test-e2e-playwright's first-10-rows hex-pane test
# unaffected) and reachable via the explicit ?timeWindow=0
# deep-link the #1486 test uses.
run: |
sqlite3 test-fixtures/e2e-fixture.db <<'SQL'
-- Sort the seeded row LAST in BOTH default packets views:
-- • flat view sorts by transmissions.id DESC → id=0 puts it last
-- • grouped view (#default for the packets page) sorts by
-- MAX(observations.timestamp) DESC → we must keep our obs
-- timestamps OLDER than every other fixture observation.
-- Fixture (after freshen) has obs timestamps spanning
-- 2026-05-17 16:01:39Z .. 2026-05-28 00:00:00Z (max).
-- Note: freshen only shifts transmissions.first_seen forward
-- to ~now; observation.timestamp is left alone except for
-- the timestamp=0 case.
-- Use 2026-05-15 (~2 days older than the oldest fixture obs)
-- so our row sorts LAST in the grouped view too, keeping
-- test-e2e-playwright's first-10-rows hex-pane test
-- unaffected. The #1486 test still reaches the row via the
-- explicit hash + ?timeWindow=0 deep-link.
INSERT INTO transmissions(id,raw_hex,hash,first_seen,route_type,payload_type,payload_version,decoded_json,channel_hash,from_pubkey)
VALUES (0,'15000102030405060708090a0b0c0d0e0f','fae0c9e6d357a814','2026-05-15T00:00:00Z',1,5,0,'{"type":"CHAN","channel":"#test","text":"#1486 fixture"}',NULL,NULL);
INSERT INTO observations(transmission_id,observer_idx,direction,snr,rssi,score,path_json,timestamp,resolved_path) VALUES
(0,1,'rx',5.0,-95,0,'["AA"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["aa00000000000000000000000000000000000000000000000000000000000000"]'),
(0,2,'rx',5.5,-92,0,'["BB"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["bb00000000000000000000000000000000000000000000000000000000000000"]'),
(0,3,'rx',6.0,-90,0,'["CC"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["cc00000000000000000000000000000000000000000000000000000000000000"]');
SQL
- name: Migrate fixture DB to current schema (#1287)
# Server now ASSERTs schema is migrated and refuses to start
# otherwise (cmd/server/main.go: dbschema.AssertReady). In prod
# the ingestor owns dbschema.Apply, but CI starts only the
# server against the committed e2e fixture — so we run the
# standalone migrate tool here to bring the fixture up to the
# required shape before the server boots.
run: ./corescope-migrate -db test-fixtures/e2e-fixture.db
- name: Start Go server with fixture DB
run: |
fuser -k 13581/tcp 2>/dev/null || true
@@ -349,7 +174,7 @@ jobs:
./corescope-server -port 13581 -db test-fixtures/e2e-fixture.db -public public-instrumented &
echo $! > .server.pid
for i in $(seq 1 30); do
if curl -sf http://localhost:13581/api/healthz > /dev/null 2>&1; then
if curl -sf http://localhost:13581/api/stats > /dev/null 2>&1; then
echo "Server ready after ${i}s"
break
fi
@@ -363,118 +188,6 @@ jobs:
- name: Run Playwright E2E tests (fail-fast)
run: |
BASE_URL=http://localhost:13581 node test-e2e-playwright.js 2>&1 | tee e2e-output.txt
# M5 of #1668 — axe-core CI gate (color-contrast AA).
# Real browser run; fails on any net violation (raw allowlist).
# Allowlist: tests/a11y-allowlist.yaml (0 entries at M5 baseline).
BASE_URL=http://localhost:13581 AXE_SCREENSHOT_DIR=/tmp/axe-1668 \
node test-a11y-axe-1668.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-filter-ux-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-issue-1087-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-issue-1111-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-map-modal-fluid-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-map-nodes-pagination-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-observer-iata-1188-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1639-observers-sort-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-fluid-1055-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1102-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1311-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1391-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1413-nav-overlap-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1400-nav-vertical-clip.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-more-floor-1139-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-bottom-nav-1061-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-gestures-1062-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-gestures-1185-scroll-discriminator-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-gesture-hints-1065-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-touch-gestures-coverage-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-fluid-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-table-fluid-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-charts-fluid-1058-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-slideover-1056-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1692-packets-init-parallel-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-slideover-1168-munger-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-logo-pulse-1173-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1122-packets-filter-ux-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1128-packets-layout-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1128-multi-viewport-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1136-live-region-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1150-404-state-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1146-path-link-contrast-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1147-section-order-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1151-orphan-separators-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1486-collapse-reopens-detail-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-rebrand-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-theme-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-default-sage-teal-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1109-hamburger-dropdown-visible-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-live-layout-1178-1179-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1205-live-controls-anchor-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-live-mql-leak-1180-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1204-live-panel-structure-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1234-live-chrome-pass2-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1206-vcr-overlap-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1244-live-vcr-row-hints-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1510-live-nav-pin-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-live-fullscreen-1572-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1599-replay-freeze-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m1-icons-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m2-icons-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m3-icons-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m4-icons-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1657-analytics-channels-group-sprites-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1224-channels-mobile-ux-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1367-channels-chat-app-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1236-map-mobile-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1329-map-controls-accordion-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1273-qr-overlay-height-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1281-location-row-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-issue-1279-legend-p2-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-home-coverage-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-path-inspector-coverage-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1206-resize-observer-leak-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-drawer-1064-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-audio-live-1297-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-audio-lab-1297-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-decrypt-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-qr-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-channel-color-picker-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-customize-theme-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-customize-branding-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-customize-display-e2e.js 2>&1 | tee -a e2e-output.txt
BASE_URL=http://localhost:13581 node test-customize-export-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-drag-manager-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1567-corner-clears-drag-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1306-collisions-terminology-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1374-route-map-a11y-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-list-render-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-selection-flow-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-add-modal-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-share-color-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-ws-batch-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-ws-race-1498-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1487-byop-modal-layout-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1630-reach-mobile-e2e.js 2>&1 | tee -a e2e-output.txt
CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1640-compare-discovery-e2e.js 2>&1 | tee -a e2e-output.txt
# #1616: slide-over focus-restore flake-gate. Runs the slide-over
# E2E 20 consecutive times against the SAME backend instance so
# the Chromium-headless focus race documented in #1172/#1616 has
# a 20× shot at firing. Any single non-zero exit aborts. This is
# the architectural-fix gate — if it ever turns red post-merge,
# the focused-but-hidden state has crept back in.
#
# PERMANENT step. Adds ~3-4 min to the e2e-test job in exchange
# for closing out a flake family that was blocking ~8 unrelated
# PRs at a time. If profiling pressures the budget later, drop
# repeat count first; do not delete.
- name: Slide-over E2E flake-gate (#1616, --repeat-each=3)
run: |
set -e
for i in $(seq 1 3); do
echo "--- slide-over E2E run $i/20 ---"
BASE_URL=http://localhost:13581 node test-slideover-1056-e2e.js 2>&1 | tee -a slideover-repeat-output.txt
done
echo "3 passed"
- name: Collect frontend coverage (parallel)
if: success() && github.event_name == 'push'
@@ -484,13 +197,7 @@ jobs:
- name: Generate frontend coverage badges
if: success()
run: |
# Aggregate per-suite PASS/FAIL across every test-*-e2e.js summary.
# The previous regex (grep -oP '[0-9]+(?=/)' | tail -1) caught a
# stray digits-before-slash like the '2' in '2/3 tests passed' from
# some sub-output and stamped the badge as '2 passed'. See #1296.
eval "$(bash scripts/aggregate-e2e-pass.sh e2e-output.txt)"
E2E_PASS=${PASS:-0}
E2E_FAIL=${FAIL:-0}
E2E_PASS=$(grep -oP '[0-9]+(?=/)' e2e-output.txt | tail -1 || echo "0")
mkdir -p .badges
if [ -f .nyc_output/frontend-coverage.json ] || [ -f .nyc_output/e2e-coverage.json ]; then
@@ -503,14 +210,7 @@ jobs:
echo "{\"schemaVersion\":1,\"label\":\"frontend coverage\",\"message\":\"${FE_COVERAGE}%\",\"color\":\"${FE_COLOR}\"}" > .badges/frontend-coverage.json
echo "## Frontend: ${FE_COVERAGE}% coverage" >> $GITHUB_STEP_SUMMARY
fi
if [ "${E2E_FAIL:-0}" -gt 0 ]; then
E2E_MSG="${E2E_PASS:-0} passed, ${E2E_FAIL} failed"
E2E_COLOR="red"
else
E2E_MSG="${E2E_PASS:-0} passed"
E2E_COLOR="brightgreen"
fi
echo "{\"schemaVersion\":1,\"label\":\"e2e tests\",\"message\":\"${E2E_MSG}\",\"color\":\"${E2E_COLOR}\"}" > .badges/e2e-tests.json
echo "{\"schemaVersion\":1,\"label\":\"e2e tests\",\"message\":\"${E2E_PASS:-0} passed\",\"color\":\"brightgreen\"}" > .badges/e2e-tests.json
- name: Stop test server
if: always()
@@ -531,150 +231,54 @@ jobs:
include-hidden-files: true
# ───────────────────────────────────────────────────────────────
# 3. Build & Publish Docker Image
# 3. Build Docker Image
# ───────────────────────────────────────────────────────────────
build-and-publish:
name: "🏗️ Build & Publish Docker Image"
build:
name: "🏗️ Build Docker Image"
needs: [e2e-test]
runs-on: ubuntu-latest
runs-on: [self-hosted, meshcore-vm]
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Compute build metadata
id: meta
run: |
BUILD_TIME=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
GIT_COMMIT="${GITHUB_SHA::7}"
if [[ "$GITHUB_REF" == refs/tags/v* ]]; then
APP_VERSION="${GITHUB_REF#refs/tags/}"
else
APP_VERSION="edge"
fi
echo "build_time=$BUILD_TIME" >> "$GITHUB_OUTPUT"
echo "git_commit=$GIT_COMMIT" >> "$GITHUB_OUTPUT"
echo "app_version=$APP_VERSION" >> "$GITHUB_OUTPUT"
echo "Build: version=$APP_VERSION commit=$GIT_COMMIT time=$BUILD_TIME"
- name: Set up Node.js 22
uses: actions/setup-node@v5
with:
node-version: '22'
- name: Build Go Docker image (local staging)
- name: Free disk space
run: |
GIT_COMMIT="${{ steps.meta.outputs.git_commit }}" \
APP_VERSION="${{ steps.meta.outputs.app_version }}" \
BUILD_TIME="${{ steps.meta.outputs.build_time }}" \
docker system prune -af 2>/dev/null || true
docker builder prune -af 2>/dev/null || true
df -h /
- name: Build Go Docker image
run: |
echo "${GITHUB_SHA::7}" > .git-commit
APP_VERSION=$(node -p "require('./package.json').version") \
GIT_COMMIT="${GITHUB_SHA::7}" \
APP_VERSION=$(grep -oP 'APP_VERSION:-\K[^}]+' docker-compose.yml | head -1 || echo "3.0.0")
GIT_COMMIT=$(git rev-parse --short HEAD)
BUILD_TIME=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
export APP_VERSION GIT_COMMIT BUILD_TIME
docker compose -f "$STAGING_COMPOSE_FILE" -p corescope-staging build "$STAGING_SERVICE"
echo "Built Go staging image ✅"
- name: Set up Docker Buildx
if: github.event_name == 'push'
uses: docker/setup-buildx-action@v3
- name: Set up QEMU (arm64 runtime stage)
if: github.event_name == 'push'
uses: docker/setup-qemu-action@v3
- name: Log in to GHCR
if: github.event_name == 'push'
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract Docker metadata
if: github.event_name == 'push'
id: docker-meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/kpa-clawbot/corescope
tags: |
type=semver,pattern=v{{version}}
type=semver,pattern=v{{major}}.{{minor}}
type=semver,pattern=v{{major}}
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/v') }}
type=edge,branch=master
- name: Build and push to GHCR
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.docker-meta.outputs.tags }}
labels: ${{ steps.docker-meta.outputs.labels }}
build-args: |
APP_VERSION=${{ steps.meta.outputs.app_version }}
GIT_COMMIT=${{ steps.meta.outputs.git_commit }}
BUILD_TIME=${{ steps.meta.outputs.build_time }}
cache-from: type=gha
cache-to: type=gha,mode=max
# ───────────────────────────────────────────────────────────────
# 4. Release Artifacts (tags only)
# ───────────────────────────────────────────────────────────────
release-artifacts:
name: "📦 Release Artifacts"
if: startsWith(github.ref, 'refs/tags/v')
needs: [go-test]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Set up Go 1.22
uses: actions/setup-go@v6
with:
go-version: '1.22'
- name: Build corescope-decrypt (static, linux/amd64)
run: |
cd cmd/decrypt
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags="-s -w -X main.version=${{ github.ref_name }}" -o ../../corescope-decrypt-linux-amd64 .
- name: Build corescope-decrypt (static, linux/arm64)
run: |
cd cmd/decrypt
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -ldflags="-s -w -X main.version=${{ github.ref_name }}" -o ../../corescope-decrypt-linux-arm64 .
- name: Upload release assets
uses: softprops/action-gh-release@v2
with:
files: |
corescope-decrypt-linux-amd64
corescope-decrypt-linux-arm64
# ───────────────────────────────────────────────────────────────
# 4b. Deploy Staging (master only)
# 4. Deploy Staging (master only)
# ───────────────────────────────────────────────────────────────
deploy:
name: "🚀 Deploy Staging"
if: |
(github.event_name == 'push' || github.event_name == 'workflow_dispatch')
&& github.ref == 'refs/heads/master'
needs: [build-and-publish]
runs-on: [self-hosted, meshcore-runner-2]
if: github.event_name == 'push'
needs: [build]
runs-on: [self-hosted, meshcore-vm]
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: Pull latest image from GHCR
run: |
# Try to pull the edge image from GHCR and tag for docker-compose compatibility
if docker pull ghcr.io/kpa-clawbot/corescope:edge; then
docker tag ghcr.io/kpa-clawbot/corescope:edge corescope-go:latest
echo "Pulled and tagged GHCR edge image ✅"
else
echo "⚠️ GHCR pull failed — falling back to locally built image"
fi
- name: Deploy staging
run: |
# Force-remove the staging container regardless of how it was created
# (compose-managed OR manually created via docker run)
docker stop corescope-staging-go 2>/dev/null || true
docker rm -f corescope-staging-go 2>/dev/null || true
# Stop old container and release memory
docker compose -f "$STAGING_COMPOSE_FILE" -p corescope-staging down --timeout 30 2>/dev/null || true
# Wait for container to be fully gone and OS to reclaim memory (3GB limit)
@@ -716,11 +320,10 @@ jobs:
- name: Smoke test staging API
run: |
PORT="${STAGING_GO_HTTP_PORT:-80}"
if curl -sf "http://localhost:${PORT}/api/stats" | grep -q engine; then
if curl -sf http://localhost:82/api/stats | grep -q engine; then
echo "Staging verified — engine field present ✅"
else
echo "Staging /api/stats did not return engine field (port ${PORT})"
echo "Staging /api/stats did not return engine field"
exit 1
fi
@@ -742,7 +345,7 @@ jobs:
name: "📝 Publish Badges & Summary"
if: github.event_name == 'push'
needs: [deploy]
runs-on: ubuntu-latest
runs-on: [self-hosted, Linux]
steps:
- name: Checkout code
uses: actions/checkout@v5
-111
View File
@@ -1,111 +0,0 @@
name: Release Fast-Path
# Issue #1677: re-tag :edge as :vX.Y.Z when the tag SHA matches :edge's
# org.opencontainers.image.revision label. Skips ~30 min of Go test +
# Playwright + Docker rebuild because the bytes are identical — only the
# manifest name changes. Falls back to deploy.yml when SHAs differ so
# tags on older commits still go through full validation.
#
# This workflow is the SOLE consumer of push.tags. deploy.yml's tag
# trigger has been removed to prevent double-fire.
on:
push:
tags: ['v[0-9]+.[0-9]+.[0-9]+']
permissions:
contents: read
packages: write
concurrency:
group: release-fast-path-${{ github.ref }}
cancel-in-progress: false
jobs:
retag-or-fallback:
name: "🏷️ Re-tag :edge → :vX.Y.Z (fast) or dispatch deploy.yml (fallback)"
runs-on: ubuntu-latest
steps:
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Install crane
uses: imjasonh/setup-crane@v0.4
- name: Parse semver from tag
id: semver
run: |
set -euo pipefail
TAG="${GITHUB_REF#refs/tags/}"
# Expect vMAJOR.MINOR.PATCH (workflow trigger already enforces this).
if [[ ! "$TAG" =~ ^v([0-9]+)\.([0-9]+)\.([0-9]+)$ ]]; then
echo "Tag $TAG does not match vMAJOR.MINOR.PATCH" >&2
exit 1
fi
MAJOR="${BASH_REMATCH[1]}"
MINOR="${BASH_REMATCH[2]}"
{
echo "tag=$TAG"
echo "vMajor=v$MAJOR"
echo "vMajorMinor=v$MAJOR.$MINOR"
} >> "$GITHUB_OUTPUT"
echo "Parsed: $TAG → v$MAJOR / v$MAJOR.$MINOR / $TAG"
- name: Inspect :edge revision label
id: edge
run: |
set -euo pipefail
IMAGE="ghcr.io/kpa-clawbot/corescope"
EDGE_REF="${IMAGE}:edge"
# crane config returns the OCI image config JSON; the revision label
# is set by docker/metadata-action on the master-edge build.
# If :edge doesn't exist yet (first run on a fresh registry), fall
# through to the slow path.
if ! CONFIG="$(crane config "$EDGE_REF" 2>/dev/null)"; then
echo "edge_revision=" >> "$GITHUB_OUTPUT"
echo "no_edge=true" >> "$GITHUB_OUTPUT"
echo ":edge not found in registry — will use fallback path"
exit 0
fi
REV="$(echo "$CONFIG" | jq -r '.config.Labels["org.opencontainers.image.revision"] // ""')"
echo "edge_revision=$REV" >> "$GITHUB_OUTPUT"
echo "no_edge=false" >> "$GITHUB_OUTPUT"
echo ":edge org.opencontainers.image.revision = $REV"
echo "tag SHA (github.sha) = ${{ github.sha }}"
# ─────────── FAST PATH: SHAs match, metadata-only retag ───────────
- name: Re-tag :edge → :vX.Y.Z + :vX.Y + :vX + :latest (fast path)
if: steps.edge.outputs.no_edge == 'false' && steps.edge.outputs.edge_revision == github.sha
run: |
set -euo pipefail
IMAGE="ghcr.io/kpa-clawbot/corescope"
SRC="${IMAGE}:edge"
echo "SHA match — fast-path re-tag from $SRC"
for NEW_TAG in \
"${{ steps.semver.outputs.tag }}" \
"${{ steps.semver.outputs.vMajorMinor }}" \
"${{ steps.semver.outputs.vMajor }}" \
"latest"; do
echo " crane tag $SRC $NEW_TAG"
crane tag "$SRC" "$NEW_TAG"
done
echo "Fast-path complete — all tags point at the :edge manifest digest."
# ─────────── FALLBACK: SHAs differ, run the full pipeline ───────────
- name: Dispatch full deploy.yml pipeline (fallback)
if: steps.edge.outputs.no_edge == 'true' || steps.edge.outputs.edge_revision != github.sha
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -euo pipefail
echo "SHA mismatch (or no :edge) — falling back to full pipeline"
echo " :edge revision = '${{ steps.edge.outputs.edge_revision }}'"
echo " tag SHA = '${{ github.sha }}'"
gh workflow run deploy.yml \
--repo "${{ github.repository }}" \
--ref "${{ github.ref }}"
echo "Dispatched deploy.yml against ${{ github.ref }}"
-2
View File
@@ -31,5 +31,3 @@ cmd/ingestor/ingestor.exe
!test-fixtures/e2e-fixture.db
corescope-server
cmd/server/server
# Local-only planning and design files
docs/superpowers/
-12
View File
@@ -43,17 +43,6 @@ scripts/ — Tooling (coverage collector, fixture capture, frontend in
2. Go server (`cmd/server/`) polls SQLite for new packets, broadcasts via WebSocket
3. Frontend fetches via REST API (`/api/*`), filters/sorts client-side
### Read/Write Separation Invariant (#1283)
- **All DB writes live in `cmd/ingestor/`.** INSERT / UPDATE / DELETE / VACUUM /
schema migrations / retention all run in the ingestor process.
- **`cmd/server/` is read-only.** It opens SQLite with `mode=ro` and must not
acquire a write lock. Adding a write-side helper (e.g. a `cachedRW`-style
RW connection) regresses this invariant and races the ingestor → SQLITE_BUSY.
- Enforcement: `cmd/server/readonly_invariant_test.go` reflect-asserts that
`PruneOldPackets`, `PruneOldMetrics`, and `RemoveStaleObservers` are NOT
methods on the server's `*DB`. If you need a new write, add it to
`cmd/ingestor/`.
### What's Deprecated (DO NOT TOUCH)
The following were part of the old Node.js backend and have been removed:
- `server.js`, `db.js`, `decoder.js`, `server-helpers.js`, `packet-store.js`, `iata-coords.js`
@@ -381,7 +370,6 @@ Existing patterns: `#/nodes/{pubkey}?section=node-neighbors`, `#/analytics?tab=c
## What NOT to Do
- **Don't check in private information** — no names, API keys, tokens, passwords, IP addresses, personal data, or any identifying information. This is a PUBLIC repo.
- **Don't introduce new `map[string]interface{}` in API response builders, handler returns, or internal data structures that cross domain boundaries.** Use a named Go struct with explicit JSON tags. CoreScope already carries 694 occurrences (see #1383); the count must monotonically decrease. If your change adds even one new occurrence in a touched file, the PR is wrong-shaped — fix the design, don't paper over with `interface{}`. Exempt: third-party library boundaries that genuinely return `interface{}`, and ad-hoc test fixture assertions.
- Don't add npm dependencies without asking
- Don't create a build step
- Don't add framework abstractions (React, Vue, etc.)
-37
View File
@@ -1,42 +1,5 @@
# Changelog
## [Unreleased]
## [3.9.1] — 2026-06-12
Patch release on top of v3.9.0 — v3.9.0's container image never published (Playwright flake gated Docker build). See [docs/release-notes/v3.9.1.md](docs/release-notes/v3.9.1.md).
### 🎨 Accessibility
- **WCAG AA contrast pass** (#1676, f0addfda) — two-tier CSS palette; muted-text ≥4.5:1 in both themes; unknown-repeater chip fixed (2.75:1 → 4.95:1). Closes #1671. Partial fix for #1668.
### 🧪 Test stability
- **Slideover E2E flake fix** (#1663+followups, f06359d7) — tightened selectors, bumped data-row wait. Fixes #1662.
## [3.9.0] — 2026-06-12
See [docs/release-notes/v3.9.0.md](docs/release-notes/v3.9.0.md) for the full notes. 257 commits since v3.8.3 (72 substantive + 185 coverage bumps).
### ✨ Highlights
- **Relay timelines survive an ingestor restart** (#1643) — relay-hop attribution is rebuilt from `path_json` on cold load.
- **Observer Compare is first-class** (#1642, #1645, #1647) — three new entry points + Tufte-grade compare page with state-preserving multi-select.
- **Emoji → Phosphor icon migration** (#1648, #1649#1654) — every UI emoji replaced with theme-tinted Phosphor sprites, lint-gated.
- **Per-node Reach page + API** (#1627) — `GET /api/nodes/{pubkey}/reach` with cache invalidation on blacklist changes (#1636).
- **Hashtag channels catalogue integration** (#1656) — public hashtag channels appear without manual config.
- **Operator-customizable name-prefix hiding** (#1655) — new `hiddenNamePrefixes` config (default `["🚫"]`).
### ⚙️ Config
- New: `hiddenNamePrefixes`, `liveMap.maxNodes`, `runtime.maxMemoryMB`, configurable observer-health thresholds, `branding.homeUrl`, customizer disabled-tabs.
### 📝 Documentation Corrections (carried from prior [Unreleased])
- **PR #1324 historical record correction** (#1387) — the merged PR #1324 body referenced four tests that do NOT exist in master: `TestMultibyteCapPersistRoundTrip`, `TestMultibyteCapPersistSkipsUnknown`, `TestMaybePersistCoalesces`, and a `TryLock` coalescing test. The actual tests that landed are `TestRunMultibyteCapPersist_AppliesSnapshot` and `TestRunMultibyteCapPersist_NoSnapshot_NoOp`. See issue #1386 for the corrective test additions (round-trip, unknown-key skip, coalescing).
## [3.7.2] — 2026-05-06
Hotfix release branched from `v3.7.1`. Cherry-picks PR #1121 only — no other changes.
### 🐛 Bug Fixes
- **Ingestor: backfill infinite loop on `path_json='[]'` rows** (#1119, #1121) — `BackfillPathJSONAsync` re-selected observations whose `path_json` was already `'[]'`, rewrote them to `'[]'`, and looped forever. The migration marker was never recorded and the ingestor sustained 23 MB/s WAL writes at idle (~76% CPU in `sqlite.Exec`). Fix: drop `'[]'` from the WHERE clause so the loop terminates after one full pass and the `backfill_path_json_from_raw_hex_v1` marker is written.
## [2.5.0] "Digital Rain" — 2026-03-22
### ✨ Matrix Mode — Full Cyberpunk Map Theme
-226
View File
@@ -1,226 +0,0 @@
# Deploy CoreScope
Pre-built images are published to GHCR for `linux/amd64` and `linux/arm64` (Raspberry Pi 4/5).
## Quick Start
### Docker run
```bash
docker run -d --name corescope \
-p 80:80 \
-v corescope-data:/app/data \
-e DISABLE_CADDY=true \
ghcr.io/kpa-clawbot/corescope:latest
```
Open `http://localhost` — done.
### Docker Compose
```bash
curl -sL https://raw.githubusercontent.com/Kpa-clawbot/CoreScope/master/docker-compose.example.yml \
-o docker-compose.yml
docker compose up -d
```
## Image Tags
| Tag | Description |
|-----|-------------|
| `v3.4.1` | Pinned release (recommended for production) |
| `v3.4` | Latest patch in v3.4.x |
| `v3` | Latest minor+patch in v3.x |
| `latest` | Latest release tag |
| `edge` | Built from master — unstable, for testing |
## Configuration
Settings can be overridden via environment variables:
| Variable | Default | Description |
|----------|---------|-------------|
| `DISABLE_CADDY` | `false` | Skip internal Caddy (set `true` behind a reverse proxy) |
| `DISABLE_MOSQUITTO` | `false` | Skip internal MQTT broker (use external) |
| `HTTP_PORT` | `80` | Host port mapping |
| `DATA_DIR` | `./data` | Host path for persistent data |
For advanced configuration, mount a `config.json` into `/app/data/config.json`. See `config.example.json` in the repo.
## Updating
```bash
docker compose pull
docker compose up -d
```
## Data
All persistent data lives in `/app/data`:
- `meshcore.db` — SQLite database (packets, nodes)
- `config.json` — custom config (optional)
- `theme.json` — custom theme (optional)
**Backup:** `cp data/meshcore.db ~/backup/`
## TLS
Option A — **External reverse proxy** (recommended): Run with `DISABLE_CADDY=true`, put nginx/traefik/Cloudflare in front.
Option B — **Built-in Caddy**: Mount a custom Caddyfile at `/etc/caddy/Caddyfile` and expose ports 80+443.
---
## Migrating from manage.sh (existing admins)
If you're currently deploying with `manage.sh` (git clone + local build), you have two options going forward:
### Option A: Keep using manage.sh (no changes needed)
`manage.sh update` continues to work exactly as before — it fetches the latest tag, builds locally, and restarts. Nothing breaks.
```bash
./manage.sh update # latest release
./manage.sh update v3.5.0 # specific version
```
### Option B: Switch to pre-built images (recommended)
Pre-built images skip the build step entirely — faster updates, no Go toolchain needed.
**One-time migration:**
1. Stop the current deployment:
```bash
./manage.sh stop
```
2. Your data is in `~/meshcore-data/` (or whatever `PROD_DATA_DIR` is set to). It's untouched — the database, config, and theme files persist.
3. Copy `docker-compose.example.yml` to where you want to run from:
```bash
cp docker-compose.example.yml ~/docker-compose.yml
```
4. Start with the pre-built image:
```bash
cd ~ && docker compose up -d
```
5. Verify it picked up your existing data:
```bash
curl http://localhost/api/stats
```
**Updates after migration:**
```bash
docker compose pull && docker compose up -d
```
### What about manage.sh features?
| manage.sh command | Pre-built equivalent |
|---|---|
| `./manage.sh update` | `docker compose pull && docker compose up -d` |
| `./manage.sh stop` | `docker compose down` |
| `./manage.sh start` | `docker compose up -d` |
| `./manage.sh logs` | `docker compose logs -f` |
| `./manage.sh status` | `docker compose ps` |
| `./manage.sh setup` | Copy `docker-compose.example.yml`, edit env vars |
`manage.sh` remains available for advanced use cases (building from source, custom patches, development). Pre-built images are recommended for most production deployments.
## Staging VM — disk-usage monitor & cleanup (#1684)
The staging VM ran out of disk during a hot-patch (#1684). To prevent
repeats, two scripts live in `scripts/staging/`:
- `disk-monitor.sh <mount>` — reads `df -P`, classifies usage against
`<80 ok / >=80 warn / >=90 error / >=95 alert`, emits to stderr +
journald (via `logger`). Returns non-zero on `error|alert` so
systemd surfaces the unit as failed.
- `disk-cleanup.sh` — removes `/tmp` snapshot files (`*.db`,
`staging-snap.*`, `cs-*`, `node-compile-cache`) older than 7 days
and runs `docker builder prune` + `docker image prune` with
`--filter "until=72h" --filter "label!=keep"`. Set
`CORESCOPE_CLEANUP_DRY_RUN=1` to log without deleting.
### Install on the staging host
SSH to `<STAGING_HOST>` as the staging operator user and:
```bash
sudo install -m 0755 scripts/staging/disk-monitor.sh /usr/local/bin/corescope-disk-monitor
sudo install -m 0755 scripts/staging/disk-cleanup.sh /usr/local/bin/corescope-disk-cleanup
# 15-minute monitor
sudo tee /etc/systemd/system/corescope-disk-monitor.service >/dev/null <<'UNIT'
[Unit]
Description=CoreScope staging disk-usage monitor (issue #1684)
[Service]
Type=oneshot
ExecStart=/usr/local/bin/corescope-disk-monitor /
UNIT
sudo tee /etc/systemd/system/corescope-disk-monitor.timer >/dev/null <<'UNIT'
[Unit]
Description=Run CoreScope disk-usage monitor every 15 minutes
[Timer]
OnBootSec=5min
OnUnitActiveSec=15min
Unit=corescope-disk-monitor.service
[Install]
WantedBy=timers.target
UNIT
# Daily cleanup at 03:30 local
sudo tee /etc/systemd/system/corescope-disk-cleanup.service >/dev/null <<'UNIT'
[Unit]
Description=CoreScope staging disk cleanup (issue #1684)
[Service]
Type=oneshot
ExecStart=/usr/local/bin/corescope-disk-cleanup
UNIT
sudo tee /etc/systemd/system/corescope-disk-cleanup.timer >/dev/null <<'UNIT'
[Unit]
Description=Run CoreScope disk cleanup daily at off-peak
[Timer]
OnCalendar=*-*-* 03:30:00
Persistent=true
Unit=corescope-disk-cleanup.service
[Install]
WantedBy=timers.target
UNIT
sudo systemctl daemon-reload
sudo systemctl enable --now corescope-disk-monitor.timer corescope-disk-cleanup.timer
```
`<STAGING_HOST>` is the staging VM hostname/IP — operator supplies it,
not committed to the repo.
### Inspecting alerts
```bash
journalctl -t corescope-disk-monitor --since '-1d'
journalctl -t corescope-disk-cleanup --since '-7d'
systemctl list-timers | grep corescope-disk
```
`logger` priorities map: `ok→info`, `warn→warning`, `error→err`,
`alert→alert` (syslog severity 1, the highest level). Wire
`journalctl -p alert ...` to whatever ops channel the operator
prefers; use `-p err` to also catch the `error` tier.
### Notes on `staging-snap.db` root cause (#1684 phase 3)
`grep -rn staging-snap.db cmd/ public/ scripts/` returns **zero**
hits in the repo. The 4.4 GB orphan was a manual debugging artifact,
not produced by any committed code. The `disk-cleanup.sh` retention
rule (anything matching `staging-snap.*` in `/tmp` older than 7 days)
prevents recurrence without needing source-side TTL changes.
If a future feature legitimately needs persistent snapshot DBs, put
them under `/var/lib/corescope/snapshots/` with explicit rotation —
not in `/tmp`, which is ephemeral by definition.
+7 -41
View File
@@ -1,57 +1,25 @@
# Build stage always runs natively on the builder's arch ($BUILDPLATFORM)
# and cross-compiles to $TARGETOS/$TARGETARCH via Go toolchain. No QEMU.
# BUILDPLATFORM is auto-set by buildx; default to linux/amd64 so plain
# `docker build` (without buildx) doesn't fail on an empty platform string.
ARG BUILDPLATFORM=linux/amd64
FROM --platform=$BUILDPLATFORM golang:1.22-alpine AS builder
FROM golang:1.22-alpine AS builder
RUN apk add --no-cache build-base
ARG APP_VERSION=unknown
ARG GIT_COMMIT=unknown
ARG BUILD_TIME=unknown
# Provided by buildx for multi-arch builds
ARG TARGETOS
ARG TARGETARCH
# Build server (pure-Go sqlite — no CGO needed, cross-compiles cleanly)
# Build server
WORKDIR /build/server
COPY cmd/server/go.mod cmd/server/go.sum ./
COPY internal/geofilter/ ../../internal/geofilter/
COPY internal/sigvalidate/ ../../internal/sigvalidate/
COPY internal/packetpath/ ../../internal/packetpath/
COPY internal/dbconfig/ ../../internal/dbconfig/
COPY internal/dbschema/ ../../internal/dbschema/
COPY internal/prunequeue/ ../../internal/prunequeue/
COPY internal/perfio/ ../../internal/perfio/
COPY internal/mbcapqueue/ ../../internal/mbcapqueue/
RUN go mod download
COPY cmd/server/ ./
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -ldflags "-X main.Version=${APP_VERSION} -X main.Commit=${GIT_COMMIT} -X main.BuildTime=${BUILD_TIME}" -o /corescope-server .
RUN go build -ldflags "-X main.Version=${APP_VERSION} -X main.Commit=${GIT_COMMIT} -X main.BuildTime=${BUILD_TIME}" -o /corescope-server .
# Build ingestor
WORKDIR /build/ingestor
COPY cmd/ingestor/go.mod cmd/ingestor/go.sum ./
COPY internal/geofilter/ ../../internal/geofilter/
COPY internal/sigvalidate/ ../../internal/sigvalidate/
COPY internal/packetpath/ ../../internal/packetpath/
COPY internal/dbconfig/ ../../internal/dbconfig/
COPY internal/dbschema/ ../../internal/dbschema/
COPY internal/prunequeue/ ../../internal/prunequeue/
COPY internal/perfio/ ../../internal/perfio/
COPY internal/mbcapqueue/ ../../internal/mbcapqueue/
RUN go mod download
COPY cmd/ingestor/ ./
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -o /corescope-ingestor .
# Build decrypt CLI
WORKDIR /build/decrypt
COPY cmd/decrypt/go.mod cmd/decrypt/go.sum ./
COPY internal/channel/ ../../internal/channel/
RUN go mod download
COPY cmd/decrypt/ ./
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} \
go build -ldflags="-s -w" -o /corescope-decrypt .
RUN go build -o /corescope-ingestor .
# Runtime image
FROM alpine:3.20
@@ -61,7 +29,7 @@ RUN apk add --no-cache mosquitto mosquitto-clients supervisor caddy wget
WORKDIR /app
# Go binaries
COPY --from=builder /corescope-server /corescope-ingestor /corescope-decrypt /app/
COPY --from=builder /corescope-server /corescope-ingestor /app/
# Frontend assets + config
COPY public/ ./public/
@@ -74,8 +42,6 @@ RUN echo "unknown" > .git-commit
# Supervisor + Mosquitto + Caddy config
COPY docker/supervisord-go.conf /etc/supervisor/conf.d/supervisord.conf
COPY docker/supervisord-go-no-mosquitto.conf /etc/supervisor/conf.d/supervisord-no-mosquitto.conf
COPY docker/supervisord-go-no-caddy.conf /etc/supervisor/conf.d/supervisord-no-caddy.conf
COPY docker/supervisord-go-no-mosquitto-no-caddy.conf /etc/supervisor/conf.d/supervisord-no-mosquitto-no-caddy.conf
COPY docker/mosquitto.conf /etc/mosquitto/mosquitto.conf
COPY docker/Caddyfile /etc/caddy/Caddyfile
-3
View File
@@ -40,9 +40,6 @@ RUN if [ ! -f .git-commit ]; then echo "unknown" > .git-commit; fi
# Supervisor + Mosquitto + Caddy config
COPY docker/supervisord-go.conf /etc/supervisor/conf.d/supervisord.conf
COPY docker/supervisord-go-no-mosquitto.conf /etc/supervisor/conf.d/supervisord-no-mosquitto.conf
COPY docker/supervisord-go-no-caddy.conf /etc/supervisor/conf.d/supervisord-no-caddy.conf
COPY docker/supervisord-go-no-mosquitto-no-caddy.conf /etc/supervisor/conf.d/supervisord-no-mosquitto-no-caddy.conf
COPY docker/mosquitto.conf /etc/mosquitto/mosquitto.conf
COPY docker/Caddyfile /etc/caddy/Caddyfile
-142
View File
@@ -1,142 +0,0 @@
# MIGRATIONS — async vs sync policy
CoreScope's ingestor applies schema/data migrations inline at boot in
`cmd/ingestor/db.go`. Every migration that runs synchronously blocks the
ingestor from accepting packets until it returns. On a dev DB that's
milliseconds; at prod scale (1.9M+ observations, 80K+ adverts, 2600+ nodes
on Cascadia) it can pin the boot for minutes and trigger restart loops —
the "upgrade broke prod" failure class (#791, #1483, and others).
## The rule
**Any new `CREATE INDEX`, `ALTER TABLE`, or data-rewriting `UPDATE`/`DELETE`
in a migration file MUST do ONE of the following:**
### Option 1 — Run via `Store.RunAsyncMigration` (preferred for backfills)
```go
// Scheduled in OpenStore() AFTER the *Store is constructed.
if err := s.RunAsyncMigration(ctx, "my_migration_v1",
func(ctx context.Context, db *sql.DB) error {
_, err := db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS ...`)
return err
}); err != nil {
log.Printf("[migration/async] scheduling failed: %v", err)
}
```
- The migration is recorded as `pending_async` in the `_async_migrations`
table **immediately** — the ingestor boots and starts ingesting.
- `fn` runs in a goroutine; the WaitGroup is shared with the rest of the
ingestor (`Store.WaitForAsyncMigrations()` waits for everything).
- On success the row flips to `done`; on error/panic to `failed` with the
error message captured.
- Idempotent: rows in `done` state short-circuit; `failed`/`pending_async`
rows are retried on the next boot.
Reference implementations: `Store.BackfillPathJSONAsync` (path_json
backfill) and the converted `obs_observer_ts_idx_v1` index build in
`OpenStore`.
### Option 2 — Annotate as preflight-cheap
Some migrations are genuinely cheap at any scale (e.g. `ALTER TABLE ADD
COLUMN`, `CREATE INDEX` on a table you know is bounded to a few thousand
rows). Annotate the migration block with a comment **on the line
immediately above the migration block** so the preflight gate recognises
the opt-out:
```go
// PREFLIGHT: async=true reason="ALTER ADD COLUMN — O(1) sqlite operation"
if r := db.QueryRow("SELECT 1 FROM _migrations WHERE name = 'foo_v1'"); ...
```
The reason MUST be a real one-line justification you can defend in
review. "It's fine" is not a reason.
### Option 3 — Opt out per PR
If the migration is genuinely safe and you don't want to add an inline
annotation, put a single line in the PR body:
```
PREFLIGHT-MIGRATION-SCALE: <30s N=80K verified on Cascadia staging snapshot
```
This must include both `<30s` and `N=<some scale>` so a reviewer can
challenge the measurement.
## The gate
`~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh` runs
on every PR via the preflight orchestrator. It greps the diff for new or
modified migration blocks (files matching `cmd/ingestor/db.go`,
`cmd/ingestor/maintenance.go`, `internal/dbschema/**`, `**/migrations/**`,
`**/*.sql`, plus any Go file touching `CREATE INDEX` / `ALTER TABLE` /
`CREATE UNIQUE INDEX`). For each hit it requires one of the three
opt-outs above. Hard-fail (exit 1) — no warning-only mode.
## Concurrency model
CoreScope runs **one ingestor process** per deployment (`cmd/ingestor/`,
single binary, single `*Store`). There is no cluster mode, no leader
election, no second writer. SQLite is opened with `SetMaxOpenConns(1)`
and a 5s `busy_timeout`; all writes (live MQTT ingest + async migration
goroutines + maintenance backfills) serialize through the one connection
in a single process.
What this means for async migrations:
- **No cross-process race** to worry about. Two ingestor instances
running against the same DB is not a supported deployment shape.
- **Within a single process**, concurrent `RunAsyncMigration(name=X)`
callers race the initial `SELECT status``UPDATE/INSERT` step. The
current implementation re-schedules `fn` on a pending/failed row so a
duplicate caller may legitimately re-run it; once status is `done` all
further calls short-circuit. See
`TestRunAsyncMigration_ConcurrentSameNameSerialized` for the contract.
- **`fn` runs concurrently with live ingest writers.** Because
`MaxOpenConns=1`, a long `CREATE INDEX` will serialize behind / ahead
of insert batches via SQLite's busy-timeout. This is acceptable for
index builds (the boot path is unblocked, which was the whole point),
but it means long migrations DO add latency to live writes. Document
expected runtime in the `reason=` annotation and prefer batched/chunked
fn implementations for multi-minute work (see `BackfillPathJSONAsync`
for the canonical batched pattern with inter-batch `time.Sleep`).
## Scale budgets
Per-migration target: **<30s** at current prod scale (Cascadia: ~2,600
nodes, ~80K observations; previous prod snapshot: ~1.9M observations).
Worked example (#1483, `obs_observer_ts_idx_v1`): composite index build
on `observations(observer_idx, timestamp)`. At ~1.9M rows the sync build
pinned ingestor boot for several minutes → restart loop. Converted to
async via `RunAsyncMigration` in `OpenStore` so boot returns immediately
and the index materializes in the background; the existing `_migrations`
short-circuit at the top of the migration block ensures DBs that already
completed the sync v3.8.3 build do NOT re-run it through the goroutine
path on subsequent boots.
If you cannot meet the <30s budget, document the expected upper bound
and operator runbook expectation (e.g. "index build expected ~10 min on
a 5M-row table; ingestor remains responsive; monitor via
`SELECT status, error FROM _async_migrations WHERE name = ...`").
## Why this exists
Pattern that keeps repeating:
1. Author writes `CREATE INDEX foo ON observations(...)` in a migration.
2. Local dev DB has ~100 rows. Migration returns in 1ms. CI is green.
3. Reviewer focuses on plan correctness, not scale.
4. Ship.
5. Prod boots, sqlite scans 1.9M rows, the ingestor sits at `[migration]
Adding index...` for 8 minutes, healthcheck times out, container
restarts, loops.
6. Operator pages. Hotfix. Apology.
The gate doesn't try to detect table size (undecidable from a diff). It
enforces **annotation discipline**: every author who adds a migration
must consciously decide which bucket it falls into and write that down.
That is the cheapest possible intervention that breaks the cycle.
+4 -30
View File
@@ -21,7 +21,6 @@ The Go backend serves all 40+ API endpoints from an in-memory packet store with
| Memory (56K packets) | **~300 MB** (vs 1.3 GB on Node.js) |
| WebSocket broadcast | **Real-time** to all connected browsers |
| Channel decryption | **AES-128-ECB** with rainbow table |
| GOMEMLIMIT (memory-constrained hosts) | **set to ≥1.5× working set** (e.g. 1536 MiB on a 2 GB Pi for a ~1 GB store). Lower values trigger a GC death-spiral. Configure via the `GOMEMLIMIT` env var or `runtime.maxMemoryMB` in `config.json`; env wins. Applies to both server and ingestor. See [#1010](https://github.com/Kpa-clawbot/CoreScope/issues/1010). |
See [PERFORMANCE.md](PERFORMANCE.md) for full benchmarks.
@@ -75,34 +74,9 @@ Full experience on your phone — proper touch controls, iOS safe area support,
## Quick Start
### Pre-built Image (Recommended)
### Docker (Recommended)
No build step required — just run:
```bash
docker run -d --name corescope \
--restart=unless-stopped \
-p 80:80 -p 1883:1883 \
-v /your/data:/app/data \
ghcr.io/kpa-clawbot/corescope:latest
```
Open `http://localhost` — done. No config file needed; CoreScope starts with sensible defaults.
For HTTPS with a custom domain, add `-p 443:443` and mount your Caddyfile:
```bash
docker run -d --name corescope \
--restart=unless-stopped \
-p 80:80 -p 443:443 -p 1883:1883 \
-v /your/data:/app/data \
-v /your/Caddyfile:/etc/caddy/Caddyfile:ro \
-v /your/caddy-data:/data/caddy \
ghcr.io/kpa-clawbot/corescope:latest
```
Disable built-in services with `-e DISABLE_MOSQUITTO=true` or `-e DISABLE_CADDY=true`, or drop a `.env` file in your data volume. See [docs/deployment.md](docs/deployment.md) for the full reference.
### Build from Source
No Go installation needed — everything builds inside the container.
```bash
git clone https://github.com/Kpa-clawbot/CoreScope.git
@@ -121,6 +95,8 @@ The setup wizard walks you through config, domain, HTTPS, build, and run.
./manage.sh help # All commands
```
See [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) for the full deployment guide — HTTPS options (auto cert, bring your own, Cloudflare Tunnel), MQTT security, backups, and troubleshooting.
### Configure
Copy `config.example.json` to `config.json` and edit:
@@ -266,8 +242,6 @@ Contributions welcome. Please read [AGENTS.md](AGENTS.md) for coding conventions
**Live instance:** [analyzer.00id.net](https://analyzer.00id.net) — all API endpoints are public, no auth required.
**API Documentation:** CoreScope auto-generates an OpenAPI 3.0 spec. Browse the interactive Swagger UI at [`/api/docs`](https://analyzer.00id.net/api/docs) or fetch the machine-readable spec at [`/api/spec`](https://analyzer.00id.net/api/spec).
## License
MIT
-207
View File
@@ -1,207 +0,0 @@
# v3.6.0 - The Forensics
CoreScope just got eyes everywhere. This release drops **path inspection**, **color-by-hash markers**, **clock skew detection**, **full channel encryption**, an **observer graph**, and a pile of robustness fixes that make your mesh network feel like it's being watched by someone who actually cares.
134 commits, 105 PRs merged, 18K+ lines added. Here's what shipped.
---
## 🚀 New Features
### Path-Prefix Candidate Inspector (#944, #945)
The marquee feature. Click any path segment and CoreScope opens an interactive inspector showing every candidate node that could match that hop prefix - plotted on a map with scoring by neighbor-graph affinity and geographic centroid. Ambiguous hops? Now you can see *why* they're ambiguous and pick the right one.
**Why you'll love it:** No more guessing which `0xA3` is the real repeater. The inspector lays out every candidate, scores them, and lets you drill in visually.
### Color-by-Hash Packet Markers (#948, #951)
Every packet type gets a vivid, hash-derived color - on the live feed, map polylines, and flying-packet animations. Bright fill with dark outline for contrast. No more monochrome blobs - you can visually track packet flows by color at a glance.
### Node Filter on Live Page (#924, #771)
Filter the live packet stream to show only traffic flowing through a specific node. Pick a repeater, see exactly what it's carrying. That simple.
### Clock Skew Detection (#746, #752, #828, #850)
Full pipeline: backend computes drift using Theil-Sen regression with outlier rejection (#828), the UI shows per-node badges, detail sparklines, and fleet-wide analytics (#752). Bimodal clock severity (#850) surfaces flaky-RTC nodes that toggle between accurate and drifted - instead of hiding them as "No Clock."
**Why you'll love it:** Nodes with bad clocks silently corrupt your timeline. Now they glow red before they ruin your analysis.
### Observer Graph (M1+M2) (#774)
Observers are now first-class graph citizens. CoreScope builds a neighbor graph from observation overlaps, scores hop-resolver candidates by graph edges (#876), and uses geographic centroid for tiebreaking. The observer topology is visible and queryable.
### Channel Encryption - Full Stack (#726, #733, #750, #760)
Three milestones landed as one: DB-backed channel message history (#726), client-side PSK decryption in the browser (#733), and PSK channel management with add/remove UX and message caching (#750). Add a channel key in the UI, and CoreScope decrypts messages client-side - no server-side key storage. The add-channel button (#760) makes it dead simple.
**Why you'll love it:** Encrypted channels are no longer black boxes. Add your PSK, see the messages, search history - all without exposing keys to the server.
### Hash Collision Inspector (#758)
The Hash Usage Matrix now shows collision details for all hash sizes. When two nodes share a prefix, you see exactly who collides and at what size.
### Geofilter Builder - In-App (#735, #900)
The geofilter polygon builder is now served directly from CoreScope with a full docs page (#900). No more hunting for external tools. Link from the customizer, draw your polygon, done.
### Node Blacklist (#742)
`nodeBlacklist` in config hides abusive or troll nodes from all views. They're gone.
### Observer Retention (#764)
Stale observers are automatically pruned after a configurable number of days. Your observer list stays clean without manual intervention.
### Advert Signature Validation (#794)
Corrupt packets with invalid advert signatures are now rejected at ingest. Bad data never hits your store.
### Bounded Cold Load (#790)
`Load()` now respects a memory budget - no more OOM on cold start with a fat database. Combined with retention-hours cutoff (#917), cold start is safe on constrained hardware.
### Multi-Arch Docker Images (#869)
Official images now publish `amd64` + `arm64` in a single multi-arch manifest. Raspberry Pi operators: pull and run. No special tags needed.
### /nodes Detail Panel + Search (#868)
The nodes detail panel ships with search improvements (#862) - find nodes fast, see their full detail in a slide-out panel.
### Deduplicated Top Longest Hops (#848)
Longest hops are now deduplicated by pair with observation count and SNR cues. No more seeing the same link 47 times.
---
## 🔥 Performance Wins
### StoreTx ResolvedPath Elimination (#806)
The per-transaction `ResolvedPath` computation is gone - replaced by a membership index with on-demand decode. This was one of the hottest paths in the ingestor.
### Node Packet Queries (#803)
Raw JSON text search for node packets replaced with a proper `byNode` index (#673). Night and day.
### Channel Query Performance (#762, #763)
New `channel_hash` column enables SQL-level channel filtering. No more full-table scan to find messages in a channel.
### SQLite Auto-Vacuum (#919, #920)
Incremental auto-vacuum enabled - the database file actually shrinks after retention pruning. No more 2GB database holding 200MB of live data.
### Retention-Hours Cutoff on Load (#917)
`Load()` now applies `retentionHours` at read time, preventing OOM when the DB has more history than memory allows.
---
## 🛡️ Security & Robustness
### MQTT Reconnect with Bounded Backoff (#947, #949)
The ingestor now reconnects to MQTT brokers with exponential backoff, observability logging, and bounded retry. No more silent disconnects that kill your data stream.
---
## 🐛 Bugs Squashed
This release exterminates **40+ bugs** — from protocol-level hash mismatches to pixel-level CSS breakage. Operators told us what hurt; we listened.
- **Path inspector "Show on Map" missed origin and first hop** (#950) - map view now includes all hops
- **Content hash used full header byte** (#787) - content hashing now uses payload type bits only, fixing hash collisions between packets that differ only in header flags
- **Encrypted channel deep links showed broken UI** (#825, #826, #815) - deep links to encrypted channels now show a lock message instead of broken UI when you don't have the key
- **Geofilter longitude wrapping** (#925) - geofilter builder wraps longitude to [-180, 180]; southern hemisphere polygons no longer invert
- **Hash filter bypasses saved region filter** (#939) - hash lookups now skip the geo filter as intended
- **Companion-as-repeater excluded from path hops** (#935, #936) - non-repeater nodes no longer pollute hop resolution
- **Customize panel re-renders while typing** (#927) - text fields keep focus during config changes
- **Per-observation raw_hex** (#881, #882) - each observer's hex dump now shows what *that observer* actually received
- **Per-observation children in packet groups** (#866, #880) - expanded groups show per-obs data, not cross-observer aggregates
- **Full-page obs-switch** (#866, #870) - switching observers updates hex, path, and direction correctly
- **Packet detail shows wrong observation** (#849, #851) - clicking a specific observation opens *that* observation
- **Byte breakdown hop count** (#844, #846) - derived from `path_len`, not aggregated `_parsedPath`
- **Transport-route path_len offset** (#852, #853) - correct offset calculation + CSS variable fix
- **Packets/hour chart bars + x-axis** (#858, #865) - bars render correctly, x-axis labels properly decimated
- **Channel timeline capped to top 8** (#860, #864) - no more 47-channel chart spaghetti
- **Reachability row opacity removed** (#859, #863) - clean rows without misleading gradient
- **Sticky table headers on mobile** (#861, #867) - restored after regression
- **Map popup 'Show Neighbors' on iOS Safari** (#840, #841) - link actually works now
- **Node detail Recent Packets invisible text** (#829, #830) - CSS fix
- **/api/packets/{hash} falls back to DB** (#827, #831) - when in-memory store misses, DB catches it
- **IATA filter bypass for status messages** (#694, #802) - status packets no longer filtered out by airport codes
- **Desktop node click URL hash** (#676, #739) - clicking a node updates the URL for deep linking
- **Filter params in URL hash** (#682, #740) - all filter state serialized for shareable links
- **Hide undecryptable channel messages** (#727, #728) - clean default view
- **TRACE path_json uses path_sz** (#732) - correct field from flags byte, not header hash_size
- **Multi-byte adopters** (#754, #767) - all node types, role column, advert precedence
- **Channel key case sensitivity** (#761) - Public decode works correctly
- **Transport route field offsets** (#766) - correct offsets in field table
- **Clock skew sanity checks** (#769) - filter epoch-0, cap drift, require minimum samples
- **Neighbor graph slider persistence** (#776) - default 0.7, persisted to localStorage
- **Node detail panel navigation** (#779, #785) - Details/Analytics links actually navigate
- **Channel key removal** (#898) - user-added keys for server-known channels can be removed
- **Side-panel Details on desktop** (#892) - opens full-screen correctly
- **Hex-dump byte ranges client-side** (#891) - computed from per-obs raw_hex
- **path_json derived from raw_hex at ingest** (#886, #887) - single source of truth
- **Path pill and byte breakdown hop agreement** (#885) - they match now
- **Mobile close button + toolbar scroll** (#797, #805) - accessible and scrollable
- **/health.recentPackets resolved_path fallback** (#810, #821) - falls back to longest sibling observation
- **Channel filter on Packets page** (#812, #816) - UI and API both fixed
- **Clock-skew section in side panel** (#813, #814) - renders correctly
- **Real RSS in /api/stats** (#832, #835) - surface actual RSS alongside tracked store bytes
- **Hash size detection for transport routes + zero-hop adverts** (#747) - correct detection
- **Repeater+observer merged map marker** (#745) - single marker, not two overlapping
---
## 🎨 UI Polish
- QA findings applied across the board (#832, #833, #836, #837, #838) - dozens of small UX fixes from systematic QA pass
---
## 📦 Upgrading
```bash
git pull
docker compose down
docker compose build prod
docker compose up -d prod
```
Your existing `config.json` works as-is. New optional config keys:
- `nodeBlacklist` - array of node hashes to hide
- `observerRetentionDays` - days before stale observers are pruned
- `memoryBudgetMB` - cap on in-memory packet store
### Verify
```bash
curl -s http://localhost/api/health | jq .version
# "3.6.0"
```
---
## 🙏 External Contributors
- **#735** ([@efiten](https://github.com/efiten)) - Serve geofilter builder from app, link from customizer
- **#739** ([@efiten](https://github.com/efiten)) - Desktop node click updates URL hash for deep linking
- **#740** ([@efiten](https://github.com/efiten)) - Serialize filter params in URL hash for shareable links
- **#742** ([@Joel-Claw](https://github.com/Joel-Claw)) - Add nodeBlacklist config to hide abusive/troll nodes
- **#761** ([@copelaje](https://github.com/copelaje)) - Fix channel key case sensitivity for Public decode
- **#764** ([@Joel-Claw](https://github.com/Joel-Claw)) - Add observer retention - prune stale observers after configurable days
- **#802** ([@efiten](https://github.com/efiten)) - Bypass IATA filter for status messages, fill SNR on duplicate observations
- **#803** ([@efiten](https://github.com/efiten)) - Replace raw JSON text search with byNode index for node packet queries
- **#805** ([@efiten](https://github.com/efiten)) - Mobile close button accessible + toolbar scrollable
- **#900** ([@efiten](https://github.com/efiten)) - App-served geofilter docs page
- **#917** ([@efiten](https://github.com/efiten)) - Apply retentionHours cutoff in Load() to prevent OOM on cold start
- **#924** ([@efiten](https://github.com/efiten)) - Node filter on live page - show only traffic through a specific node
- **#925** ([@efiten](https://github.com/efiten)) - Fix geobuilder longitude wrapping for southern hemisphere polygons
- **#927** ([@efiten](https://github.com/efiten)) - Skip customize panel re-render while text field has focus
---
## ⚠️ Breaking Changes
**None.** All API endpoints remain backwards-compatible. New fields are additive only.
---
## 📊 By the Numbers
| Stat | Count |
|------|-------|
| Commits | 134 |
| PRs merged | 105 |
| Lines added | 18,480 |
| Lines removed | 1,632 |
| Files changed | 110 |
| Contributors | 4 |
---
*Previous release: [v3.5.2](https://github.com/Kpa-clawbot/CoreScope/releases/tag/v3.5.2)*
+2 -3
View File
@@ -294,6 +294,5 @@
"#colombia": "bea223a8c1d13ed9638ee000ea3a6aca",
"#bogota": "6d0864985b64350ce4cbfebf4979e970",
"#peru": "7e6fc347bf29a4c128ac3156865bd521",
"#lima": "5f167ce354eca08ab742463df10ef255",
"Public": "8b3387e9c5cdea6ac9e5edbaa115cd72"
}
"#lima": "5f167ce354eca08ab742463df10ef255"
}
-142
View File
@@ -1,142 +0,0 @@
# corescope-decrypt
Standalone CLI tool to decrypt and export MeshCore hashtag channel messages from a CoreScope SQLite database.
## Why
MeshCore hashtag channels use symmetric encryption where the key is derived deterministically from the channel name. The CoreScope ingestor stores **all** `GRP_TXT` packets in the database, including those it cannot decrypt at ingest time.
This tool enables:
- **Retroactive decryption** — decrypt historical messages for any channel whose name you learn after the fact
- **Forensics & analysis** — export channel traffic for offline review
- **Bulk export** — dump an entire channel's history as JSON, HTML, or plain text
## Installation
### From Docker image
The binary is included in the CoreScope Docker image at `/app/corescope-decrypt`:
```bash
docker exec corescope-prod /app/corescope-decrypt --channel "#wardriving" --db /app/data/meshcore.db
```
### From GitHub release
Download the static binary from the [Releases](https://github.com/Kpa-clawbot/CoreScope/releases) page:
```bash
# Linux amd64
curl -LO https://github.com/Kpa-clawbot/CoreScope/releases/latest/download/corescope-decrypt-linux-amd64
chmod +x corescope-decrypt-linux-amd64
./corescope-decrypt-linux-amd64 --help
```
### Build from source
```bash
cd cmd/decrypt
CGO_ENABLED=0 go build -ldflags="-s -w" -o corescope-decrypt .
```
The binary is statically linked — no dependencies, runs on any Linux.
## Usage
```
corescope-decrypt --channel NAME --db PATH [--format FORMAT] [--output FILE]
```
Run `corescope-decrypt --help` for full flag documentation.
### JSON output (default)
Machine-readable, includes all metadata (observers, path hops, raw hex):
```bash
corescope-decrypt --channel "#wardriving" --db meshcore.db
```
```json
[
{
"hash": "a1b2c3...",
"timestamp": "2026-04-12T17:19:09Z",
"sender": "XMD Tag 1",
"message": "@[MapperBot] 37.76985, -122.40525 [0.3w]",
"channel": "#wardriving",
"raw_hex": "150206...",
"path": ["A3", "B0"],
"observers": [
{"name": "Observer1", "snr": 9.5, "rssi": -56, "timestamp": "2026-04-12T17:19:10Z"}
]
}
]
```
### HTML output
Self-contained interactive viewer — search, sortable columns, expandable detail rows:
```bash
corescope-decrypt --channel "#wardriving" --db meshcore.db --format html --output wardriving.html
open wardriving.html
```
No external dependencies. The JSON data is embedded directly in the HTML file.
### IRC / log output
Plain-text, one line per message — ideal for `grep`, `awk`, and piping:
```bash
corescope-decrypt --channel "#wardriving" --db meshcore.db --format irc
```
```
[2026-04-12 17:19:09] <XMD Tag 1> @[MapperBot] 37.76985, -122.40525 [0.3w]
[2026-04-12 17:20:25] <XMD Tag 1> @[MapperBot] 37.78075, -122.39774 [0.3w]
[2026-04-12 17:25:30] <mk 🤠> @[MapperBot] 35.32444, -120.62077
```
```bash
# Find all messages from a specific sender
corescope-decrypt --channel "#wardriving" --db meshcore.db --format irc | grep "KE6QR"
```
## How channel encryption works
MeshCore hashtag channels derive their encryption key from the channel name:
1. **Key derivation**: `AES-128 key = SHA-256("#channelname")[:16]` (first 16 bytes)
2. **Channel hash**: `SHA-256(key)[0]` — 1-byte identifier in the packet header, used for fast filtering
3. **Encryption**: AES-128-ECB
4. **MAC**: HMAC-SHA256 with a 32-byte secret (key + 16 zero bytes), truncated to 2 bytes
5. **Plaintext format**: `timestamp(4 LE) + flags(1) + "sender: message\0"`
See the firmware source at `firmware/src/helpers/BaseChatMesh.cpp` for the canonical implementation.
## Testing against the fixture DB
```bash
cd cmd/decrypt
go test ./...
# Manual test with the real fixture:
go run . --channel "#wardriving" --db ../../test-fixtures/e2e-fixture.db --format irc
```
The shared crypto library also has independent tests:
```bash
cd internal/channel
go test -v ./...
```
## Limitations
- **Hashtag channels only.** Only channels where the key is derived from `SHA-256("#name")` are supported. Custom PSK channels require the raw key (not implemented).
- **No DM decryption.** Direct messages (`TXT_MSG`) use per-peer asymmetric encryption and cannot be decrypted by this tool.
- **Read-only.** The tool opens the database in read-only mode and never modifies it.
- **Timestamps are UTC.** The sender's embedded timestamp is used when available, displayed in UTC.
-22
View File
@@ -1,22 +0,0 @@
module github.com/corescope/decrypt
go 1.22
require (
github.com/meshcore-analyzer/channel v0.0.0
modernc.org/sqlite v1.34.5
)
require (
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/sys v0.22.0 // indirect
modernc.org/libc v1.55.3 // indirect
modernc.org/mathutil v1.6.0 // indirect
modernc.org/memory v1.8.0 // indirect
)
replace github.com/meshcore-analyzer/channel => ../../internal/channel
-43
View File
@@ -1,43 +0,0 @@
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ=
modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y=
modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s=
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
modernc.org/sqlite v1.34.5 h1:Bb6SR13/fjp15jt70CL4f18JIN7p7dnMExd+UFnF15g=
modernc.org/sqlite v1.34.5/go.mod h1:YLuNmX9NKs8wRNK2ko1LW1NGYcc9FkBO69JOt1AR9JE=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-467
View File
@@ -1,467 +0,0 @@
// corescope-decrypt decrypts and exports hashtag channel messages from a CoreScope SQLite database.
//
// Usage:
//
// corescope-decrypt --channel "#wardriving" --db meshcore.db [--format json|html] [--output file]
package main
import (
"database/sql"
"encoding/hex"
"encoding/json"
"flag"
"fmt"
"html"
"log"
"os"
"sort"
"strings"
"time"
"github.com/meshcore-analyzer/channel"
_ "modernc.org/sqlite"
)
// Version info (set via ldflags).
var version = "dev"
// ChannelMessage is a single decrypted channel message with metadata.
type ChannelMessage struct {
Hash string `json:"hash"`
Timestamp string `json:"timestamp"`
Sender string `json:"sender"`
Message string `json:"message"`
Channel string `json:"channel"`
RawHex string `json:"raw_hex"`
Path []string `json:"path"`
Observers []Observer `json:"observers"`
}
// Observer is a single observation of the transmission.
type Observer struct {
Name string `json:"name"`
SNR float64 `json:"snr"`
RSSI float64 `json:"rssi"`
Timestamp string `json:"timestamp"`
}
func main() {
channelName := flag.String("channel", "", "Channel name (e.g. \"#wardriving\")")
dbPath := flag.String("db", "", "Path to CoreScope SQLite database")
format := flag.String("format", "json", "Output format: json, html, irc (or log)")
output := flag.String("output", "", "Output file (default: stdout)")
showVersion := flag.Bool("version", false, "Print version and exit")
flag.Usage = func() {
fmt.Fprintf(os.Stderr, `corescope-decrypt — Decrypt and export MeshCore hashtag channel messages
USAGE
corescope-decrypt --channel NAME --db PATH [--format FORMAT] [--output FILE]
FLAGS
--channel NAME Channel name to decrypt (e.g. "#wardriving", "wardriving")
The "#" prefix is added automatically if missing.
--db PATH Path to a CoreScope SQLite database file (read-only access).
--format FORMAT Output format (default: json):
json — Machine-readable JSON array with full metadata
html — Self-contained HTML viewer with search and sorting
irc — Plain-text IRC-style log, one line per message
log — Alias for irc
--output FILE Write output to FILE instead of stdout.
--version Print version and exit.
EXAMPLES
# Export #wardriving messages as JSON
corescope-decrypt --channel "#wardriving" --db /app/data/meshcore.db
# Generate an interactive HTML viewer
corescope-decrypt --channel wardriving --db meshcore.db --format html --output wardriving.html
# Greppable IRC log
corescope-decrypt --channel "#MeshCore" --db meshcore.db --format irc --output meshcore.log
grep "KE6QR" meshcore.log
# From the Docker container
docker exec corescope-prod /app/corescope-decrypt --channel "#wardriving" --db /app/data/meshcore.db
RETROACTIVE DECRYPTION
MeshCore hashtag channels use symmetric encryption — the key is derived from the
channel name. The CoreScope ingestor stores ALL GRP_TXT packets in the database,
even those it cannot decrypt at ingest time. This tool lets you retroactively
decrypt messages for any channel whose name you know, even if the ingestor was
never configured with that channel's key.
This means you can recover historical messages by simply knowing the channel name.
LIMITATIONS
- Only hashtag channels (shared-secret, name-derived key) are supported.
- Direct messages (TXT_MSG) use per-peer encryption and cannot be decrypted.
- Custom PSK channels (non-hashtag) require the raw key, not a channel name.
`)
}
flag.Parse()
if *showVersion {
fmt.Println("corescope-decrypt", version)
os.Exit(0)
}
if *channelName == "" || *dbPath == "" {
flag.Usage()
os.Exit(1)
}
// Normalize channel name
ch := *channelName
if !strings.HasPrefix(ch, "#") {
ch = "#" + ch
}
key := channel.DeriveKey(ch)
chHash := channel.ChannelHash(key)
db, err := sql.Open("sqlite", *dbPath+"?mode=ro")
if err != nil {
log.Fatalf("Failed to open database: %v", err)
}
defer db.Close()
// Query all GRP_TXT packets
rows, err := db.Query(`SELECT id, hash, raw_hex, first_seen FROM transmissions WHERE payload_type = 5`)
if err != nil {
log.Fatalf("Query failed: %v", err)
}
defer rows.Close()
var messages []ChannelMessage
decrypted, total := 0, 0
for rows.Next() {
var id int
var txHash, rawHex, firstSeen string
if err := rows.Scan(&id, &txHash, &rawHex, &firstSeen); err != nil {
log.Printf("Scan error: %v", err)
continue
}
total++
payload, err := extractGRPPayload(rawHex)
if err != nil {
continue
}
if len(payload) < 3 {
continue
}
// Check channel hash byte
if payload[0] != chHash {
continue
}
mac := payload[1:3]
ciphertext := payload[3:]
if len(ciphertext) < 5 || len(ciphertext)%16 != 0 {
// Pad ciphertext to block boundary for decryption attempt
if len(ciphertext) < 16 {
continue
}
// Truncate to block boundary
ciphertext = ciphertext[:len(ciphertext)/16*16]
}
plaintext, ok := channel.Decrypt(key, mac, ciphertext)
if !ok {
continue
}
ts, sender, msg, err := channel.ParsePlaintext(plaintext)
if err != nil {
continue
}
decrypted++
// Convert MeshCore timestamp
timestamp := time.Unix(int64(ts), 0).UTC().Format(time.RFC3339)
// Get path from decoded_json
path := getPathFromDB(db, id)
// Get observers
observers := getObservers(db, id)
messages = append(messages, ChannelMessage{
Hash: txHash,
Timestamp: timestamp,
Sender: sender,
Message: msg,
Channel: ch,
RawHex: rawHex,
Path: path,
Observers: observers,
})
}
// Sort by timestamp
sort.Slice(messages, func(i, j int) bool {
return messages[i].Timestamp < messages[j].Timestamp
})
log.Printf("Scanned %d GRP_TXT packets, decrypted %d for channel %s", total, decrypted, ch)
// Generate output
var out []byte
switch *format {
case "json":
out, err = json.MarshalIndent(messages, "", " ")
if err != nil {
log.Fatalf("JSON marshal: %v", err)
}
out = append(out, '\n')
case "html":
out = renderHTML(messages, ch)
case "irc", "log":
out = renderIRC(messages)
default:
log.Fatalf("Unknown format: %s (use json, html, irc, or log)", *format)
}
if *output != "" {
if err := os.WriteFile(*output, out, 0644); err != nil {
log.Fatalf("Write file: %v", err)
}
log.Printf("Written to %s", *output)
} else {
os.Stdout.Write(out)
}
}
// extractGRPPayload parses a raw hex packet and returns the GRP_TXT payload bytes.
func extractGRPPayload(rawHex string) ([]byte, error) {
buf, err := hex.DecodeString(strings.TrimSpace(rawHex))
if err != nil || len(buf) < 2 {
return nil, fmt.Errorf("invalid hex")
}
// Header byte
header := buf[0]
payloadType := int((header >> 2) & 0x0F)
if payloadType != 5 { // GRP_TXT
return nil, fmt.Errorf("not GRP_TXT")
}
routeType := int(header & 0x03)
offset := 1
// Transport codes (2 codes × 2 bytes) come BEFORE path for transport routes
if routeType == 0 || routeType == 3 {
offset += 4
}
// Path byte
if offset >= len(buf) {
return nil, fmt.Errorf("too short for path")
}
pathByte := buf[offset]
offset++
hashSize := int(pathByte>>6) + 1
hashCount := int(pathByte & 0x3F)
offset += hashSize * hashCount
if offset >= len(buf) {
return nil, fmt.Errorf("too short for payload")
}
return buf[offset:], nil
}
func getPathFromDB(db *sql.DB, txID int) []string {
var decodedJSON sql.NullString
err := db.QueryRow(`SELECT decoded_json FROM transmissions WHERE id = ?`, txID).Scan(&decodedJSON)
if err != nil || !decodedJSON.Valid {
return nil
}
var decoded struct {
Path struct {
Hops []string `json:"hops"`
} `json:"path"`
}
if json.Unmarshal([]byte(decodedJSON.String), &decoded) == nil {
return decoded.Path.Hops
}
return nil
}
func getObservers(db *sql.DB, txID int) []Observer {
rows, err := db.Query(`
SELECT o.name, obs.snr, obs.rssi, obs.timestamp
FROM observations obs
LEFT JOIN observers o ON o.id = CAST(obs.observer_idx AS TEXT)
WHERE obs.transmission_id = ?
ORDER BY obs.timestamp
`, txID)
if err != nil {
return nil
}
defer rows.Close()
var observers []Observer
for rows.Next() {
var name sql.NullString
var snr, rssi sql.NullFloat64
var ts int64
if err := rows.Scan(&name, &snr, &rssi, &ts); err != nil {
continue
}
obs := Observer{
Timestamp: time.Unix(ts, 0).UTC().Format(time.RFC3339),
}
if name.Valid {
obs.Name = name.String
}
if snr.Valid {
obs.SNR = snr.Float64
}
if rssi.Valid {
obs.RSSI = rssi.Float64
}
observers = append(observers, obs)
}
return observers
}
func renderIRC(messages []ChannelMessage) []byte {
var b strings.Builder
for _, m := range messages {
sender := m.Sender
if sender == "" {
sender = "???"
}
// Parse RFC3339 timestamp into a compact format
t, err := time.Parse(time.RFC3339, m.Timestamp)
if err != nil {
b.WriteString(fmt.Sprintf("[%s] <%s> %s\n", m.Timestamp, sender, m.Message))
continue
}
b.WriteString(fmt.Sprintf("[%s] <%s> %s\n", t.Format("2006-01-02 15:04:05"), sender, m.Message))
}
return []byte(b.String())
}
func renderHTML(messages []ChannelMessage, channelName string) []byte {
jsonData, _ := json.Marshal(messages)
var b strings.Builder
b.WriteString(`<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CoreScope Channel Export — ` + html.EscapeString(channelName) + `</title>
<style>
*{box-sizing:border-box;margin:0;padding:0}
body{font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,sans-serif;background:#0d1117;color:#c9d1d9;padding:20px}
h1{color:#58a6ff;margin-bottom:16px;font-size:1.5em}
.stats{color:#8b949e;margin-bottom:16px;font-size:0.9em}
input[type=text]{width:100%;max-width:500px;padding:8px 12px;background:#161b22;border:1px solid #30363d;border-radius:6px;color:#c9d1d9;font-size:14px;margin-bottom:16px}
input[type=text]:focus{outline:none;border-color:#58a6ff}
table{width:100%;border-collapse:collapse;font-size:14px}
th{background:#161b22;color:#8b949e;text-align:left;padding:8px 12px;border-bottom:2px solid #30363d;cursor:pointer;user-select:none;white-space:nowrap}
th:hover{color:#58a6ff}
th.sorted-asc::after{content:" ▲"}
th.sorted-desc::after{content:" ▼"}
td{padding:8px 12px;border-bottom:1px solid #21262d;vertical-align:top}
tr:hover{background:#161b22}
tr.expanded{background:#161b22}
.detail-row td{padding:12px 24px;background:#0d1117;border-bottom:1px solid #21262d}
.detail-row pre{background:#161b22;padding:12px;border-radius:6px;overflow-x:auto;font-size:12px;color:#8b949e}
.detail-row .label{color:#58a6ff;font-weight:600;margin-top:8px;display:block}
.observer-tag{display:inline-block;background:#1f6feb22;color:#58a6ff;padding:2px 8px;border-radius:4px;margin:2px;font-size:12px}
.no-results{color:#8b949e;text-align:center;padding:40px;font-size:16px}
.sender{color:#d2a8ff;font-weight:600}
.timestamp{color:#8b949e;font-family:monospace;font-size:12px}
</style>
</head>
<body>
<h1>` + html.EscapeString(channelName) + ` — Channel Messages</h1>
<div class="stats" id="stats"></div>
<input type="text" id="search" placeholder="Search messages..." autocomplete="off">
<table>
<thead>
<tr>
<th data-col="timestamp">Timestamp</th>
<th data-col="sender">Sender</th>
<th data-col="message">Message</th>
<th data-col="observers">Observers</th>
</tr>
</thead>
<tbody id="tbody"></tbody>
</table>
<div class="no-results" id="no-results" style="display:none">No matching messages</div>
<script>
var DATA=` + string(jsonData) + `;
var sortCol="timestamp",sortAsc=true,expandedHash=null;
function init(){
document.getElementById("stats").textContent=DATA.length+" messages";
document.getElementById("search").addEventListener("input",render);
document.querySelectorAll("th[data-col]").forEach(function(th){
th.addEventListener("click",function(){
var col=th.dataset.col;
if(sortCol===col)sortAsc=!sortAsc;
else{sortCol=col;sortAsc=true}
render();
});
});
render();
}
function render(){
var q=document.getElementById("search").value.toLowerCase();
var filtered=DATA.filter(function(m){
if(!q)return true;
return(m.message||"").toLowerCase().indexOf(q)>=0||(m.sender||"").toLowerCase().indexOf(q)>=0;
});
filtered.sort(function(a,b){
var va=a[sortCol]||"",vb=b[sortCol]||"";
if(sortCol==="observers"){va=a.observers?a.observers.length:0;vb=b.observers?b.observers.length:0}
if(va<vb)return sortAsc?-1:1;
if(va>vb)return sortAsc?1:-1;
return 0;
});
document.querySelectorAll("th[data-col]").forEach(function(th){
th.className=th.dataset.col===sortCol?(sortAsc?"sorted-asc":"sorted-desc"):"";
});
var tb=document.getElementById("tbody");
tb.innerHTML="";
document.getElementById("no-results").style.display=filtered.length?"none":"block";
filtered.forEach(function(m){
var tr=document.createElement("tr");
tr.innerHTML='<td class="timestamp">'+esc(m.timestamp)+'</td><td class="sender">'+esc(m.sender||"—")+'</td><td>'+esc(m.message)+'</td><td>'+
(m.observers?m.observers.map(function(o){return'<span class="observer-tag">'+esc(o.name||"?")+" SNR:"+o.snr.toFixed(1)+'</span>'}).join(""):"—")+'</td>';
tr.style.cursor="pointer";
tr.addEventListener("click",function(){
expandedHash=expandedHash===m.hash?null:m.hash;
render();
});
tb.appendChild(tr);
if(expandedHash===m.hash){
tr.className="expanded";
var dr=document.createElement("tr");
dr.className="detail-row";
dr.innerHTML='<td colspan="4"><span class="label">Hash</span><pre>'+esc(m.hash)+'</pre>'+
'<span class="label">Raw Hex</span><pre>'+esc(m.raw_hex)+'</pre>'+
(m.path&&m.path.length?'<span class="label">Path</span><pre>'+esc(m.path.join(" → "))+'</pre>':'')+
'<span class="label">Observers</span><pre>'+esc(JSON.stringify(m.observers,null,2))+'</pre></td>';
tb.appendChild(dr);
}
});
}
function esc(s){var d=document.createElement("div");d.textContent=s;return d.innerHTML}
init();
</script>
</body>
</html>`)
return []byte(b.String())
}
-129
View File
@@ -1,129 +0,0 @@
package main
import (
"encoding/hex"
"encoding/json"
"os"
"strings"
"testing"
"github.com/meshcore-analyzer/channel"
)
func TestExtractGRPPayload(t *testing.T) {
// Build a minimal GRP_TXT packet: header(1) + path(1) + payload
// header: route=FLOOD(1), payload=GRP_TXT(5), version=0 → (5<<2)|1 = 0x15
// path: 0 hops, hash_size=1 → 0x00
payload := []byte{0x81, 0x12, 0x34} // channel_hash + mac + data
pkt := append([]byte{0x15, 0x00}, payload...)
rawHex := hex.EncodeToString(pkt)
result, err := extractGRPPayload(rawHex)
if err != nil {
t.Fatal(err)
}
if len(result) != 3 || result[0] != 0x81 {
t.Fatalf("payload mismatch: %x", result)
}
}
func TestExtractGRPPayloadTransport(t *testing.T) {
// Transport flood: route=0, 4 bytes transport codes BEFORE path byte
// header: (5<<2)|0 = 0x14
payload := []byte{0xAA, 0xBB, 0xCC}
// header + 4 transport bytes + path(0 hops) + payload
pkt := append([]byte{0x14, 0xFF, 0xFF, 0xFF, 0xFF, 0x00}, payload...)
rawHex := hex.EncodeToString(pkt)
result, err := extractGRPPayload(rawHex)
if err != nil {
t.Fatal(err)
}
if result[0] != 0xAA {
t.Fatalf("expected AA, got %02X", result[0])
}
}
func TestExtractGRPPayloadNotGRP(t *testing.T) {
// payload type = ADVERT (4): (4<<2)|1 = 0x11
rawHex := hex.EncodeToString([]byte{0x11, 0x00, 0x01, 0x02})
_, err := extractGRPPayload(rawHex)
if err == nil {
t.Fatal("expected error for non-GRP_TXT")
}
}
func TestKeyDerivationConsistency(t *testing.T) {
// Verify key derivation matches what the ingestor expects
key := channel.DeriveKey("#wardriving")
if len(key) != 16 {
t.Fatalf("key len %d", len(key))
}
ch := channel.ChannelHash(key)
if ch != 0x81 {
// We know from fixture data that #wardriving has channelHashHex "81"
t.Fatalf("channel hash %02X, expected 81", ch)
}
}
func TestRenderIRC(t *testing.T) {
msgs := []ChannelMessage{
{Timestamp: "2026-04-12T03:45:12Z", Sender: "NodeA", Message: "Hello"},
{Timestamp: "2026-04-12T03:46:01Z", Sender: "", Message: "No sender"},
}
out := string(renderIRC(msgs))
if !strings.Contains(out, "[2026-04-12 03:45:12] <NodeA> Hello") {
t.Fatalf("IRC output missing expected line: %s", out)
}
if !strings.Contains(out, "<???> No sender") {
t.Fatalf("IRC output should use ??? for empty sender: %s", out)
}
}
func TestRenderHTMLValid(t *testing.T) {
msgs := []ChannelMessage{
{Hash: "abc", Timestamp: "2026-04-12T00:00:00Z", Sender: "X", Message: "test", Channel: "#test"},
}
out := string(renderHTML(msgs, "#test"))
if !strings.Contains(out, "<!DOCTYPE html>") {
t.Fatal("not valid HTML")
}
if !strings.Contains(out, "#test") {
t.Fatal("channel name missing")
}
if !strings.Contains(out, "</html>") {
t.Fatal("HTML not closed")
}
}
func TestJSONOutputParseable(t *testing.T) {
msgs := []ChannelMessage{
{Hash: "abc", Timestamp: "2026-04-12T00:00:00Z", Sender: "X", Message: "hi", Channel: "#test"},
}
data, err := json.MarshalIndent(msgs, "", " ")
if err != nil {
t.Fatal(err)
}
var parsed []ChannelMessage
if err := json.Unmarshal(data, &parsed); err != nil {
t.Fatalf("JSON not parseable: %v", err)
}
if len(parsed) != 1 || parsed[0].Sender != "X" {
t.Fatalf("parsed mismatch: %+v", parsed)
}
}
// Integration test against fixture DB (skipped if DB not found)
func TestFixtureDecrypt(t *testing.T) {
dbPath := "../../test-fixtures/e2e-fixture.db"
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
t.Skip("fixture DB not found")
}
// We know the fixture has #wardriving messages with channelHash 0x81
key := channel.DeriveKey("#wardriving")
ch := channel.ChannelHash(key)
if ch != 0x81 {
t.Fatalf("unexpected channel hash: %02X", ch)
}
}
-1
View File
@@ -1 +0,0 @@
ingestor
-18
View File
@@ -47,24 +47,6 @@ The config file uses the same format as the Node.js `config.json`. The ingestor
| `DB_PATH` | SQLite database path | `data/meshcore.db` |
| `MQTT_BROKER` | Single MQTT broker URL (overrides config) | — |
| `MQTT_TOPIC` | MQTT topic (used with `MQTT_BROKER`) | `meshcore/#` |
| `CORESCOPE_INGESTOR_STATS` | Path to the per-second stats JSON file consumed by the server's `/api/perf/io` and `/api/perf/write-sources` endpoints (#1120) | `/tmp/corescope-ingestor-stats.json` |
### Stats file (`CORESCOPE_INGESTOR_STATS`)
Every second the ingestor publishes a JSON snapshot of its counters
(`tx_inserted`, `obs_inserted`, `walCommits`, `backfillUpdates.*`, etc.) plus
a `procIO` block sampled from `/proc/self/io` (read/write/cancelled bytes per
second + syscall counts). The server reads this file and surfaces the data on
the Perf page so operators can self-diagnose write-volume anomalies.
The writer uses `O_NOFOLLOW | O_CREAT | O_TRUNC` mode `0o600`, so a
pre-planted symlink at the path cannot be used to clobber an arbitrary file.
**Security note:** the default lives in `/tmp`, which is world-writable on
most hosts (sticky bit only protects deletion, not creation). On
shared/multi-tenant hosts, override `CORESCOPE_INGESTOR_STATS` to point at a
private directory (e.g. `/var/lib/corescope/ingestor-stats.json`) that only
the corescope user can write to.
### Minimal Config
-148
View File
@@ -1,148 +0,0 @@
// Async migration helper — runs schema/backfill work that may take minutes on
// large prod tables WITHOUT blocking ingestor startup.
//
// MIGRATION ANNOTATION CONVENTION (read this before touching migrations):
//
// Sync schema/data migrations (CREATE INDEX, ALTER TABLE, UPDATE ... WHERE)
// that run inline during OpenStore() block the ingestor from accepting
// packets until they finish. On an empty dev DB they return in milliseconds;
// at prod scale (1.9M+ observations, 80K+ adverts) they can pin the boot
// for minutes and trigger restart loops. This regression class has bitten us
// repeatedly (#791 resolved_path backfill, #1483 obs_observer_ts_idx_v1).
//
// ANY new CREATE INDEX / ALTER TABLE / data-rewrite migration MUST EITHER:
// 1. Run via Store.RunAsyncMigration(...) below (preferred for backfills
// and any work that may touch >1K rows). The migration is recorded as
// `pending_async` immediately, returns to the caller (boot proceeds),
// and completes in a goroutine. Status flips to `done` (or `failed`
// with an error message) when fn returns.
// 2. Carry the preflight annotation comment immediately above the
// migration block, e.g.
// // PREFLIGHT: async=true reason="<one-line justification>"
// Use this for migrations that are genuinely cheap at any scale
// (e.g. ALTER TABLE ADD COLUMN, CREATE INDEX on a known-bounded
// table). The annotation is grepped by
// ~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh
// — its absence on a touched migration block is a hard-fail gate.
//
// See MIGRATIONS.md in the repo root for the full policy and examples.
package main
import (
"context"
"database/sql"
"fmt"
"log"
)
// ensureAsyncMigrationsTable creates the bookkeeping table used by
// RunAsyncMigration / AsyncMigrationStatus. Idempotent.
func ensureAsyncMigrationsTable(db *sql.DB) error {
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS _async_migrations (
name TEXT PRIMARY KEY,
status TEXT NOT NULL, -- pending_async | done | failed
started_at TEXT NOT NULL DEFAULT (datetime('now')),
ended_at TEXT,
error TEXT
)
`)
return err
}
// RunAsyncMigration registers `name` as a pending async migration and
// schedules `fn` to run in a background goroutine. It returns to the caller
// immediately so the ingestor can keep booting.
//
// Contract (pinned by async_migration_test.go):
// - status is `pending_async` IMMEDIATELY after this returns.
// - fn runs in a goroutine; on success status becomes `done`, on error or
// panic status becomes `failed` and the error is recorded.
// - Idempotent: if a row with the same name already exists in `done`
// state, fn is NOT re-run. If in `failed` or `pending_async` state,
// fn IS re-scheduled (a previous run may have crashed mid-flight).
// - The caller's WaitGroup tracks the goroutine so tests/shutdown can
// wait via Store.WaitForAsyncMigrations().
func (s *Store) RunAsyncMigration(ctx context.Context, name string, fn func(context.Context, *sql.DB) error) error {
if err := ensureAsyncMigrationsTable(s.db); err != nil {
return fmt.Errorf("ensure _async_migrations: %w", err)
}
var existing string
row := s.db.QueryRow(`SELECT status FROM _async_migrations WHERE name = ?`, name)
switch err := row.Scan(&existing); err {
case nil:
if existing == "done" {
return nil // already complete, nothing to do
}
// pending_async or failed → reset and retry.
if _, err := s.db.Exec(`
UPDATE _async_migrations
SET status = 'pending_async', started_at = datetime('now'), ended_at = NULL, error = NULL
WHERE name = ?`, name); err != nil {
return fmt.Errorf("reset async migration %q: %w", name, err)
}
case sql.ErrNoRows:
if _, err := s.db.Exec(`
INSERT INTO _async_migrations (name, status) VALUES (?, 'pending_async')`,
name); err != nil {
return fmt.Errorf("register async migration %q: %w", name, err)
}
default:
return fmt.Errorf("lookup async migration %q: %w", name, err)
}
s.backfillWg.Add(1)
go func() {
defer s.backfillWg.Done()
var runErr error
defer func() {
if r := recover(); r != nil {
runErr = fmt.Errorf("panic: %v", r)
log.Printf("[async-migration] %q panic recovered: %v", name, r)
}
if runErr != nil {
if _, err := s.db.Exec(`
UPDATE _async_migrations
SET status = 'failed', ended_at = datetime('now'), error = ?
WHERE name = ?`, runErr.Error(), name); err != nil {
log.Printf("[async-migration] failed to record failure for %q: %v", name, err)
}
log.Printf("[async-migration] %q FAILED: %v", name, runErr)
return
}
if _, err := s.db.Exec(`
UPDATE _async_migrations
SET status = 'done', ended_at = datetime('now'), error = NULL
WHERE name = ?`, name); err != nil {
log.Printf("[async-migration] failed to mark %q done: %v", name, err)
return
}
log.Printf("[async-migration] %q done", name)
}()
log.Printf("[async-migration] %q starting (boot continues)", name)
runErr = fn(ctx, s.db)
}()
return nil
}
// AsyncMigrationStatus returns the current status of an async migration
// (one of "pending_async", "done", "failed") or sql.ErrNoRows if no such
// migration has been registered.
func (s *Store) AsyncMigrationStatus(name string) (string, error) {
if err := ensureAsyncMigrationsTable(s.db); err != nil {
return "", err
}
var status string
err := s.db.QueryRow(`SELECT status FROM _async_migrations WHERE name = ?`, name).Scan(&status)
return status, err
}
// WaitForAsyncMigrations blocks until all currently-scheduled async migrations
// finish. Intended for tests + graceful shutdown; production boot path does NOT
// call this (that's the whole point).
func (s *Store) WaitForAsyncMigrations() {
s.backfillWg.Wait()
}
-299
View File
@@ -1,299 +0,0 @@
package main
import (
"context"
"database/sql"
"fmt"
"sync"
"sync/atomic"
"testing"
"time"
)
// waitForStatus polls AsyncMigrationStatus until it matches `want` or `deadline` passes.
func waitForStatus(t *testing.T, s *Store, name, want string, timeout time.Duration) string {
t.Helper()
deadline := time.Now().Add(timeout)
var status string
var err error
for time.Now().Before(deadline) {
status, err = s.AsyncMigrationStatus(name)
if err == nil && status == want {
return status
}
time.Sleep(10 * time.Millisecond)
}
t.Fatalf("status never reached %q within %s: got %q (err=%v)", want, timeout, status, err)
return status
}
// TestRunAsyncMigration_PendingThenDone pins the contract for RunAsyncMigration:
//
// 1. After calling, the migration name MUST be queryable in the migrations
// table with status `pending_async` IMMEDIATELY (no waiting for fn).
// 2. After fn returns, the status MUST transition to `done`.
// 3. RunAsyncMigration MUST return without blocking on fn.
//
// This is the regression test for the recurring "sync migration on large
// table blocks ingestor startup" class (#791, #1483, ...). If this test
// fails the contract is broken — do not relax it; fix the runner.
func TestRunAsyncMigration_PendingThenDone(t *testing.T) {
s := newTestStore(t)
ctx := context.Background()
started := make(chan struct{})
release := make(chan struct{})
const name = "test_async_migration_v1"
if err := s.RunAsyncMigration(ctx, name, func(ctx context.Context, db *sql.DB) error {
close(started)
<-release
return nil
}); err != nil {
t.Fatalf("RunAsyncMigration returned error: %v", err)
}
// Wait for the goroutine to actually start before checking status; this
// proves RunAsyncMigration did not block on fn and that fn is running
// concurrently.
select {
case <-started:
case <-time.After(2 * time.Second):
t.Fatal("async migration fn did not start within 2s — RunAsyncMigration may have blocked or never scheduled")
}
status, err := s.AsyncMigrationStatus(name)
if err != nil {
t.Fatalf("AsyncMigrationStatus while running: %v", err)
}
if status != "pending_async" {
t.Fatalf("status while fn running: got %q, want %q", status, "pending_async")
}
close(release)
// Poll for transition to done.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
status, err = s.AsyncMigrationStatus(name)
if err == nil && status == "done" {
return
}
time.Sleep(10 * time.Millisecond)
}
t.Fatalf("status never transitioned to done within 2s: got %q (err=%v)", status, err)
}
// TestRunAsyncMigration_PanicCapture proves that a panic inside fn does NOT
// leak past the recover, AND that the migration row transitions to
// "failed" with the panic message captured — NOT silently to "done".
// Operator visibility into mid-migration crashes is the whole point.
func TestRunAsyncMigration_PanicCapture(t *testing.T) {
s := newTestStore(t)
const name = "test_panic_capture_v1"
if err := s.RunAsyncMigration(context.Background(), name,
func(ctx context.Context, db *sql.DB) error {
panic("synthetic boom")
}); err != nil {
t.Fatalf("RunAsyncMigration returned error: %v", err)
}
s.WaitForAsyncMigrations()
status, err := s.AsyncMigrationStatus(name)
if err != nil {
t.Fatalf("status lookup: %v", err)
}
if status != "failed" {
t.Fatalf("status after panic: got %q, want %q (silent-done would be catastrophic)", status, "failed")
}
var errMsg sql.NullString
if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errMsg); err != nil {
t.Fatalf("error column lookup: %v", err)
}
if !errMsg.Valid || errMsg.String == "" {
t.Fatalf("error column empty after panic — operator has no clue what failed")
}
}
// TestRunAsyncMigration_IdempotentSecondCallNoOps verifies that calling
// RunAsyncMigration a second time with the same name AFTER it has reached
// "done" status does NOT re-run fn. This protects the prod path: ingestor
// restarts must not rebuild already-built indexes.
func TestRunAsyncMigration_IdempotentSecondCallNoOps(t *testing.T) {
s := newTestStore(t)
const name = "test_idempotent_v1"
var calls int32
fn := func(ctx context.Context, db *sql.DB) error {
atomic.AddInt32(&calls, 1)
return nil
}
if err := s.RunAsyncMigration(context.Background(), name, fn); err != nil {
t.Fatalf("first call: %v", err)
}
s.WaitForAsyncMigrations()
waitForStatus(t, s, name, "done", 2*time.Second)
// Second call must short-circuit; fn must not be invoked again.
if err := s.RunAsyncMigration(context.Background(), name, fn); err != nil {
t.Fatalf("second call: %v", err)
}
s.WaitForAsyncMigrations()
if got := atomic.LoadInt32(&calls); got != 1 {
t.Fatalf("fn invoked %d times, want 1 (done-state row must short-circuit)", got)
}
}
// TestRunAsyncMigration_RestartSafetyFailedIsRetried simulates a crashed
// previous run: a row exists in `failed` state from a prior boot. The next
// RunAsyncMigration call MUST re-schedule fn (reset to pending_async, then
// run it), not leave the migration stuck in `failed` forever.
func TestRunAsyncMigration_RestartSafetyFailedIsRetried(t *testing.T) {
s := newTestStore(t)
const name = "test_restart_failed_v1"
if err := ensureAsyncMigrationsTable(s.db); err != nil {
t.Fatalf("ensure table: %v", err)
}
if _, err := s.db.Exec(`INSERT INTO _async_migrations (name, status, error) VALUES (?, 'failed', 'simulated prior crash')`, name); err != nil {
t.Fatalf("seed failed row: %v", err)
}
var calls int32
if err := s.RunAsyncMigration(context.Background(), name,
func(ctx context.Context, db *sql.DB) error {
atomic.AddInt32(&calls, 1)
return nil
}); err != nil {
t.Fatalf("RunAsyncMigration on failed row: %v", err)
}
s.WaitForAsyncMigrations()
waitForStatus(t, s, name, "done", 2*time.Second)
if got := atomic.LoadInt32(&calls); got != 1 {
t.Fatalf("fn invoked %d times, want 1 (failed-state row must be retried)", got)
}
// And the error column must be cleared on success.
var errCol sql.NullString
if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errCol); err != nil {
t.Fatalf("error col: %v", err)
}
if errCol.Valid && errCol.String != "" {
t.Fatalf("error column not cleared on retry success: %q", errCol.String)
}
}
// TestRunAsyncMigration_RestartSafetyPendingIsRetried simulates the
// ingestor crashing while a migration was still in `pending_async` (the
// goroutine never finished). On next boot the migration MUST be re-picked-up
// — leaving it stuck in pending forever would be a silent prod outage.
func TestRunAsyncMigration_RestartSafetyPendingIsRetried(t *testing.T) {
s := newTestStore(t)
const name = "test_restart_pending_v1"
if err := ensureAsyncMigrationsTable(s.db); err != nil {
t.Fatalf("ensure table: %v", err)
}
if _, err := s.db.Exec(`INSERT INTO _async_migrations (name, status) VALUES (?, 'pending_async')`, name); err != nil {
t.Fatalf("seed pending row: %v", err)
}
var calls int32
if err := s.RunAsyncMigration(context.Background(), name,
func(ctx context.Context, db *sql.DB) error {
atomic.AddInt32(&calls, 1)
return nil
}); err != nil {
t.Fatalf("RunAsyncMigration on pending row: %v", err)
}
s.WaitForAsyncMigrations()
waitForStatus(t, s, name, "done", 2*time.Second)
if got := atomic.LoadInt32(&calls); got != 1 {
t.Fatalf("fn invoked %d times, want 1 (pending row must be retried after crash)", got)
}
}
// TestRunAsyncMigration_FnErrorRecorded covers the non-panic failure path:
// fn returns an error → status MUST be "failed" with the error captured.
func TestRunAsyncMigration_FnErrorRecorded(t *testing.T) {
s := newTestStore(t)
const name = "test_fn_error_v1"
if err := s.RunAsyncMigration(context.Background(), name,
func(ctx context.Context, db *sql.DB) error {
return fmt.Errorf("simulated migration error")
}); err != nil {
t.Fatalf("RunAsyncMigration: %v", err)
}
s.WaitForAsyncMigrations()
status, err := s.AsyncMigrationStatus(name)
if err != nil {
t.Fatalf("status: %v", err)
}
if status != "failed" {
t.Fatalf("status: got %q, want failed", status)
}
var errCol sql.NullString
if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errCol); err != nil {
t.Fatalf("error col: %v", err)
}
if !errCol.Valid || errCol.String == "" {
t.Fatalf("error column empty after fn error")
}
}
// TestRunAsyncMigration_ConcurrentSameNameSerialized validates the
// single-process-instance assumption: ingestor has only one *Store, and
// concurrent RunAsyncMigration(name=X) calls on the SAME *Store must not
// execute fn more than once for a given name. (CoreScope does not support
// multi-ingestor / cluster mode — see MIGRATIONS.md "Concurrency" note —
// so cross-process races are out of scope.)
func TestRunAsyncMigration_ConcurrentSameNameSerialized(t *testing.T) {
s := newTestStore(t)
const name = "test_concurrent_serialize_v1"
var calls int32
fn := func(ctx context.Context, db *sql.DB) error {
atomic.AddInt32(&calls, 1)
time.Sleep(20 * time.Millisecond)
return nil
}
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
// All concurrent callers use the SAME name. Each is allowed
// to either no-op (status==done short-circuit) or schedule
// a re-run; the invariant is "fn never runs more than once
// concurrently and on second-call-after-done it does not
// re-execute."
_ = s.RunAsyncMigration(context.Background(), name, fn)
}()
}
wg.Wait()
s.WaitForAsyncMigrations()
waitForStatus(t, s, name, "done", 2*time.Second)
// The contract per the helper's docstring + Idempotent test is: once
// status is `done`, subsequent calls short-circuit. Concurrent calls
// that lose the race to set up the pending_async row may legitimately
// re-schedule fn (the comment "previous run may have crashed
// mid-flight" justifies retry on pending_async). The hard bound is
// "fn runs at most ONCE PER pending->done transition" — for this
// test we assert fn ran at least once and at most a small bounded
// number (5 callers, each may have scheduled before any reached done).
if got := atomic.LoadInt32(&calls); got < 1 || got > 5 {
t.Fatalf("fn invoked %d times, want 1..5 inclusive (bounded by caller count)", got)
}
}
+10 -207
View File
@@ -2,14 +2,10 @@ package main
import (
"encoding/json"
"errors"
"fmt"
"log"
"os"
"strings"
"sync"
"github.com/meshcore-analyzer/dbconfig"
"github.com/meshcore-analyzer/geofilter"
)
@@ -22,17 +18,6 @@ type MQTTSource struct {
RejectUnauthorized *bool `json:"rejectUnauthorized,omitempty"`
Topics []string `json:"topics"`
IATAFilter []string `json:"iataFilter,omitempty"`
ConnectTimeoutSec int `json:"connectTimeoutSec,omitempty"`
Region string `json:"region,omitempty"`
}
// ConnectTimeoutOrDefault returns the per-source connect timeout in seconds,
// or 30 if not set (matching the WaitTimeout default from #926).
func (s MQTTSource) ConnectTimeoutOrDefault() int {
if s.ConnectTimeoutSec > 0 {
return s.ConnectTimeoutSec
}
return 30
}
// MQTTLegacy is the old single-broker config format.
@@ -50,101 +35,18 @@ type Config struct {
ChannelKeysPath string `json:"channelKeysPath,omitempty"`
ChannelKeys map[string]string `json:"channelKeys,omitempty"`
HashChannels []string `json:"hashChannels,omitempty"`
HashRegions []string `json:"hashRegions,omitempty"`
Retention *RetentionConfig `json:"retention,omitempty"`
Metrics *MetricsConfig `json:"metrics,omitempty"`
Runtime *RuntimeConfig `json:"runtime,omitempty"`
GeoFilter *GeoFilterConfig `json:"geo_filter,omitempty"`
ForeignAdverts *ForeignAdvertConfig `json:"foreignAdverts,omitempty"`
ValidateSignatures *bool `json:"validateSignatures,omitempty"`
DB *DBConfig `json:"db,omitempty"`
// ObserverIATAWhitelist restricts which observer IATA regions are processed.
// When non-empty, only observers whose IATA code (from the MQTT topic) matches
// one of these entries are accepted. Case-insensitive. An empty list means all
// IATA codes are allowed. This applies globally, unlike the per-source iataFilter.
ObserverIATAWhitelist []string `json:"observerIATAWhitelist,omitempty"`
// obsIATAWhitelistCached is the lazily-built uppercase set for O(1) lookups.
obsIATAWhitelistCached map[string]bool
obsIATAWhitelistOnce sync.Once
// ObserverBlacklist is a list of observer public keys to drop at ingest.
// Messages from blacklisted observers are silently discarded — no DB writes,
// no UpsertObserver, no observations, no metrics.
ObserverBlacklist []string `json:"observerBlacklist,omitempty"`
// obsBlacklistSetCached is the lazily-built lowercase set for O(1) lookups.
obsBlacklistSetCached map[string]bool
obsBlacklistOnce sync.Once
// NeighborEdgesMaxAgeDays controls neighbor_edges row retention
// (#1287 — moved from cmd/server). 0 = default 5.
NeighborEdgesMaxAgeDays int `json:"neighborEdgesMaxAgeDays,omitempty"`
// IngestBufferSize caps the in-memory queue (number of MQTT messages) held
// while the single SQLite writer is blocked by startup migrations/prunes
// (#1608). Received messages are drained once the write path is ready.
// 0 / unset => default. Bounded memory.
IngestBufferSize int `json:"ingestBufferSize,omitempty"`
}
// NeighborEdgesDaysOrDefault returns the configured pruning window or 5.
func (c *Config) NeighborEdgesDaysOrDefault() int {
if c == nil || c.NeighborEdgesMaxAgeDays <= 0 {
return 5
}
return c.NeighborEdgesMaxAgeDays
}
// IngestBufferSizeOrDefault returns the ingest buffer capacity. Default 50000:
// at typical mesh rates (~1-2 msg/s) that is many minutes of headroom while a
// startup migration holds the writer; each queued item is a small closure, so
// worst-case memory stays in the tens of MB.
func (c *Config) IngestBufferSizeOrDefault() int {
if c.IngestBufferSize > 0 {
return c.IngestBufferSize
}
return 50000
GeoFilter *GeoFilterConfig `json:"geo_filter,omitempty"`
}
// GeoFilterConfig is an alias for the shared geofilter.Config type.
type GeoFilterConfig = geofilter.Config
// ForeignAdvertConfig controls how the ingestor handles ADVERTs whose GPS lies
// outside the configured geofilter polygon (#730). Modes:
// - "flag" (default): store the advert/node and tag it foreign for visibility.
// - "drop": silently discard the advert (legacy behavior).
type ForeignAdvertConfig struct {
Mode string `json:"mode,omitempty"`
}
// IsDropMode reports whether the foreign-advert config is set to "drop".
// Defaults to false ("flag" mode) when nil or unset.
func (f *ForeignAdvertConfig) IsDropMode() bool {
if f == nil {
return false
}
return strings.EqualFold(strings.TrimSpace(f.Mode), "drop")
}
// RetentionConfig controls how long stale nodes are kept before being moved to inactive_nodes.
type RetentionConfig struct {
NodeDays int `json:"nodeDays"`
ObserverDays int `json:"observerDays"`
MetricsDays int `json:"metricsDays"`
// PacketDays is the retention window for transmissions (#1283).
// Ownership moved from cmd/server to cmd/ingestor; 0 disables.
PacketDays int `json:"packetDays"`
}
// PacketDaysOrZero returns the configured retention.packetDays or 0
// (disabled) if not set.
func (c *Config) PacketDaysOrZero() int {
if c.Retention != nil && c.Retention.PacketDays > 0 {
return c.Retention.PacketDays
}
return 0
NodeDays int `json:"nodeDays"`
MetricsDays int `json:"metricsDays"`
}
// MetricsConfig controls observer metrics collection.
@@ -152,34 +54,6 @@ type MetricsConfig struct {
SampleIntervalSec int `json:"sampleIntervalSec"`
}
// RuntimeConfig holds Go runtime tuning knobs (#1010).
type RuntimeConfig struct {
// MaxMemoryMB is the soft memory limit (GOMEMLIMIT) in MiB applied via
// runtime/debug.SetMemoryLimit at startup. The GOMEMLIMIT environment
// variable, when set, takes precedence over this value. 0/unset means
// no limit is applied and default Go runtime behavior is preserved.
MaxMemoryMB int `json:"maxMemoryMB"`
}
// DBConfig is the shared SQLite vacuum/maintenance config (#919, #921).
type DBConfig = dbconfig.DBConfig
// IncrementalVacuumPages returns the configured pages per vacuum or 1024 default.
func (c *Config) IncrementalVacuumPages() int {
if c.DB != nil && c.DB.IncrementalVacuumPages > 0 {
return c.DB.IncrementalVacuumPages
}
return 1024
}
// ShouldValidateSignatures returns true (default) unless explicitly disabled.
func (c *Config) ShouldValidateSignatures() bool {
if c.ValidateSignatures != nil {
return *c.ValidateSignatures
}
return true
}
// MetricsSampleInterval returns the configured sample interval or 300s default.
func (c *Config) MetricsSampleInterval() int {
if c.Metrics != nil && c.Metrics.SampleIntervalSec > 0 {
@@ -204,68 +78,16 @@ func (c *Config) NodeDaysOrDefault() int {
return 7
}
// ObserverDaysOrDefault returns the configured retention.observerDays or 14 if not set.
// A value of -1 means observers are never removed.
func (c *Config) ObserverDaysOrDefault() int {
if c.Retention != nil && c.Retention.ObserverDays != 0 {
return c.Retention.ObserverDays
}
return 14
}
// IsObserverBlacklisted returns true if the given observer ID is in the observerBlacklist.
func (c *Config) IsObserverBlacklisted(id string) bool {
if c == nil || len(c.ObserverBlacklist) == 0 {
return false
}
c.obsBlacklistOnce.Do(func() {
m := make(map[string]bool, len(c.ObserverBlacklist))
for _, pk := range c.ObserverBlacklist {
trimmed := strings.ToLower(strings.TrimSpace(pk))
if trimmed != "" {
m[trimmed] = true
}
}
c.obsBlacklistSetCached = m
})
return c.obsBlacklistSetCached[strings.ToLower(strings.TrimSpace(id))]
}
// IsObserverIATAAllowed returns true if the given IATA code is permitted.
// When ObserverIATAWhitelist is empty, all codes are allowed.
func (c *Config) IsObserverIATAAllowed(iata string) bool {
if c == nil || len(c.ObserverIATAWhitelist) == 0 {
return true
}
c.obsIATAWhitelistOnce.Do(func() {
m := make(map[string]bool, len(c.ObserverIATAWhitelist))
for _, code := range c.ObserverIATAWhitelist {
trimmed := strings.ToUpper(strings.TrimSpace(code))
if trimmed != "" {
m[trimmed] = true
}
}
c.obsIATAWhitelistCached = m
})
return c.obsIATAWhitelistCached[strings.ToUpper(strings.TrimSpace(iata))]
}
// LoadConfig reads configuration from a JSON file, with env var overrides.
// If the config file does not exist, sensible defaults are used (zero-config startup).
func LoadConfig(path string) (*Config, error) {
var cfg Config
data, err := os.ReadFile(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
return nil, fmt.Errorf("reading config %s: %w", path, err)
}
// Config file doesn't exist — use defaults (zero-config mode)
log.Printf("config file %s not found, using sensible defaults", path)
} else {
if err := json.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("parsing config %s: %w", path, err)
}
return nil, fmt.Errorf("reading config %s: %w", path, err)
}
var cfg Config
if err := json.Unmarshal(data, &cfg); err != nil {
return nil, fmt.Errorf("parsing config %s: %w", path, err)
}
// Env var overrides
@@ -299,38 +121,19 @@ func LoadConfig(path string) (*Config, error) {
}}
}
// Default MQTT source: connect to localhost broker when no sources configured
if len(cfg.MQTTSources) == 0 {
cfg.MQTTSources = []MQTTSource{{
Name: "local",
Broker: "mqtt://localhost:1883",
Topics: []string{"meshcore/#"},
}}
log.Printf("no MQTT sources configured, defaulting to mqtt://localhost:1883")
}
return &cfg, nil
}
// ResolvedSources returns the final list of MQTT sources to connect to.
//
// Scheme mapping:
//
// mqtt:// → tcp:// (paho plain TCP)
// mqtts:// → ssl:// (paho TLS over TCP)
// ws:// (paho WebSocket — passed through, no mapping needed)
// wss:// (paho WebSocket TLS — passed through, no mapping needed)
func (c *Config) ResolvedSources() []MQTTSource {
for i := range c.MQTTSources {
// paho uses tcp:// and ssl:// for plain MQTT; ws:// and wss:// are accepted natively.
// paho uses tcp:// and ssl:// not mqtt:// and mqtts://
b := c.MQTTSources[i].Broker
if strings.HasPrefix(b, "mqtt://") {
c.MQTTSources[i].Broker = "tcp://" + b[7:]
} else if strings.HasPrefix(b, "mqtts://") {
c.MQTTSources[i].Broker = "ssl://" + b[8:]
}
// ws:// and wss:// pass through unchanged — paho handles WebSocket
// connections natively via gorilla/websocket.
}
return c.MQTTSources
}
+5 -233
View File
@@ -32,25 +32,9 @@ func TestLoadConfigValidJSON(t *testing.T) {
}
func TestLoadConfigMissingFile(t *testing.T) {
t.Setenv("DB_PATH", "")
t.Setenv("MQTT_BROKER", "")
cfg, err := LoadConfig("/nonexistent/path/config.json")
if err != nil {
t.Fatalf("missing config should not error (zero-config mode), got: %v", err)
}
if cfg.DBPath != "data/meshcore.db" {
t.Errorf("dbPath=%s, want data/meshcore.db", cfg.DBPath)
}
// Should default to localhost MQTT
if len(cfg.MQTTSources) != 1 {
t.Fatalf("mqttSources len=%d, want 1", len(cfg.MQTTSources))
}
if cfg.MQTTSources[0].Broker != "mqtt://localhost:1883" {
t.Errorf("default broker=%s, want mqtt://localhost:1883", cfg.MQTTSources[0].Broker)
}
if cfg.MQTTSources[0].Name != "local" {
t.Errorf("default source name=%s, want local", cfg.MQTTSources[0].Name)
_, err := LoadConfig("/nonexistent/path/config.json")
if err == nil {
t.Error("expected error for missing file")
}
}
@@ -212,8 +196,8 @@ func TestLoadConfigLegacyMQTTEmptyBroker(t *testing.T) {
if err != nil {
t.Fatal(err)
}
if len(cfg.MQTTSources) != 1 || cfg.MQTTSources[0].Name != "local" {
t.Errorf("mqttSources should default to local broker when legacy broker is empty, got %v", cfg.MQTTSources)
if len(cfg.MQTTSources) != 0 {
t.Errorf("mqttSources should be empty when legacy broker is empty, got %d", len(cfg.MQTTSources))
}
}
@@ -284,215 +268,3 @@ func TestLoadConfigWithAllFields(t *testing.T) {
t.Errorf("iataFilter=%v", src.IATAFilter)
}
}
func TestConnectTimeoutOrDefault(t *testing.T) {
// Default when unset
s := MQTTSource{}
if got := s.ConnectTimeoutOrDefault(); got != 30 {
t.Errorf("default: got %d, want 30", got)
}
// Custom value
s.ConnectTimeoutSec = 5
if got := s.ConnectTimeoutOrDefault(); got != 5 {
t.Errorf("custom: got %d, want 5", got)
}
// Zero treated as unset
s.ConnectTimeoutSec = 0
if got := s.ConnectTimeoutOrDefault(); got != 30 {
t.Errorf("zero: got %d, want 30", got)
}
}
func TestConnectTimeoutFromJSON(t *testing.T) {
dir := t.TempDir()
cfgPath := dir + "/config.json"
os.WriteFile(cfgPath, []byte(`{"mqttSources":[{"name":"s1","broker":"tcp://b:1883","topics":["#"],"connectTimeoutSec":5}]}`), 0644)
cfg, err := LoadConfig(cfgPath)
if err != nil {
t.Fatal(err)
}
if got := cfg.MQTTSources[0].ConnectTimeoutOrDefault(); got != 5 {
t.Errorf("from JSON: got %d, want 5", got)
}
}
func TestObserverIATAWhitelist(t *testing.T) {
// Config with whitelist set
cfg := Config{
ObserverIATAWhitelist: []string{"ARN", "got"},
}
// Matching (case-insensitive)
if !cfg.IsObserverIATAAllowed("ARN") {
t.Error("ARN should be allowed")
}
if !cfg.IsObserverIATAAllowed("arn") {
t.Error("arn (lowercase) should be allowed")
}
if !cfg.IsObserverIATAAllowed("GOT") {
t.Error("GOT should be allowed")
}
// Non-matching
if cfg.IsObserverIATAAllowed("SJC") {
t.Error("SJC should NOT be allowed")
}
// Empty string not allowed
if cfg.IsObserverIATAAllowed("") {
t.Error("empty IATA should NOT be allowed")
}
}
func TestObserverIATAWhitelistEmpty(t *testing.T) {
// No whitelist = allow all
cfg := Config{}
if !cfg.IsObserverIATAAllowed("SJC") {
t.Error("with no whitelist, all IATAs should be allowed")
}
if !cfg.IsObserverIATAAllowed("") {
t.Error("with no whitelist, even empty IATA should be allowed")
}
}
func TestObserverIATAWhitelistJSON(t *testing.T) {
json := `{
"dbPath": "test.db",
"observerIATAWhitelist": ["ARN", "GOT"]
}`
tmp := t.TempDir() + "/config.json"
os.WriteFile(tmp, []byte(json), 0644)
cfg, err := LoadConfig(tmp)
if err != nil {
t.Fatal(err)
}
if len(cfg.ObserverIATAWhitelist) != 2 {
t.Fatalf("expected 2 entries, got %d", len(cfg.ObserverIATAWhitelist))
}
if !cfg.IsObserverIATAAllowed("ARN") {
t.Error("ARN should be allowed after loading from JSON")
}
}
func TestMQTTSourceRegionField(t *testing.T) {
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.json")
os.WriteFile(cfgPath, []byte(`{
"dbPath": "/tmp/test.db",
"mqttSources": [
{"name": "cascadia", "broker": "tcp://localhost:1883", "topics": ["meshcore/#"], "region": "PDX"}
]
}`), 0o644)
cfg, err := LoadConfig(cfgPath)
if err != nil {
t.Fatal(err)
}
if cfg.MQTTSources[0].Region != "PDX" {
t.Fatalf("expected region PDX, got %q", cfg.MQTTSources[0].Region)
}
}
// TestResolvedSourcesSchemeMapping verifies that mqtt:// and mqtts:// are translated
// to the paho-native tcp:// and ssl:// schemes, while ws:// and wss:// pass through
// unchanged (paho handles WebSocket connections natively).
func TestResolvedSourcesSchemeMapping(t *testing.T) {
tests := []struct {
input string
want string
}{
{"mqtt://host:1883", "tcp://host:1883"},
{"mqtts://host:8883", "ssl://host:8883"},
{"tcp://host:1883", "tcp://host:1883"},
{"ssl://host:8883", "ssl://host:8883"},
{"ws://host:9001", "ws://host:9001"},
{"wss://host:9001", "wss://host:9001"},
{"ws://host:9001/mqtt", "ws://host:9001/mqtt"},
{"wss://host:9001/mqtt", "wss://host:9001/mqtt"},
}
for _, tt := range tests {
cfg := &Config{
MQTTSources: []MQTTSource{
{Name: "test", Broker: tt.input, Topics: []string{"meshcore/#"}},
},
}
sources := cfg.ResolvedSources()
if got := sources[0].Broker; got != tt.want {
t.Errorf("ResolvedSources(%q) = %q, want %q", tt.input, got, tt.want)
}
}
}
// TestLoadConfigWSSource verifies that a WebSocket MQTT source round-trips through
// LoadConfig correctly — username/password preserved, scheme unchanged.
func TestLoadConfigWSSource(t *testing.T) {
t.Setenv("DB_PATH", "")
t.Setenv("MQTT_BROKER", "")
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.json")
os.WriteFile(cfgPath, []byte(`{
"dbPath": "test.db",
"mqttSources": [
{
"name": "local-tcp",
"broker": "mqtt://localhost:1883",
"topics": ["meshcore/#"]
},
{
"name": "wsmqtt-ws",
"broker": "wss://wsmqtt.example.com/mqtt",
"username": "corescope",
"password": "s3cr3t",
"topics": ["meshcore/#"]
}
]
}`), 0o644)
cfg, err := LoadConfig(cfgPath)
if err != nil {
t.Fatal(err)
}
if len(cfg.MQTTSources) != 2 {
t.Fatalf("mqttSources len=%d, want 2", len(cfg.MQTTSources))
}
tcp := cfg.MQTTSources[0]
if tcp.Name != "local-tcp" {
t.Errorf("name=%s, want local-tcp", tcp.Name)
}
ws := cfg.MQTTSources[1]
if ws.Name != "wsmqtt-ws" {
t.Errorf("name=%s, want wsmqtt-ws", ws.Name)
}
if ws.Broker != "wss://wsmqtt.example.com/mqtt" {
t.Errorf("broker=%s, want wss://wsmqtt.example.com/mqtt", ws.Broker)
}
if ws.Username != "corescope" {
t.Errorf("username=%s, want corescope", ws.Username)
}
if ws.Password != "s3cr3t" {
t.Errorf("password=%s, want s3cr3t", ws.Password)
}
sources := cfg.ResolvedSources()
if sources[1].Broker != "wss://wsmqtt.example.com/mqtt" {
t.Errorf("ResolvedSources wss broker=%s, want unchanged", sources[1].Broker)
}
}
func TestIngestBufferSizeOrDefault(t *testing.T) {
if got := (&Config{}).IngestBufferSizeOrDefault(); got != 50000 {
t.Fatalf("default: want 50000, got %d", got)
}
if got := (&Config{IngestBufferSize: 10}).IngestBufferSizeOrDefault(); got != 10 {
t.Fatalf("override: want 10, got %d", got)
}
if got := (&Config{IngestBufferSize: -5}).IngestBufferSizeOrDefault(); got != 50000 {
t.Fatalf("invalid negative should fall back to default, got %d", got)
}
}
+41 -225
View File
@@ -5,10 +5,7 @@ import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"os"
"path/filepath"
"testing"
"time"
)
// hmacSHA256 computes HMAC-SHA256 for test use.
@@ -160,7 +157,7 @@ func TestHandleMessageChannelMessage(t *testing.T) {
payload := []byte(`{"text":"Alice: Hello everyone","channel_idx":3,"SNR":5.0,"RSSI":-95,"score":10,"direction":"rx","sender_timestamp":1700000000}`)
msg := &mockMessage{topic: "meshcore/message/channel/2", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -206,13 +203,21 @@ func TestHandleMessageChannelMessage(t *testing.T) {
t.Errorf("direction=%v, want rx", direction)
}
// Sender node should NOT be created (see issue #665: synthetic "sender-" keys
// are unreachable from the claiming/health flow)
// Should create sender node
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&count); err != nil {
t.Fatal(err)
}
if count != 0 {
t.Errorf("nodes count=%d, want 0 (no phantom sender node)", count)
if count != 1 {
t.Errorf("nodes count=%d, want 1 (sender node)", count)
}
// Verify sender node name
var nodeName string
if err := store.db.QueryRow("SELECT name FROM nodes LIMIT 1").Scan(&nodeName); err != nil {
t.Fatal(err)
}
if nodeName != "Alice" {
t.Errorf("node name=%s, want Alice", nodeName)
}
}
@@ -220,7 +225,7 @@ func TestHandleMessageChannelMessageEmptyText(t *testing.T) {
store, source := newTestContext(t)
msg := &mockMessage{topic: "meshcore/message/channel/1", payload: []byte(`{"text":""}`)}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -235,7 +240,7 @@ func TestHandleMessageChannelNoSender(t *testing.T) {
store, source := newTestContext(t)
msg := &mockMessage{topic: "meshcore/message/channel/1", payload: []byte(`{"text":"no sender here"}`)}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&count); err != nil {
@@ -252,7 +257,7 @@ func TestHandleMessageDirectMessage(t *testing.T) {
payload := []byte(`{"text":"Bob: Hey there","sender_timestamp":1700000000,"SNR":3.0,"rssi":-100,"Score":8,"Direction":"tx"}`)
msg := &mockMessage{topic: "meshcore/message/direct/abc123", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -296,7 +301,7 @@ func TestHandleMessageDirectMessageEmptyText(t *testing.T) {
store, source := newTestContext(t)
msg := &mockMessage{topic: "meshcore/message/direct/abc", payload: []byte(`{"text":""}`)}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -311,7 +316,7 @@ func TestHandleMessageDirectNoSender(t *testing.T) {
store, source := newTestContext(t)
msg := &mockMessage{topic: "meshcore/message/direct/xyz", payload: []byte(`{"text":"message with no colon"}`)}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -330,7 +335,7 @@ func TestHandleMessageUppercaseScoreDirection(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `","Score":9.0,"Direction":"tx"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var score *float64
var direction *string
@@ -351,7 +356,7 @@ func TestHandleMessageChannelLowercaseFields(t *testing.T) {
payload := []byte(`{"text":"Test: msg","snr":3.0,"rssi":-90,"Score":5,"Direction":"rx"}`)
msg := &mockMessage{topic: "meshcore/message/channel/0", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -367,7 +372,7 @@ func TestHandleMessageDirectLowercaseFields(t *testing.T) {
payload := []byte(`{"text":"Test: msg","snr":2.0,"rssi":-85,"score":7,"direction":"tx"}`)
msg := &mockMessage{topic: "meshcore/message/direct/xyz", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -390,7 +395,7 @@ func TestHandleMessageAdvertWithTelemetry(t *testing.T) {
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
// Should have created transmission, node, and observer
var txCount, nodeCount, obsCount int
@@ -430,12 +435,7 @@ func TestHandleMessageAdvertGeoFiltered(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
// Legacy silent-drop behavior is now opt-in via ForeignAdverts.Mode="drop"
// (#730). The new default — flag — is covered by foreign_advert_test.go.
handleMessage(store, "test", source, msg, nil, nil, &Config{
GeoFilter: gf,
ForeignAdverts: &ForeignAdvertConfig{Mode: "drop"},
})
handleMessage(store, "test", source, msg, nil, gf)
// Geo-filtered adverts should not create nodes
var nodeCount int
@@ -443,7 +443,7 @@ func TestHandleMessageAdvertGeoFiltered(t *testing.T) {
t.Fatal(err)
}
if nodeCount != 0 {
t.Errorf("nodes=%d, want 0 (geo-filtered advert in drop mode should not create node)", nodeCount)
t.Errorf("nodes=%d, want 0 (geo-filtered advert should not create node)", nodeCount)
}
}
@@ -461,7 +461,7 @@ func TestDecodeAdvertLocationTruncated(t *testing.T) {
buf[100] = 0x11
// Only 4 bytes after flags — not enough for full location (needs 8)
p := decodeAdvert(buf[:105], false)
p := decodeAdvert(buf[:105])
if p.Error != "" {
t.Fatalf("error: %s", p.Error)
}
@@ -483,7 +483,7 @@ func TestDecodeAdvertFeat1Truncated(t *testing.T) {
buf[100] = 0x21
// Only 1 byte after flags — not enough for feat1 (needs 2)
p := decodeAdvert(buf[:102], false)
p := decodeAdvert(buf[:102])
if p.Feat1 != nil {
t.Error("feat1 should be nil with truncated data")
}
@@ -504,7 +504,7 @@ func TestDecodeAdvertFeat2Truncated(t *testing.T) {
buf[102] = 0x00
// Only 1 byte left — not enough for feat2
p := decodeAdvert(buf[:104], false)
p := decodeAdvert(buf[:104])
if p.Feat1 == nil {
t.Error("feat1 should be set")
}
@@ -544,7 +544,7 @@ func TestDecodeAdvertSensorBadTelemetry(t *testing.T) {
buf[105] = 0x20
buf[106] = 0x4E
p := decodeAdvert(buf[:107], false)
p := decodeAdvert(buf[:107])
if p.BatteryMv != nil {
t.Error("battery_mv=0 should be nil")
}
@@ -672,7 +672,7 @@ func TestHandleMessageCorruptedAdvertNoNode(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&count); err != nil {
@@ -694,7 +694,7 @@ func TestHandleMessageNonAdvertPacket(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -740,7 +740,7 @@ func TestDecodeAdvertSensorNoName(t *testing.T) {
buf[103] = 0xC4
buf[104] = 0x09
p := decodeAdvert(buf[:105], false)
p := decodeAdvert(buf[:105])
if p.Error != "" {
t.Fatalf("error: %s", p.Error)
}
@@ -755,13 +755,8 @@ func TestDecodeAdvertSensorNoName(t *testing.T) {
// --- db.go: OpenStore error path (invalid dir) ---
func TestOpenStoreInvalidPath(t *testing.T) {
// Create a regular file then try to open a DB inside it — impossible on all platforms.
f, err := os.CreateTemp(t.TempDir(), "not-a-dir")
if err != nil {
t.Fatalf("setup: %v", err)
}
f.Close()
_, err = OpenStore(filepath.Join(f.Name(), "db.sqlite"))
// Path under /dev/null can't create directory
_, err := OpenStore("/dev/null/impossible/path/db.sqlite")
if err == nil {
t.Error("should error on impossible path")
}
@@ -840,7 +835,7 @@ func TestDecodePacketNoPathByteAfterHeader(t *testing.T) {
// Non-transport route, but only header byte (no path byte)
// Actually 0A alone = 1 byte, but we need >= 2
// Header + exactly at offset boundary
_, err := DecodePacket("0A", nil, false)
_, err := DecodePacket("0A", nil)
if err == nil {
t.Error("should error - too short")
}
@@ -861,7 +856,7 @@ func TestDecodeAdvertNameNoNull(t *testing.T) {
// Name without null terminator — goes to end of buffer
copy(buf[101:], []byte("LongNameNoNull"))
p := decodeAdvert(buf[:115], false)
p := decodeAdvert(buf[:115])
if p.Name != "LongNameNoNull" {
t.Errorf("name=%q, want LongNameNoNull", p.Name)
}
@@ -876,7 +871,7 @@ func TestHandleMessageChannelLongSender(t *testing.T) {
longText := "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA: msg"
payload := []byte(`{"text":"` + longText + `"}`)
msg := &mockMessage{topic: "meshcore/message/channel/1", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&count); err != nil {
@@ -895,7 +890,7 @@ func TestHandleMessageDirectLongSender(t *testing.T) {
longText := "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB: msg"
payload := []byte(`{"text":"` + longText + `"}`)
msg := &mockMessage{topic: "meshcore/message/direct/abc", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -912,7 +907,7 @@ func TestHandleMessageDirectUppercaseScoreDirection(t *testing.T) {
payload := []byte(`{"text":"X: hi","Score":6,"Direction":"rx"}`)
msg := &mockMessage{topic: "meshcore/message/direct/d1", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -942,7 +937,7 @@ func TestHandleMessageChannelUppercaseScoreDirection(t *testing.T) {
payload := []byte(`{"text":"Y: hi","Score":4,"Direction":"tx"}`)
msg := &mockMessage{topic: "meshcore/message/channel/5", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count); err != nil {
@@ -973,7 +968,7 @@ func TestHandleMessageRawLowercaseScore(t *testing.T) {
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
payload := []byte(`{"raw":"` + rawHex + `","score":3.5}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var score *float64
if err := store.db.QueryRow("SELECT score FROM observations LIMIT 1").Scan(&score); err != nil {
@@ -992,7 +987,7 @@ func TestHandleMessageStatusNoOrigin(t *testing.T) {
topic: "meshcore/LAX/obs5/status",
payload: []byte(`{"model":"L1"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM observers WHERE id = 'obs5'").Scan(&count); err != nil {
@@ -1151,182 +1146,3 @@ func TestDecodeTraceWithPath(t *testing.T) {
t.Errorf("flags=%v, want 3", p.TraceFlags)
}
}
// --- db.go: RemoveStaleObservers (soft-delete) ---
func TestRemoveStaleObservers(t *testing.T) {
store := newTestStore(t)
// Insert an observer with last_seen 30 days ago
err := store.UpsertObserver("obs-old", "OldObserver", "LAX", nil)
if err != nil {
t.Fatal(err)
}
// Override last_seen to 30 days ago
cutoff := time.Now().UTC().AddDate(0, 0, -30).Format(time.RFC3339)
_, err = store.db.Exec("UPDATE observers SET last_seen = ? WHERE id = ?", cutoff, "obs-old")
if err != nil {
t.Fatal(err)
}
// Insert a recent observer
err = store.UpsertObserver("obs-new", "NewObserver", "NYC", nil)
if err != nil {
t.Fatal(err)
}
removed, err := store.RemoveStaleObservers(14)
if err != nil {
t.Fatal(err)
}
if removed != 1 {
t.Errorf("removed=%d, want 1", removed)
}
// Observer should still be in the table (soft-delete), but marked inactive
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM observers").Scan(&count); err != nil {
t.Fatal(err)
}
if count != 2 {
t.Errorf("observers count=%d, want 2 (soft-delete preserves row)", count)
}
// Check that the old observer is marked inactive
var inactive int
if err := store.db.QueryRow("SELECT inactive FROM observers WHERE id = ?", "obs-old").Scan(&inactive); err != nil {
t.Fatal(err)
}
if inactive != 1 {
t.Errorf("obs-old inactive=%d, want 1", inactive)
}
// Check that the recent observer is still active
var newInactive int
if err := store.db.QueryRow("SELECT inactive FROM observers WHERE id = ?", "obs-new").Scan(&newInactive); err != nil {
t.Fatal(err)
}
if newInactive != 0 {
t.Errorf("obs-new inactive=%d, want 0", newInactive)
}
}
func TestRemoveStaleObserversNone(t *testing.T) {
store := newTestStore(t)
removed, err := store.RemoveStaleObservers(14)
if err != nil {
t.Fatal(err)
}
if removed != 0 {
t.Errorf("removed=%d, want 0", removed)
}
}
func TestRemoveStaleObserversKeepForever(t *testing.T) {
store := newTestStore(t)
// Insert an old observer
err := store.UpsertObserver("obs-ancient", "AncientObserver", "LAX", nil)
if err != nil {
t.Fatal(err)
}
cutoff := time.Now().UTC().AddDate(0, 0, -365).Format(time.RFC3339)
_, err = store.db.Exec("UPDATE observers SET last_seen = ? WHERE id = ?", cutoff, "obs-ancient")
if err != nil {
t.Fatal(err)
}
// observerDays = -1 means keep forever
removed, err := store.RemoveStaleObservers(-1)
if err != nil {
t.Fatal(err)
}
if removed != 0 {
t.Errorf("removed=%d, want 0 (keep forever)", removed)
}
var count int
if err := store.db.QueryRow("SELECT COUNT(*) FROM observers").Scan(&count); err != nil {
t.Fatal(err)
}
if count != 1 {
t.Errorf("observers count=%d, want 1 (keep forever)", count)
}
// Observer should NOT be marked inactive
var inactive int
if err := store.db.QueryRow("SELECT inactive FROM observers WHERE id = ?", "obs-ancient").Scan(&inactive); err != nil {
t.Fatal(err)
}
if inactive != 0 {
t.Errorf("obs-ancient inactive=%d, want 0 (keep forever)", inactive)
}
}
func TestRemoveStaleObserversReactivation(t *testing.T) {
store := newTestStore(t)
// Insert and stale-mark an observer
err := store.UpsertObserver("obs-test", "TestObserver", "LAX", nil)
if err != nil {
t.Fatal(err)
}
cutoff := time.Now().UTC().AddDate(0, 0, -30).Format(time.RFC3339)
_, err = store.db.Exec("UPDATE observers SET last_seen = ? WHERE id = ?", cutoff, "obs-test")
if err != nil {
t.Fatal(err)
}
removed, err := store.RemoveStaleObservers(14)
if err != nil {
t.Fatal(err)
}
if removed != 1 {
t.Errorf("removed=%d, want 1", removed)
}
// Verify it's inactive
var inactive int
if err := store.db.QueryRow("SELECT inactive FROM observers WHERE id = ?", "obs-test").Scan(&inactive); err != nil {
t.Fatal(err)
}
if inactive != 1 {
t.Errorf("inactive=%d, want 1 after soft-delete", inactive)
}
// Now UpsertObserver should reactivate it
err = store.UpsertObserver("obs-test", "TestObserver", "LAX", nil)
if err != nil {
t.Fatal(err)
}
if err := store.db.QueryRow("SELECT inactive FROM observers WHERE id = ?", "obs-test").Scan(&inactive); err != nil {
t.Fatal(err)
}
if inactive != 0 {
t.Errorf("inactive=%d, want 0 after reactivation", inactive)
}
}
func TestObserverDaysOrDefault(t *testing.T) {
tests := []struct {
name string
cfg *Config
want int
}{
{"nil retention", &Config{}, 14},
{"zero observer days", &Config{Retention: &RetentionConfig{ObserverDays: 0}}, 14},
{"positive value", &Config{Retention: &RetentionConfig{ObserverDays: 30}}, 30},
{"keep forever", &Config{Retention: &RetentionConfig{ObserverDays: -1}}, -1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := tt.cfg.ObserverDaysOrDefault()
if got != tt.want {
t.Errorf("ObserverDaysOrDefault() = %d, want %d", got, tt.want)
}
})
}
}
+75 -1266
View File
File diff suppressed because it is too large Load Diff
+20 -1098
View File
File diff suppressed because it is too large Load Diff
-115
View File
@@ -1,115 +0,0 @@
package main
import (
"database/sql"
"fmt"
"sync"
"testing"
"time"
)
// TestWriterStarvationVisibleInPerf reproduces the #1339 class of bug:
// one component (neighbor_builder) holds the writer connection for an
// extended period; a second component (mqtt_handler) firing concurrent
// writes must show observable wait_ms in the perf snapshot.
//
// This is the gate test for issue #1340: SQLite write-lock instrumentation
// per component. If the wait_ms percentile collapses to zero, the
// observability gap remains and the regression class is invisible again.
//
// Runs ~60s — guarded by testing.Short() so fast unit-test passes can
// skip it locally, but CI runs `go test ./...` without -short.
func TestWriterStarvationVisibleInPerf(t *testing.T) {
if testing.Short() {
t.Skip("skipping 60s starvation test in short mode")
}
// Isolate from samples accumulated by earlier tests in the same
// package run — without this the mqtt_handler component already
// has ~thousand fast InsertTransmission samples and the 5 slow
// follower samples can't move p99 above 50s.
ResetWriterStatsForTest()
s, err := OpenStore(tempDBPath(t))
if err != nil {
t.Fatal(err)
}
defer s.Close()
const blockDur = 60 * time.Second
// Blocker: acquire the writer via the wrapped Tx path, tag as
// neighbor_builder, sleep 60s while holding the single conn,
// then commit. This monopolises the writer for the duration.
blockStarted := make(chan struct{})
blockerDone := make(chan struct{})
go func() {
defer close(blockerDone)
err := s.WriterTx("neighbor_builder", func(tx *sql.Tx) error {
if _, err := tx.Exec(`UPDATE nodes SET name = name WHERE 0`); err != nil {
return err
}
close(blockStarted)
time.Sleep(blockDur)
return nil
})
if err != nil {
t.Errorf("blocker tx: %v", err)
}
}()
// Wait for the blocker to be inside its transaction.
<-blockStarted
// Small safety margin so the blocker is firmly holding the conn.
time.Sleep(100 * time.Millisecond)
// Now fire several mqtt_handler writes. Each will block on the
// single writer connection until the blocker commits.
const followers = 5
var wg sync.WaitGroup
wg.Add(followers)
for i := 0; i < followers; i++ {
i := i
go func() {
defer wg.Done()
_, err := s.WriterExec(
"mqtt_handler",
`INSERT OR IGNORE INTO _migrations (name) VALUES (?)`,
fmt.Sprintf("writer_starvation_test_%d", i),
)
if err != nil {
t.Errorf("mqtt follower %d: %v", i, err)
}
}()
}
wg.Wait()
<-blockerDone
snap := s.WriterStatsSnapshot()
mqtt, ok := snap["mqtt_handler"]
if !ok {
t.Fatalf("no perf snapshot for mqtt_handler component (got components: %v)", componentKeys(snap))
}
if mqtt.Count < followers {
t.Fatalf("expected at least %d mqtt_handler samples, got %d", followers, mqtt.Count)
}
// This is the gate assertion. With instrumentation present the
// follower writes should each register ~60s of wait_ms; p99 must
// be well above 50_000ms. With instrumentation missing or broken
// the percentile collapses to zero and this fails — which is the
// exact regression class #1340 is meant to prevent.
if mqtt.WaitMsP99 <= 50_000 {
t.Fatalf("mqtt_handler wait_ms p99 = %.1fms, want > 50000ms; "+
"writer starvation is invisible to /api/perf — issue #1340 not fixed",
mqtt.WaitMsP99)
}
}
func componentKeys(m map[string]WriterStatsSnapshot) []string {
out := make([]string, 0, len(m))
for k := range m {
out = append(out, k)
}
return out
}
-63
View File
@@ -1,63 +0,0 @@
package main
import (
"bytes"
"log"
"strings"
"testing"
)
// TestHandleMessageDecodeErrorLog_PII — issue #1211 round-0 fix shipped without
// a test. Asserts the decode-error log line:
// (a) includes structured fields: topic, observer prefix, payload length
// (b) observer substring is at most 8 chars
// (c) full observer ID is NOT present in the output
//
// A bare `log.Printf("... observer=%s ...", obs)` would leak the full ID.
func TestHandleMessageDecodeErrorLog_PII_Issue1211(t *testing.T) {
store, source := newTestContext(t)
// Use a 64-char observer ID; the prefix MUST be capped at 8 chars in logs.
observerID := "abcdef0123456789aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
// Malformed raw — pathByte=0xF6 claims 216 path bytes in a tiny buffer.
// This triggers the decode-error path under test.
rawHex := "12F6AAAAAAAAAAAAAAAAAAAAAAAAAA"
topic := "meshcore/SJC/" + observerID + "/packets"
payload := []byte(`{"raw":"` + rawHex + `"}`)
msg := &mockMessage{topic: topic, payload: payload}
var buf bytes.Buffer
orig := log.Writer()
log.SetOutput(&buf)
defer log.SetOutput(orig)
handleMessage(store, "test", source, msg, nil, nil, &Config{})
out := buf.String()
if !strings.Contains(out, "decode error") {
t.Fatalf("expected decode-error log; got:\n%s", out)
}
// (a) structured fields present
if !strings.Contains(out, "topic=") {
t.Errorf("log missing topic=; got:\n%s", out)
}
if !strings.Contains(out, "observer=") {
t.Errorf("log missing observer=; got:\n%s", out)
}
if !strings.Contains(out, "rawHexLen=") {
t.Errorf("log missing rawHexLen=; got:\n%s", out)
}
// (c) full observer ID must NOT appear
if strings.Contains(out, observerID) {
t.Errorf("log leaked full observer ID; got:\n%s", out)
}
// (b) observer substring capped at 8 chars — the 9th char ('2') after the
// 8-char prefix must NOT appear adjacent to the prefix.
if strings.Contains(out, "abcdef01234") {
t.Errorf("log observer field longer than 8 chars; got:\n%s", out)
}
// Positive: 8-char prefix must be present in the log
if !strings.Contains(out, "abcdef01") {
t.Errorf("log missing 8-char observer prefix; got:\n%s", out)
}
}
+33 -461
View File
@@ -11,9 +11,6 @@ import (
"math"
"strings"
"unicode/utf8"
"github.com/meshcore-analyzer/packetpath"
"github.com/meshcore-analyzer/sigvalidate"
)
// Route type constants (header bits 1-0)
@@ -81,10 +78,9 @@ type TransportCodes struct {
// Path holds decoded path/hop information.
type Path struct {
HashSize int `json:"hashSize"`
HashCount int `json:"hashCount"`
Hops []string `json:"hops"`
HopsCompleted *int `json:"hopsCompleted,omitempty"`
HashSize int `json:"hashSize"`
HashCount int `json:"hashCount"`
Hops []string `json:"hops"`
}
// AdvertFlags holds decoded advert flag bits.
@@ -109,20 +105,10 @@ type Payload struct {
MAC string `json:"mac,omitempty"`
EncryptedData string `json:"encryptedData,omitempty"`
ExtraHash string `json:"extraHash,omitempty"`
// Extended ACK fields per firmware 1.16.0 (issue #1610) —
// firmware/src/helpers/BaseChatMesh.cpp:218-234. ACK payloads grew from
// always-4 bytes to 4/5/6 (4-byte truncated sha256 CRC, optional 1-byte
// attempt counter, optional 1-byte RNG byte added in commit a130a95a).
// AckLen is the wire payload length; AckAttempt/AckRand are surfaced
// only when the sender included them (legacy 4-byte ACKs leave them nil).
AckLen *int `json:"ackLen,omitempty"`
AckAttempt *int `json:"ackAttempt,omitempty"`
AckRand *int `json:"ackRand,omitempty"`
PubKey string `json:"pubKey,omitempty"`
Timestamp uint32 `json:"timestamp,omitempty"`
TimestampISO string `json:"timestampISO,omitempty"`
Signature string `json:"signature,omitempty"`
SignatureValid *bool `json:"signatureValid,omitempty"`
Flags *AdvertFlags `json:"flags,omitempty"`
Lat *float64 `json:"lat,omitempty"`
Lon *float64 `json:"lon,omitempty"`
@@ -135,45 +121,16 @@ type Payload struct {
ChannelHashHex string `json:"channelHashHex,omitempty"`
DecryptionStatus string `json:"decryptionStatus,omitempty"`
Channel string `json:"channel,omitempty"`
// GRP_DATA (PAYLOAD_TYPE_GRP_DATA=0x06) inner fields, decoded after
// channel decrypt per firmware/src/helpers/BaseChatMesh.cpp:382-385.
DataType *int `json:"dataType,omitempty"`
DataLen *int `json:"dataLen,omitempty"`
DecryptedBlob string `json:"decryptedBlob,omitempty"`
Text string `json:"text,omitempty"`
Sender string `json:"sender,omitempty"`
SenderTimestamp uint32 `json:"sender_timestamp,omitempty"`
EphemeralPubKey string `json:"ephemeralPubKey,omitempty"`
PathData string `json:"pathData,omitempty"`
SNRValues []float64 `json:"snrValues,omitempty"`
Tag uint32 `json:"tag,omitempty"`
AuthCode uint32 `json:"authCode,omitempty"`
TraceFlags *int `json:"traceFlags,omitempty"`
RawHex string `json:"raw,omitempty"`
Error string `json:"error,omitempty"`
// MULTIPART (PAYLOAD_TYPE_MULTIPART=0x0A) inner fields, decoded per
// firmware/src/Mesh.cpp:289 — byte0 = (remaining<<4) | inner_type.
Remaining *int `json:"remaining,omitempty"`
InnerType *int `json:"innerType,omitempty"`
InnerTypeName string `json:"innerTypeName,omitempty"`
InnerAckCrc string `json:"innerAckCrc,omitempty"`
// Extended ACK inner fields (issue #1610) — when the multipart inner
// blob is a v1.16+ extended ACK (5 or 6 bytes after the byte0 header),
// surface the same attempt/rand bytes as the top-level decoder.
InnerAckLen *int `json:"innerAckLen,omitempty"`
InnerAckAttempt *int `json:"innerAckAttempt,omitempty"`
InnerAckRand *int `json:"innerAckRand,omitempty"`
InnerPayload string `json:"innerPayload,omitempty"`
// CONTROL (PAYLOAD_TYPE_CONTROL=0x0B) byte0 flags, per
// firmware/src/Mesh.cpp:69 — byte0 high-bit marks zero-hop direct subset.
CtrlFlags string `json:"ctrlFlags,omitempty"`
CtrlZeroHop *bool `json:"ctrlZeroHop,omitempty"`
CtrlLength *int `json:"ctrlLength,omitempty"`
// RAW_CUSTOM (PAYLOAD_TYPE_RAW_CUSTOM=0x0F) — application-defined per
// firmware/src/Mesh.cpp:577 (createRawData). Exposes the bare envelope
// shape (length + leading tag) so consumers can triage by app id.
RawLength *int `json:"rawLength,omitempty"`
FirstByteTag string `json:"firstByteTag,omitempty"`
}
// DecodedPacket is the full decoded result.
@@ -183,8 +140,6 @@ type DecodedPacket struct {
Path Path `json:"path"`
Payload Payload `json:"payload"`
Raw string `json:"raw"`
Anomaly string `json:"anomaly,omitempty"`
payloadRaw []byte
}
func decodeHeader(b byte) Header {
@@ -210,35 +165,9 @@ func decodeHeader(b byte) Header {
}
}
// Firmware-derived limits — see firmware/src/MeshCore.h:19,21.
const (
maxPathSize = 64 // MAX_PATH_SIZE — total path bytes allowed
maxPacketPayload = 184 // MAX_PACKET_PAYLOAD — max raw payload bytes
)
// isValidPathLen mirrors firmware Packet::isValidPathLen
// (firmware/src/Packet.cpp:13-18). hash_size==4 is reserved; total path bytes
// must fit within MAX_PATH_SIZE.
func isValidPathLen(pathByte byte) bool {
hashCount := int(pathByte & 0x3F)
hashSize := int(pathByte>>6) + 1
if hashSize == 4 {
return false // reserved
}
return hashCount*hashSize <= maxPathSize
}
func decodePath(pathByte byte, buf []byte, offset int) (Path, int, error) {
func decodePath(pathByte byte, buf []byte, offset int) (Path, int) {
hashSize := int(pathByte>>6) + 1
hashCount := int(pathByte & 0x3F)
// Exact mirror of firmware Packet::isValidPathLen (Packet.cpp:13-18).
// hash_size==4 is reserved and is rejected by firmware regardless of
// hash_count, so we must reject 0xC0 etc even on zero-hop packets —
// firmware never emits them, so an on-wire pathByte with the upper
// 2 bits set to 11 is by definition malformed/adversarial.
if !isValidPathLen(pathByte) {
return Path{}, 0, fmt.Errorf("invalid path encoding: pathByte 0x%02X (hash_size=%d hash_count=%d) violates firmware validity (Packet.cpp:13-18, MAX_PATH_SIZE=%d)", pathByte, hashSize, hashCount, maxPathSize)
}
totalBytes := hashSize * hashCount
hops := make([]string, 0, hashCount)
@@ -255,12 +184,11 @@ func decodePath(pathByte byte, buf []byte, offset int) (Path, int, error) {
HashSize: hashSize,
HashCount: hashCount,
Hops: hops,
}, totalBytes, nil
}, totalBytes
}
// isTransportRoute delegates to packetpath.IsTransportRoute.
func isTransportRoute(routeType int) bool {
return packetpath.IsTransportRoute(routeType)
return routeType == RouteTransportFlood || routeType == RouteTransportDirect
}
func decodeEncryptedPayload(typeName string, buf []byte) Payload {
@@ -281,30 +209,13 @@ func decodeAck(buf []byte) Payload {
return Payload{Type: "ACK", Error: "too short", RawHex: hex.EncodeToString(buf)}
}
checksum := binary.LittleEndian.Uint32(buf[0:4])
ackLen := len(buf)
if ackLen > 6 {
ackLen = 6
}
p := Payload{
return Payload{
Type: "ACK",
ExtraHash: fmt.Sprintf("%08x", checksum),
AckLen: &ackLen,
}
// Firmware 1.16.0 extended ACK (issue #1610): 5th byte is the attempt
// counter (commit f6e6fdaa), 6th byte is a random byte added so identical
// attempts still hash uniquely (commit a130a95a).
if len(buf) >= 5 {
attempt := int(buf[4])
p.AckAttempt = &attempt
}
if len(buf) >= 6 {
rnd := int(buf[5])
p.AckRand = &rnd
}
return p
}
func decodeAdvert(buf []byte, validateSignatures bool) Payload {
func decodeAdvert(buf []byte) Payload {
if len(buf) < 100 {
return Payload{Type: "ADVERT", Error: "too short for advert", RawHex: hex.EncodeToString(buf)}
}
@@ -322,16 +233,6 @@ func decodeAdvert(buf []byte, validateSignatures bool) Payload {
Signature: signature,
}
if validateSignatures {
valid, err := sigvalidate.ValidateAdvert(buf[0:32], buf[36:100], timestamp, appdata)
if err != nil {
f := false
p.SignatureValid = &f
} else {
p.SignatureValid = &valid
}
}
if len(appdata) > 0 {
flags := appdata[0]
advType := int(flags & 0x0F)
@@ -381,13 +282,6 @@ func decodeAdvert(buf []byte, validateSignatures bool) Payload {
}
name := string(appdata[off:nameEnd])
name = sanitizeName(name)
// Firmware writes the node name into a 32-byte buffer
// (MAX_ADVERT_DATA_SIZE, firmware/src/MeshCore.h:11). Truncate
// here so adversarial on-wire adverts can't pollute Payload.Name
// with bytes firmware would never emit.
if len(name) > 32 {
name = name[:32]
}
p.Name = name
off = nameEnd
// Skip null terminator(s)
@@ -398,17 +292,6 @@ func decodeAdvert(buf []byte, validateSignatures bool) Payload {
// Telemetry bytes after name: battery_mv(2 LE) + temperature_c(2 LE, signed, /100)
// Only sensor nodes (advType=4) carry telemetry bytes.
//
// Firmware derivation (see firmware/src/helpers/SensorMesh.h and the
// SensorHost::handleAdvert path in firmware/src/helpers/SensorMesh.cpp:
// the sensor builds appdata as <flags+adv_type><pubkey?><name\0>
// followed by two little-endian uint16 fields appended verbatim:
// appdata[name_end+0..1] = battery voltage in millivolts (uint16 LE,
// valid 0 < mv ≤ 10000)
// appdata[name_end+2..3] = temperature × 100 (int16 LE, divide by 100
// for °C; valid raw -5000..10000 → -50..100 °C)
// We accept only adverts whose flags.Sensor bit is set (firmware
// AdvertDataHelpers.h:7-12, ADV_TYPE_SENSOR=4) before parsing telemetry.
if p.Flags.Sensor && off+4 <= len(appdata) {
batteryMv := int(binary.LittleEndian.Uint16(appdata[off : off+2]))
tempRaw := int16(binary.LittleEndian.Uint16(appdata[off+2 : off+4]))
@@ -525,22 +408,6 @@ func decryptChannelMessage(ciphertextHex, macHex, channelKeyHex string) (*channe
return result, nil
}
// knownChannelCasing maps known channel keys to their canonical display names.
// Only well-known channels are normalized — custom/user channels are left as-is.
var knownChannelCasing = map[string]string{
"public": "Public",
}
// normalizeChannelName fixes casing for well-known channel names.
// Only normalizes names that appear in knownChannelCasing (e.g. "public" → "Public").
// Custom channel names are left untouched since we can't know the intended casing.
func normalizeChannelName(name string) string {
if corrected, ok := knownChannelCasing[strings.ToLower(name)]; ok {
return corrected
}
return name
}
func decodeGrpTxt(buf []byte, channelKeys map[string]string) Payload {
if len(buf) < 3 {
return Payload{Type: "GRP_TXT", Error: "too short", RawHex: hex.EncodeToString(buf)}
@@ -565,7 +432,7 @@ func decodeGrpTxt(buf []byte, channelKeys map[string]string) Payload {
}
return Payload{
Type: "CHAN",
Channel: normalizeChannelName(name),
Channel: name,
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "decrypted",
@@ -594,200 +461,6 @@ func decodeGrpTxt(buf []byte, channelKeys map[string]string) Payload {
}
}
// decodeGrpData decodes PAYLOAD_TYPE_GRP_DATA (0x06). Outer envelope is the
// same shape as GRP_TXT (channel_hash(1)+MAC(2)+ciphertext) — see
// firmware/src/helpers/BaseChatMesh.cpp:476,500. When the channel key matches,
// the decrypted inner is parsed per firmware/src/helpers/BaseChatMesh.cpp:382-385
// as data_type(uint16 LE) + data_len(1) + blob(data_len).
func decodeGrpData(buf []byte, channelKeys map[string]string) Payload {
if len(buf) < 3 {
return Payload{Type: "GRP_DATA", Error: "too short", RawHex: hex.EncodeToString(buf)}
}
channelHash := int(buf[0])
channelHashHex := fmt.Sprintf("%02X", buf[0])
mac := hex.EncodeToString(buf[1:3])
encryptedData := hex.EncodeToString(buf[3:])
hasKeys := len(channelKeys) > 0
if hasKeys && len(encryptedData) >= 10 {
for name, key := range channelKeys {
plain, err := decryptChannelBlock(encryptedData, mac, key)
if err != nil {
continue
}
// Inner: data_type(uint16 LE) + data_len(1) + blob (firmware:382-385).
if len(plain) < 3 {
return Payload{
Type: "GRP_DATA",
Channel: name,
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "decrypted",
Error: "inner too short",
}
}
dataType := int(binary.LittleEndian.Uint16(plain[0:2]))
dataLen := int(plain[2])
if 3+dataLen > len(plain) {
return Payload{
Type: "GRP_DATA",
Channel: name,
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "decrypted",
DataType: &dataType,
DataLen: &dataLen,
Error: "inner data_len exceeds buffer",
}
}
blob := hex.EncodeToString(plain[3 : 3+dataLen])
return Payload{
Type: "GRP_DATA",
Channel: name,
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "decrypted",
DataType: &dataType,
DataLen: &dataLen,
DecryptedBlob: blob,
}
}
return Payload{
Type: "GRP_DATA",
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "decryption_failed",
MAC: mac,
EncryptedData: encryptedData,
}
}
return Payload{
Type: "GRP_DATA",
ChannelHash: channelHash,
ChannelHashHex: channelHashHex,
DecryptionStatus: "no_key",
MAC: mac,
EncryptedData: encryptedData,
}
}
// decodeMultipart decodes PAYLOAD_TYPE_MULTIPART (0x0A) per
// firmware/src/Mesh.cpp:287-310. byte0 = (remaining<<4) | inner_type;
// when inner_type == PAYLOAD_TYPE_ACK the next 4 bytes are an ack_crc.
func decodeMultipart(buf []byte) Payload {
if len(buf) < 1 {
return Payload{Type: "MULTIPART", Error: "too short", RawHex: hex.EncodeToString(buf)}
}
remaining := int(buf[0] >> 4)
innerType := int(buf[0] & 0x0F)
innerName := payloadTypeNames[innerType]
if innerName == "" {
innerName = "UNKNOWN"
}
p := Payload{
Type: "MULTIPART",
Remaining: &remaining,
InnerType: &innerType,
InnerTypeName: innerName,
}
if innerType == PayloadACK && len(buf) >= 5 {
// ack_crc is little-endian; surface as canonical big-endian hex
// to match decodeAck's extraHash convention.
crc := binary.LittleEndian.Uint32(buf[1:5])
p.InnerAckCrc = fmt.Sprintf("%08x", crc)
// Firmware 1.16.0 extended ACK (issue #1610): inner ACK blob may be
// 5 or 6 bytes (payload_len = 1 + ack_len) instead of always 4.
ackLen := len(buf) - 1
if ackLen > 6 {
ackLen = 6
}
p.InnerAckLen = &ackLen
if len(buf) >= 6 {
attempt := int(buf[5])
p.InnerAckAttempt = &attempt
}
if len(buf) >= 7 {
rnd := int(buf[6])
p.InnerAckRand = &rnd
}
} else if len(buf) > 1 {
p.InnerPayload = hex.EncodeToString(buf[1:])
}
return p
}
// decodeControl decodes PAYLOAD_TYPE_CONTROL (0x0B) byte0 flags per
// firmware/src/Mesh.cpp:69 (high-bit set ⇒ zero-hop direct subset).
func decodeControl(buf []byte) Payload {
if len(buf) < 1 {
return Payload{Type: "CONTROL", Error: "too short", RawHex: hex.EncodeToString(buf)}
}
zeroHop := buf[0]&0x80 != 0
length := len(buf)
return Payload{
Type: "CONTROL",
CtrlFlags: fmt.Sprintf("%02x", buf[0]),
CtrlZeroHop: &zeroHop,
CtrlLength: &length,
RawHex: hex.EncodeToString(buf),
}
}
// decodeRawCustom decodes PAYLOAD_TYPE_RAW_CUSTOM (0x0F). Application-defined
// payload per firmware/src/Mesh.cpp:577 (createRawData); we only surface the
// envelope shape (total length + leading tag byte).
func decodeRawCustom(buf []byte) Payload {
length := len(buf)
p := Payload{
Type: "RAW_CUSTOM",
RawLength: &length,
RawHex: hex.EncodeToString(buf),
}
if length > 0 {
p.FirstByteTag = fmt.Sprintf("%02X", buf[0])
}
return p
}
// decryptChannelBlock performs the MAC verify + AES-128-ECB decrypt step shared
// by GRP_TXT and GRP_DATA, returning the raw plaintext block (no further
// parsing). See firmware/src/helpers/BaseChatMesh.cpp:376-391.
func decryptChannelBlock(ciphertextHex, macHex, channelKeyHex string) ([]byte, error) {
channelKey, err := hex.DecodeString(channelKeyHex)
if err != nil || len(channelKey) != 16 {
return nil, fmt.Errorf("invalid channel key")
}
macBytes, err := hex.DecodeString(macHex)
if err != nil || len(macBytes) != 2 {
return nil, fmt.Errorf("invalid MAC")
}
ciphertext, err := hex.DecodeString(ciphertextHex)
if err != nil || len(ciphertext) == 0 {
return nil, fmt.Errorf("invalid ciphertext")
}
channelSecret := make([]byte, 32)
copy(channelSecret, channelKey)
h := hmac.New(sha256.New, channelSecret)
h.Write(ciphertext)
calc := h.Sum(nil)
if calc[0] != macBytes[0] || calc[1] != macBytes[1] {
return nil, fmt.Errorf("MAC verification failed")
}
if len(ciphertext)%aes.BlockSize != 0 {
return nil, fmt.Errorf("ciphertext not aligned to AES block size")
}
block, err := aes.NewCipher(channelKey)
if err != nil {
return nil, err
}
plain := make([]byte, len(ciphertext))
for i := 0; i < len(ciphertext); i += aes.BlockSize {
block.Decrypt(plain[i:i+aes.BlockSize], ciphertext[i:i+aes.BlockSize])
}
return plain, nil
}
func decodeAnonReq(buf []byte) Payload {
if len(buf) < 35 {
return Payload{Type: "ANON_REQ", Error: "too short", RawHex: hex.EncodeToString(buf)}
@@ -833,7 +506,7 @@ func decodeTrace(buf []byte) Payload {
return p
}
func decodePayload(payloadType int, buf []byte, channelKeys map[string]string, validateSignatures bool) Payload {
func decodePayload(payloadType int, buf []byte, channelKeys map[string]string) Payload {
switch payloadType {
case PayloadREQ:
return decodeEncryptedPayload("REQ", buf)
@@ -844,30 +517,22 @@ func decodePayload(payloadType int, buf []byte, channelKeys map[string]string, v
case PayloadACK:
return decodeAck(buf)
case PayloadADVERT:
return decodeAdvert(buf, validateSignatures)
return decodeAdvert(buf)
case PayloadGRP_TXT:
return decodeGrpTxt(buf, channelKeys)
case PayloadGRP_DATA:
return decodeGrpData(buf, channelKeys)
case PayloadANON_REQ:
return decodeAnonReq(buf)
case PayloadPATH:
return decodePathPayload(buf)
case PayloadTRACE:
return decodeTrace(buf)
case PayloadMULTIPART:
return decodeMultipart(buf)
case PayloadCONTROL:
return decodeControl(buf)
case PayloadRAW_CUSTOM:
return decodeRawCustom(buf)
default:
return Payload{Type: "UNKNOWN", RawHex: hex.EncodeToString(buf)}
}
}
// DecodePacket decodes a hex-encoded MeshCore packet.
func DecodePacket(hexString string, channelKeys map[string]string, validateSignatures bool) (*DecodedPacket, error) {
func DecodePacket(hexString string, channelKeys map[string]string) (*DecodedPacket, error) {
hexString = strings.ReplaceAll(hexString, " ", "")
hexString = strings.ReplaceAll(hexString, "\n", "")
hexString = strings.ReplaceAll(hexString, "\r", "")
@@ -901,104 +566,39 @@ func DecodePacket(hexString string, channelKeys map[string]string, validateSigna
pathByte := buf[offset]
offset++
path, bytesConsumed, decodeErr := decodePath(pathByte, buf, offset)
if decodeErr != nil {
return nil, decodeErr
}
path, bytesConsumed := decodePath(pathByte, buf, offset)
offset += bytesConsumed
// Bounds check: pathByte is wire-supplied (hash_size in upper 2 bits,
// hash_count in lower 6 bits → up to 4*63=252 claimed path bytes). A
// malformed packet can claim more bytes than the buffer holds — without
// this guard `buf[offset:]` panics with `slice bounds out of range
// [offset:len(buf)]`. See issue #1211 (prod observed [218:15]).
if offset > len(buf) {
return nil, fmt.Errorf("packet path length (%d bytes claimed by pathByte 0x%02X) exceeds buffer (%d bytes)", bytesConsumed, pathByte, len(buf))
}
payloadBuf := buf[offset:]
// Firmware caps payload at MAX_PACKET_PAYLOAD=184 (firmware/src/MeshCore.h:19).
if len(payloadBuf) > maxPacketPayload {
return nil, fmt.Errorf("packet payload (%d bytes) exceeds firmware MAX_PACKET_PAYLOAD=%d (MeshCore.h:19)", len(payloadBuf), maxPacketPayload)
}
payload := decodePayload(header.PayloadType, payloadBuf, channelKeys, validateSignatures)
payload := decodePayload(header.PayloadType, payloadBuf, channelKeys)
// TRACE packets store hop IDs in the payload (buf[9:]) rather than the header
// path field. Firmware always sends TRACE as DIRECT (route_type 2 or 3);
// FLOOD-routed TRACEs are anomalous but handled gracefully (parsed, but
// flagged). The TRACE flags byte (payload offset 8) encodes path_sz in
// bits 0-1 as a power-of-two exponent: hash_bytes = 1 << path_sz.
// NOT the header path byte's hash_size bits. The header path contains SNR
// bytes — one per hop that actually forwarded.
// We expose hopsCompleted (count of SNR bytes) so consumers can distinguish
// how far the trace got vs the full intended route.
var anomaly string
if header.PayloadType == PayloadTRACE && payload.Error != "" {
anomaly = fmt.Sprintf("TRACE payload decode failed: %s", payload.Error)
}
// path field. The header path byte still encodes hashSize in bits 6-7, which
// we use to split the payload path data into individual hop prefixes.
if header.PayloadType == PayloadTRACE && payload.PathData != "" {
// Flag anomalous routing — firmware only sends TRACE as DIRECT
if header.RouteType != RouteDirect && header.RouteType != RouteTransportDirect {
anomaly = "TRACE packet with non-DIRECT routing (expected DIRECT or TRANSPORT_DIRECT)"
}
// The header path hops count represents SNR entries = completed hops
hopsCompleted := path.HashCount
// Extract per-hop SNR from header path bytes (int8, quarter-dB encoding).
// Mirrors cmd/server/decoder.go — must be done at ingest time so SNR
// values are persisted in decoded_json (server endpoint serves DB as-is).
if hopsCompleted > 0 && len(path.Hops) >= hopsCompleted {
snrVals := make([]float64, 0, hopsCompleted)
for i := 0; i < hopsCompleted; i++ {
b, err := hex.DecodeString(path.Hops[i])
if err == nil && len(b) == 1 {
snrVals = append(snrVals, float64(int8(b[0]))/4.0)
}
}
if len(snrVals) > 0 {
payload.SNRValues = snrVals
}
}
pathBytes, err := hex.DecodeString(payload.PathData)
if err == nil && payload.TraceFlags != nil {
// path_sz from flags byte is a power-of-two exponent per firmware:
// hash_bytes = 1 << (flags & 0x03)
pathSz := 1 << (*payload.TraceFlags & 0x03)
hops := make([]string, 0, len(pathBytes)/pathSz)
for i := 0; i+pathSz <= len(pathBytes); i += pathSz {
hops = append(hops, strings.ToUpper(hex.EncodeToString(pathBytes[i:i+pathSz])))
if err == nil && path.HashSize > 0 {
hops := make([]string, 0, len(pathBytes)/path.HashSize)
for i := 0; i+path.HashSize <= len(pathBytes); i += path.HashSize {
hops = append(hops, strings.ToUpper(hex.EncodeToString(pathBytes[i:i+path.HashSize])))
}
path.Hops = hops
path.HashCount = len(hops)
path.HashSize = pathSz
path.HopsCompleted = &hopsCompleted
}
}
// Zero-hop direct packets have hash_count=0 (lower 6 bits of pathByte),
// which makes the generic formula yield a bogus hashSize. Reset to 0
// (unknown) so API consumers get correct data. We mask with 0x3F to check
// only hash_count, matching the JS frontend approach — the upper hash_size
// bits are meaningless when there are no hops. Skip TRACE packets — they
// use hashSize to parse hops from the payload above.
if (header.RouteType == RouteDirect || header.RouteType == RouteTransportDirect) && pathByte&0x3F == 0 && header.PayloadType != PayloadTRACE {
path.HashSize = 0
}
return &DecodedPacket{
Header: header,
TransportCodes: tc,
Path: path,
Payload: payload,
Raw: strings.ToUpper(hexString),
Anomaly: anomaly,
payloadRaw: payloadBuf,
}, nil
}
// ComputeContentHash computes the SHA-256-based content hash (first 16 hex chars).
// It hashes the payload-type nibble + payload (skipping path bytes) to produce a
// route-independent identifier for the same logical packet. For TRACE packets,
// path_len is included in the hash to match firmware behavior.
// It hashes the header byte + payload (skipping path bytes) to produce a
// path-independent identifier for the same transmission.
func ComputeContentHash(rawHex string) string {
buf, err := hex.DecodeString(rawHex)
if err != nil || len(buf) < 2 {
@@ -1034,18 +634,7 @@ func ComputeContentHash(rawHex string) string {
}
payload := buf[payloadStart:]
// Hash payload-type byte only (bits 2-5 of header), not the full header.
// Firmware: SHA256(payload_type + [path_len for TRACE] + payload)
// Using the full header caused different hashes for the same logical packet
// when route type or version bits differed. See issue #786.
payloadType := (headerByte >> 2) & 0x0F
toHash := []byte{payloadType}
if int(payloadType) == PayloadTRACE {
// Firmware uses uint16_t path_len (2 bytes, little-endian)
toHash = append(toHash, pathByte, 0x00)
}
toHash = append(toHash, payload...)
toHash := append([]byte{headerByte}, payload...)
h := sha256.Sum256(toHash)
return hex.EncodeToString(h[:])[:16]
@@ -1109,13 +698,8 @@ func ValidateAdvert(p *Payload) (bool, string) {
if p.Flags != nil {
role := advertRole(p.Flags)
// Accept canonical labels plus "none" (ADV_TYPE_NONE=0) and the
// "type-N" placeholders we now return for ADV_TYPE 5-15 (FUTURE)
// — see firmware/src/helpers/AdvertDataHelpers.h:7-12.
validRoles := map[string]bool{
"repeater": true, "companion": true, "room": true, "sensor": true, "none": true,
}
if !validRoles[role] && !strings.HasPrefix(role, "type-") {
validRoles := map[string]bool{"repeater": true, "companion": true, "room": true, "sensor": true}
if !validRoles[role] {
return false, fmt.Sprintf("unknown role: %s", role)
}
}
@@ -1135,29 +719,17 @@ func sanitizeName(s string) string {
return b.String()
}
// advertRole returns a stable role label for an advert. Follows firmware
// ADV_TYPE_* constants in firmware/src/helpers/AdvertDataHelpers.h:7-12:
// 0 NONE, 1 CHAT, 2 REPEATER, 3 ROOM, 4 SENSOR, 5-15 FUTURE.
// Previously this coerced both 0 (NONE) and 5-15 (FUTURE) to "companion",
// silently relabelling unknown/reserved types — see issue #1279 P1 #3.
func advertRole(f *AdvertFlags) string {
if f == nil {
return "companion"
}
switch f.Type {
case 0:
return "none"
case 1:
return "companion"
case 2:
if f.Repeater {
return "repeater"
case 3:
return "room"
case 4:
return "sensor"
default:
return fmt.Sprintf("type-%d", f.Type)
}
if f.Room {
return "room"
}
if f.Sensor {
return "sensor"
}
return "companion"
}
func epochToISO(epoch uint32) string {
-97
View File
@@ -1,97 +0,0 @@
package main
import (
"encoding/hex"
"strings"
"testing"
)
// --- Issue #1211 round-1 protocol-correctness regressions ---
// See cmd/server/decoder_bounds_test.go for full firmware citations
// (firmware/src/Packet.cpp:13-18, firmware/src/MeshCore.h:19-21).
// pathByte=0xF6 → hash_size=4 (reserved), hash_count=54.
// Buffer holds all 216 claimed bytes so the OOB guard does NOT catch.
func TestDecodePacketRejectsReservedHashSize_Issue1211(t *testing.T) {
raw := "12F6" + strings.Repeat("AB", 216) + strings.Repeat("CD", 8)
pkt, err := DecodePacket(raw, nil, false)
if err == nil {
t.Fatalf("expected error rejecting reserved hash_size=4 (firmware Packet.cpp:13-18); got nil, pkt=%+v", pkt)
}
if !strings.Contains(err.Error(), "path") {
t.Errorf("error should mention path; got %q", err)
}
}
// pathByte=0xBF → hash_size=3, hash_count=63, total=189 > MAX_PATH_SIZE=64.
func TestDecodePacketRejectsOversizedPath_Issue1211(t *testing.T) {
raw := "12BF" + strings.Repeat("AB", 189) + strings.Repeat("CD", 8)
pkt, err := DecodePacket(raw, nil, false)
if err == nil {
t.Fatalf("expected error rejecting hash_count*hash_size > 64; got nil, pkt=%+v", pkt)
}
}
// Payload > MAX_PACKET_PAYLOAD (184).
func TestDecodePacketRejectsOversizedPayload_Issue1211(t *testing.T) {
raw := "1200" + strings.Repeat("AA", 200)
pkt, err := DecodePacket(raw, nil, false)
if err == nil {
t.Fatalf("expected error rejecting payload > MAX_PACKET_PAYLOAD=184 (firmware MeshCore.h:19); got nil, pkt=%+v", pkt)
}
if !strings.Contains(err.Error(), "payload") {
t.Errorf("error should mention payload; got %q", err)
}
}
func TestDecodePath_RejectsReservedHashSize_Issue1211(t *testing.T) {
buf := make([]byte, 216)
for i := range buf {
buf[i] = 0xAB
}
_, _, err := decodePath(0xF6, buf, 0)
if err == nil {
t.Fatalf("decodePath should reject pathByte=0xF6 (hash_size=4 reserved); got nil err")
}
}
func TestDecodePath_RejectsOversizedPath_Issue1211(t *testing.T) {
buf := make([]byte, 189)
_, _, err := decodePath(0xBF, buf, 0)
if err == nil {
t.Fatalf("decodePath should reject hash_count*hash_size=189 > MAX_PATH_SIZE=64; got nil err")
}
}
func TestDecodePath_AcceptsValidEncodings_Issue1211(t *testing.T) {
buf := []byte{0x01, 0x02, 0x03, 0x04, 0x05}
path, consumed, err := decodePath(0x05, buf, 0)
if err != nil {
t.Fatalf("decodePath rejected valid encoding: %v", err)
}
if consumed != 5 {
t.Errorf("consumed=%d, want 5", consumed)
}
if path.HashCount != 5 || path.HashSize != 1 {
t.Errorf("decode wrong: hashCount=%d hashSize=%d", path.HashCount, path.HashSize)
}
}
// Kent #1 — pin tautological assertion: error MUST mention "path length"
// AND "exceeds buffer", not just non-nil. Uses firmware-valid pathByte
// that exhausts a small buffer, so the OOB guard fires (not validity).
func TestDecodePacketBoundsFromWireErrorPhrasing_Issue1211(t *testing.T) {
raw := "120A" + strings.Repeat("AA", 5)
_, err := DecodePacket(raw, nil, false)
if err == nil {
t.Fatalf("expected error, got nil")
}
if !strings.Contains(err.Error(), "path length") {
t.Errorf("error missing 'path length'; got %q", err)
}
if !strings.Contains(err.Error(), "exceeds buffer") {
t.Errorf("error missing 'exceeds buffer'; got %q", err)
}
}
var _ = hex.EncodeToString
+51 -618
View File
@@ -2,7 +2,6 @@ package main
import (
"crypto/aes"
"crypto/ed25519"
"crypto/hmac"
"crypto/sha256"
"encoding/binary"
@@ -10,9 +9,6 @@ import (
"math"
"strings"
"testing"
"github.com/meshcore-analyzer/packetpath"
"github.com/meshcore-analyzer/sigvalidate"
)
func TestDecodeHeaderRoutTypes(t *testing.T) {
@@ -59,7 +55,7 @@ func TestDecodeHeaderPayloadTypes(t *testing.T) {
func TestDecodePathZeroHops(t *testing.T) {
// 0x00: 0 hops, 1-byte hashes
pkt, err := DecodePacket("0500"+strings.Repeat("00", 10), nil, false)
pkt, err := DecodePacket("0500"+strings.Repeat("00", 10), nil)
if err != nil {
t.Fatal(err)
}
@@ -76,7 +72,7 @@ func TestDecodePathZeroHops(t *testing.T) {
func TestDecodePath1ByteHashes(t *testing.T) {
// 0x05: 5 hops, 1-byte hashes → 5 path bytes
pkt, err := DecodePacket("0505"+"AABBCCDDEE"+strings.Repeat("00", 10), nil, false)
pkt, err := DecodePacket("0505"+"AABBCCDDEE"+strings.Repeat("00", 10), nil)
if err != nil {
t.Fatal(err)
}
@@ -99,7 +95,7 @@ func TestDecodePath1ByteHashes(t *testing.T) {
func TestDecodePath2ByteHashes(t *testing.T) {
// 0x45: 5 hops, 2-byte hashes
pkt, err := DecodePacket("0545"+"AA11BB22CC33DD44EE55"+strings.Repeat("00", 10), nil, false)
pkt, err := DecodePacket("0545"+"AA11BB22CC33DD44EE55"+strings.Repeat("00", 10), nil)
if err != nil {
t.Fatal(err)
}
@@ -116,7 +112,7 @@ func TestDecodePath2ByteHashes(t *testing.T) {
func TestDecodePath3ByteHashes(t *testing.T) {
// 0x8A: 10 hops, 3-byte hashes
pkt, err := DecodePacket("058A"+strings.Repeat("AA11FF", 10)+strings.Repeat("00", 10), nil, false)
pkt, err := DecodePacket("058A"+strings.Repeat("AA11FF", 10)+strings.Repeat("00", 10), nil)
if err != nil {
t.Fatal(err)
}
@@ -135,7 +131,7 @@ func TestTransportCodes(t *testing.T) {
// Route type 0 (TRANSPORT_FLOOD) should have transport codes
// Firmware order: header + transport_codes(4) + path_len + path + payload
hex := "14" + "AABB" + "CCDD" + "00" + strings.Repeat("00", 10)
pkt, err := DecodePacket(hex, nil, false)
pkt, err := DecodePacket(hex, nil)
if err != nil {
t.Fatal(err)
}
@@ -153,7 +149,7 @@ func TestTransportCodes(t *testing.T) {
}
// Route type 1 (FLOOD) should NOT have transport codes
pkt2, err := DecodePacket("0500"+strings.Repeat("00", 10), nil, false)
pkt2, err := DecodePacket("0500"+strings.Repeat("00", 10), nil)
if err != nil {
t.Fatal(err)
}
@@ -173,7 +169,7 @@ func TestDecodeAdvertFull(t *testing.T) {
name := "546573744E6F6465" // "TestNode"
hex := "1200" + pubkey + timestamp + signature + flags + lat + lon + name
pkt, err := DecodePacket(hex, nil, false)
pkt, err := DecodePacket(hex, nil)
if err != nil {
t.Fatal(err)
}
@@ -231,7 +227,7 @@ func TestDecodeAdvertTypeEnums(t *testing.T) {
makeAdvert := func(flagsByte byte) *DecodedPacket {
hex := "1200" + strings.Repeat("AA", 32) + "00000000" + strings.Repeat("BB", 64) +
strings.ToUpper(string([]byte{hexDigit(flagsByte>>4), hexDigit(flagsByte & 0x0f)}))
pkt, err := DecodePacket(hex, nil, false)
pkt, err := DecodePacket(hex, nil)
if err != nil {
t.Fatal(err)
}
@@ -276,7 +272,7 @@ func hexDigit(v byte) byte {
func TestDecodeAdvertNoLocationNoName(t *testing.T) {
hex := "1200" + strings.Repeat("CC", 32) + "00000000" + strings.Repeat("DD", 64) + "02"
pkt, err := DecodePacket(hex, nil, false)
pkt, err := DecodePacket(hex, nil)
if err != nil {
t.Fatal(err)
}
@@ -295,7 +291,7 @@ func TestDecodeAdvertNoLocationNoName(t *testing.T) {
}
func TestGoldenFixtureTxtMsg(t *testing.T) {
pkt, err := DecodePacket("0A00D69FD7A5A7475DB07337749AE61FA53A4788E976", nil, false)
pkt, err := DecodePacket("0A00D69FD7A5A7475DB07337749AE61FA53A4788E976", nil)
if err != nil {
t.Fatal(err)
}
@@ -318,7 +314,7 @@ func TestGoldenFixtureTxtMsg(t *testing.T) {
func TestGoldenFixtureAdvert(t *testing.T) {
rawHex := "120046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
pkt, err := DecodePacket(rawHex, nil, false)
pkt, err := DecodePacket(rawHex, nil)
if err != nil {
t.Fatal(err)
}
@@ -341,7 +337,7 @@ func TestGoldenFixtureAdvert(t *testing.T) {
func TestGoldenFixtureUnicodeAdvert(t *testing.T) {
rawHex := "120073CFF971E1CB5754A742C152B2D2E0EB108A19B246D663ED8898A72C4A5AD86EA6768E66694B025EDF6939D5C44CFF719C5D5520E5F06B20680A83AD9C2C61C3227BBB977A85EE462F3553445FECF8EDD05C234ECE217272E503F14D6DF2B1B9B133890C923CDF3002F8FDC1F85045414BF09F8CB3"
pkt, err := DecodePacket(rawHex, nil, false)
pkt, err := DecodePacket(rawHex, nil)
if err != nil {
t.Fatal(err)
}
@@ -358,14 +354,14 @@ func TestGoldenFixtureUnicodeAdvert(t *testing.T) {
}
func TestDecodePacketTooShort(t *testing.T) {
_, err := DecodePacket("FF", nil, false)
_, err := DecodePacket("FF", nil)
if err == nil {
t.Error("expected error for 1-byte packet")
}
}
func TestDecodePacketInvalidHex(t *testing.T) {
_, err := DecodePacket("ZZZZ", nil, false)
_, err := DecodePacket("ZZZZ", nil)
if err == nil {
t.Error("expected error for invalid hex")
}
@@ -447,28 +443,6 @@ func TestValidateAdvert(t *testing.T) {
}
}
func TestDecodePacketPayloadRaw(t *testing.T) {
// Build a minimal TRANSPORT_FLOOD packet (route_type=0):
// header(1) + transport_codes(4) + path_len(1) + payload(N)
// Header 0x00 = route_type=TRANSPORT_FLOOD, payload_type=0, version=0
// Code1=9A52, Code2=0000, path_len=0x00 (0 hops, hash_size=1)
payload := []byte("hello")
raw := []byte{0x00, 0x9A, 0x52, 0x00, 0x00, 0x00}
raw = append(raw, payload...)
hexStr := strings.ToUpper(hex.EncodeToString(raw))
decoded, err := DecodePacket(hexStr, nil, false)
if err != nil {
t.Fatalf("DecodePacket: %v", err)
}
if decoded.TransportCodes == nil {
t.Fatal("expected TransportCodes, got nil")
}
if string(decoded.payloadRaw) != string(payload) {
t.Errorf("payloadRaw = %v, want %v", decoded.payloadRaw, payload)
}
}
func TestDecodeGrpTxtShort(t *testing.T) {
p := decodeGrpTxt([]byte{0x01, 0x02}, nil)
if p.Error != "too short" {
@@ -594,7 +568,7 @@ func TestDecodeTracePathParsing(t *testing.T) {
// Packet from issue #276: 260001807dca00000000007d547d
// Path byte 0x00 → hashSize=1, hops in payload at buf[9:] = 7d 54 7d
// Expected path: ["7D", "54", "7D"]
pkt, err := DecodePacket("260001807dca00000000007d547d", nil, false)
pkt, err := DecodePacket("260001807dca00000000007d547d", nil)
if err != nil {
t.Fatalf("DecodePacket error: %v", err)
}
@@ -616,7 +590,7 @@ func TestDecodeTracePathParsing(t *testing.T) {
}
func TestDecodeAdvertShort(t *testing.T) {
p := decodeAdvert(make([]byte, 50), false)
p := decodeAdvert(make([]byte, 50))
if p.Error != "too short for advert" {
t.Errorf("expected 'too short for advert' error, got %q", p.Error)
}
@@ -653,76 +627,69 @@ func TestDecodeEncryptedPayloadValid(t *testing.T) {
}
func TestDecodePayloadGRPData(t *testing.T) {
// GRP_DATA (0x06) decoder added for #1279 P0 #1 — envelope only when no
// channel key matches (firmware/src/helpers/BaseChatMesh.cpp:500).
buf := []byte{0x01, 0x02, 0x03}
p := decodePayload(PayloadGRP_DATA, buf, nil, false)
if p.Type != "GRP_DATA" {
t.Errorf("type=%s, want GRP_DATA", p.Type)
p := decodePayload(PayloadGRP_DATA, buf, nil)
if p.Type != "UNKNOWN" {
t.Errorf("type=%s, want UNKNOWN", p.Type)
}
if p.RawHex != "010203" {
t.Errorf("rawHex=%s, want 010203", p.RawHex)
}
}
func TestDecodePayloadRAWCustom(t *testing.T) {
// #1279 P2 #5: RAW_CUSTOM (0x0F) now exposes envelope shape (length +
// first-byte tag) per firmware/src/Mesh.cpp:577 (createRawData).
buf := []byte{0xFF, 0xFE}
p := decodePayload(PayloadRAW_CUSTOM, buf, nil, false)
if p.Type != "RAW_CUSTOM" {
t.Errorf("type=%s, want RAW_CUSTOM", p.Type)
}
if p.RawLength == nil || *p.RawLength != 2 {
t.Errorf("rawLength missing or wrong, want 2")
}
if p.FirstByteTag != "FF" {
t.Errorf("firstByteTag=%q, want FF", p.FirstByteTag)
p := decodePayload(PayloadRAW_CUSTOM, buf, nil)
if p.Type != "UNKNOWN" {
t.Errorf("type=%s, want UNKNOWN", p.Type)
}
}
func TestDecodePayloadAllTypes(t *testing.T) {
// REQ
p := decodePayload(PayloadREQ, make([]byte, 10), nil, false)
p := decodePayload(PayloadREQ, make([]byte, 10), nil)
if p.Type != "REQ" {
t.Errorf("REQ: type=%s", p.Type)
}
// RESPONSE
p = decodePayload(PayloadRESPONSE, make([]byte, 10), nil, false)
p = decodePayload(PayloadRESPONSE, make([]byte, 10), nil)
if p.Type != "RESPONSE" {
t.Errorf("RESPONSE: type=%s", p.Type)
}
// TXT_MSG
p = decodePayload(PayloadTXT_MSG, make([]byte, 10), nil, false)
p = decodePayload(PayloadTXT_MSG, make([]byte, 10), nil)
if p.Type != "TXT_MSG" {
t.Errorf("TXT_MSG: type=%s", p.Type)
}
// ACK
p = decodePayload(PayloadACK, make([]byte, 10), nil, false)
p = decodePayload(PayloadACK, make([]byte, 10), nil)
if p.Type != "ACK" {
t.Errorf("ACK: type=%s", p.Type)
}
// GRP_TXT
p = decodePayload(PayloadGRP_TXT, make([]byte, 10), nil, false)
p = decodePayload(PayloadGRP_TXT, make([]byte, 10), nil)
if p.Type != "GRP_TXT" {
t.Errorf("GRP_TXT: type=%s", p.Type)
}
// ANON_REQ
p = decodePayload(PayloadANON_REQ, make([]byte, 40), nil, false)
p = decodePayload(PayloadANON_REQ, make([]byte, 40), nil)
if p.Type != "ANON_REQ" {
t.Errorf("ANON_REQ: type=%s", p.Type)
}
// PATH
p = decodePayload(PayloadPATH, make([]byte, 10), nil, false)
p = decodePayload(PayloadPATH, make([]byte, 10), nil)
if p.Type != "PATH" {
t.Errorf("PATH: type=%s", p.Type)
}
// TRACE
p = decodePayload(PayloadTRACE, make([]byte, 20), nil, false)
p = decodePayload(PayloadTRACE, make([]byte, 20), nil)
if p.Type != "TRACE" {
t.Errorf("TRACE: type=%s", p.Type)
}
@@ -956,96 +923,9 @@ func TestComputeContentHashLongFallback(t *testing.T) {
}
}
// TestComputeContentHashRouteTypeIndependence verifies that the same logical
// packet produces the same content hash regardless of route type (issue #786).
func TestComputeContentHashRouteTypeIndependence(t *testing.T) {
// Same payload type (TXT_MSG=2, bits 2-5) with different route types.
// Header 0x08 = route_type 0 (TRANSPORT_FLOOD), payload_type 2
// Header 0x0A = route_type 2 (DIRECT), payload_type 2
// Header 0x09 = route_type 1 (FLOOD), payload_type 2
// pathByte=0x00, payload=D69FD7A5A7
payloadHex := "D69FD7A5A7"
// FLOOD: header=0x09 (route_type 1), pathByte=0x00
floodHex := "09" + "00" + payloadHex
// DIRECT: header=0x0A (route_type 2), pathByte=0x00
directHex := "0A" + "00" + payloadHex
hashFlood := ComputeContentHash(floodHex)
hashDirect := ComputeContentHash(directHex)
if hashFlood != hashDirect {
t.Errorf("same payload with different route types produced different hashes: flood=%s direct=%s", hashFlood, hashDirect)
}
}
// TestComputeContentHashTraceIncludesPathLen verifies TRACE packets include
// path_len in the hash (matching firmware behavior).
func TestComputeContentHashTraceIncludesPathLen(t *testing.T) {
// TRACE = payload_type 0x09, so header bits 2-5 = 0x09 → header = 0x09<<2 | route=2 = 0x26
// pathByte=0x01 (1 hop, 1-byte hash) → 1 path byte
traceHeader1 := "26" // route=2, payload_type=9
pathByte1 := "01"
pathData1 := "AA"
payload := "DEADBEEF"
hex1 := traceHeader1 + pathByte1 + pathData1 + payload
// Same but pathByte=0x02 (2 hops) → 2 path bytes
pathByte2 := "02"
pathData2 := "AABB"
hex2 := traceHeader1 + pathByte2 + pathData2 + payload
hash1 := ComputeContentHash(hex1)
hash2 := ComputeContentHash(hex2)
if hash1 == hash2 {
t.Error("TRACE packets with different path_len should produce different hashes (path_len is part of hash input)")
}
}
// TestComputeContentHashMatchesFirmware verifies hash output matches what the
// firmware would compute: SHA256(payload_type_byte + payload)[:16hex].
func TestComputeContentHashMatchesFirmware(t *testing.T) {
// header=0x0A → payload_type = (0x0A >> 2) & 0x0F = 2
// pathByte=0x00, payload = D69FD7A5A7475DB07337749AE61FA53A4788E976
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
hash := ComputeContentHash(rawHex)
// Manually compute expected: SHA256(0x02 + payload_bytes)
payloadBytes, _ := hex.DecodeString("D69FD7A5A7475DB07337749AE61FA53A4788E976")
toHash := append([]byte{0x02}, payloadBytes...)
expected := sha256.Sum256(toHash)
expectedHex := hex.EncodeToString(expected[:])[:16]
if hash != expectedHex {
t.Errorf("hash=%s, want %s (firmware-compatible)", hash, expectedHex)
}
}
// TestComputeContentHashTraceGoldenValue is a golden-value test that locks down
// the 2-byte path_len (uint16 LE) behavior for TRACE hashing. If anyone removes
// the 0x00 byte from the hash input, this test breaks.
//
// Packet: header=0x25 (FLOOD route=1, payload_type=TRACE=0x09), pathByte=0x02
// (2 hops, 1-byte hash), path=[AA,BB], payload=[DE,AD,BE,EF].
// Hash input: [0x09, 0x02, 0x00, 0xDE, 0xAD, 0xBE, 0xEF]
// → SHA256 = b1baaf3bf0d0726c2672b1ec9e2665dc...
// → first 16 hex chars = "b1baaf3bf0d0726c"
func TestComputeContentHashTraceGoldenValue(t *testing.T) {
// TRACE packet: header byte 0x25 = payload_type 9 (TRACE), route_type 1 (FLOOD)
// pathByte 0x02 = hash_size 1, hash_count 2
// 2 path bytes (AA, BB), then payload DEADBEEF
rawHex := "2502AABBDEADBEEF"
hash := ComputeContentHash(rawHex)
// Pre-computed: SHA256(0x09 0x02 0x00 0xDE 0xAD 0xBE 0xEF)[:16hex]
// The 0x00 is the high byte of uint16_t path_len (little-endian).
const golden = "b1baaf3bf0d0726c"
if hash != golden {
t.Errorf("TRACE golden hash = %s, want %s (2-byte path_len encoding)", hash, golden)
}
}
func TestDecodePacketWithWhitespace(t *testing.T) {
raw := "0A 00 D6 9F D7 A5 A7 47 5D B0 73 37 74 9A E6 1F A5 3A 47 88 E9 76"
pkt, err := DecodePacket(raw, nil, false)
pkt, err := DecodePacket(raw, nil)
if err != nil {
t.Fatal(err)
}
@@ -1056,7 +936,7 @@ func TestDecodePacketWithWhitespace(t *testing.T) {
func TestDecodePacketWithNewlines(t *testing.T) {
raw := "0A00\nD69F\r\nD7A5A7475DB07337749AE61FA53A4788E976"
pkt, err := DecodePacket(raw, nil, false)
pkt, err := DecodePacket(raw, nil)
if err != nil {
t.Fatal(err)
}
@@ -1067,7 +947,7 @@ func TestDecodePacketWithNewlines(t *testing.T) {
func TestDecodePacketTransportRouteTooShort(t *testing.T) {
// TRANSPORT_FLOOD (route=0) but only 2 bytes total → too short for transport codes
_, err := DecodePacket("1400", nil, false)
_, err := DecodePacket("1400", nil)
if err == nil {
t.Error("expected error for transport route with too-short buffer")
}
@@ -1126,24 +1006,24 @@ func TestDecodeHeaderUnknownTypes(t *testing.T) {
}
func TestDecodePayloadMultipart(t *testing.T) {
// MULTIPART (0x0A) now decoded — #1279 P0 #2 (firmware/src/Mesh.cpp:289).
p := decodePayload(PayloadMULTIPART, []byte{0x01, 0x02}, nil, false)
if p.Type != "MULTIPART" {
t.Errorf("MULTIPART type=%s, want MULTIPART", p.Type)
// MULTIPART (0x0A) falls through to default → UNKNOWN
p := decodePayload(PayloadMULTIPART, []byte{0x01, 0x02}, nil)
if p.Type != "UNKNOWN" {
t.Errorf("MULTIPART type=%s, want UNKNOWN", p.Type)
}
}
func TestDecodePayloadControl(t *testing.T) {
// CONTROL (0x0B) now decoded — #1279 P1 #4 (firmware/src/Mesh.cpp:69).
p := decodePayload(PayloadCONTROL, []byte{0x01, 0x02}, nil, false)
if p.Type != "CONTROL" {
t.Errorf("CONTROL type=%s, want CONTROL", p.Type)
// CONTROL (0x0B) falls through to default → UNKNOWN
p := decodePayload(PayloadCONTROL, []byte{0x01, 0x02}, nil)
if p.Type != "UNKNOWN" {
t.Errorf("CONTROL type=%s, want UNKNOWN", p.Type)
}
}
func TestDecodePathTruncatedBuffer(t *testing.T) {
// path byte claims 5 hops of 2 bytes = 10 bytes, but only 4 available
path, consumed, _ := decodePath(0x45, []byte{0xAA, 0x11, 0xBB, 0x22}, 0)
path, consumed := decodePath(0x45, []byte{0xAA, 0x11, 0xBB, 0x22}, 0)
if path.HashCount != 5 {
t.Errorf("hashCount=%d, want 5", path.HashCount)
}
@@ -1159,7 +1039,7 @@ func TestDecodePathTruncatedBuffer(t *testing.T) {
func TestDecodeFloodAdvert5Hops(t *testing.T) {
// From test-decoder.js Test 1
raw := "11451000D818206D3AAC152C8A91F89957E6D30CA51F36E28790228971C473B755F244F718754CF5EE4A2FD58D944466E42CDED140C66D0CC590183E32BAF40F112BE8F3F2BDF6012B4B2793C52F1D36F69EE054D9A05593286F78453E56C0EC4A3EB95DDA2A7543FCCC00B939CACC009278603902FC12BCF84B706120526F6F6620536F6C6172"
pkt, err := DecodePacket(raw, nil, false)
pkt, err := DecodePacket(raw, nil)
if err != nil {
t.Fatal(err)
}
@@ -1530,7 +1410,7 @@ func TestDecodeAdvertWithTelemetry(t *testing.T) {
name + nullTerm +
hex.EncodeToString(batteryLE) + hex.EncodeToString(tempLE)
pkt, err := DecodePacket(hexStr, nil, false)
pkt, err := DecodePacket(hexStr, nil)
if err != nil {
t.Fatal(err)
}
@@ -1569,7 +1449,7 @@ func TestDecodeAdvertWithTelemetryNegativeTemp(t *testing.T) {
name + nullTerm +
hex.EncodeToString(batteryLE) + hex.EncodeToString(tempLE)
pkt, err := DecodePacket(hexStr, nil, false)
pkt, err := DecodePacket(hexStr, nil)
if err != nil {
t.Fatal(err)
}
@@ -1596,7 +1476,7 @@ func TestDecodeAdvertWithoutTelemetry(t *testing.T) {
name := hex.EncodeToString([]byte("Node1"))
hexStr := "1200" + pubkey + timestamp + signature + flags + name
pkt, err := DecodePacket(hexStr, nil, false)
pkt, err := DecodePacket(hexStr, nil)
if err != nil {
t.Fatal(err)
}
@@ -1623,7 +1503,7 @@ func TestDecodeAdvertNonSensorIgnoresTelemetryBytes(t *testing.T) {
extraBytes := "B40ED403" // battery-like and temp-like bytes
hexStr := "1200" + pubkey + timestamp + signature + flags + name + nullTerm + extraBytes
pkt, err := DecodePacket(hexStr, nil, false)
pkt, err := DecodePacket(hexStr, nil)
if err != nil {
t.Fatal(err)
}
@@ -1651,7 +1531,7 @@ func TestDecodeAdvertTelemetryZeroTemp(t *testing.T) {
name + nullTerm +
hex.EncodeToString(batteryLE) + hex.EncodeToString(tempLE)
pkt, err := DecodePacket(hexStr, nil, false)
pkt, err := DecodePacket(hexStr, nil)
if err != nil {
t.Fatal(err)
}
@@ -1662,450 +1542,3 @@ func TestDecodeAdvertTelemetryZeroTemp(t *testing.T) {
t.Errorf("temperature_c=%f, want 0.0", *pkt.Payload.TemperatureC)
}
}
func repeatHex(byteHex string, n int) string {
s := ""
for i := 0; i < n; i++ {
s += byteHex
}
return s
}
func TestZeroHopDirectHashSize(t *testing.T) {
// DIRECT (RouteType=2) + REQ (PayloadType=0) → header byte = 0x02
// pathByte=0x00 → hash_count=0, hash_size bits=0 → should get HashSize=0
hex := "02" + "00" + repeatHex("AA", 20)
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket failed: %v", err)
}
if pkt.Path.HashSize != 0 {
t.Errorf("DIRECT zero-hop: want HashSize=0, got %d", pkt.Path.HashSize)
}
}
func TestZeroHopDirectHashSizeWithNonZeroUpperBits(t *testing.T) {
// DIRECT (RouteType=2) + REQ (PayloadType=0) → header byte = 0x02
// pathByte=0x40 → hash_count=0, hash_size bits=01 → should still get HashSize=0
hex := "02" + "40" + repeatHex("AA", 20)
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket failed: %v", err)
}
if pkt.Path.HashSize != 0 {
t.Errorf("DIRECT zero-hop with hash_size bits set: want HashSize=0, got %d", pkt.Path.HashSize)
}
}
func TestNonDirectZeroPathByteKeepsHashSize(t *testing.T) {
// FLOOD (RouteType=1) + REQ (PayloadType=0) → header byte = 0x01
// pathByte=0x00 → non-DIRECT should keep HashSize=1
hex := "01" + "00" + repeatHex("AA", 20)
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket failed: %v", err)
}
if pkt.Path.HashSize != 1 {
t.Errorf("FLOOD zero pathByte: want HashSize=1, got %d", pkt.Path.HashSize)
}
}
func TestDirectNonZeroHopKeepsHashSize(t *testing.T) {
// DIRECT (RouteType=2) + REQ (PayloadType=0) → header byte = 0x02
// pathByte=0x01 → hash_count=1, hash_size=1 → should keep HashSize=1
hex := "02" + "01" + repeatHex("BB", 21)
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket failed: %v", err)
}
if pkt.Path.HashSize != 1 {
t.Errorf("DIRECT with 1 hop: want HashSize=1, got %d", pkt.Path.HashSize)
}
}
func TestZeroHopTransportDirectHashSize(t *testing.T) {
// TRANSPORT_DIRECT (RouteType=3) + REQ (PayloadType=0) → header byte = 0x03
// 4 bytes transport codes + pathByte=0x00 → hash_count=0 → should get HashSize=0
hex := "03" + "11223344" + "00" + repeatHex("AA", 20)
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket failed: %v", err)
}
if pkt.Path.HashSize != 0 {
t.Errorf("TRANSPORT_DIRECT zero-hop: want HashSize=0, got %d", pkt.Path.HashSize)
}
}
func TestZeroHopTransportDirectHashSizeWithNonZeroUpperBits(t *testing.T) {
// pathByte=0xC0 → hash_size bits=11 (4, reserved per firmware Packet.cpp:13-18).
// Firmware Packet::isValidPathLen rejects this regardless of hash_count,
// because hash_size==4 is reserved. Go decoder must mirror that — even
// when hash_count==0, an attacker-emitted 0xC0 byte should not be
// silently accepted; firmware never emits hash_size==4.
hex := "03" + "11223344" + "C0" + repeatHex("AA", 20)
_, err := DecodePacket(hex, nil, false)
if err == nil {
t.Fatalf("DecodePacket(pathByte=0xC0) succeeded; want error mirroring firmware Packet.cpp:13-18 (hash_size==4 reserved)")
}
}
func TestValidateAdvertSignature(t *testing.T) {
// Generate a real ed25519 key pair
pub, priv, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatal(err)
}
var timestamp uint32 = 1234567890
appdata := []byte{0x02, 0x11, 0x22} // flags + some data
// Build the signed message: pubKey + timestamp(LE) + appdata
message := make([]byte, 32+4+len(appdata))
copy(message[0:32], pub)
binary.LittleEndian.PutUint32(message[32:36], timestamp)
copy(message[36:], appdata)
sig := ed25519.Sign(priv, message)
// Valid signature
valid, err := sigvalidate.ValidateAdvert([]byte(pub), sig, timestamp, appdata)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if !valid {
t.Error("expected valid signature")
}
// Tampered appdata → invalid
badAppdata := []byte{0x03, 0x11, 0x22}
valid, err = sigvalidate.ValidateAdvert([]byte(pub), sig, timestamp, badAppdata)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if valid {
t.Error("expected invalid signature with tampered appdata")
}
// Wrong timestamp → invalid
valid, err = sigvalidate.ValidateAdvert([]byte(pub), sig, timestamp+1, appdata)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if valid {
t.Error("expected invalid signature with wrong timestamp")
}
// Wrong length pubkey
_, err = sigvalidate.ValidateAdvert([]byte{0xAA, 0xBB}, sig, timestamp, appdata)
if err == nil {
t.Error("expected error for short pubkey")
}
// Wrong length signature
_, err = sigvalidate.ValidateAdvert([]byte(pub), []byte{0xAA, 0xBB}, timestamp, appdata)
if err == nil {
t.Error("expected error for short signature")
}
}
func TestDecodeAdvertWithSignatureValidation(t *testing.T) {
// Generate key pair
pub, priv, err := ed25519.GenerateKey(nil)
if err != nil {
t.Fatal(err)
}
var timestamp uint32 = 1000000
appdata := []byte{0x02} // repeater type, no location
// Build signed message
message := make([]byte, 32+4+len(appdata))
copy(message[0:32], pub)
binary.LittleEndian.PutUint32(message[32:36], timestamp)
copy(message[36:], appdata)
sig := ed25519.Sign(priv, message)
// Build advert buffer: pubkey(32) + timestamp(4) + signature(64) + appdata
buf := make([]byte, 0, 101)
buf = append(buf, pub...)
ts := make([]byte, 4)
binary.LittleEndian.PutUint32(ts, timestamp)
buf = append(buf, ts...)
buf = append(buf, sig...)
buf = append(buf, appdata...)
// With validation enabled
p := decodeAdvert(buf, true)
if p.Error != "" {
t.Fatalf("decode error: %s", p.Error)
}
if p.SignatureValid == nil {
t.Fatal("SignatureValid should be set when validation enabled")
}
if !*p.SignatureValid {
t.Error("expected valid signature")
}
// Without validation
p2 := decodeAdvert(buf, false)
if p2.SignatureValid != nil {
t.Error("SignatureValid should be nil when validation disabled")
}
}
// === Tests for DecodePathFromRawHex (issue #886) ===
func TestDecodePathFromRawHex_HashSize1(t *testing.T) {
// Header byte 0x26 = route_type DIRECT, payload TRACE
// Path byte 0x04 = hash_size 1 (bits 7-6 = 00 → 0+1=1), hash_count 4
// Path bytes: 30 2D 0D 23
raw := "2604302D0D2359FEE7B100000000006733D63367"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
expected := []string{"30", "2D", "0D", "23"}
if len(hops) != len(expected) {
t.Fatalf("got %d hops, want %d", len(hops), len(expected))
}
for i, h := range hops {
if h != expected[i] {
t.Errorf("hop[%d] = %s, want %s", i, h, expected[i])
}
}
}
func TestDecodePathFromRawHex_HashSize2(t *testing.T) {
// Path byte 0x42 = hash_size 2 (bits 7-6 = 01 → 1+1=2), hash_count 2
// Header 0x09 = FLOOD route (rt=1), payload ADVERT (pt=2)
// Path bytes: AABB CCDD (4 bytes = 2 hops * 2 bytes)
raw := "0942AABBCCDD" + "00000000000000"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
expected := []string{"AABB", "CCDD"}
if len(hops) != len(expected) {
t.Fatalf("got %d hops, want %d", len(hops), len(expected))
}
for i, h := range hops {
if h != expected[i] {
t.Errorf("hop[%d] = %s, want %s", i, h, expected[i])
}
}
}
func TestDecodePathFromRawHex_HashSize3(t *testing.T) {
// Path byte 0x81 = hash_size 3 (bits 7-6 = 10 → 2+1=3), hash_count 1
// Header 0x09 = FLOOD route (rt=1), payload ADVERT
raw := "0981AABBCC" + "0000000000"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
if len(hops) != 1 || hops[0] != "AABBCC" {
t.Fatalf("got %v, want [AABBCC]", hops)
}
}
func TestDecodePathFromRawHex_HashSize4(t *testing.T) {
// Path byte 0xC1 = hash_size 4 (bits 7-6 = 11 → 3+1=4), hash_count 1
// Header 0x09 = FLOOD route (rt=1)
raw := "09C1AABBCCDD" + "0000000000"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
if len(hops) != 1 || hops[0] != "AABBCCDD" {
t.Fatalf("got %v, want [AABBCCDD]", hops)
}
}
func TestDecodePathFromRawHex_DirectZeroHops(t *testing.T) {
// Path byte 0x00 = hash_size 1, hash_count 0
// Header 0x0A = DIRECT route (rt=2), payload ADVERT
raw := "0A00" + "0000000000"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
if len(hops) != 0 {
t.Fatalf("got %d hops, want 0", len(hops))
}
}
func TestDecodePathFromRawHex_Transport(t *testing.T) {
// Route type 3 = TRANSPORT_DIRECT → 4 transport code bytes before path byte
// Header 0x27 = route_type 3, payload TRACE
// Transport codes: 1122 3344
// Path byte 0x02 = hash_size 1, hash_count 2
// Path bytes: AA BB
raw := "2711223344" + "02AABB" + "0000000000"
hops, err := packetpath.DecodePathFromRawHex(raw)
if err != nil {
t.Fatal(err)
}
expected := []string{"AA", "BB"}
if len(hops) != len(expected) {
t.Fatalf("got %d hops, want %d", len(hops), len(expected))
}
for i, h := range hops {
if h != expected[i] {
t.Errorf("hop[%d] = %s, want %s", i, h, expected[i])
}
}
}
func TestDecodeTracePayloadFailSetsAnomaly(t *testing.T) {
// Issue #889: TRACE packet with payload too short to decode (< 9 bytes)
// should still return a DecodedPacket (observation stored) but with Anomaly
// set to warn operators that the decode was degraded.
// Packet: header 0x26 (TRACE+DIRECT), pathByte 0x00, payload 4 bytes (too short).
pkt, err := DecodePacket("2600aabbccdd", nil, false)
if err != nil {
t.Fatalf("DecodePacket error: %v", err)
}
if pkt.Payload.Type != "TRACE" {
t.Fatalf("payload type=%s, want TRACE", pkt.Payload.Type)
}
if pkt.Payload.Error == "" {
t.Fatal("expected payload.Error to indicate decode failure")
}
// The key assertion: Anomaly must be set when TRACE decode fails
if pkt.Anomaly == "" {
t.Error("expected Anomaly to be set when TRACE payload decode fails but observation is stored")
}
}
// TestDecodeTraceExtractsSNRValues verifies that for TRACE packets, the header
// path bytes are interpreted as int8 SNR values (quarter-dB) and exposed via
// payload.SNRValues. Mirrors logic in cmd/server/decoder.go (issue: SNR values
// extracted by server but never written into decoded_json by ingestor).
//
// Packet 26022FF8116A23A80000000001C0DE1000DEDE:
// header 0x26 → TRACE (pt=9), DIRECT (rt=2)
// pathByte 0x02 → hash_size=1, hash_count=2
// header path: 2F F8 → SNR = [int8(0x2F)/4, int8(0xF8)/4] = [11.75, -2.0]
// payload (15B): tag=116A23A8 auth=00000000 flags=0x01 pathData=C0DE1000DEDE
func TestDecodeTraceExtractsSNRValues(t *testing.T) {
pkt, err := DecodePacket("26022FF8116A23A80000000001C0DE1000DEDE", nil, false)
if err != nil {
t.Fatalf("DecodePacket error: %v", err)
}
if pkt.Payload.Type != "TRACE" {
t.Fatalf("payload type=%s, want TRACE", pkt.Payload.Type)
}
if len(pkt.Payload.SNRValues) != 2 {
t.Fatalf("len(SNRValues)=%d, want 2 (got %v)", len(pkt.Payload.SNRValues), pkt.Payload.SNRValues)
}
if pkt.Payload.SNRValues[0] != 11.75 {
t.Errorf("SNRValues[0]=%v, want 11.75", pkt.Payload.SNRValues[0])
}
if pkt.Payload.SNRValues[1] != -2.0 {
t.Errorf("SNRValues[1]=%v, want -2.0", pkt.Payload.SNRValues[1])
}
}
// TestDecodePacketBoundsFromWire — regression for issue #1211.
//
// A malformed packet on the wire claimed pathByte=0xF6 (hash_size=4, hash_count=54
// → 216 path bytes) inside a 15-byte buffer. decodePath() returned bytesConsumed=216
// without bounds-check, causing the outer slice `payloadBuf := buf[offset:]` to
// blow up with `slice bounds out of range [218:15]`.
//
// Expected behaviour: DecodePacket MUST NOT panic on any input. If the path
// length claimed by the wire byte exceeds the buffer, it should return a
// clean error.
func TestDecodePacketBoundsFromWire_Issue1211(t *testing.T) {
// 15-byte buffer: header=0x12 (rt=DIRECT, pt=ADVERT), pathByte=0xF6
// (hash_size=4, hash_count=54 → claims 216 path bytes), + 13 garbage bytes.
raw := "12F6" + strings.Repeat("AA", 13)
defer func() {
if r := recover(); r != nil {
t.Fatalf("DecodePacket panicked on malformed input: %v", r)
}
}()
pkt, err := DecodePacket(raw, nil, false)
if err == nil {
t.Fatalf("expected error for malformed packet (path claims 216 bytes in 15-byte buf), got nil; pkt=%+v", pkt)
}
}
// TestDecodePacketFuzzTruncated — sweep the decoder with truncated payloads.
// Zero panics is the acceptance bar.
//
// Adv M2: the original loop ran 256*256*20 = 1.3M iterations on every
// `go test` (in both packages, so 2.6M total). That is not "fuzzing" — it
// is an expensive deterministic sweep that runs in the default unit-test
// path with no opt-in. We now:
//
// - gate the exhaustive sweep on !testing.Short() so `go test -short`
// skips it (CI's unit gate runs short)
// - keep the full sweep under `go test ./...` to preserve coverage
// - prefer `go test -fuzz=FuzzDecodePacketTruncated` for actual
// randomized fuzzing (see FuzzDecodePacketTruncated below)
func TestDecodePacketFuzzTruncated_Issue1211(t *testing.T) {
defer func() {
if r := recover(); r != nil {
t.Fatalf("DecodePacket panicked during fuzz: %v", r)
}
}()
if testing.Short() {
t.Skip("skipping exhaustive sweep in -short mode; use FuzzDecodePacketTruncated")
}
// Sweep every pathByte value with a short tail.
for hdr := 0; hdr < 256; hdr++ {
for pb := 0; pb < 256; pb++ {
for tail := 0; tail < 20; tail++ {
raw := hex.EncodeToString([]byte{byte(hdr), byte(pb)}) + strings.Repeat("00", tail)
_, _ = DecodePacket(raw, nil, false)
}
}
}
}
// FuzzDecodePacketTruncated — native go fuzz target. Run with:
//
// go test -fuzz=FuzzDecodePacketTruncated -fuzztime=30s ./cmd/ingestor
//
// Zero panics regardless of input is the acceptance bar.
func FuzzDecodePacketTruncated(f *testing.F) {
seeds := [][]byte{
{0x12, 0xF6, 0xAA, 0xAA, 0xAA},
{0x12, 0x00},
{0x03, 0x11, 0x22, 0x33, 0x44, 0xC0, 0xAA, 0xAA, 0xAA},
}
for _, s := range seeds {
f.Add(s)
}
f.Fuzz(func(t *testing.T, data []byte) {
defer func() {
if r := recover(); r != nil {
t.Fatalf("DecodePacket panicked on input %x: %v", data, r)
}
}()
_, _ = DecodePacket(hex.EncodeToString(data), nil, false)
})
}
// TestDecodeAdvertOversizedNameTruncated asserts decodeAdvert truncates the
// advert name to firmware's MAX_ADVERT_DATA_SIZE=32 (firmware/src/MeshCore.h:11).
// Firmware writes the node name into a 32-byte buffer, so any on-wire advert
// carrying >32 bytes of name data is adversarial — the Go decoder must not
// surface attacker-controlled bytes beyond what firmware would ever emit.
func TestDecodeAdvertOversizedNameTruncated(t *testing.T) {
pubkey := repeatHex("AA", 32)
timestamp := "78563412"
signature := repeatHex("BB", 64)
flags := "81" // chat(1) | hasName(0x80), no location, no feat1/2
// 64-byte ASCII 'X' name with no null terminator (firmware buffer is 32 bytes).
name := repeatHex("58", 64)
hex := "1200" + pubkey + timestamp + signature + flags + name
pkt, err := DecodePacket(hex, nil, false)
if err != nil {
t.Fatalf("DecodePacket: %v", err)
}
if got := len(pkt.Payload.Name); got > 32 {
t.Errorf("name length=%d, want <=32 (MAX_ADVERT_DATA_SIZE firmware/src/MeshCore.h:11)", got)
}
}
-112
View File
@@ -1,112 +0,0 @@
package main
import (
"testing"
)
// TestHandleMessageAdvertForeign_FlagModeStoresWithFlag asserts that when an
// ADVERT comes from a node whose GPS is OUTSIDE the configured geofilter,
// the ingestor (in default "flag" mode) stores the node and marks it foreign,
// instead of silently dropping it (#730).
func TestHandleMessageAdvertForeign_FlagModeStoresWithFlag(t *testing.T) {
store, source := newTestContext(t)
// Real ADVERT raw hex from existing TestHandleMessageAdvertGeoFiltered.
// Decoder will produce a node with a known GPS — the test below just
// asserts that with a tight geofilter that EXCLUDES that GPS, the node
// is still stored AND tagged as foreign.
rawHex := "120046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
latMin, latMax := -1.0, 1.0
lonMin, lonMax := -1.0, 1.0
gf := &GeoFilterConfig{
LatMin: &latMin, LatMax: &latMax,
LonMin: &lonMin, LonMax: &lonMax,
}
msg := &mockMessage{
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
// Default mode (no ForeignAdverts.Mode set) MUST be "flag", per #730 design.
handleMessage(store, "test", source, msg, nil, nil, &Config{GeoFilter: gf})
var nodeCount int
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&nodeCount); err != nil {
t.Fatal(err)
}
if nodeCount != 1 {
t.Fatalf("nodes=%d, want 1 (foreign advert should be stored, not dropped, in flag mode)", nodeCount)
}
var foreign int
if err := store.db.QueryRow("SELECT foreign_advert FROM nodes").Scan(&foreign); err != nil {
t.Fatalf("foreign_advert column missing or unreadable: %v", err)
}
if foreign != 1 {
t.Errorf("foreign_advert=%d, want 1", foreign)
}
}
// TestHandleMessageAdvertForeign_DropModeStillDrops asserts the legacy
// drop-on-foreign behavior is preserved when ForeignAdverts.Mode = "drop".
func TestHandleMessageAdvertForeign_DropModeStillDrops(t *testing.T) {
store, source := newTestContext(t)
rawHex := "120046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
latMin, latMax := -1.0, 1.0
lonMin, lonMax := -1.0, 1.0
gf := &GeoFilterConfig{
LatMin: &latMin, LatMax: &latMax,
LonMin: &lonMin, LonMax: &lonMax,
}
msg := &mockMessage{
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
cfg := &Config{
GeoFilter: gf,
ForeignAdverts: &ForeignAdvertConfig{Mode: "drop"},
}
handleMessage(store, "test", source, msg, nil, nil, cfg)
var nodeCount int
if err := store.db.QueryRow("SELECT COUNT(*) FROM nodes").Scan(&nodeCount); err != nil {
t.Fatal(err)
}
if nodeCount != 0 {
t.Errorf("nodes=%d, want 0 (drop mode preserves legacy silent-drop behavior)", nodeCount)
}
}
// TestHandleMessageAdvertInRegion_NotFlaggedForeign asserts in-region
// adverts are NOT marked foreign.
func TestHandleMessageAdvertInRegion_NotFlaggedForeign(t *testing.T) {
store, source := newTestContext(t)
rawHex := "120046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
// Wide-open geofilter: every coord passes.
latMin, latMax := -90.0, 90.0
lonMin, lonMax := -180.0, 180.0
gf := &GeoFilterConfig{
LatMin: &latMin, LatMax: &latMax,
LonMin: &lonMin, LonMax: &lonMax,
}
msg := &mockMessage{
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{GeoFilter: gf})
var foreign int
err := store.db.QueryRow("SELECT foreign_advert FROM nodes").Scan(&foreign)
if err != nil {
t.Fatalf("query foreign_advert: %v", err)
}
if foreign != 0 {
t.Errorf("foreign_advert=%d, want 0 (in-region node)", foreign)
}
}
-94
View File
@@ -1,94 +0,0 @@
package main
// Tests for #1143: ingestor must populate transmissions.from_pubkey at
// write time (cheap — already parsing decoded_json) so attribution queries
// don't rely on JSON substring matches.
import (
"database/sql"
"testing"
)
func TestInsertTransmission_FromPubkeyPopulatedForAdvert(t *testing.T) {
s, err := OpenStore(tempDBPath(t))
if err != nil {
t.Fatal(err)
}
defer s.Close()
const pk = "f7181c468dfe7c55aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
data := &PacketData{
RawHex: "AABBCC",
Timestamp: "2026-03-25T00:00:00Z",
ObserverID: "obs1",
Hash: "advert_hash_1143",
RouteType: 1,
PayloadType: 4, // ADVERT
PayloadVersion: 0,
PathJSON: "[]",
DecodedJSON: `{"type":"ADVERT","pubKey":"` + pk + `","name":"X"}`,
FromPubkey: pk,
}
if _, err := s.InsertTransmission(data); err != nil {
t.Fatal(err)
}
var got sql.NullString
s.db.QueryRow("SELECT from_pubkey FROM transmissions WHERE hash = ?", data.Hash).Scan(&got)
if !got.Valid || got.String != pk {
t.Fatalf("from_pubkey = %v (valid=%v), want %q", got.String, got.Valid, pk)
}
}
func TestInsertTransmission_FromPubkeyNullForNonAdvert(t *testing.T) {
s, err := OpenStore(tempDBPath(t))
if err != nil {
t.Fatal(err)
}
defer s.Close()
data := &PacketData{
RawHex: "AA",
Timestamp: "2026-03-25T00:00:00Z",
ObserverID: "obs1",
Hash: "txt_hash_1143",
RouteType: 1,
PayloadType: 2, // TXT_MSG
PayloadVersion: 0,
PathJSON: "[]",
DecodedJSON: `{"type":"TXT_MSG"}`,
// FromPubkey deliberately empty — non-ADVERTs don't carry one.
}
if _, err := s.InsertTransmission(data); err != nil {
t.Fatal(err)
}
var got sql.NullString
s.db.QueryRow("SELECT from_pubkey FROM transmissions WHERE hash = ?", data.Hash).Scan(&got)
if got.Valid {
t.Fatalf("from_pubkey for non-ADVERT must be NULL, got %q", got.String)
}
}
func TestBuildPacketData_PopulatesFromPubkey(t *testing.T) {
const pk = "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef"
msg := &MQTTPacketMessage{Raw: "AA", Origin: "obs"}
decoded := &DecodedPacket{
Header: Header{PayloadType: PayloadADVERT},
Payload: Payload{Type: "ADVERT", PubKey: pk},
}
pd := BuildPacketData(msg, decoded, "obs", "", nil)
if pd.FromPubkey != pk {
t.Fatalf("BuildPacketData FromPubkey = %q, want %q", pd.FromPubkey, pk)
}
// Non-ADVERT: must not carry a pubkey.
decoded2 := &DecodedPacket{
Header: Header{PayloadType: 2},
Payload: Payload{Type: "TXT_MSG"},
}
pd2 := BuildPacketData(msg, decoded2, "obs", "", nil)
if pd2.FromPubkey != "" {
t.Fatalf("BuildPacketData FromPubkey for non-ADVERT = %q, want empty", pd2.FromPubkey)
}
}
-27
View File
@@ -5,30 +5,11 @@ go 1.22
require (
github.com/eclipse/paho.mqtt.golang v1.5.0
github.com/meshcore-analyzer/geofilter v0.0.0
github.com/meshcore-analyzer/sigvalidate v0.0.0
modernc.org/sqlite v1.34.5
)
replace github.com/meshcore-analyzer/geofilter => ../../internal/geofilter
replace github.com/meshcore-analyzer/sigvalidate => ../../internal/sigvalidate
require github.com/meshcore-analyzer/packetpath v0.0.0
replace github.com/meshcore-analyzer/packetpath => ../../internal/packetpath
require github.com/meshcore-analyzer/dbconfig v0.0.0
replace github.com/meshcore-analyzer/dbconfig => ../../internal/dbconfig
require github.com/meshcore-analyzer/perfio v0.0.0
replace github.com/meshcore-analyzer/perfio => ../../internal/perfio
require github.com/meshcore-analyzer/dbschema v0.0.0
replace github.com/meshcore-analyzer/dbschema => ../../internal/dbschema
require (
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/google/uuid v1.6.0 // indirect
@@ -43,11 +24,3 @@ require (
modernc.org/mathutil v1.6.0 // indirect
modernc.org/memory v1.8.0 // indirect
)
require github.com/meshcore-analyzer/prunequeue v0.0.0
replace github.com/meshcore-analyzer/prunequeue => ../../internal/prunequeue
require github.com/meshcore-analyzer/mbcapqueue v0.0.0
replace github.com/meshcore-analyzer/mbcapqueue => ../../internal/mbcapqueue
-202
View File
@@ -1,202 +0,0 @@
package main
import (
"log"
"sync"
"sync/atomic"
"time"
)
// IngestBuffer decouples MQTT message receipt from DB writes (#1608).
//
// On boot the ingestor must subscribe to MQTT immediately, but the single
// SQLite writer (#1283) can be held for minutes by a startup migration
// (e.g. a large CREATE INDEX) or prune. Without buffering, every QoS-0 packet
// received in that window is lost. IngestBuffer holds received work in a
// bounded FIFO and a single consumer goroutine drains it once Ready() is
// called — i.e. once the write path is free.
//
// A single consumer preserves the single-writer invariant: jobs run one at a
// time, exactly as paho's in-order handler did before. Submit never blocks the
// MQTT delivery goroutine; if the buffer is full it drops and counts (bounded
// memory). Buffering replays the original messages, so it introduces NO
// duplicates (contrast: a QoS-1 broker-queue would).
type IngestBuffer struct {
jobs chan func()
ready chan struct{}
stop chan struct{}
done chan struct{}
dropped atomic.Int64
startOnce sync.Once
readyOnce sync.Once
stopOnce sync.Once
// dropLogMu guards the time-based drop-log throttle (PR #1623
// round-1 fix to #1609 M1). Per-drop logging under sustained
// stalls could flood the log at MQTT inbound rate; instead we
// always log the FIRST drop of a stall and then summarize at
// most once per second until the stall ends.
dropLogMu sync.Mutex
stallActive bool // true between first drop and first successful Submit
stallStart time.Time // when the current stall began
stallStartDrop int64 // dropped() value when stall began
lastSummaryAt time.Time // last time we wrote a summary line
}
// dropLogSummaryInterval is the minimum interval between summary lines
// during a sustained stall. Exposed as a var so tests can shrink it.
var dropLogSummaryInterval = time.Second
// NewIngestBuffer returns a buffer holding up to capacity pending jobs.
// Non-positive capacity is clamped to 1 and a WARN is logged so the
// misconfiguration is visible (PR #1609 m2 — silent clamp hid bad
// ingestBufferSize values).
func NewIngestBuffer(capacity int) *IngestBuffer {
if capacity < 1 {
log.Printf("[ingest-buffer] WARN: requested capacity %d < 1, clamping to 1 — check ingestBufferSize config; default is 50000", capacity)
capacity = 1
}
return &IngestBuffer{
jobs: make(chan func(), capacity),
ready: make(chan struct{}),
stop: make(chan struct{}),
done: make(chan struct{}),
}
}
// Submit enqueues a job without blocking. If the buffer is full the job is
// dropped and the dropped counter is incremented. Safe for concurrent callers.
//
// Ordering invariant: callers MUST call Start() before the first Submit().
// Submit only enqueues — without a running consumer, jobs sit in the channel
// and (once cap is reached) are silently dropped until Start()+Ready() run.
//
// Drop logging (PR #1623 round-1 fix to #1609 M1) uses a time-based
// throttle to stay loud-on-stall-start without flooding under sustained
// stalls:
// - the FIRST drop of a stall logs immediately
// - subsequent drops are summarized at most once per second
// - when the next Submit succeeds, a "drained" recovery line is
// emitted so operators can quantify the burst
//
// All log lines include the buffer capacity for operator triage.
func (b *IngestBuffer) Submit(job func()) {
select {
case b.jobs <- job:
b.maybeLogRecovery()
default:
n := b.dropped.Add(1)
b.logDrop(n)
}
}
// logDrop emits a drop log line under the time-based throttle. The first
// drop of a stall always logs; subsequent drops summarize at most once
// per dropLogSummaryInterval.
func (b *IngestBuffer) logDrop(n int64) {
b.dropLogMu.Lock()
defer b.dropLogMu.Unlock()
now := time.Now()
if !b.stallActive {
b.stallActive = true
b.stallStart = now
b.stallStartDrop = n - 1 // last successful Submit -> this is the 1st drop of the stall
b.lastSummaryAt = now
log.Printf("[ingest-buffer] WARNING: buffer full (cap %d), dropped %d message(s) total — write path stalled, raise ingestBufferSize or investigate slow writer", cap(b.jobs), n)
return
}
if now.Sub(b.lastSummaryAt) >= dropLogSummaryInterval {
b.lastSummaryAt = now
stallDrops := n - b.stallStartDrop
log.Printf("[ingest-buffer] WARNING: buffer full (cap %d), %d drop(s) in current stall, %d total — write path still stalled", cap(b.jobs), stallDrops, n)
}
}
// maybeLogRecovery is called from the success branch of Submit. If a
// stall was active, it logs a recovery line summarizing the burst and
// clears the stall state.
func (b *IngestBuffer) maybeLogRecovery() {
b.dropLogMu.Lock()
defer b.dropLogMu.Unlock()
if !b.stallActive {
return
}
stallDrops := b.dropped.Load() - b.stallStartDrop
dur := time.Since(b.stallStart)
log.Printf("[ingest-buffer] INFO: buffer drained, %d drop(s) over %s (cap %d) — write path recovered", stallDrops, dur.Round(time.Millisecond), cap(b.jobs))
b.stallActive = false
}
// Start launches the consumer goroutine. It blocks until Ready() is called
// (or Stop() fires, whichever comes first), then drains buffered jobs and
// runs newly-submitted ones serially, in FIFO order. Idempotent.
//
// Lifecycle: Stop() closes b.stop, which causes the consumer to exit via
// the stop-select arm (after draining any queued jobs if Ready() had
// already fired). The b.jobs channel is never closed — closing it would
// race with concurrent Submit() callers and panic; instead jobs is
// garbage-collected with the buffer once all references drop. Done() is
// closed when the consumer goroutine returns.
func (b *IngestBuffer) Start() {
b.startOnce.Do(func() {
go func() {
defer close(b.done)
select {
case <-b.ready:
case <-b.stop:
// Stopped before Ready — exit immediately. Pending jobs
// are discarded; the buffer was never authorized to drain.
return
}
for {
select {
case job := <-b.jobs:
job()
case <-b.stop:
// Stop after Ready — drain whatever is queued so
// shutdown is graceful, then exit. b.jobs is never
// closed (see Start godoc), so a default-case
// non-blocking receive is the correct drain idiom.
for {
select {
case job := <-b.jobs:
job()
default:
return
}
}
}
}
}()
})
}
// Ready signals that the write path is available; the consumer begins
// draining. Idempotent.
//
// Ordering invariant: Start() MUST have been called before Ready() takes
// effect. Calling Ready() without a prior Start() simply closes the ready
// channel — nothing drains until a later Start() runs its consumer goroutine.
func (b *IngestBuffer) Ready() {
b.readyOnce.Do(func() { close(b.ready) })
}
// Dropped returns the number of jobs dropped due to a full buffer.
func (b *IngestBuffer) Dropped() int64 { return b.dropped.Load() }
// Pending returns the current queue depth (best-effort; for observability).
func (b *IngestBuffer) Pending() int { return len(b.jobs) }
// Stop signals the consumer goroutine to exit. Test-hygiene helper so unit
// tests don't leak the goroutine that Start() spawns. Idempotent / safe to
// call without a prior Start(). After Stop() the consumer exits and Done()
// is closed.
func (b *IngestBuffer) Stop() {
b.stopOnce.Do(func() { close(b.stop) })
}
// Done returns a channel that is closed after the consumer goroutine has
// exited. If Start() was never called, Done() never closes.
func (b *IngestBuffer) Done() <-chan struct{} {
return b.done
}
-274
View File
@@ -1,274 +0,0 @@
package main
import (
"bytes"
"log"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
func TestIngestBuffer_BuffersUntilReady(t *testing.T) {
b := NewIngestBuffer(10)
t.Cleanup(b.Stop)
var ran atomic.Int64
b.Start()
for i := 0; i < 3; i++ {
b.Submit(func() { ran.Add(1) })
}
time.Sleep(30 * time.Millisecond)
if ran.Load() != 0 {
t.Fatalf("jobs ran before Ready(): %d", ran.Load())
}
b.Ready()
deadline := time.Now().Add(time.Second)
for ran.Load() < 3 && time.Now().Before(deadline) {
time.Sleep(5 * time.Millisecond)
}
if ran.Load() != 3 {
t.Fatalf("want 3 ran after Ready, got %d", ran.Load())
}
}
func TestIngestBuffer_FIFOOrder(t *testing.T) {
b := NewIngestBuffer(10)
t.Cleanup(b.Stop)
out := make(chan int, 5)
b.Start()
for i := 0; i < 5; i++ {
i := i
b.Submit(func() { out <- i })
}
b.Ready()
for want := 0; want < 5; want++ {
select {
case got := <-out:
if got != want {
t.Fatalf("order: want %d got %d", want, got)
}
case <-time.After(time.Second):
t.Fatalf("timeout waiting for job %d", want)
}
}
}
func TestIngestBuffer_DropsWhenFull(t *testing.T) {
b := NewIngestBuffer(2)
t.Cleanup(b.Stop) // never Ready()'d -> nothing drains
for i := 0; i < 5; i++ {
b.Submit(func() {})
}
if got := b.Dropped(); got != 3 {
t.Fatalf("want 3 dropped (cap 2, 5 submitted), got %d", got)
}
}
func TestIngestBuffer_ProcessesAfterReady(t *testing.T) {
b := NewIngestBuffer(10)
t.Cleanup(b.Stop)
b.Start()
b.Ready()
done := make(chan struct{})
b.Submit(func() { close(done) })
select {
case <-done:
case <-time.After(time.Second):
t.Fatal("job submitted after Ready was not processed")
}
}
func TestIngestBuffer_SerialExecution(t *testing.T) {
b := NewIngestBuffer(50)
t.Cleanup(b.Stop)
var inFlight atomic.Int32
var overlap atomic.Bool
var wg sync.WaitGroup
b.Start()
const n = 20
wg.Add(n)
for i := 0; i < n; i++ {
b.Submit(func() {
if inFlight.Add(1) > 1 {
overlap.Store(true)
}
time.Sleep(time.Millisecond)
inFlight.Add(-1)
wg.Done()
})
}
b.Ready()
wg.Wait()
if overlap.Load() {
t.Fatal("jobs overlapped — consumer is not serial (violates single-writer)")
}
}
func TestIngestBuffer_ConcurrentSubmitSafe(t *testing.T) {
b := NewIngestBuffer(20000)
t.Cleanup(b.Stop)
b.Start()
var wg sync.WaitGroup
for g := 0; g < 8; g++ {
wg.Add(1)
go func() {
defer wg.Done()
for i := 0; i < 1000; i++ {
b.Submit(func() {})
}
}()
}
wg.Wait()
b.Ready()
// Assertion is the absence of a race/panic; run under -race in CI.
}
// TestIngestBuffer_StopUnblocksConsumer guards the consumer-goroutine leak
// described in PR #1609 review m1: Start() blocks on <-b.ready forever if
// Ready() is never called, leaking the goroutine in test runs. Stop() must
// signal the consumer to exit cleanly without requiring Ready().
func TestIngestBuffer_StopUnblocksConsumer(t *testing.T) {
b := NewIngestBuffer(10)
t.Cleanup(b.Stop)
b.Start()
// Do NOT call Ready(). The consumer must exit purely because of Stop().
b.Stop()
select {
case <-b.Done():
// good — consumer goroutine returned
case <-time.After(time.Second):
t.Fatal("Stop() did not unblock the consumer goroutine within 1s (Done() never closed)")
}
}
// TestNewIngestBuffer_WarnsOnSubOneClamp asserts that constructing the
// buffer with a non-positive capacity emits a WARN log line. Silent
// clamping (PR #1609 review m2) hid misconfigurations like
// ingestBufferSize=-1 or 0-from-default-not-applied paths.
func TestNewIngestBuffer_WarnsOnSubOneClamp(t *testing.T) {
var buf bytes.Buffer
oldOut := log.Writer()
oldFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
t.Cleanup(func() {
log.SetOutput(oldOut)
log.SetFlags(oldFlags)
})
b := NewIngestBuffer(0)
t.Cleanup(b.Stop)
got := buf.String()
if !strings.Contains(got, "WARN") || !strings.Contains(got, "ingest-buffer") {
t.Fatalf("expected WARN log on sub-one clamp, got %q", got)
}
}
// TestIngestBuffer_DropLogThrottle asserts the time-based throttle (PR
// #1623 round-1 fix to #1609 M1): the FIRST drop of a stall logs
// immediately (loud), then subsequent drops within the same stall are
// rate-limited to at most one summary line per second, and a recovery
// line is emitted when Submit succeeds again. This prevents log-flood
// under sustained stalls (potentially hundreds of MB/min) while
// preserving "loud the instant the stall starts".
func TestIngestBuffer_DropLogThrottle(t *testing.T) {
var buf bytes.Buffer
oldOut := log.Writer()
oldFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
t.Cleanup(func() {
log.SetOutput(oldOut)
log.SetFlags(oldFlags)
})
b := NewIngestBuffer(2)
t.Cleanup(b.Stop)
// Fill to capacity (no Ready() — nothing drains).
for i := 0; i < 2; i++ {
b.Submit(func() {})
}
// 100 drops in tight loop (well under 1s).
for i := 0; i < 100; i++ {
b.Submit(func() {})
}
got := buf.String()
lines := strings.Count(got, "buffer full")
if lines < 1 {
t.Fatalf("expected the FIRST drop to log immediately; got 0 'buffer full' lines:\n%s", got)
}
if lines > 2 {
t.Fatalf("expected at most 2 'buffer full' lines for 100 drops in <1s (first + at-most-one summary), got %d:\n%s", lines, got)
}
// Every line must include the capacity for operator triage.
if !strings.Contains(got, "cap 2") {
t.Fatalf("expected every drop log line to include 'cap 2', got:\n%s", got)
}
}
// TestIngestBuffer_DropLogFirstAlwaysImmediate guards the "loud the
// instant the stall starts" half of the throttle contract from PR
// #1623: even a single drop must log immediately, not be silently
// absorbed by the per-second summary window.
func TestIngestBuffer_DropLogFirstAlwaysImmediate(t *testing.T) {
var buf bytes.Buffer
oldOut := log.Writer()
oldFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
t.Cleanup(func() {
log.SetOutput(oldOut)
log.SetFlags(oldFlags)
})
b := NewIngestBuffer(1)
t.Cleanup(b.Stop)
b.Submit(func() {}) // fills cap=1
b.Submit(func() {}) // first drop
got := buf.String()
if !strings.Contains(got, "buffer full") {
t.Fatalf("expected FIRST drop to log immediately; got:\n%s", got)
}
}
// TestIngestBuffer_DropLogRecoveryAfterDrain guards the recovery-line
// half of the throttle contract: once Submit succeeds again after one
// or more drops, a "recovered" / "drained" line must be emitted so
// operators can quantify the burst (PR #1623).
func TestIngestBuffer_DropLogRecoveryAfterDrain(t *testing.T) {
var buf bytes.Buffer
oldOut := log.Writer()
oldFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
t.Cleanup(func() {
log.SetOutput(oldOut)
log.SetFlags(oldFlags)
})
b := NewIngestBuffer(1)
t.Cleanup(b.Stop)
b.Submit(func() {}) // fills cap=1
for i := 0; i < 3; i++ {
b.Submit(func() {}) // drops
}
// Drain: start consumer and Ready(), wait for queue to empty.
b.Start()
b.Ready()
deadline := time.Now().Add(time.Second)
for b.Pending() > 0 && time.Now().Before(deadline) {
time.Sleep(2 * time.Millisecond)
}
// Now a successful Submit should trigger the recovery line.
b.Submit(func() {})
// Give the goroutine + log a moment.
time.Sleep(20 * time.Millisecond)
got := buf.String()
if !strings.Contains(got, "drained") && !strings.Contains(got, "recovered") {
t.Fatalf("expected a 'drained'/'recovered' log line after stall ended; got:\n%s", got)
}
}
@@ -1,126 +0,0 @@
package main
// Regression test for issue #1370 — counters PR #1233 (commit 498fbc03).
//
// PR #1233 made the ingestor use the MQTT envelope's "timestamp" field as
// transmissions.first_seen / observations.timestamp, on the premise that
// uploaders stamp it at radio receive and the value is trustworthy.
//
// That premise FAILS for observers whose own clock is wrong. Staging
// Voodoo3 tx 304114 in channel #test had 5 observations:
// - 4 from Voodoo3 stamped "18:42" — Voodoo3's broken client clock,
// - 1 from another observer stamped "01:42" — the actual receive time.
// Voodoo3 ingested first, so first_seen locked at "18:42" and the
// /api/channels row showed the channel as last-active 7h+ in the past.
//
// Fix: revert the storage path — packet/observation timestamps are
// server ingest time (time.Now() at the ingestor). Envelope timestamp
// stays usable for observer.last_seen (PR #1233's MAX/MIN guard there
// is fine and unrelated to the channel-ordering bug).
import (
"strconv"
"testing"
"time"
)
// Raw packet path: envelope reports timestamp 7h in the past
// (simulating Voodoo3's broken client clock). After ingest,
// transmissions.first_seen and observations.timestamp must reflect
// SERVER wall clock, not the bogus envelope value.
func TestHandleMessage_PacketTimestamp_IgnoresStaleEnvelope_1370(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
stale := time.Now().UTC().Add(-7 * time.Hour).Format(time.RFC3339)
before := time.Now().Unix()
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
payload := []byte(`{"raw":"` + rawHex + `","SNR":5.5,"RSSI":-100.0,"origin":"voodoo3","timestamp":"` + stale + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/voodoo3/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
// ─── transmissions.first_seen ───────────────────────────────────────
var firstSeen string
if err := store.db.QueryRow(`SELECT first_seen FROM transmissions LIMIT 1`).Scan(&firstSeen); err != nil {
t.Fatalf("scan first_seen: %v", err)
}
fsParsed, err := time.Parse(time.RFC3339, firstSeen)
if err != nil {
t.Fatalf("first_seen %q not RFC3339: %v", firstSeen, err)
}
if fsParsed.Unix() < before-5 || fsParsed.Unix() > after+5 {
t.Errorf("transmissions.first_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
"Envelope reported stale %q (7h ago) — PR #1233's premise that envelope timestamp is trustworthy is FALSE for buggy-clock observers. Issue #1370.",
firstSeen, fsParsed.Unix(), before, after, stale)
}
// ─── observations.timestamp (epoch) ─────────────────────────────────
var obsTs int64
if err := store.db.QueryRow(`SELECT timestamp FROM observations LIMIT 1`).Scan(&obsTs); err != nil {
t.Fatalf("scan observations.timestamp: %v", err)
}
if obsTs < before-5 || obsTs > after+5 {
t.Errorf("observations.timestamp = %d; want in [%d, %d] (server wall clock). Envelope stale = %q. Issue #1370.",
obsTs, before, after, stale)
}
}
// Channel-message (BLE companion) path: envelope timestamp stale → stored
// transmissions.first_seen must still be server wall clock.
func TestHandleMessage_ChannelPath_PacketTimestamp_IgnoresStaleEnvelope_1370(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
stale := time.Now().UTC().Add(-7 * time.Hour).Format(time.RFC3339)
before := time.Now().Unix()
payload := []byte(`{"text":"Voodoo3: tst hmdpt","channel_idx":3,"SNR":5.0,"RSSI":-95,"timestamp":"` + stale + `","sender_timestamp":` + strconv.FormatInt(time.Now().Unix(), 10) + `}`)
msg := &mockMessage{topic: "meshcore/message/channel/3", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
var firstSeen string
if err := store.db.QueryRow(`SELECT first_seen FROM transmissions LIMIT 1`).Scan(&firstSeen); err != nil {
t.Fatalf("scan first_seen: %v", err)
}
fsParsed, err := time.Parse(time.RFC3339, firstSeen)
if err != nil {
t.Fatalf("first_seen %q not RFC3339: %v", firstSeen, err)
}
if fsParsed.Unix() < before-5 || fsParsed.Unix() > after+5 {
t.Errorf("channel-path transmissions.first_seen = %q (epoch %d); want in [%d, %d] (server wall clock). Envelope stale = %q. Issue #1370.",
firstSeen, fsParsed.Unix(), before, after, stale)
}
}
// DM (BLE companion direct-message) path: same revert applies.
func TestHandleMessage_DMPath_PacketTimestamp_IgnoresStaleEnvelope_1370(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
stale := time.Now().UTC().Add(-7 * time.Hour).Format(time.RFC3339)
before := time.Now().Unix()
payload := []byte(`{"text":"Voodoo3: hello","SNR":5.0,"RSSI":-95,"timestamp":"` + stale + `"}`)
msg := &mockMessage{topic: "meshcore/message/direct/voodoo3", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
var firstSeen string
if err := store.db.QueryRow(`SELECT first_seen FROM transmissions LIMIT 1`).Scan(&firstSeen); err != nil {
t.Fatalf("scan first_seen: %v", err)
}
fsParsed, err := time.Parse(time.RFC3339, firstSeen)
if err != nil {
t.Fatalf("first_seen %q not RFC3339: %v", firstSeen, err)
}
if fsParsed.Unix() < before-5 || fsParsed.Unix() > after+5 {
t.Errorf("DM-path transmissions.first_seen = %q (epoch %d); want in [%d, %d] (server wall clock). Envelope stale = %q. Issue #1370.",
firstSeen, fsParsed.Unix(), before, after, stale)
}
}
-30
View File
@@ -1,30 +0,0 @@
package main
// Tests for issue #1279 P2 item 5: ingestor RAW_CUSTOM exposure.
import (
"strings"
"testing"
)
func TestDecodeRawCustomExposesLengthAndTag(t *testing.T) {
// header = (1<<6)|(0x0F<<2)|1 = 0x7D ; path byte = 0x00 ; payload = A5 DE AD BE EF
hexStr := "7D00A5DEADBEEF"
pkt, err := DecodePacket(hexStr, nil, false)
if err != nil {
t.Fatalf("decode: %v", err)
}
if pkt.Payload.Type != "RAW_CUSTOM" {
t.Fatalf("payload type = %q, want RAW_CUSTOM", pkt.Payload.Type)
}
if pkt.Payload.RawLength == nil || *pkt.Payload.RawLength != 5 {
got := -1
if pkt.Payload.RawLength != nil {
got = *pkt.Payload.RawLength
}
t.Errorf("RawLength=%d, want 5", got)
}
if !strings.EqualFold(pkt.Payload.FirstByteTag, "A5") {
t.Errorf("FirstByteTag=%q, want A5", pkt.Payload.FirstByteTag)
}
}
-211
View File
@@ -1,211 +0,0 @@
package main
// Tests for issue #1279 P0+P1 decoder additions.
//
// Each test uses firmware-derived wire vectors:
// - GRP_DATA outer: firmware/src/helpers/BaseChatMesh.cpp:500 (createGroupDatagram)
// - GRP_DATA inner: firmware/src/helpers/BaseChatMesh.cpp:382-385
// - MULTIPART byte0: firmware/src/Mesh.cpp:289
// - MULTIPART ACK inner: firmware/src/Mesh.cpp:292-307
// - CONTROL byte0 flags: firmware/src/Mesh.cpp:69 + createControlData at Mesh.cpp:609
// - advertRole label rules: firmware/src/helpers/AdvertDataHelpers.h:7-12
import (
"crypto/aes"
"crypto/hmac"
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"testing"
)
// --- P0 #1: GRP_DATA decoder ---
// buildChannelEncrypted encrypts arbitrary inner bytes with the channel
// key/MAC scheme firmware uses for both GRP_TXT and GRP_DATA (see
// BaseChatMesh.cpp:376-391: AES-128-ECB, HMAC-SHA256-trunc-2 MAC).
func buildChannelEncrypted(channelKeyHex string, inner []byte) (ctHex, macHex string) {
key, _ := hex.DecodeString(channelKeyHex)
plain := append([]byte{}, inner...)
pad := aes.BlockSize - (len(plain) % aes.BlockSize)
if pad != aes.BlockSize {
plain = append(plain, make([]byte, pad)...)
}
block, _ := aes.NewCipher(key)
ct := make([]byte, len(plain))
for i := 0; i < len(plain); i += aes.BlockSize {
block.Encrypt(ct[i:i+aes.BlockSize], plain[i:i+aes.BlockSize])
}
secret := make([]byte, 32)
copy(secret, key)
h := hmac.New(sha256.New, secret)
h.Write(ct)
mac := h.Sum(nil)
return hex.EncodeToString(ct), hex.EncodeToString(mac[:2])
}
func TestDecodeGrpDataNoKey(t *testing.T) {
// Envelope alone (no key in store).
buf := []byte{0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11}
p := decodeGrpData(buf, nil)
if p.Type != "GRP_DATA" {
t.Fatalf("type=%q want GRP_DATA", p.Type)
}
if p.ChannelHash != 0xAA {
t.Errorf("channelHash=%d want 170", p.ChannelHash)
}
if p.ChannelHashHex != "AA" {
t.Errorf("channelHashHex=%q want AA", p.ChannelHashHex)
}
if p.MAC != "bbcc" {
t.Errorf("mac=%q want bbcc", p.MAC)
}
if p.EncryptedData != "ddeeff11" {
t.Errorf("encryptedData=%q want ddeeff11", p.EncryptedData)
}
if p.DecryptionStatus != "no_key" {
t.Errorf("decryptionStatus=%q want no_key", p.DecryptionStatus)
}
}
func TestDecodeGrpDataDecryptedInner(t *testing.T) {
// Inner per BaseChatMesh.cpp:382-385: data_type(uint16 LE) + data_len(1) + blob.
key := "2cc3d22840e086105ad73443da2cacb8"
blob := []byte{0x10, 0x20, 0x30, 0x40, 0x50}
inner := []byte{0x34, 0x12, byte(len(blob))} // data_type = 0x1234
inner = append(inner, blob...)
ctHex, macHex := buildChannelEncrypted(key, inner)
buf := []byte{0xAB}
mb, _ := hex.DecodeString(macHex)
buf = append(buf, mb...)
cb, _ := hex.DecodeString(ctHex)
buf = append(buf, cb...)
p := decodeGrpData(buf, map[string]string{"test": key})
if p.Type != "GRP_DATA" {
t.Fatalf("type=%q want GRP_DATA", p.Type)
}
if p.DecryptionStatus != "decrypted" {
t.Fatalf("decryptionStatus=%q want decrypted", p.DecryptionStatus)
}
if p.DataType == nil || *p.DataType != 0x1234 {
t.Errorf("dataType=%v want 0x1234", p.DataType)
}
if p.DataLen == nil || *p.DataLen != 5 {
t.Errorf("dataLen=%v want 5", p.DataLen)
}
if p.DecryptedBlob != hex.EncodeToString(blob) {
t.Errorf("decryptedBlob=%q want %q", p.DecryptedBlob, hex.EncodeToString(blob))
}
if p.Channel != "test" {
t.Errorf("channel=%q want test", p.Channel)
}
}
// --- P0 #2: MULTIPART decoder ---
func TestDecodeMultipartAck(t *testing.T) {
// remaining=3, inner_type=PAYLOAD_TYPE_ACK(0x03), ack_crc=0xDEADBEEF.
// byte0 = (3<<4) | 3 = 0x33; next 4 bytes are LE crc.
buf := []byte{0x33, 0xEF, 0xBE, 0xAD, 0xDE}
p := decodeMultipart(buf)
if p.Type != "MULTIPART" {
t.Fatalf("type=%q want MULTIPART", p.Type)
}
if p.Remaining == nil || *p.Remaining != 3 {
t.Errorf("remaining=%v want 3", p.Remaining)
}
if p.InnerType == nil || *p.InnerType != 0x03 {
t.Errorf("innerType=%v want 3", p.InnerType)
}
if p.InnerTypeName != "ACK" {
t.Errorf("innerTypeName=%q want ACK", p.InnerTypeName)
}
if p.InnerAckCrc != "deadbeef" {
t.Errorf("innerAckCrc=%q want deadbeef", p.InnerAckCrc)
}
}
func TestDecodeMultipartNonAck(t *testing.T) {
// remaining=2, inner_type=0x02 (TXT_MSG), arbitrary inner payload.
buf := []byte{0x22, 0x01, 0x02, 0x03}
p := decodeMultipart(buf)
if p.Remaining == nil || *p.Remaining != 2 {
t.Errorf("remaining=%v want 2", p.Remaining)
}
if p.InnerType == nil || *p.InnerType != 0x02 {
t.Errorf("innerType=%v want 2", p.InnerType)
}
if p.InnerTypeName != "TXT_MSG" {
t.Errorf("innerTypeName=%q want TXT_MSG", p.InnerTypeName)
}
if p.InnerPayload != "010203" {
t.Errorf("innerPayload=%q want 010203", p.InnerPayload)
}
if p.InnerAckCrc != "" {
t.Errorf("non-ACK should not surface innerAckCrc, got %q", p.InnerAckCrc)
}
}
// --- P1 #3: advertRole label fix ---
func TestAdvertRoleLabelsRawType(t *testing.T) {
// Firmware: ADV_TYPE_NONE=0, CHAT=1, REPEATER=2, ROOM=3, SENSOR=4, 5..15 FUTURE.
cases := []struct {
typ int
want string
}{
{0, "none"},
{1, "companion"},
{2, "repeater"},
{3, "room"},
{4, "sensor"},
{5, "type-5"},
{15, "type-15"},
}
for _, tc := range cases {
got := advertRole(&AdvertFlags{Type: tc.typ, Repeater: tc.typ == 2, Room: tc.typ == 3, Sensor: tc.typ == 4})
if got != tc.want {
t.Errorf("advertRole(type=%d) = %q, want %q", tc.typ, got, tc.want)
}
}
}
// --- P1 #4: CONTROL byte0 flags ---
func TestDecodeControlZeroHop(t *testing.T) {
// byte0 = 0x81 (high-bit set ⇒ zero-hop), followed by 3 app bytes.
buf := []byte{0x81, 0xAA, 0xBB, 0xCC}
p := decodeControl(buf)
if p.Type != "CONTROL" {
t.Fatalf("type=%q want CONTROL", p.Type)
}
if p.CtrlFlags != "81" {
t.Errorf("ctrlFlags=%q want 81", p.CtrlFlags)
}
if p.CtrlZeroHop == nil || !*p.CtrlZeroHop {
t.Errorf("ctrlZeroHop=%v want true", p.CtrlZeroHop)
}
if p.CtrlLength == nil || *p.CtrlLength != 4 {
t.Errorf("ctrlLength=%v want 4", p.CtrlLength)
}
}
func TestDecodeControlMultiHop(t *testing.T) {
// byte0 = 0x01 (high-bit clear ⇒ not zero-hop subset).
buf := []byte{0x01, 0x42}
p := decodeControl(buf)
if p.CtrlFlags != "01" {
t.Errorf("ctrlFlags=%q want 01", p.CtrlFlags)
}
if p.CtrlZeroHop == nil || *p.CtrlZeroHop {
t.Errorf("ctrlZeroHop=%v want false", p.CtrlZeroHop)
}
if p.CtrlLength == nil || *p.CtrlLength != 2 {
t.Errorf("ctrlLength=%v want 2", p.CtrlLength)
}
}
// silence unused-import diagnostics for stub-phase builds
var _ = binary.LittleEndian
-98
View File
@@ -1,98 +0,0 @@
package main
import (
"database/sql"
"path/filepath"
"testing"
"time"
_ "modernc.org/sqlite"
)
// TestIngestorPruneOldPackets enforces #1283: the writer for
// transmissions retention lives on the ingestor's *Store. Before the fix,
// this lived on cmd/server/*DB and raced with ingestor INSERTs. After
// the fix, ingestor owns it and runs it on its own write-locked handle.
func TestIngestorPruneOldPackets(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "prune.db")
store, err := OpenStore(path)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
old := time.Now().UTC().AddDate(0, 0, -10).Format(time.RFC3339)
new := time.Now().UTC().Format(time.RFC3339)
for i, ts := range []string{old, old, new} {
_, err := store.db.Exec(
`INSERT INTO transmissions (raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json)
VALUES (?, ?, ?, 0, 1, 1, '{}')`,
"AA", "h"+string(rune('a'+i)), ts,
)
if err != nil {
t.Fatalf("seed tx: %v", err)
}
}
n, err := store.PruneOldPackets(5)
if err != nil {
t.Fatalf("PruneOldPackets: %v", err)
}
if n != 2 {
t.Fatalf("expected 2 pruned, got %d", n)
}
var remaining int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM transmissions`).Scan(&remaining); err != nil {
t.Fatalf("count: %v", err)
}
if remaining != 1 {
t.Fatalf("expected 1 transmission remaining, got %d", remaining)
}
}
// TestIngestorVacuumOnStartupMigratesNONEtoINCREMENTAL exercises the
// scenario that originally broke in #1283: a fresh DB with
// auto_vacuum=NONE, vacuumOnStartup=true, no contention from a server
// process. The ingestor must complete the VACUUM and flip auto_vacuum to
// INCREMENTAL. Before the fix, the migration ran inside cmd/server and
// hit SQLITE_BUSY because the ingestor (sharing the container) was
// already writing.
func TestIngestorVacuumOnStartupMigratesNONEtoINCREMENTAL(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "vac.db")
// Create a NONE-auto_vacuum DB (simulates an older deployment).
seed, err := sql.Open("sqlite", path+"?_pragma=journal_mode(WAL)")
if err != nil {
t.Fatal(err)
}
seed.SetMaxOpenConns(1)
if _, err := seed.Exec(`CREATE TABLE dummy(id INTEGER PRIMARY KEY)`); err != nil {
t.Fatal(err)
}
var before int
seed.QueryRow("PRAGMA auto_vacuum").Scan(&before)
if before != 0 {
t.Fatalf("precondition: auto_vacuum=%d, want 0", before)
}
seed.Close()
store, err := OpenStore(path)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
cfg := &Config{DB: &DBConfig{VacuumOnStartup: true}}
store.CheckAutoVacuum(cfg)
var after int
if err := store.db.QueryRow("PRAGMA auto_vacuum").Scan(&after); err != nil {
t.Fatal(err)
}
if after != 2 {
t.Fatalf("expected auto_vacuum=2 after ingestor VACUUM, got %d", after)
}
}
-134
View File
@@ -1,134 +0,0 @@
package main
// Tests for issue #1610: firmware 1.16.0 extended ACK support.
//
// Wire vectors are synthetic, derived by hand from the firmware spec:
// - Variable-length ACK on the wire:
// firmware/src/Mesh.cpp:545-575 createAck/createMultiAck (commit f6e6fdaa)
// - 5-byte ACK = 4-byte truncated sha256 CRC + 1-byte attempt counter:
// firmware/src/helpers/BaseChatMesh.cpp:218-232 (commit f6e6fdaa)
// - 6-byte ACK = 5-byte + 1-byte RNG (so identical attempts get unique hash):
// firmware/src/helpers/BaseChatMesh.cpp:219-234 (commit a130a95a)
// - Multipart ACK inner blob: firmware/src/Mesh.cpp:292-307 — byte0 then
// ack bytes, payload_len = 1 + ack_len.
import (
"testing"
)
// --- top-level ACK (decodeAck) ---
func TestDecodeAckLegacy4Byte(t *testing.T) {
// Backwards-compat: 4-byte ACK leaves the new optional fields nil.
buf := []byte{0xAA, 0xBB, 0xCC, 0xDD}
p := decodeAck(buf)
if p.ExtraHash != "ddccbbaa" {
t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
}
if p.AckLen == nil || *p.AckLen != 4 {
t.Errorf("ackLen=%v want 4", p.AckLen)
}
if p.AckAttempt != nil {
t.Errorf("ackAttempt=%v want nil for legacy 4-byte ACK", *p.AckAttempt)
}
if p.AckRand != nil {
t.Errorf("ackRand=%v want nil for legacy 4-byte ACK", *p.AckRand)
}
}
func TestDecodeAck5ByteExtended(t *testing.T) {
// v1.16 sender (commit f6e6fdaa): 4-byte CRC + 1-byte attempt.
buf := []byte{0xAA, 0xBB, 0xCC, 0xDD, 0x07}
p := decodeAck(buf)
if p.ExtraHash != "ddccbbaa" {
t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
}
if p.AckLen == nil || *p.AckLen != 5 {
t.Errorf("ackLen=%v want 5", p.AckLen)
}
if p.AckAttempt == nil || *p.AckAttempt != 7 {
t.Errorf("ackAttempt=%v want 7", p.AckAttempt)
}
if p.AckRand != nil {
t.Errorf("ackRand=%v want nil for 5-byte ACK", *p.AckRand)
}
}
func TestDecodeAck6ByteExtended(t *testing.T) {
// v1.16 sender (commit a130a95a): 4-byte CRC + 1-byte attempt + 1-byte RNG.
buf := []byte{0xAA, 0xBB, 0xCC, 0xDD, 0x02, 0x5A}
p := decodeAck(buf)
if p.ExtraHash != "ddccbbaa" {
t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
}
if p.AckLen == nil || *p.AckLen != 6 {
t.Errorf("ackLen=%v want 6", p.AckLen)
}
if p.AckAttempt == nil || *p.AckAttempt != 2 {
t.Errorf("ackAttempt=%v want 2", p.AckAttempt)
}
if p.AckRand == nil || *p.AckRand != 0x5A {
t.Errorf("ackRand=%v want 90", p.AckRand)
}
}
// --- multipart-with-ACK (decodeMultipart) ---
// buildMultipartAckByte0: remaining<<4 | PayloadACK (0x02).
func buildMultipartAckByte0(remaining int) byte {
return byte((remaining<<4)&0xF0) | byte(PayloadACK&0x0F)
}
func TestDecodeMultipartAck4ByteLegacy(t *testing.T) {
// Pre-1.16 inner ACK is 4 bytes → ackLen=4, attempt/rand nil.
buf := []byte{buildMultipartAckByte0(3), 0xAA, 0xBB, 0xCC, 0xDD}
p := decodeMultipart(buf)
if p.InnerAckCrc != "ddccbbaa" {
t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
}
if p.InnerAckLen == nil || *p.InnerAckLen != 4 {
t.Errorf("innerAckLen=%v want 4", p.InnerAckLen)
}
if p.InnerAckAttempt != nil {
t.Errorf("innerAckAttempt=%v want nil", *p.InnerAckAttempt)
}
if p.InnerAckRand != nil {
t.Errorf("innerAckRand=%v want nil", *p.InnerAckRand)
}
}
func TestDecodeMultipartAck5Byte(t *testing.T) {
// v1.16: byte0 + 4-byte CRC + 1-byte attempt → payload_len = 6.
buf := []byte{buildMultipartAckByte0(1), 0xAA, 0xBB, 0xCC, 0xDD, 0x09}
p := decodeMultipart(buf)
if p.InnerAckCrc != "ddccbbaa" {
t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
}
if p.InnerAckLen == nil || *p.InnerAckLen != 5 {
t.Errorf("innerAckLen=%v want 5", p.InnerAckLen)
}
if p.InnerAckAttempt == nil || *p.InnerAckAttempt != 9 {
t.Errorf("innerAckAttempt=%v want 9", p.InnerAckAttempt)
}
if p.InnerAckRand != nil {
t.Errorf("innerAckRand=%v want nil for 5-byte inner ACK", *p.InnerAckRand)
}
}
func TestDecodeMultipartAck6Byte(t *testing.T) {
// v1.16: byte0 + 4-byte CRC + 1-byte attempt + 1-byte RNG → payload_len = 7.
buf := []byte{buildMultipartAckByte0(0), 0xAA, 0xBB, 0xCC, 0xDD, 0x04, 0xC3}
p := decodeMultipart(buf)
if p.InnerAckCrc != "ddccbbaa" {
t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
}
if p.InnerAckLen == nil || *p.InnerAckLen != 6 {
t.Errorf("innerAckLen=%v want 6", p.InnerAckLen)
}
if p.InnerAckAttempt == nil || *p.InnerAckAttempt != 4 {
t.Errorf("innerAckAttempt=%v want 4", p.InnerAckAttempt)
}
if p.InnerAckRand == nil || *p.InnerAckRand != 0xC3 {
t.Errorf("innerAckRand=%v want 195", p.InnerAckRand)
}
}
-84
View File
@@ -1,84 +0,0 @@
package main
// Test for issue #1690 — every observation insert must denormalize the
// transmission's last_seen so cold-load can filter on effective recency.
//
// Setup: insert a transmission whose first/last seen are both 7 days ago.
// Then insert a fresh observation against the same hash. Post-fix the
// transmissions.last_seen column must reflect the new observation time.
import (
"testing"
"time"
)
func TestIssue1690_LastSeenUpdatedOnObservation(t *testing.T) {
s, err := OpenStore(tempDBPath(t))
if err != nil {
t.Fatal(err)
}
defer s.Close()
hash := "abcdef1690cafebabe"
weekAgo := time.Now().UTC().Add(-7 * 24 * time.Hour).Format(time.RFC3339)
snr, rssi := 5.5, -100.0
first := &PacketData{
RawHex: "0A00",
Timestamp: weekAgo,
ObserverID: "obs1",
Hash: hash,
RouteType: 2,
PayloadType: 2,
PayloadVersion: 0,
PathJSON: "[]",
DecodedJSON: `{"type":"TXT_MSG"}`,
SNR: &snr,
RSSI: &rssi,
}
if _, err := s.InsertTransmission(first); err != nil {
t.Fatalf("seed insert: %v", err)
}
// Sanity: confirm the seed last_seen is the 7d-ago time.
var seededLastSeen int64
if err := s.db.QueryRow(`SELECT COALESCE(last_seen, 0) FROM transmissions WHERE hash = ?`, hash).Scan(&seededLastSeen); err != nil {
t.Fatalf("seed select last_seen: %v (column missing? post-fix must add it)", err)
}
weekAgoUnix, _ := time.Parse(time.RFC3339, weekAgo)
if seededLastSeen != weekAgoUnix.Unix() {
t.Logf("seed last_seen=%d expected %d (allowed for fresh column)", seededLastSeen, weekAgoUnix.Unix())
}
// New observation: nowSec timestamp.
nowSec := time.Now().UTC().Unix()
nowStr := time.Unix(nowSec, 0).UTC().Format(time.RFC3339)
second := &PacketData{
RawHex: "0A00",
Timestamp: nowStr,
ObserverID: "obs2", // different observer → new observation row
Hash: hash,
RouteType: 2,
PayloadType: 2,
PayloadVersion: 0,
PathJSON: "[]",
DecodedJSON: `{"type":"TXT_MSG"}`,
SNR: &snr,
RSSI: &rssi,
}
if _, err := s.InsertTransmission(second); err != nil {
t.Fatalf("second insert: %v", err)
}
var ls int64
if err := s.db.QueryRow(`SELECT last_seen FROM transmissions WHERE hash = ?`, hash).Scan(&ls); err != nil {
t.Fatalf("post-insert select last_seen: %v", err)
}
// The post-fix writer must bump last_seen to at least the new observation's
// epoch second. We allow ±2s slack for the unix-second round trip.
if ls < nowSec-2 {
t.Errorf("transmissions.last_seen=%d after fresh observation; expected ≥ %d (a recent unix-second). "+
"Pre-fix the column is never updated on re-observation — the original cold-load bug (#1690).",
ls, nowSec)
}
}
-30
View File
@@ -1,30 +0,0 @@
package main
import "fmt"
// formatStatusLog formats the "status: name (iata)" log line emitted on
// MQTT status messages. name + iata are MQTT-controlled and routed
// through sanitizeLogString so CR/LF/control bytes cannot inject forged
// log lines.
//
// See audit-input-vulns-20260603 follow-up to #1540 — call site
// cmd/ingestor/main.go:531.
func formatStatusLog(tag, name, iata string) string {
return fmt.Sprintf("MQTT [%s] status: %s (%s)", tag, sanitizeLogString(name), sanitizeLogString(iata))
}
// formatChannelMessageLog formats the "channel message: chN from S" log line
// emitted on MQTT channel messages. channelIdx + sender are MQTT-controlled.
//
// Call site cmd/ingestor/main.go:854.
func formatChannelMessageLog(tag, channelIdx, sender string) string {
return fmt.Sprintf("MQTT [%s] channel message: ch%s from %s", tag, sanitizeLogString(channelIdx), sanitizeLogString(sender))
}
// formatDirectMessageLog formats the "direct message from S" log line
// emitted on MQTT DM messages. sender is MQTT-controlled.
//
// Call site cmd/ingestor/main.go:940.
func formatDirectMessageLog(tag, sender string) string {
return fmt.Sprintf("MQTT [%s] direct message from %s", tag, sanitizeLogString(sender))
}
-53
View File
@@ -1,53 +0,0 @@
package main
import (
"strings"
"testing"
)
// TestFormatStatusLog_SanitizesMQTTFields pins the status log line at
// cmd/ingestor/main.go:531 — MQTT-derived name + iata must not be able to
// inject CR/LF/control bytes into the log stream.
func TestFormatStatusLog_SanitizesMQTTFields(t *testing.T) {
got := formatStatusLog("ds1", "evil\r\n[FAKE LOG LINE]", "X\nY")
if strings.ContainsAny(got, "\r\n") {
t.Fatalf("formatStatusLog leaked CR/LF: %q", got)
}
if strings.Contains(got, "[FAKE LOG LINE]") && !strings.Contains(got, "?[FAKE LOG LINE]") {
t.Fatalf("formatStatusLog passed injection payload through unmodified: %q", got)
}
}
// TestFormatChannelMessageLog_SanitizesMQTTFields pins
// cmd/ingestor/main.go:854 — channelIdx + sender are MQTT-controlled.
func TestFormatChannelMessageLog_SanitizesMQTTFields(t *testing.T) {
got := formatChannelMessageLog("ds1", "0\r\n[FAKE]", "evil\nguy")
if strings.ContainsAny(got, "\r\n") {
t.Fatalf("formatChannelMessageLog leaked CR/LF: %q", got)
}
}
// TestFormatDirectMessageLog_SanitizesMQTTFields pins
// cmd/ingestor/main.go:940 — sender is MQTT-controlled.
func TestFormatDirectMessageLog_SanitizesMQTTFields(t *testing.T) {
got := formatDirectMessageLog("ds1", "evil\r\n[FAKE LOG LINE] something")
if strings.ContainsAny(got, "\r\n") {
t.Fatalf("formatDirectMessageLog leaked CR/LF: %q", got)
}
if !strings.Contains(got, "??[FAKE LOG LINE]") {
t.Fatalf("formatDirectMessageLog did not sanitize injection payload: %q", got)
}
}
// Sanity: legitimate input passes through untouched apart from tag framing.
func TestFormatLogs_LegitInputUnchanged(t *testing.T) {
if got := formatStatusLog("ds1", "alpha-node", "BG"); got != "MQTT [ds1] status: alpha-node (BG)" {
t.Fatalf("unexpected status line: %q", got)
}
if got := formatChannelMessageLog("ds1", "3", "bob"); got != "MQTT [ds1] channel message: ch3 from bob" {
t.Fatalf("unexpected channel line: %q", got)
}
if got := formatDirectMessageLog("ds1", "bob"); got != "MQTT [ds1] direct message from bob" {
t.Fatalf("unexpected DM line: %q", got)
}
}
+94 -763
View File
File diff suppressed because it is too large Load Diff
+31 -477
View File
@@ -1,19 +1,12 @@
package main
import (
"bytes"
"database/sql"
"encoding/hex"
"encoding/json"
"fmt"
"math"
"os"
"path/filepath"
"runtime"
"testing"
"time"
mqtt "github.com/eclipse/paho.mqtt.golang"
)
func TestToFloat64(t *testing.T) {
@@ -137,7 +130,7 @@ func TestHandleMessageRawPacket(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `","SNR":5.5,"RSSI":-100.0,"origin":"myobs"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -154,7 +147,7 @@ func TestHandleMessageRawPacketAdvert(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
// Should create a node from the ADVERT
var count int
@@ -176,7 +169,7 @@ func TestHandleMessageInvalidJSON(t *testing.T) {
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: []byte(`not json`)}
// Should not panic
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -193,7 +186,7 @@ func TestHandleMessageStatusTopic(t *testing.T) {
payload: []byte(`{"origin":"MyObserver"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var name, iata string
err := store.db.QueryRow("SELECT name, iata FROM observers WHERE id = 'obs1'").Scan(&name, &iata)
@@ -214,11 +207,11 @@ func TestHandleMessageSkipStatusTopics(t *testing.T) {
// meshcore/status should be skipped
msg1 := &mockMessage{topic: "meshcore/status", payload: []byte(`{"raw":"0A00"}`)}
handleMessage(store, "test", source, msg1, nil, nil, &Config{})
handleMessage(store, "test", source, msg1, nil, nil)
// meshcore/events/connection should be skipped
msg2 := &mockMessage{topic: "meshcore/events/connection", payload: []byte(`{"raw":"0A00"}`)}
handleMessage(store, "test", source, msg2, nil, nil, &Config{})
handleMessage(store, "test", source, msg2, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -237,7 +230,7 @@ func TestHandleMessageIATAFilter(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -250,7 +243,7 @@ func TestHandleMessageIATAFilter(t *testing.T) {
topic: "meshcore/LAX/obs2/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg2, nil, nil, &Config{})
handleMessage(store, "test", source, msg2, nil, nil)
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
if count != 1 {
@@ -268,7 +261,7 @@ func TestHandleMessageIATAFilterNoRegion(t *testing.T) {
topic: "meshcore",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
// No region part → filter doesn't apply, message goes through
// Actually the code checks len(parts) > 1 for IATA filter
@@ -284,7 +277,7 @@ func TestHandleMessageNoRawHex(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"type":"companion","data":"something"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -302,7 +295,7 @@ func TestHandleMessageBadRawHex(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"ZZZZ"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -319,7 +312,7 @@ func TestHandleMessageWithSNRRSSIAsNumbers(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `","SNR":7.2,"RSSI":-95}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var snr, rssi *float64
store.db.QueryRow("SELECT snr, rssi FROM observations LIMIT 1").Scan(&snr, &rssi)
@@ -338,7 +331,7 @@ func TestHandleMessageMinimalTopic(t *testing.T) {
topic: "meshcore/SJC",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -359,7 +352,7 @@ func TestHandleMessageCorruptedAdvert(t *testing.T) {
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
// Transmission should be inserted (even if advert is invalid)
var count int
@@ -385,7 +378,7 @@ func TestHandleMessageNoObserverID(t *testing.T) {
topic: "packets",
payload: []byte(`{"raw":"` + rawHex + `","origin":"obs1"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -407,7 +400,7 @@ func TestHandleMessageSNRNotFloat(t *testing.T) {
// SNR as a string value — should not parse as float
payload := []byte(`{"raw":"` + rawHex + `","SNR":"bad","RSSI":"bad"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
@@ -423,7 +416,7 @@ func TestHandleMessageOriginExtraction(t *testing.T) {
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
payload := []byte(`{"raw":"` + rawHex + `","origin":"MyOrigin"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
// Verify origin was extracted to observer name
var name string
@@ -446,7 +439,7 @@ func TestHandleMessagePanicRecovery(t *testing.T) {
}
// Should not panic — the defer/recover should catch it
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
}
func TestHandleMessageStatusOriginFallback(t *testing.T) {
@@ -458,7 +451,7 @@ func TestHandleMessageStatusOriginFallback(t *testing.T) {
topic: "meshcore/SJC/obs1/status",
payload: []byte(`{"type":"status"}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var name string
err := store.db.QueryRow("SELECT name FROM observers WHERE id = 'obs1'").Scan(&name)
@@ -484,20 +477,18 @@ func TestEpochToISO(t *testing.T) {
}
func TestAdvertRole(t *testing.T) {
// advertRole now keys off AdvertFlags.Type (firmware ADV_TYPE_*) — see
// firmware/src/helpers/AdvertDataHelpers.h:7-12 and issue #1279 P1 #3.
tests := []struct {
name string
flags *AdvertFlags
want string
}{
{"none (type 0)", &AdvertFlags{Type: 0}, "none"},
{"companion (type 1)", &AdvertFlags{Type: 1, Chat: true}, "companion"},
{"repeater (type 2)", &AdvertFlags{Type: 2, Repeater: true}, "repeater"},
{"room (type 3)", &AdvertFlags{Type: 3, Room: true}, "room"},
{"sensor (type 4)", &AdvertFlags{Type: 4, Sensor: true}, "sensor"},
{"future type-5", &AdvertFlags{Type: 5}, "type-5"},
{"nil flags falls back to companion", nil, "companion"},
{"repeater", &AdvertFlags{Repeater: true}, "repeater"},
{"room", &AdvertFlags{Room: true}, "room"},
{"sensor", &AdvertFlags{Sensor: true}, "sensor"},
{"companion (default)", &AdvertFlags{Chat: true}, "companion"},
{"companion (no flags)", &AdvertFlags{}, "companion"},
{"repeater takes priority", &AdvertFlags{Repeater: true, Room: true}, "repeater"},
{"room before sensor", &AdvertFlags{Room: true, Sensor: true}, "room"},
}
for _, tt := range tests {
@@ -616,41 +607,8 @@ func TestLoadChannelKeysHashChannelsNormalization(t *testing.T) {
if _, ok := keys["#Spaced"]; !ok {
t.Error("should derive key for #Spaced (trimmed)")
}
// 3 derived + builtins (Public)
expected := 3 + len(builtinChannelKeys())
if len(keys) != expected {
t.Errorf("expected %d keys, got %d", expected, len(keys))
}
}
// Default Public channel must always be present from the built-in floor,
// regardless of whether a rainbow file is provided.
func TestLoadChannelKeysBuiltinPublic(t *testing.T) {
t.Setenv("CHANNEL_KEYS_PATH", "")
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.json")
cfg := &Config{}
keys := loadChannelKeys(cfg, cfgPath)
if got := keys["Public"]; got != "8b3387e9c5cdea6ac9e5edbaa115cd72" {
t.Errorf("Public key = %q, want firmware-default 8b3387e9c5cdea6ac9e5edbaa115cd72", got)
}
}
// Explicit config and rainbow entries must still override the built-in floor.
func TestLoadChannelKeysBuiltinOverridable(t *testing.T) {
t.Setenv("CHANNEL_KEYS_PATH", "")
dir := t.TempDir()
cfgPath := filepath.Join(dir, "config.json")
cfg := &Config{
ChannelKeys: map[string]string{"Public": "deadbeefdeadbeefdeadbeefdeadbeef"},
}
keys := loadChannelKeys(cfg, cfgPath)
if got := keys["Public"]; got != "deadbeefdeadbeefdeadbeefdeadbeef" {
t.Errorf("Public key = %q, want explicit override deadbeef...", got)
if len(keys) != 3 {
t.Errorf("expected 3 keys, got %d", len(keys))
}
}
@@ -682,7 +640,7 @@ func TestHandleMessageWithLowercaseSNRRSSI(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `","snr":5.5,"rssi":-102}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var snr, rssi *float64
store.db.QueryRow("SELECT snr, rssi FROM observations LIMIT 1").Scan(&snr, &rssi)
@@ -703,7 +661,7 @@ func TestHandleMessageSNRRSSIUppercaseWins(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `","SNR":7.2,"snr":1.0,"RSSI":-95,"rssi":-50}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var snr, rssi *float64
store.db.QueryRow("SELECT snr, rssi FROM observations LIMIT 1").Scan(&snr, &rssi)
@@ -723,7 +681,7 @@ func TestHandleMessageNoSNRRSSI(t *testing.T) {
payload := []byte(`{"raw":"` + rawHex + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs1/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
handleMessage(store, "test", source, msg, nil, nil)
var snr, rssi *float64
store.db.QueryRow("SELECT snr, rssi FROM observations LIMIT 1").Scan(&snr, &rssi)
@@ -781,407 +739,3 @@ func TestToFloat64WithUnits(t *testing.T) {
}
}
}
// TestIATAFilterDoesNotDropStatusMessages verifies that status messages from
// out-of-region observers are still processed (noise_floor, battery, etc.)
// even when an IATA filter is configured for packet data.
func TestIATAFilterDoesNotDropStatusMessages(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test", IATAFilter: []string{"SJC"}}
// BFL observer sends a status message with noise_floor — outside the IATA filter.
msg := &mockMessage{
topic: "meshcore/BFL/bfl-obs1/status",
payload: []byte(`{"origin":"BFLObserver","stats":{"noise_floor":-105.0}}`),
}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
var name string
var noiseFloor *float64
err := store.db.QueryRow("SELECT name, noise_floor FROM observers WHERE id = 'bfl-obs1'").Scan(&name, &noiseFloor)
if err != nil {
t.Fatalf("observer not found after status from out-of-region observer: %v", err)
}
if name != "BFLObserver" {
t.Errorf("name=%q, want BFLObserver", name)
}
if noiseFloor == nil || *noiseFloor != -105.0 {
t.Errorf("noise_floor=%v, want -105.0 — status message was dropped by IATA filter when it should not be", noiseFloor)
}
// Verify that a packet from BFL is still filtered.
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
pktMsg := &mockMessage{
topic: "meshcore/BFL/bfl-obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, pktMsg, nil, nil, &Config{})
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
if count != 0 {
t.Error("packet from out-of-region BFL should still be filtered by IATA")
}
}
func TestLoadRegionKeys(t *testing.T) {
cfg := &Config{HashRegions: []string{"#belgium", "eu", " #Test ", "", "#belgium"}}
keys := loadRegionKeys(cfg)
// Deduplication + normalization
if len(keys) != 3 {
t.Fatalf("len(keys) = %d, want 3", len(keys))
}
// Pre-computed: SHA256("#belgium")[:16]. Hardcoded so a change to the key
// derivation algorithm (hash function, truncation length) breaks this test
// even if both sides were updated together.
wantBelgium, _ := hex.DecodeString("7085b78ed010599094f8c8e7d1aa0e27")
if got := keys["#belgium"]; !bytes.Equal(got, wantBelgium) {
t.Errorf("#belgium key mismatch: got %x, want %x", got, wantBelgium)
}
// "eu" should be normalized to "#eu"
if _, ok := keys["#eu"]; !ok {
t.Error("expected #eu key")
}
// " #Test " should be normalized to "#Test"
if _, ok := keys["#Test"]; !ok {
t.Error("expected #Test key")
}
}
func TestMatchScope(t *testing.T) {
// Fixed known-answer vectors only — no in-test HMAC computation.
// Keys and Code1 values are pre-computed externally so a wrong algorithm
// that produces consistent wrong results on both sides would still fail.
// Vector 1: "#test"/payloadType=5/"hello" → Code1=2AB5
// Key = SHA256("#test")[:16] = 9cd8fcf22a47333b591d96a2b848b73f
testKey, _ := hex.DecodeString("9cd8fcf22a47333b591d96a2b848b73f")
testKeys := map[string][]byte{"#test": testKey}
if got := matchScope(testKeys, 5, []byte("hello"), "2AB5"); got != "#test" {
t.Errorf("#test vector: matchScope = %q, want #test", got)
}
// Vector 2: "#belgium"/payloadType=5/"hello" → Code1=4A75
// Key = SHA256("#belgium")[:16] = 7085b78ed010599094f8c8e7d1aa0e27
belgiumKey, _ := hex.DecodeString("7085b78ed010599094f8c8e7d1aa0e27")
belgiumKeys := map[string][]byte{"#belgium": belgiumKey}
if got := matchScope(belgiumKeys, 5, []byte("hello"), "4A75"); got != "#belgium" {
t.Errorf("#belgium vector: matchScope = %q, want #belgium", got)
}
// Code1=0000 (unscoped transport) → no region matched
if got := matchScope(belgiumKeys, 5, []byte("hello"), "0000"); got != "" {
t.Errorf("unscoped: matchScope = %q, want empty", got)
}
// Code1 present but matches no configured region → empty string
if got := matchScope(belgiumKeys, 5, []byte("hello"), "BEEF"); got != "" {
t.Errorf("no match: matchScope = %q, want empty", got)
}
}
func TestBuildPacketDataScopeMatching(t *testing.T) {
// Fixed known-answer packet: TRANSPORT_FLOOD, payloadType=5, payload="hello",
// Code1=2AB5 (pre-computed for region "#test").
// header=0x14 (route_type=0 FLOOD, payloadType=5 → 5<<2), Code1=[0x2A,0xB5],
// Code2=[0,0], path_len=0, payload="hello" (68 65 6C 6C 6F).
const rawHex = "142AB500000068656C6C6F"
key, _ := hex.DecodeString("9cd8fcf22a47333b591d96a2b848b73f") // SHA256("#test")[:16]
regionKeys := map[string][]byte{"#test": key}
decoded, err := DecodePacket(rawHex, nil, false)
if err != nil {
t.Fatalf("DecodePacket: %v", err)
}
msg := &MQTTPacketMessage{Raw: rawHex}
pktData := BuildPacketData(msg, decoded, "obs1", "region1", regionKeys)
if pktData.ScopeName != "#test" {
t.Errorf("ScopeName = %q, want #test", pktData.ScopeName)
}
if !pktData.IsTransportScoped {
t.Error("IsTransportScoped should be true")
}
}
// TestMQTTConnectRetryTimeoutDoesNotBlock verifies that WaitTimeout returns within
// the deadline for an unreachable broker when ConnectRetry=true (#910). Previously,
// token.Wait() would block forever in this configuration.
func TestMQTTConnectRetryTimeoutDoesNotBlock(t *testing.T) {
opts := mqtt.NewClientOptions().
AddBroker("tcp://127.0.0.1:1"). // port 1 — nothing listening, fast refusal
SetConnectRetry(true).
SetAutoReconnect(true)
client := mqtt.NewClient(opts)
token := client.Connect()
defer client.Disconnect(100)
start := time.Now()
connected := token.WaitTimeout(3 * time.Second)
elapsed := time.Since(start)
if connected {
t.Skip("port 1 unexpectedly accepted a connection — skipping")
}
if elapsed > 4*time.Second {
t.Errorf("WaitTimeout blocked for %v — token.Wait() would block forever with ConnectRetry=true", elapsed)
}
}
// TestBL1_GoroutineLeakOnHardFailure reproduces BLOCKER 1: without Disconnect()
// on the error path, Paho's internal retry goroutines leak when a client is
// discarded after Connect() with ConnectRetry=true.
//
// We prove the leak by creating N clients WITHOUT Disconnect — goroutines grow
// proportionally. The fix (client.Disconnect(0) before continue) prevents this.
func TestBL1_GoroutineLeakOnHardFailure(t *testing.T) {
runtime.GC()
time.Sleep(100 * time.Millisecond)
baseline := runtime.NumGoroutine()
// Create multiple clients connected to unreachable broker, WITHOUT disconnecting.
// Each one spawns Paho retry goroutines that accumulate.
const numClients = 10
clients := make([]mqtt.Client, numClients)
for i := 0; i < numClients; i++ {
opts := mqtt.NewClientOptions().
AddBroker("tcp://127.0.0.1:1").
SetConnectRetry(true).
SetAutoReconnect(true).
SetConnectTimeout(500 * time.Millisecond)
c := mqtt.NewClient(opts)
tok := c.Connect()
tok.WaitTimeout(1 * time.Second)
clients[i] = c
}
time.Sleep(200 * time.Millisecond)
leaked := runtime.NumGoroutine()
goroutineGrowth := leaked - baseline
// Clean up to not actually leak in test
for _, c := range clients {
c.Disconnect(0)
}
t.Logf("baseline=%d, after %d undisconnected clients=%d, growth=%d",
baseline, numClients, leaked, goroutineGrowth)
// With ConnectRetry=true, each Connect() spawns retry goroutines.
// Without Disconnect, these accumulate. Verify growth is meaningful.
if goroutineGrowth < 3 {
t.Skip("Connect didn't spawn enough extra goroutines to measure leak")
}
// The fix: calling client.Disconnect(0) on the error path prevents accumulation.
// Anti-tautology: removing the Disconnect(0) call from main.go's error path
// would cause goroutine accumulation proportional to failed broker count.
t.Logf("CONFIRMED: %d leaked goroutines from %d clients without Disconnect — fix adds Disconnect(0) on error path", goroutineGrowth, numClients)
}
// TestBL2_ZeroConnectedFatals verifies BLOCKER 2: when all brokers are unreachable,
// connectedCount==0 must be detected. We test the logic directly — if only timed-out
// clients exist (appended to clients slice) but connectedCount is 0, the guard triggers.
func TestBL2_ZeroConnectedFatals(t *testing.T) {
// Simulate the connection loop result: 1 timed-out client, 0 connected
var clients []mqtt.Client
connectedCount := 0
// Create a client that times out (unreachable broker)
opts := mqtt.NewClientOptions().
AddBroker("tcp://127.0.0.1:1").
SetConnectRetry(true).
SetAutoReconnect(true)
client := mqtt.NewClient(opts)
token := client.Connect()
if !token.WaitTimeout(2 * time.Second) {
// Timed out — PR #926 appends to clients
clients = append(clients, client)
}
defer func() {
for _, c := range clients {
c.Disconnect(0)
}
}()
// OLD bug: len(clients) == 0 would be false (1 timed-out client in list)
// → ingestor would silently run with zero connections
if len(clients) == 0 {
t.Fatal("expected timed-out client to be in clients slice")
}
// NEW fix: connectedCount == 0 catches this
if connectedCount != 0 {
t.Errorf("connectedCount should be 0, got %d", connectedCount)
}
// The real code does: if connectedCount == 0 { log.Fatal(...) }
// This test proves len(clients) > 0 but connectedCount == 0 — the old guard
// would have missed it.
if len(clients) > 0 && connectedCount == 0 {
t.Log("BL2 confirmed: old guard len(clients)==0 would NOT fatal; new guard connectedCount==0 correctly catches zero-connected state")
}
}
func TestHandleMessageObserverIATAWhitelist(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
cfg := &Config{
ObserverIATAWhitelist: []string{"ARN"},
}
// Message from non-whitelisted region GOT — should be dropped
handleMessage(store, "test", source, &mockMessage{
topic: "meshcore/GOT/obs1/status",
payload: []byte(`{"origin":"node1","noise_floor":-110}`),
}, nil, nil, cfg)
var count int
store.db.QueryRow("SELECT COUNT(*) FROM observers WHERE id='obs1'").Scan(&count)
if count != 0 {
t.Error("observer from non-whitelisted IATA GOT should be dropped")
}
// Message from whitelisted region ARN — should be accepted
handleMessage(store, "test", source, &mockMessage{
topic: "meshcore/ARN/obs2/status",
payload: []byte(`{"origin":"node2","noise_floor":-105}`),
}, nil, nil, cfg)
store.db.QueryRow("SELECT COUNT(*) FROM observers WHERE id='obs2'").Scan(&count)
if count != 1 {
t.Errorf("observer from whitelisted IATA ARN should be accepted, got count=%d", count)
}
}
// TestBuildPacketDataScopeMatchingNoMatch covers the #1534 regression: a
// transport-scoped advert from a non-matching region carries
// IsTransportScoped=true and ScopeName="". The default_scope update guard
// must skip these packets so previously-correct scopes aren't overwritten
// with the empty string.
func TestBuildPacketDataScopeMatchingNoMatch(t *testing.T) {
// Code1=2AB5 is the precomputed code for region "#test" (payload="hello",
// payloadType=5). Build a region-key map for a DIFFERENT region so
// matchScope() finds no match and returns "".
const rawHex = "142AB500000068656C6C6F"
otherKey, _ := hex.DecodeString("aabbccddeeff00112233445566778899")
regionKeys := map[string][]byte{"#other": otherKey}
decoded, err := DecodePacket(rawHex, nil, false)
if err != nil {
t.Fatalf("DecodePacket: %v", err)
}
msg := &MQTTPacketMessage{Raw: rawHex}
pktData := BuildPacketData(msg, decoded, "obs1", "region1", regionKeys)
if !pktData.IsTransportScoped {
t.Fatalf("precondition: IsTransportScoped should be true (Code1 != 0000)")
}
if pktData.ScopeName != "" {
t.Fatalf("precondition: ScopeName should be empty (no region match), got %q", pktData.ScopeName)
}
// Regression assertion: when ScopeName is empty, the guard must skip the
// UpdateNodeDefaultScope call so an empty value never overwrites a
// previously-correct default_scope (#1534).
if shouldUpdateDefaultScope(pktData) {
t.Errorf("shouldUpdateDefaultScope = true for empty ScopeName; want false (would overwrite default_scope with \"\")")
}
}
// TestHandleMessageAdvert_EmptyScopeSkipsDefaultScopeUpdate is the call-site
// regression test for #1534. It drives a transport-scoped ADVERT whose
// region key does NOT match any configured region (so ScopeName=="") through
// handleMessage end-to-end and asserts that a pre-existing default_scope on
// the node is NOT overwritten with the empty string. This anchors the
// call-site guard at main.go:720 — a future refactor that drops the
// `if shouldUpdateDefaultScope(...)` wrapper and calls
// `store.UpdateNodeDefaultScope(pubkey, pktData.ScopeName)` unconditionally
// would re-introduce the #1534 bug and fail this test.
func TestHandleMessageAdvert_EmptyScopeSkipsDefaultScopeUpdate(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
// A transport-scoped ADVERT: header byte 0x10 = route_type 0
// (TRANSPORT_FLOOD) + payload_type 4 (ADVERT). Code1=AABB (non-zero, so
// IsTransportScoped becomes true), Code2=0000, path_byte=00, then a
// 100-byte ADVERT payload (32-byte pubkey starting 46D62D… + 4-byte ts
// + 64-byte signature) reused from TestHandleMessageAdvertWithTelemetry.
const rawHex = "10AABB00000046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
const pubkey = "46d62de27d4c5194d7821fc5a34a45565dcc2537b300b9ab6275255cefb65d84"
// Pre-seed the node with a non-empty default_scope so we can detect an
// erroneous overwrite with "".
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, default_scope) VALUES (?, 'Node1', '#belgium')`, pubkey); err != nil {
t.Fatalf("seed node: %v", err)
}
// Empty regionKeys → matchScope() returns "" for any Code1 → ScopeName "".
msg := &mockMessage{
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, map[string][]byte{}, &Config{})
var got sql.NullString
if err := store.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = ?`, pubkey).Scan(&got); err != nil {
t.Fatalf("read default_scope: %v", err)
}
if !got.Valid || got.String != "#belgium" {
t.Errorf("default_scope after empty-scope advert = %q (valid=%v), want #belgium — call-site guard at main.go:720 is missing or broken (#1534)", got.String, got.Valid)
}
}
// TestHandleMessageAdvert_MatchedScopeUpdatesDefaultScope is the positive
// counterpart: a transport-scoped ADVERT whose Code1 matches a configured
// region key MUST cause default_scope to be updated to the matched region
// name. Together with the empty-scope test above this proves the call-site
// branch routes correctly for both ScopeName states.
func TestHandleMessageAdvert_MatchedScopeUpdatesDefaultScope(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
// Same ADVERT bytes; this time we compute the matching region key for
// the (payloadType=4, payload=<advert bytes>) tuple so matchScope() will
// return "#de".
const advertBytes = "46D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
const pubkey = "46d62de27d4c5194d7821fc5a34a45565dcc2537b300b9ab6275255cefb65d84"
advertRaw, _ := hex.DecodeString(advertBytes)
// Derive the region key whose HMAC produces Code1 we can plant in the
// header. Choose key = first 16 bytes of HMAC-SHA256(zeros, advertBytes)
// is non-deterministic to find; instead pick an arbitrary key and
// compute Code1 from it, then build the packet around that Code1.
regionKey, _ := hex.DecodeString("0123456789abcdef0123456789abcdef")
mac := hmacSHA256(regionKey, append([]byte{4}, advertRaw...))
// Per firmware (#1534 helper logic): Code1 is the first 2 bytes of the
// HMAC, sentinel-shifted so 0x0000 → 0x0001 and 0xFFFF → 0xFFFE.
code := uint16(mac[0]) | (uint16(mac[1]) << 8)
if code == 0x0000 {
code = 0x0001
} else if code == 0xFFFF {
code = 0xFFFE
}
code1 := fmt.Sprintf("%02X%02X", byte(code&0xFF), byte(code>>8))
rawHex := "10" + code1 + "000000" + advertBytes
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, default_scope) VALUES (?, 'Node1', '#old')`, pubkey); err != nil {
t.Fatalf("seed node: %v", err)
}
msg := &mockMessage{
topic: "meshcore/SJC/obs1/packets",
payload: []byte(`{"raw":"` + rawHex + `"}`),
}
handleMessage(store, "test", source, msg, nil, map[string][]byte{"#de": regionKey}, &Config{})
var got sql.NullString
if err := store.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = ?`, pubkey).Scan(&got); err != nil {
t.Fatalf("read default_scope: %v", err)
}
if !got.Valid || got.String != "#de" {
t.Errorf("default_scope after matched-scope advert = %q (valid=%v), want #de", got.String, got.Valid)
}
}
-221
View File
@@ -1,221 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"fmt"
"log"
"time"
"github.com/meshcore-analyzer/dbschema"
)
// PruneOldPackets deletes transmissions (and their child observations)
// older than `days`. Returns count of transmissions deleted.
//
// Owned by the ingestor per #1283: the writer process is the only one
// allowed to hold the DB write lock; previously this lived in
// cmd/server/db.go and raced ingestor INSERTs (SQLITE_BUSY).
func (s *Store) PruneOldPackets(days int) (int64, error) {
if days <= 0 {
return 0, nil
}
cutoff := time.Now().UTC().AddDate(0, 0, -days).Format(time.RFC3339)
// Tagged for writer-perf visibility (#1340).
var n int64
err := s.WriterTx("prune_packets", func(tx *sql.Tx) error {
// Delete child observations first (no CASCADE in SQLite).
if _, err := tx.Exec(`DELETE FROM observations WHERE transmission_id IN (
SELECT id FROM transmissions WHERE first_seen < ?
)`, cutoff); err != nil {
return fmt.Errorf("prune observations: %w", err)
}
res, err := tx.Exec(`DELETE FROM transmissions WHERE first_seen < ?`, cutoff)
if err != nil {
return fmt.Errorf("prune transmissions: %w", err)
}
n, _ = res.RowsAffected()
return nil
})
if err != nil {
return 0, err
}
if n > 0 {
log.Printf("[prune] deleted %d transmissions older than %d days", n, days)
}
return n, nil
}
// SoftDeleteBlacklistedObservers marks observers in the blacklist as
// inactive=1 so they are hidden from API responses. Owned by ingestor
// per #1287. Runs once at startup.
func (s *Store) SoftDeleteBlacklistedObservers(blacklist []string) {
n, err := dbschema.SoftDeleteBlacklistedObservers(s.db, blacklist)
if err != nil {
log.Printf("[observer-blacklist] warning: soft-delete failed: %v", err)
return
}
if n > 0 {
log.Printf("[observer-blacklist] soft-deleted %d blacklisted observer(s)", n)
}
}
// PruneNeighborEdges deletes rows older than maxAgeDays from
// neighbor_edges. Owned by the ingestor per #1287 (was in cmd/server).
// Returns DB rows deleted.
func (s *Store) PruneNeighborEdges(maxAgeDays int) (int64, error) {
if maxAgeDays <= 0 {
return 0, nil
}
cutoff := time.Now().UTC().Add(-time.Duration(maxAgeDays) * 24 * time.Hour).Format(time.RFC3339)
res, err := s.db.Exec("DELETE FROM neighbor_edges WHERE last_seen < ?", cutoff)
if err != nil {
return 0, fmt.Errorf("prune neighbor_edges: %w", err)
}
n, _ := res.RowsAffected()
if n > 0 {
log.Printf("[neighbor-prune] removed %d DB rows older than %d days", n, maxAgeDays)
}
return n, nil
}
// ─── from_pubkey backfill (#1143) ──────────────────────────────────────────
//
// Moved from cmd/server/from_pubkey_migration.go in #1287. Runs from the
// ingestor's maintenance loop. Populates transmissions.from_pubkey for
// ADVERT rows whose value is still NULL, by parsing decoded_json.pubKey.
// FromPubkeyBackfillStats holds progress for /api/healthz exposure.
// The ingestor exposes these via stats_file.go so the server can read
// them without writing.
type FromPubkeyBackfillStats struct {
Total int64 `json:"total"`
Processed int64 `json:"processed"`
Done bool `json:"done"`
}
// BackfillFromPubkey scans transmissions where from_pubkey IS NULL and
// payload_type = 4 (ADVERT) and populates from_pubkey from decoded_json.
// Chunked + yields between batches. Safe to call repeatedly; once a row
// is set to either "" or hex it never matches the WHERE clause again.
func (s *Store) BackfillFromPubkey(chunkSize int, yieldDuration time.Duration, progress func(total, processed int64, done bool)) {
defer func() {
if r := recover(); r != nil {
log.Printf("[backfill] from_pubkey panic recovered: %v", r)
}
if progress != nil {
progress(0, 0, true) // signal done; values overwritten below if collected
}
}()
if chunkSize <= 0 {
chunkSize = 5000
}
var total int64
if err := s.db.QueryRow(
"SELECT COUNT(*) FROM transmissions WHERE from_pubkey IS NULL AND payload_type = 4",
).Scan(&total); err != nil {
log.Printf("[backfill] from_pubkey count error: %v", err)
return
}
if total == 0 {
log.Println("[backfill] from_pubkey: nothing to do")
if progress != nil {
progress(0, 0, true)
}
return
}
if progress != nil {
progress(total, 0, false)
}
log.Printf("[backfill] from_pubkey starting: %d ADVERT rows", total)
stmt, err := s.db.Prepare("UPDATE transmissions SET from_pubkey = ? WHERE id = ?")
if err != nil {
log.Printf("[backfill] from_pubkey prepare: %v", err)
return
}
defer stmt.Close()
var processed int64
for {
rows, err := s.db.Query(
"SELECT id, decoded_json FROM transmissions WHERE from_pubkey IS NULL AND payload_type = 4 LIMIT ?",
chunkSize)
if err != nil {
log.Printf("[backfill] from_pubkey select: %v", err)
return
}
type row struct {
id int64
pk string
}
batch := make([]row, 0, chunkSize)
for rows.Next() {
var id int64
var dj sql.NullString
if err := rows.Scan(&id, &dj); err != nil {
continue
}
batch = append(batch, row{id: id, pk: extractPubkeyFromAdvertJSON(dj.String)})
}
rows.Close()
if len(batch) == 0 {
break
}
tx, err := s.db.Begin()
if err != nil {
log.Printf("[backfill] from_pubkey begin tx: %v", err)
return
}
txStmt := tx.Stmt(stmt)
for _, b := range batch {
// Sentinel: "" = scanned-no-pubkey (so the WHERE clause
// won't keep rescanning this row). hex = real pubkey.
var val interface{} = ""
if b.pk != "" {
val = b.pk
}
if _, err := txStmt.Exec(val, b.id); err != nil {
log.Printf("[backfill] from_pubkey update id=%d: %v", b.id, err)
}
}
if err := tx.Commit(); err != nil {
log.Printf("[backfill] from_pubkey commit: %v", err)
return
}
processed += int64(len(batch))
if progress != nil {
progress(total, processed, false)
}
if len(batch) < chunkSize {
break
}
if yieldDuration > 0 {
time.Sleep(yieldDuration)
}
}
log.Printf("[backfill] from_pubkey complete: %d rows processed", processed)
if progress != nil {
progress(total, processed, true)
}
}
// extractPubkeyFromAdvertJSON parses an ADVERT decoded_json blob and
// returns the pubKey field, or "" if absent/invalid.
func extractPubkeyFromAdvertJSON(s string) string {
if s == "" {
return ""
}
var m map[string]interface{}
if err := json.Unmarshal([]byte(s), &m); err != nil {
return ""
}
if v, ok := m["pubKey"].(string); ok {
return v
}
return ""
}
-26
View File
@@ -1,26 +0,0 @@
package main
import "runtime/debug"
// applyMemoryLimit configures Go's soft memory limit (GOMEMLIMIT) for the
// ingestor process. See #1010.
//
// Precedence:
// 1. GOMEMLIMIT env var (parsed by the runtime at startup) — we do not
// override; report source="env" with limit=0.
// 2. runtimeMaxMB > 0 (from config runtime.maxMemoryMB) — set limit of
// runtimeMaxMB MiB via debug.SetMemoryLimit; source="config".
// 3. Otherwise no limit applied; source="none" (default behavior).
//
// Returns the limit (bytes) we set, or 0 if we did not set one.
func applyMemoryLimit(runtimeMaxMB int, envSet bool) (int64, string) {
if envSet {
return 0, "env"
}
if runtimeMaxMB <= 0 {
return 0, "none"
}
limit := int64(runtimeMaxMB) * 1024 * 1024
debug.SetMemoryLimit(limit)
return limit, "config"
}
-71
View File
@@ -1,71 +0,0 @@
package main
import (
"runtime/debug"
"testing"
)
// TestApplyMemoryLimit_FromEnv: when GOMEMLIMIT env var is set, the runtime
// already parsed it. Our function MUST NOT override and MUST report env source.
func TestApplyMemoryLimit_FromEnv(t *testing.T) {
t.Setenv("GOMEMLIMIT", "850MiB")
defer debug.SetMemoryLimit(-1)
limit, source := applyMemoryLimit(512, true /* envSet */)
if source != "env" {
t.Fatalf("expected source=env, got %q", source)
}
if limit != 0 {
t.Fatalf("expected limit=0 (not set by us), got %d", limit)
}
}
// TestApplyMemoryLimit_FromConfig: when env is unset and runtime.maxMemoryMB
// is set, derive a limit of exactly runtimeMaxMB * 1 MiB (no headroom — the
// ingestor's working set is bounded by MQTT batch decode, not packet store).
func TestApplyMemoryLimit_FromConfig(t *testing.T) {
defer debug.SetMemoryLimit(-1)
limit, source := applyMemoryLimit(512, false /* envSet */)
if source != "config" {
t.Fatalf("expected source=config, got %q", source)
}
want := int64(512) * 1024 * 1024
if limit != want {
t.Fatalf("expected limit=%d, got %d", want, limit)
}
cur := debug.SetMemoryLimit(-1)
if cur != want {
t.Fatalf("runtime memory limit not set: want=%d got=%d", want, cur)
}
}
// TestApplyMemoryLimit_None: neither env nor config — no limit applied,
// default behavior preserved.
func TestApplyMemoryLimit_None(t *testing.T) {
defer debug.SetMemoryLimit(-1)
debug.SetMemoryLimit(int64(1<<63 - 1)) // math.MaxInt64 = "no limit"
limit, source := applyMemoryLimit(0, false)
if source != "none" {
t.Fatalf("expected source=none, got %q", source)
}
if limit != 0 {
t.Fatalf("expected limit=0, got %d", limit)
}
}
// TestApplyMemoryLimit_EnvWinsOverConfig: env set AND config set → env wins,
// our function does not override. Locks the precedence triage specified.
func TestApplyMemoryLimit_EnvWinsOverConfig(t *testing.T) {
t.Setenv("GOMEMLIMIT", "1GiB")
defer debug.SetMemoryLimit(-1)
limit, source := applyMemoryLimit(512, true /* envSet */)
if source != "env" {
t.Fatalf("expected source=env when both set, got %q", source)
}
if limit != 0 {
t.Fatalf("expected limit=0 when env wins, got %d", limit)
}
}
-76
View File
@@ -1,76 +0,0 @@
package main
import (
"testing"
"time"
)
func TestBuildMQTTOpts_ReconnectSettings(t *testing.T) {
source := MQTTSource{
Broker: "tcp://localhost:1883",
Name: "test",
}
opts := buildMQTTOpts(source)
if opts.MaxReconnectInterval != 30*time.Second {
t.Errorf("MaxReconnectInterval = %v, want 30s", opts.MaxReconnectInterval)
}
if opts.ConnectTimeout != 10*time.Second {
t.Errorf("ConnectTimeout = %v, want 10s", opts.ConnectTimeout)
}
if opts.WriteTimeout != 10*time.Second {
t.Errorf("WriteTimeout = %v, want 10s", opts.WriteTimeout)
}
if !opts.AutoReconnect {
t.Error("AutoReconnect should be true")
}
if !opts.ConnectRetry {
t.Error("ConnectRetry should be true")
}
}
func TestBuildMQTTOpts_Credentials(t *testing.T) {
source := MQTTSource{
Broker: "tcp://broker:1883",
Username: "user1",
Password: "pass1",
}
opts := buildMQTTOpts(source)
if opts.Username != "user1" {
t.Errorf("Username = %q, want %q", opts.Username, "user1")
}
if opts.Password != "pass1" {
t.Errorf("Password = %q, want %q", opts.Password, "pass1")
}
}
func TestBuildMQTTOpts_TLS_InsecureSkipVerify(t *testing.T) {
f := false
source := MQTTSource{
Broker: "ssl://broker:8883",
RejectUnauthorized: &f,
}
opts := buildMQTTOpts(source)
if opts.TLSConfig == nil {
t.Fatal("TLSConfig should be set")
}
if !opts.TLSConfig.InsecureSkipVerify {
t.Error("InsecureSkipVerify should be true when RejectUnauthorized=false")
}
}
func TestBuildMQTTOpts_TLS_SSL_Prefix(t *testing.T) {
source := MQTTSource{
Broker: "ssl://broker:8883",
}
opts := buildMQTTOpts(source)
if opts.TLSConfig == nil {
t.Fatal("TLSConfig should be set for ssl:// brokers")
}
if opts.TLSConfig.InsecureSkipVerify {
t.Error("InsecureSkipVerify should be false by default")
}
}
-248
View File
@@ -1,248 +0,0 @@
package main
import (
"bytes"
"crypto/tls"
"log"
"net/url"
"runtime"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// PR #1216 r1 item 5 (kent #1 / adv MAJOR-2): the original assertion was
// tautological — it only checked OnConnectAttempt != nil, which passes
// even if the handler is a no-op. This version invokes the wired handler,
// captures log output, and asserts the OBSERVABLE behaviour operators
// rely on during a #1212-class outage:
// - the configured source tag appears in the log line
// - the broker URL appears in the log line
// - the per-source AttemptCount increments on every invocation (proving
// the handler is wired to the right state, not just a stub)
// - the tlsCfg passed in is returned unchanged (no surprise TLS rewrite)
func TestBuildMQTTOpts_InstrumentsConnectionAttempt(t *testing.T) {
defer snapshotAndResetRegistry(t)()
source := MQTTSource{Broker: "tcp://localhost:1883", Name: "obs-tag"}
opts := buildMQTTOpts(source)
if opts.OnConnectAttempt == nil {
t.Fatal("OnConnectAttempt must be wired in buildMQTTOpts (#1212 / PR #1216 r1)")
}
// Register the liveness state so the handler can find it and increment
// the attempt counter (same wiring main.go does).
liveness := &SourceLivenessState{Tag: "obs-tag", Broker: source.Broker}
if err := registerLivenessState(liveness); err != nil {
t.Fatalf("test setup: registerLivenessState: %v", err)
}
// Capture log output via log.SetOutput. Save/restore so other tests
// running serially don't lose their writer.
var buf bytes.Buffer
origOut := log.Writer()
origFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
defer func() {
log.SetOutput(origOut)
log.SetFlags(origFlags)
}()
brokerURL, err := url.Parse(source.Broker)
if err != nil {
t.Fatalf("test setup: parse broker url: %v", err)
}
tlsIn := &tls.Config{ServerName: "sentinel.test"}
// Invoke the handler twice — operators need to see attempt # increment
// per dial to gauge backoff progress.
tlsOut1 := opts.OnConnectAttempt(brokerURL, tlsIn)
tlsOut2 := opts.OnConnectAttempt(brokerURL, tlsIn)
if tlsOut1 != tlsIn || tlsOut2 != tlsIn {
t.Errorf("OnConnectAttempt must pass tlsCfg through unchanged (got %p, %p; want %p)", tlsOut1, tlsOut2, tlsIn)
}
logOut := buf.String()
if !strings.Contains(logOut, "obs-tag") {
t.Errorf("log output must include the source tag for operator grep; got %q", logOut)
}
if !strings.Contains(logOut, source.Broker) {
t.Errorf("log output must include the broker URL so operators can correlate against config; got %q", logOut)
}
if !strings.Contains(logOut, "#1") || !strings.Contains(logOut, "#2") {
t.Errorf("log output must show attempt #1 and #2 across the two invocations (per-source counter); got %q", logOut)
}
if got := atomic.LoadInt64(&liveness.AttemptCount); got != 2 {
t.Errorf("AttemptCount must increment per dial (got %d after 2 invocations, want 2)", got)
}
}
// RED: the watchdog acceptance criterion from #1212 — even when the client
// reports connected, if NO packets have flowed for >threshold, log a warning.
// This is a separate detection layer that catches "silently dead" sockets
// (broker accepted TCP but stopped forwarding, half-open TCP, etc.).
func TestMQTTStallWatchdog_FiresOnSilentSource(t *testing.T) {
state := &SourceLivenessState{Tag: "test", Broker: "tcp://x:1883"}
atomic.StoreInt64(&state.LastMessageUnix, time.Now().Add(-10*time.Minute).Unix())
state.IsConnectedFn = func() bool { return true }
msg, kind := checkSourceLiveness(state, 5*time.Minute, time.Now())
if kind != LivenessStalled {
t.Fatalf("watchdog should flag stall when source connected but no message for 10m (threshold 5m); got kind=%v msg=%q", kind, msg)
}
if !strings.Contains(msg, "no messages") {
t.Errorf("stall message should mention 'no messages'; got %q", msg)
}
if !strings.Contains(msg, "test") {
t.Errorf("stall message should include the source tag; got %q", msg)
}
}
func TestMQTTStallWatchdog_QuietWhenRecent(t *testing.T) {
state := &SourceLivenessState{Tag: "test", Broker: "tcp://x:1883"}
atomic.StoreInt64(&state.LastMessageUnix, time.Now().Add(-30*time.Second).Unix())
state.IsConnectedFn = func() bool { return true }
_, kind := checkSourceLiveness(state, 5*time.Minute, time.Now())
if kind != LivenessOK {
t.Fatal("watchdog should NOT flag stall when last message was 30s ago and threshold is 5m")
}
}
func TestMQTTStallWatchdog_QuietWhenDisconnected(t *testing.T) {
// When disconnected, paho's own reconnect logging covers it — the
// watchdog should only fire for the silent-while-connected case.
state := &SourceLivenessState{Tag: "test", Broker: "tcp://x:1883"}
atomic.StoreInt64(&state.LastMessageUnix, time.Now().Add(-1*time.Hour).Unix())
state.IsConnectedFn = func() bool { return false }
_, kind := checkSourceLiveness(state, 5*time.Minute, time.Now())
if kind != LivenessDisconnected {
t.Fatalf("watchdog must classify a !IsConnected source as LivenessDisconnected (silent state), not LivenessOK — r2 item 1 prevents disconnect→recovery mis-classification; got kind=%v", kind)
}
}
// snapshotAndResetRegistry isolates the package-level livenessRegistry for a
// single test. Returns a restore func to defer. Without this, parallel or
// previously-registered sources leak into the watchdog goroutine under test.
func snapshotAndResetRegistry(t *testing.T) func() {
t.Helper()
livenessRegistryMu.Lock()
saved := livenessRegistry
livenessRegistry = map[string]*SourceLivenessState{}
livenessRegistryMu.Unlock()
return func() {
livenessRegistryMu.Lock()
livenessRegistry = saved
livenessRegistryMu.Unlock()
}
}
// RED-then-GREEN: the watchdog GOROUTINE (not just checkSourceLiveness) must
// fan out emits across the registry on each tick, AND must exit cleanly when
// the stop signal fires. Originally runLivenessWatchdog used `for range
// t.C` — ticker.Stop() does not close the channel, so the goroutine
// leaked past shutdown. This test asserts both:
// - tick → emit for every stalled source in the registry
// - stop → goroutine returns within a short bound
func TestMQTTStallWatchdog_LoopEmitsAndStopsCleanly(t *testing.T) {
defer snapshotAndResetRegistry(t)()
s1 := &SourceLivenessState{Tag: "alpha", Broker: "tcp://a:1883", IsConnectedFn: func() bool { return true }}
s2 := &SourceLivenessState{Tag: "beta", Broker: "tcp://b:1883", IsConnectedFn: func() bool { return true }}
atomic.StoreInt64(&s1.LastMessageUnix, time.Now().Add(-10*time.Minute).Unix())
atomic.StoreInt64(&s2.LastMessageUnix, time.Now().Add(-10*time.Minute).Unix())
registerLivenessState(s1)
registerLivenessState(s2)
tick := make(chan time.Time, 1)
done := make(chan struct{})
var mu sync.Mutex
var emits []string
emit := func(args ...any) {
mu.Lock()
defer mu.Unlock()
if len(args) > 0 {
if s, ok := args[0].(string); ok {
emits = append(emits, s)
}
}
}
exited := make(chan struct{})
go func() {
runLivenessWatchdogLoop(tick, done, 5*time.Minute, emit)
close(exited)
}()
tick <- time.Now()
// Drain: wait briefly for the emits to land. Polling instead of sleeping
// keeps the test fast on a healthy machine.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
mu.Lock()
n := len(emits)
mu.Unlock()
if n >= 2 {
break
}
time.Sleep(10 * time.Millisecond)
}
mu.Lock()
got := append([]string(nil), emits...)
mu.Unlock()
if len(got) != 2 {
t.Fatalf("expected 2 stall emits (alpha+beta), got %d: %v", len(got), got)
}
close(done)
select {
case <-exited:
case <-time.After(2 * time.Second):
t.Fatal("watchdog goroutine did not exit within 2s of stop — ticker leak regression")
}
}
// PR #1216 r1 item 6 (kent #2 / adv MAJOR-3): the original test had no
// assertions gating behaviour — it called stop() and trusted `-race` to
// catch leaks. `-race` does NOT detect goroutine leaks. This version
// captures runtime.NumGoroutine() before/after and asserts the watchdog's
// goroutine actually exited. Allows ±1 slack for unrelated runtime
// bookkeeping (gc, finalizer).
func TestMQTTStallWatchdog_RunStopsCleanly(t *testing.T) {
defer snapshotAndResetRegistry(t)()
// Settle: let any prior-test goroutines finish before sampling baseline.
runtime.GC()
time.Sleep(50 * time.Millisecond)
before := runtime.NumGoroutine()
stop := runLivenessWatchdog(10*time.Millisecond, 5*time.Minute)
// Let the watchdog run a few ticks so we're sure it's truly spawned.
time.Sleep(50 * time.Millisecond)
if mid := runtime.NumGoroutine(); mid <= before {
t.Fatalf("watchdog goroutine did not spawn: before=%d mid=%d", before, mid)
}
stop()
// Poll for the goroutine count to return to baseline (±1 slack).
deadline := time.Now().Add(2 * time.Second)
var after int
for time.Now().Before(deadline) {
runtime.Gosched()
after = runtime.NumGoroutine()
if after <= before+1 {
return
}
time.Sleep(10 * time.Millisecond)
}
t.Fatalf("watchdog goroutine leaked: before=%d after=%d (delta %d) — stop() did not signal the loop to exit", before, after, after-before)
}
-410
View File
@@ -1,410 +0,0 @@
package main
import (
"fmt"
"log"
"sync"
"sync/atomic"
"time"
)
// heartbeatInterval is how often the watchdog re-emits a still-stalled
// reminder once the initial WARN edge has fired. 1h matches the pager
// budget — frequent enough that an unattended stall is noticed within a
// shift, infrequent enough not to spam ops chat.
const livenessHeartbeatInterval = time.Hour
// forceReconnectThrottle is the minimum interval between forced
// reconnects on the SAME source. See processLivenessTransition.
const forceReconnectThrottle = 60 * time.Second
// LivenessKind enumerates the watchdog verdicts for a source. Edge-triggered
// transitions use this to decide whether to emit (and what severity).
type LivenessKind int
const (
LivenessOK LivenessKind = iota
LivenessStalled
LivenessNeverReceived
LivenessRecovered
LivenessHeartbeat
// LivenessDisconnected (PR #1216 r2 item 1): paho reports !IsConnected.
// Distinct from LivenessOK so processLivenessTransition does NOT
// interpret a TCP drop as recovery and fire a spurious "messages
// flowing again" INFO when the source actually went from silently
// broken to overtly broken. paho's own reconnect logging already
// covers the disconnect — this kind exists solely to keep the
// transition engine from mis-classifying it.
LivenessDisconnected
)
// SourceLivenessState tracks per-source last-message timestamp and connection
// state for the stall watchdog (#1212). LastMessageUnix is updated by the
// message handler via atomic store; the watchdog reads it via atomic load.
//
// PR #1216 r1 added:
// - StartedAt: re-stamped on reconnect to suppress transient-stall WARNs
// during paho's reconnect window.
// - LastAlertUnix: edge-trigger cooldown; prevents 60-per-hour re-emits
// of the same WARN.
//
// PR #1216 r2 added:
// - FirstConnectedAt: stamped ONCE at registration, never reset. The
// cold-start "NEVER received" alarm uses this so a broker that flaps
// in CONNECT → SUBSCRIBE-deny cannot indefinitely re-arm the grace
// window. r1's StartedAt-as-grace-clock conflated transient-stall
// suppression with cold-start grace; r2 separates them.
type SourceLivenessState struct {
Tag string
Broker string
LastMessageUnix int64 // atomic; unix seconds of last successfully WRITTEN MQTT message (handleMessage post-write)
// LastReceiptUnix (PR #1609 M1) is stamped at MQTT receipt time —
// BEFORE the message is handed to the buffer/writer. STUB: unused
// in production until the green commit wires MarkReceipt at the
// receipt callsite and surfaces it in stats/healthz.
LastReceiptUnix int64 // atomic; unix seconds of last RECEIPT (broker liveness)
// FirstConnectedAt (PR #1216 r2 item 2) is stamped ONCE at
// registerLivenessState time and never reset. Cold-start grace
// checks against this so a flapping broker (CONNECT ok, SUBSCRIBE
// ACL-denied — the #1212 shape) can no longer suppress the
// "NEVER received" alarm by re-stamping StartedAt on every reconnect.
FirstConnectedAt int64 // atomic; unix seconds of first registration
StartedAt int64 // atomic; unix seconds when the source was registered / last reconnected (transient-stall tracking)
LastAlertUnix int64 // atomic; unix seconds of last emit (WARN or heartbeat); 0 means quiet
IsConnectedFn func() bool
// ForceReconnectFn (#1335) is called by the watchdog when a source
// transitions INTO LivenessStalled. It must force the paho client
// to drop its current TCP socket and re-establish (typically
// client.Disconnect(250) followed by client.Connect()). Half-open
// TCP sockets (Azure NAT idle timeout) report IsConnected==true so
// paho's own auto-reconnect never fires; this is the recovery path.
// May be nil (tests, or sources registered before wiring); the
// watchdog must treat that as a safe no-op. Invocations are
// throttled at forceReconnectThrottle per source so a
// stall→reconnect→re-stall loop self-recovers without hammering
// the broker.
ForceReconnectFn func()
// LastForceReconnectUnix is the unix-seconds timestamp of the most
// recent forced reconnect for this source; the watchdog reads it
// to enforce forceReconnectThrottle. atomic.
LastForceReconnectUnix int64
// AttemptCount is incremented on every TCP/TLS connection attempt. Used
// by ConnectionAttemptHandler to log attempt # independent of paho's
// internal reconnect-loop state. atomic.
AttemptCount int64
}
// MarkMessage records the time of a received MQTT message. Cheap; safe to
// call from the message-handling hot path.
func (s *SourceLivenessState) MarkMessage(now time.Time) {
atomic.StoreInt64(&s.LastMessageUnix, now.Unix())
}
// MarkReceipt records the time of an MQTT message receipt — stamped at the
// paho receipt callback BEFORE the message enters the ingest buffer. PR
// #1609 M1: kept separate from LastMessageUnix so the watchdog/healthz can
// distinguish "broker alive, write path stuck" (LastReceiptUnix fresh,
// LastMessageUnix stale) from "everything stalled" (both stale). Cheap;
// safe to call from the message-handling hot path.
func (s *SourceLivenessState) MarkReceipt(now time.Time) {
atomic.StoreInt64(&s.LastReceiptUnix, now.Unix())
}
// MarkReconnected clears stale liveness state so the watchdog does not
// false-alarm on a pre-outage timestamp after paho re-establishes the
// connection (PR #1216 r1 item 2). Resets LastMessageUnix, re-stamps
// StartedAt (transient-stall window restarts), and clears LastAlertUnix
// (edge-trigger re-arms).
//
// PR #1216 r2 item 2: FirstConnectedAt is INTENTIONALLY not touched here.
// Under broker flap (CONNECT ok, SUBSCRIBE ACL-denied — exact #1212
// class) r1 reset StartedAt on every reconnect, indefinitely re-arming
// the cold-start grace and silencing the headline "NEVER received"
// alarm. Cold-start grace now reads FirstConnectedAt instead, so the
// alarm fires after the FIRST grace window regardless of reconnect
// churn.
func (s *SourceLivenessState) MarkReconnected(now time.Time) {
atomic.StoreInt64(&s.LastMessageUnix, 0)
atomic.StoreInt64(&s.StartedAt, now.Unix())
atomic.StoreInt64(&s.LastAlertUnix, 0)
}
// checkSourceLiveness returns (message, kind) describing the source's
// liveness state. kind==LivenessOK means quiet/healthy; kind==
// LivenessDisconnected means paho is not connected (silent state — no
// emit, no recovery). Any other kind indicates the caller may want to
// emit (subject to edge-trigger).
//
// Cold-start (PR #1216 r1 item 1, r2 item 2): when LastMessageUnix==0,
// the source has never published a single message. If FirstConnectedAt
// was stamped at registration and more than `threshold` has elapsed,
// this is the #1212 failure class — wrong channel hash, ACL drops
// SUBSCRIBE, half-open TCP after CONNECT, or a broker that loops
// CONNECT-then-disconnect. We emit a DISTINCT "NEVER received" alarm
// so operators can grep for it independently of generic stalls. Using
// FirstConnectedAt (not the reconnect-reset StartedAt) ensures broker
// flap cannot silence this alarm.
func checkSourceLiveness(s *SourceLivenessState, threshold time.Duration, now time.Time) (string, LivenessKind) {
if s == nil || s.IsConnectedFn == nil {
return "", LivenessOK
}
if !s.IsConnectedFn() {
// paho's reconnect handler covers the disconnected case. Return
// a DISTINCT kind so the transition engine does not mis-classify
// disconnect as recovery (PR #1216 r2 item 1).
return "", LivenessDisconnected
}
last := atomic.LoadInt64(&s.LastMessageUnix)
if last == 0 {
firstConnected := atomic.LoadInt64(&s.FirstConnectedAt)
if firstConnected == 0 {
// Registration didn't stamp FirstConnectedAt — conservative: stay quiet.
return "", LivenessOK
}
sinceFirst := now.Sub(time.Unix(firstConnected, 0))
if sinceFirst < threshold {
return "", LivenessOK
}
msg := fmt.Sprintf("MQTT [%s] WATCHDOG: client reports connected to %s but has NEVER received a message in %s (threshold %s) — check channel hash / subscribe ACL / half-open TCP",
s.Tag, s.Broker, sinceFirst.Round(time.Second), threshold)
return msg, LivenessNeverReceived
}
silentFor := now.Sub(time.Unix(last, 0))
if silentFor < threshold {
return "", LivenessOK
}
msg := fmt.Sprintf("MQTT [%s] WATCHDOG: client reports connected to %s but no messages received for %s (threshold %s) — possible half-open socket or upstream stall",
s.Tag, s.Broker, silentFor.Round(time.Second), threshold)
return msg, LivenessStalled
}
// livenessRegistry is a package-level lookup so handleMessage (called with
// only `tag string`) can mark liveness without threading the state through
// every call site. Reads dominate (per message); writes happen once per
// source at startup.
var (
livenessRegistry = map[string]*SourceLivenessState{}
livenessRegistryMu sync.RWMutex
)
// registerLivenessState publishes a state to the registry by tag. Returns
// an error on tag collision (PR #1216 r1 item 4) so operators see a
// startup misconfiguration instead of silently losing AttemptCount and
// LastMessageUnix for the clobbered source. The collision case is real:
// two MQTT sources with empty Name fall back to Broker; two sources with
// duplicate Name; copy-paste in config.json. Caller (main) decides whether
// to fatal or just log and skip. The first registration remains
// authoritative — we do NOT overwrite.
//
// Also stamps StartedAt (transient-stall window) and FirstConnectedAt
// (cold-start grace anchor — never reset; see r2 item 2 in
// MarkReconnected) so the cold-start watchdog has its clocks.
func registerLivenessState(s *SourceLivenessState) error {
livenessRegistryMu.Lock()
defer livenessRegistryMu.Unlock()
if existing, ok := livenessRegistry[s.Tag]; ok {
return fmt.Errorf("liveness registry: duplicate tag %q (existing broker=%s, new broker=%s) — fix config so each MQTT source has a unique Name", s.Tag, existing.Broker, s.Broker)
}
nowUnix := time.Now().Unix()
if atomic.LoadInt64(&s.StartedAt) == 0 {
atomic.StoreInt64(&s.StartedAt, nowUnix)
}
if atomic.LoadInt64(&s.FirstConnectedAt) == 0 {
atomic.StoreInt64(&s.FirstConnectedAt, nowUnix)
}
livenessRegistry[s.Tag] = s
return nil
}
// registerLivenessOrSkip (PR #1216 r2 item 3) is the main-callsite wrapper
// that replaces the previous log.Fatalf on tag collision. Fatal at
// startup over a config typo would kill the entire ingestor and recreate
// the #1212 total-ingest-stop class this PR exists to prevent. On
// collision we log ERROR + skip — the MQTT source still attempts to
// connect, it just won't be tracked by the liveness watchdog. Returns
// true iff the source was registered.
func registerLivenessOrSkip(s *SourceLivenessState) bool {
if err := registerLivenessState(s); err != nil {
log.Printf("[ingestor] ERROR: source tag collision %q — skipping duplicate liveness registration, this source will connect but will not be tracked by the watchdog (%v)", s.Tag, err)
return false
}
return true
}
// markLivenessForTag is the hot-path entry point: O(1) map lookup +
// atomic store. Safe to call for unknown tags (no-op). Updates
// LastMessageUnix (post-write clock).
func markLivenessForTag(tag string, now time.Time) {
livenessRegistryMu.RLock()
s := livenessRegistry[tag]
livenessRegistryMu.RUnlock()
if s != nil {
s.MarkMessage(now)
}
}
// markReceiptForTag is the hot-path entry point used at MQTT receipt
// (BEFORE the message is buffered/written). Updates LastReceiptUnix only.
// PR #1609 M1 — separates broker-liveness signal from write-path
// liveness so /healthz can show a stalled writer with a live broker.
func markReceiptForTag(tag string, now time.Time) {
livenessRegistryMu.RLock()
s := livenessRegistry[tag]
livenessRegistryMu.RUnlock()
if s != nil {
s.MarkReceipt(now)
}
}
// SnapshotLivenessClocks returns the per-source receipt vs write-path
// liveness pair for every registered source. Read-only; safe to call
// from the stats-file writer. PR #1609 M1.
func SnapshotLivenessClocks() map[string]SourceLivenessSnapshot {
livenessRegistryMu.RLock()
defer livenessRegistryMu.RUnlock()
if len(livenessRegistry) == 0 {
return nil
}
out := make(map[string]SourceLivenessSnapshot, len(livenessRegistry))
for tag, s := range livenessRegistry {
out[tag] = SourceLivenessSnapshot{
LastReceiptUnix: atomic.LoadInt64(&s.LastReceiptUnix),
LastMessageUnix: atomic.LoadInt64(&s.LastMessageUnix),
}
}
return out
}
// runLivenessWatchdog starts a goroutine that scans the registry every
// `interval` and logs a warning for any source that has been silent while
// connected for more than `threshold`. Returns a stop function that halts
// the ticker AND signals the goroutine to exit (time.Ticker.Stop does NOT
// close the channel, so a naive `for range t.C` would leak). interval
// should be a fraction of threshold (e.g. threshold/5) so detection
// latency is bounded.
func runLivenessWatchdog(interval, threshold time.Duration) (stop func()) {
t := time.NewTicker(interval)
done := make(chan struct{})
go runLivenessWatchdogLoop(t.C, done, threshold, log.Print)
return func() {
t.Stop()
close(done)
}
}
// runLivenessWatchdogLoop is the goroutine body, extracted so tests can
// drive it with a synthetic tick channel and capture log output without
// racing on the real ticker.
//
// Edge-triggered (PR #1216 r1 item 3):
// - quiet → stalled / never-received: emit WARN once, record LastAlertUnix
// - still stalled, < heartbeat interval since last alert: suppress
// - still stalled, ≥ heartbeat interval since last alert: emit reminder,
// refresh LastAlertUnix
// - stalled → flowing: emit recovery INFO once, clear LastAlertUnix
//
// Without this, the original loop re-emitted the same WARN on every 60s
// tick (60 alerts/hr/source) — the kind of log flood that trains ops to
// mute alerts and miss the next real outage.
func runLivenessWatchdogLoop(tick <-chan time.Time, done <-chan struct{}, threshold time.Duration, emit func(...any)) {
for {
select {
case <-done:
return
case now, ok := <-tick:
if !ok {
return
}
livenessRegistryMu.RLock()
states := make([]*SourceLivenessState, 0, len(livenessRegistry))
for _, s := range livenessRegistry {
states = append(states, s)
}
livenessRegistryMu.RUnlock()
for _, s := range states {
msg, kind := checkSourceLiveness(s, threshold, now)
processLivenessTransition(s, kind, msg, now, emit)
}
}
}
}
// processLivenessTransition applies the edge-trigger rules and updates
// LastAlertUnix accordingly. Separated for testability and to keep the
// loop body small.
func processLivenessTransition(s *SourceLivenessState, kind LivenessKind, msg string, now time.Time, emit func(...any)) {
lastAlert := atomic.LoadInt64(&s.LastAlertUnix)
switch kind {
case LivenessStalled, LivenessNeverReceived:
if lastAlert == 0 {
// First detection — fire WARN edge.
emit(msg)
atomic.StoreInt64(&s.LastAlertUnix, now.Unix())
// #1335: ONLY LivenessStalled (paho reports connected but no
// messages past threshold — classic half-open TCP) gets
// force-reconnected. LivenessNeverReceived is almost always
// an ACL deny / wrong channel hash — a new TCP socket won't
// fix it and would just churn the broker. The distinct
// "NEVER received" alarm is the right operator signal for
// that class.
if kind == LivenessStalled {
maybeForceReconnect(s, now, emit)
}
return
}
// Already alerted; only re-emit on heartbeat interval to avoid log flood.
if now.Sub(time.Unix(lastAlert, 0)) >= livenessHeartbeatInterval {
emit(fmt.Sprintf("MQTT [%s] WATCHDOG heartbeat: still stalled — %s", s.Tag, msg))
atomic.StoreInt64(&s.LastAlertUnix, now.Unix())
// Heartbeat re-emit on a still-Stalled source: try another
// force-reconnect IF the throttle window has elapsed. Under
// a persistent broker issue this caps at one attempt per
// heartbeat (1h) — orders of magnitude under any rate
// limit and well within "don't hammer the broker".
if kind == LivenessStalled {
maybeForceReconnect(s, now, emit)
}
}
case LivenessOK:
if lastAlert != 0 {
// Recovered: emit INFO once, clear the cooldown.
emit(fmt.Sprintf("MQTT [%s] WATCHDOG INFO: messages flowing again (recovered)", s.Tag))
atomic.StoreInt64(&s.LastAlertUnix, 0)
}
case LivenessDisconnected:
// PR #1216 r2 item 1: disconnect is NOT recovery. Stay completely
// silent — paho's reconnect handler already logs the drop — and
// preserve LastAlertUnix so the WARN edge can re-fire if/when
// the source comes back stalled. Clearing the cooldown here
// would mean a flapping source spams the WARN every cycle.
}
}
// maybeForceReconnect invokes ForceReconnectFn IFF (a) one is wired and
// (b) the throttle window (forceReconnectThrottle) has elapsed since
// the most recent forced reconnect for this source. Logs WATCHDOG
// telemetry before/after so operators can correlate the reconnect with
// downstream paho ConnectionAttempt/OnConnect lines.
func maybeForceReconnect(s *SourceLivenessState, now time.Time, emit func(...any)) {
if s.ForceReconnectFn == nil {
return
}
lastForce := atomic.LoadInt64(&s.LastForceReconnectUnix)
if lastForce != 0 && now.Sub(time.Unix(lastForce, 0)) < forceReconnectThrottle {
emit(fmt.Sprintf("MQTT [%s] WATCHDOG suppressing forced reconnect (last attempt %s ago, throttle %s)",
s.Tag, now.Sub(time.Unix(lastForce, 0)).Round(time.Second), forceReconnectThrottle))
return
}
atomic.StoreInt64(&s.LastForceReconnectUnix, now.Unix())
emit(fmt.Sprintf("MQTT [%s] WATCHDOG forcing reconnect (half-open TCP suspected — paho.IsConnected==true but no messages)", s.Tag))
// Run in a goroutine: ForceReconnectFn typically calls
// client.Disconnect(250) which blocks up to 250ms, then
// client.Connect() which can block on the connect timeout. The
// watchdog goroutine must not stall a per-tick scan over a single
// slow source.
go func() {
s.ForceReconnectFn()
emit(fmt.Sprintf("MQTT [%s] WATCHDOG reconnect attempt issued", s.Tag))
}()
}
@@ -1,174 +0,0 @@
package main
import (
"sync"
"sync/atomic"
"testing"
"time"
)
// Issue #1335 — staging's lincomatic source stalls: paho reports
// IsConnected==true but no messages arrive for 1h+. The PR #1216
// watchdog DETECTS this (LivenessStalled) but only LOGS — it never
// forces paho to drop the half-open TCP socket and reconnect, so the
// source stays silently broken until container restart.
//
// Fix: on transition INTO LivenessStalled, invoke a per-source
// ForceReconnectFn (wired in main.go to client.Disconnect(250) +
// client.Connect()). Throttled by forceReconnectThrottle so a
// stall→reconnect→re-stall loop self-recovers without hammering the
// broker.
// RED on master: ForceReconnectFn is never invoked because the
// transition engine does not call it. After the fix, the WARN edge on
// LivenessStalled MUST fire force-reconnect exactly once.
func TestMQTTStallWatchdog_ForceReconnectOnStallEdge(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
var reconnectCount atomic.Int32
s := &SourceLivenessState{
Tag: "stalled-half-open",
Broker: "tcp://halfopen.example:1883",
IsConnectedFn: func() bool { return true },
ForceReconnectFn: func() { reconnectCount.Add(1) },
}
atomic.StoreInt64(&s.LastMessageUnix, now.Add(-10*time.Minute).Unix())
atomic.StoreInt64(&s.StartedAt, now.Add(-20*time.Minute).Unix())
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: %v", err)
}
var mu sync.Mutex
var emits []string
emit := func(args ...any) {
mu.Lock()
defer mu.Unlock()
if len(args) > 0 {
if str, ok := args[0].(string); ok {
emits = append(emits, str)
}
}
}
processLivenessTransition(s, LivenessStalled, "10m silent", now, emit)
// ForceReconnectFn runs in a goroutine (the production code can't
// block the watchdog tick on a slow Disconnect+Connect). Wait
// briefly for it to land before asserting.
waitForReconnect(t, &reconnectCount, 1, 2*time.Second)
if got := reconnectCount.Load(); got != 1 {
t.Fatalf("LivenessStalled transition MUST force-reconnect exactly once; got %d invocations (emits=%v)", got, emits)
}
}
// Throttle: a second LivenessStalled transition within the throttle
// window MUST NOT fire a second reconnect (no broker hammering).
func TestMQTTStallWatchdog_ForceReconnectThrottled(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
var reconnectCount atomic.Int32
s := &SourceLivenessState{
Tag: "throttled",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
ForceReconnectFn: func() { reconnectCount.Add(1) },
}
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: %v", err)
}
emit := func(args ...any) {}
// First stall edge → fires.
processLivenessTransition(s, LivenessStalled, "stall 1", now, emit)
waitForReconnect(t, &reconnectCount, 1, 2*time.Second)
// Simulate paho reconnect cycle: MarkReconnected clears the alert
// cooldown, then the source goes stalled again 5s later.
s.MarkReconnected(now.Add(5 * time.Second))
processLivenessTransition(s, LivenessStalled, "stall 2", now.Add(10*time.Second), emit)
// Give a stray goroutine a chance to land (it shouldn't, due to throttle).
time.Sleep(100 * time.Millisecond)
if got := reconnectCount.Load(); got != 1 {
t.Fatalf("force-reconnect MUST be throttled within %s; got %d invocations", forceReconnectThrottle, got)
}
// After the throttle window, a fresh stall edge MAY fire again.
s.MarkReconnected(now.Add(30 * time.Second))
processLivenessTransition(s, LivenessStalled, "stall 3", now.Add(forceReconnectThrottle+30*time.Second), emit)
waitForReconnect(t, &reconnectCount, 2, 2*time.Second)
if got := reconnectCount.Load(); got != 2 {
t.Fatalf("after throttle window, force-reconnect must re-arm; got %d invocations", got)
}
}
// NeverReceived (cold-start ACL-deny / never-flowed) MUST NOT
// force-reconnect. A SUBSCRIBE ACL deny is not fixed by a new TCP
// socket; reconnecting just churns the broker. Operators get the
// distinct "NEVER received" alarm so they can address the ACL.
func TestMQTTStallWatchdog_NoForceReconnectOnNeverReceived(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
var reconnectCount atomic.Int32
s := &SourceLivenessState{
Tag: "acl-denied",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
ForceReconnectFn: func() { reconnectCount.Add(1) },
}
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: %v", err)
}
emit := func(args ...any) {}
processLivenessTransition(s, LivenessNeverReceived, "no msgs ever", now, emit)
// Settle any (incorrect) goroutine before counting.
time.Sleep(100 * time.Millisecond)
if got := reconnectCount.Load(); got != 0 {
t.Fatalf("LivenessNeverReceived must NOT force-reconnect (likely ACL deny — TCP churn won't help); got %d invocations", got)
}
}
// Safety: a source with no ForceReconnectFn wired (e.g. tests, or a
// source registered before the wiring was added) MUST NOT panic when
// LivenessStalled fires.
func TestMQTTStallWatchdog_NilForceReconnectFnIsSafe(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
s := &SourceLivenessState{
Tag: "no-reconnect-fn",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
// ForceReconnectFn deliberately nil.
}
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: %v", err)
}
defer func() {
if r := recover(); r != nil {
t.Fatalf("nil ForceReconnectFn must be a safe no-op; panicked: %v", r)
}
}()
processLivenessTransition(s, LivenessStalled, "stalled", now, func(args ...any) {})
}
// waitForReconnect polls reconnectCount until it reaches `want` or the
// deadline elapses. ForceReconnectFn runs in a goroutine in production
// (Disconnect+Connect can block on broker IO), so tests can't read the
// counter synchronously.
func waitForReconnect(t *testing.T, count *atomic.Int32, want int32, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if count.Load() >= want {
return
}
time.Sleep(5 * time.Millisecond)
}
}
-43
View File
@@ -1,43 +0,0 @@
package main
import (
"sync/atomic"
"testing"
"time"
)
// TestSourceLivenessState_ReceiptVsWriteSeparate asserts that the receipt-
// time and post-write liveness clocks are independent (PR #1609 review
// MAJOR M1): stamping at receipt must NOT advance the post-write clock so
// the watchdog/healthz can distinguish "broker alive, write path stuck"
// from "everything fine". Without separation, /healthz reports "fresh"
// while the writer is stalled and the ingest buffer is filling.
func TestSourceLivenessState_ReceiptVsWriteSeparate(t *testing.T) {
s := &SourceLivenessState{Tag: "t"}
now := time.Now()
// Receipt at T0; post-write never happens (writer stalled).
s.MarkReceipt(now)
gotReceipt := atomic.LoadInt64(&s.LastReceiptUnix)
gotWrite := atomic.LoadInt64(&s.LastMessageUnix)
if gotReceipt != now.Unix() {
t.Fatalf("LastReceiptUnix: want %d, got %d", now.Unix(), gotReceipt)
}
if gotWrite != 0 {
t.Fatalf("LastMessageUnix MUST stay 0 while writer stalled (only MarkReceipt called); got %d — receipt is double-stamping the write clock and /healthz will lie about ingestion freshness", gotWrite)
}
// Write completes later: only MarkMessage advances LastMessageUnix.
later := now.Add(5 * time.Second)
s.MarkMessage(later)
gotReceipt2 := atomic.LoadInt64(&s.LastReceiptUnix)
gotWrite2 := atomic.LoadInt64(&s.LastMessageUnix)
if gotReceipt2 != now.Unix() {
t.Fatalf("MarkMessage must not move LastReceiptUnix backwards or forwards; want %d, got %d", now.Unix(), gotReceipt2)
}
if gotWrite2 != later.Unix() {
t.Fatalf("LastMessageUnix after MarkMessage: want %d, got %d", later.Unix(), gotWrite2)
}
}
-286
View File
@@ -1,286 +0,0 @@
package main
import (
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// PR #1216 round-1 review fixes. Tests are RED before the fix lands:
// - Item 1: cold-start blind spot — silent-from-start source never alarmed.
// - Item 2: reconnect reset — stale LastMessageUnix triggers false stall after recovery.
// - Item 3: log flood — every-60s rescan re-emits same WARN forever.
// - Item 4: tag collision in registerLivenessState silently overwrites prior state.
// waitFor polls until emits reaches `want` items or the deadline elapses.
// Used to serialize "drain this tick before mutating state" in goroutine
// tests so we observe deterministic edge transitions.
func waitFor(t *testing.T, mu *sync.Mutex, emits *[]string, want int, timeout time.Duration) {
t.Helper()
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
mu.Lock()
n := len(*emits)
mu.Unlock()
if n >= want {
return
}
time.Sleep(10 * time.Millisecond)
}
mu.Lock()
defer mu.Unlock()
t.Fatalf("timeout waiting for %d emits; got %d: %v", want, len(*emits), *emits)
}
// Item 1 (RED): a source that connects but never receives a message is
// invisible to the current watchdog (LastMessageUnix==0 → skip). This is
// the exact #1212 failure class — wrong channel hash, ACL drops SUBSCRIBE,
// half-open TCP after CONNECT. Fix: stamp StartedAt at registration; when
// LastMessageUnix==0 AND now-StartedAt > threshold, alarm with a distinct
// "NEVER received" message.
func TestMQTTStallWatchdog_FiresOnSilentFromStart(t *testing.T) {
now := time.Now()
state := &SourceLivenessState{
Tag: "cold",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
atomic.StoreInt64(&state.StartedAt, now.Add(-10*time.Minute).Unix())
atomic.StoreInt64(&state.FirstConnectedAt, now.Add(-10*time.Minute).Unix())
// LastMessageUnix stays 0 — never received anything.
msg, kind := checkSourceLiveness(state, 5*time.Minute, now)
if kind != LivenessNeverReceived {
t.Fatalf("expected LivenessNeverReceived for silent-from-start source after threshold; got kind=%v msg=%q", kind, msg)
}
if !strings.Contains(strings.ToUpper(msg), "NEVER") {
t.Errorf("cold-start alarm must mention NEVER received to distinguish from generic stall; got %q", msg)
}
if !strings.Contains(msg, "cold") {
t.Errorf("alarm must include source tag; got %q", msg)
}
}
func TestMQTTStallWatchdog_QuietDuringColdStartGrace(t *testing.T) {
now := time.Now()
state := &SourceLivenessState{
Tag: "warming-up",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
atomic.StoreInt64(&state.StartedAt, now.Add(-30*time.Second).Unix())
atomic.StoreInt64(&state.FirstConnectedAt, now.Add(-30*time.Second).Unix())
_, kind := checkSourceLiveness(state, 5*time.Minute, now)
if kind != LivenessOK {
t.Fatalf("must NOT alarm during cold-start grace (30s in, threshold 5m); got kind=%v", kind)
}
}
// Item 2 (RED): after a long outage + paho reconnect, LastMessageUnix is
// still 2h-old → watchdog screams "stalled for 2h" immediately. Fix: reset
// LastMessageUnix (and the cold-start clock) on OnConnect. This test
// asserts the reset method does what's required so the next watchdog scan
// stays quiet for the grace window.
func TestMQTTStallWatchdog_OnReconnectResetsClocks(t *testing.T) {
now := time.Now()
state := &SourceLivenessState{
Tag: "flaky",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
// 2-hour-old timestamp from before the outage.
atomic.StoreInt64(&state.LastMessageUnix, now.Add(-2*time.Hour).Unix())
atomic.StoreInt64(&state.StartedAt, now.Add(-3*time.Hour).Unix())
// Stale alert cooldown from before the outage too — must NOT carry forward.
atomic.StoreInt64(&state.LastAlertUnix, now.Add(-90*time.Minute).Unix())
state.MarkReconnected(now)
if last := atomic.LoadInt64(&state.LastMessageUnix); last != 0 {
t.Errorf("LastMessageUnix must be cleared on reconnect so a stale pre-outage timestamp does not trip the watchdog; got %d", last)
}
if started := atomic.LoadInt64(&state.StartedAt); started != now.Unix() {
t.Errorf("StartedAt must be re-stamped on reconnect so the cold-start grace window restarts; got %d want %d", started, now.Unix())
}
if alert := atomic.LoadInt64(&state.LastAlertUnix); alert != 0 {
t.Errorf("LastAlertUnix must be cleared on reconnect so edge-trigger re-arms; got %d", alert)
}
// Now drive checkSourceLiveness immediately after reconnect: must NOT alarm.
_, kind := checkSourceLiveness(state, 5*time.Minute, now.Add(1*time.Second))
if kind != LivenessOK {
t.Fatalf("watchdog must stay quiet immediately after MarkReconnected; got kind=%v", kind)
}
}
// Item 3 (RED): the watchdog loop currently re-emits the same WARN on every
// 60s tick (60 alerts/hr/source). Fix: edge-trigger — emit WARN once on
// quiet→stalled transition, INFO once on stalled→flowing recovery, and an
// hourly heartbeat while still stalled. Asserts: 3 consecutive ticks on a
// stalled source produce exactly ONE WARN.
func TestMQTTStallWatchdog_EdgeTriggeredEmitsOnlyOnce(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
s := &SourceLivenessState{
Tag: "stuck",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
atomic.StoreInt64(&s.LastMessageUnix, now.Add(-10*time.Minute).Unix())
atomic.StoreInt64(&s.StartedAt, now.Add(-20*time.Minute).Unix())
registerLivenessState(s)
var mu sync.Mutex
var emits []string
emit := func(args ...any) {
mu.Lock()
defer mu.Unlock()
if len(args) > 0 {
if str, ok := args[0].(string); ok {
emits = append(emits, str)
}
}
}
tick := make(chan time.Time, 3)
done := make(chan struct{})
exited := make(chan struct{})
go func() {
runLivenessWatchdogLoop(tick, done, 5*time.Minute, emit)
close(exited)
}()
// Three back-to-back ticks within the heartbeat window. Only the first
// should emit a WARN; the other two must be suppressed (edge-triggered).
tick <- now
tick <- now.Add(30 * time.Second)
tick <- now.Add(60 * time.Second)
// Wait for ticks to drain.
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
mu.Lock()
n := len(emits)
mu.Unlock()
if n >= 1 && time.Since(deadline.Add(-2*time.Second)) > 200*time.Millisecond {
break
}
time.Sleep(20 * time.Millisecond)
}
close(done)
<-exited
mu.Lock()
got := append([]string(nil), emits...)
mu.Unlock()
warns := 0
for _, e := range got {
if strings.Contains(e, "WATCHDOG") || strings.Contains(e, "stalled") || strings.Contains(strings.ToUpper(e), "WARN") {
warns++
}
}
if warns != 1 {
t.Fatalf("expected exactly 1 stall WARN across 3 consecutive scans (edge-trigger); got %d: %v", warns, got)
}
}
// Item 3 (RED): on stalled→flowing transition, a recovery INFO must fire
// exactly once. Future ticks must stay silent until a new stall edge.
func TestMQTTStallWatchdog_RecoveryEmitOnce(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
s := &SourceLivenessState{
Tag: "src-b",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
atomic.StoreInt64(&s.LastMessageUnix, now.Add(-10*time.Minute).Unix())
atomic.StoreInt64(&s.StartedAt, now.Add(-20*time.Minute).Unix())
registerLivenessState(s)
var mu sync.Mutex
var emits []string
emit := func(args ...any) {
mu.Lock()
defer mu.Unlock()
if len(args) > 0 {
if str, ok := args[0].(string); ok {
emits = append(emits, str)
}
}
}
tick := make(chan time.Time, 4)
done := make(chan struct{})
exited := make(chan struct{})
go func() {
runLivenessWatchdogLoop(tick, done, 5*time.Minute, emit)
close(exited)
}()
tick <- now // → WARN
// Wait for the goroutine to drain that tick and record the WARN edge
// before we mutate state — otherwise we race the loop and the first
// emit observes the "recovered" timestamp instead of the stall.
waitFor(t, &mu, &emits, 1, 2*time.Second)
// Source recovers: a recent message arrives.
atomic.StoreInt64(&s.LastMessageUnix, now.Add(30*time.Second).Unix())
tick <- now.Add(60 * time.Second) // → recovery INFO
waitFor(t, &mu, &emits, 2, 2*time.Second)
tick <- now.Add(120 * time.Second) // → silent
tick <- now.Add(180 * time.Second) // → silent
// Brief settle so any (incorrect) extra emits land before we count.
time.Sleep(100 * time.Millisecond)
close(done)
<-exited
mu.Lock()
got := append([]string(nil), emits...)
mu.Unlock()
infos := 0
for _, e := range got {
upper := strings.ToUpper(e)
if strings.Contains(upper, "RECOVER") || strings.Contains(upper, "FLOWING") {
infos++
}
}
if len(got) != 2 {
t.Fatalf("expected exactly 2 emits (1 WARN + 1 recovery INFO); got %d: %v", len(got), got)
}
if infos != 1 {
t.Fatalf("expected exactly 1 recovery INFO emit; got %d (all=%v)", infos, got)
}
}
// Item 4 (RED): registerLivenessState silently overwrites on tag collision
// (empty-Name + same broker, duplicate Name). Must detect & report.
func TestRegisterLivenessState_DetectsTagCollision(t *testing.T) {
defer snapshotAndResetRegistry(t)()
a := &SourceLivenessState{Tag: "dup", Broker: "tcp://a:1883"}
b := &SourceLivenessState{Tag: "dup", Broker: "tcp://b:1883"}
if err := registerLivenessState(a); err != nil {
t.Fatalf("first registration must succeed; got %v", err)
}
if err := registerLivenessState(b); err == nil {
t.Fatal("second registration with same tag must return a collision error (current behavior silently clobbers)")
}
// And the registry must still hold the FIRST registration — clobbering
// AttemptCount/LastMessageUnix invisibly is the bug.
livenessRegistryMu.RLock()
got := livenessRegistry["dup"]
livenessRegistryMu.RUnlock()
if got != a {
t.Errorf("on collision, first registration must remain authoritative (got pointer for broker=%s)", got.Broker)
}
}
-228
View File
@@ -1,228 +0,0 @@
package main
import (
"bytes"
"log"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
// PR #1216 round-2 review fixes. Tests RED before the fix lands.
//
// r1 closed the cold-start blind spot but introduced three new failure
// modes that r2 must eliminate:
//
// r2 #1 — checkSourceLiveness returns LivenessOK for BOTH "messages
// flowing" AND "disconnected/never-connected". A stalled source
// whose TCP eventually RSTs trips processLivenessTransition's
// recovery branch and emits "messages flowing again (recovered)"
// while going from silently broken to overtly broken. Fix: a
// distinct LivenessDisconnected kind that the transition
// function treats as a silent (no-emit) state, so the alert
// cooldown does not collapse on a non-event.
//
// r2 #2 — MarkReconnected re-stamps StartedAt on every reconnect, so
// the cold-start grace clock restarts forever under a broker
// flap (CONNECT ok, SUBSCRIBE ACL-denied — the exact #1212
// shape). The headline "NEVER received" alarm never fires.
// Fix: separate FirstConnectedAt (set once at registration,
// never reset) from StartedAt (free to reset on reconnect for
// transient-stall tracking). Cold-start grace must use
// FirstConnectedAt.
//
// r2 #3 — main.go calls log.Fatalf on a tag collision in the liveness
// registry, killing the entire ingestor over one config typo.
// That recreates the #1212 total-ingest-stop failure class
// this PR exists to prevent. Fix: log an ERROR and skip
// liveness registration for the duplicate — the MQTT source
// still attempts to connect, just isn't tracked by the
// watchdog (the first registration remains authoritative).
// r2 #1 RED: a stalled source whose connection then drops must NOT emit
// "recovered". The current code does — checkSourceLiveness returns
// LivenessOK for both genuine recovery and disconnection, so
// processLivenessTransition sees lastAlert!=0 + kind==LivenessOK and
// fires the recovery INFO. Operators reading the log think the source
// healed when it actually died.
func TestMQTTStallWatchdog_NoFalseRecoveryOnDisconnect(t *testing.T) {
defer snapshotAndResetRegistry(t)()
now := time.Now()
var connected atomic.Bool
connected.Store(true)
s := &SourceLivenessState{
Tag: "drops-after-stall",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return connected.Load() },
}
atomic.StoreInt64(&s.LastMessageUnix, now.Add(-10*time.Minute).Unix())
atomic.StoreInt64(&s.StartedAt, now.Add(-20*time.Minute).Unix())
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: registerLivenessState: %v", err)
}
var mu sync.Mutex
var emits []string
emit := func(args ...any) {
mu.Lock()
defer mu.Unlock()
if len(args) > 0 {
if str, ok := args[0].(string); ok {
emits = append(emits, str)
}
}
}
tick := make(chan time.Time, 2)
done := make(chan struct{})
exited := make(chan struct{})
go func() {
runLivenessWatchdogLoop(tick, done, 5*time.Minute, emit)
close(exited)
}()
// Tick 1: source connected + 10m silent → WARN edge.
tick <- now
waitFor(t, &mu, &emits, 1, 2*time.Second)
// The TCP socket RSTs — paho flips IsConnected to false. The watchdog
// must NOT interpret this as recovery; the source went from silently
// broken to overtly broken.
connected.Store(false)
tick <- now.Add(60 * time.Second)
// Settle so any (incorrect) extra emits land before we count.
time.Sleep(150 * time.Millisecond)
close(done)
<-exited
mu.Lock()
got := append([]string(nil), emits...)
mu.Unlock()
for _, e := range got {
upper := strings.ToUpper(e)
if strings.Contains(upper, "RECOVER") || strings.Contains(upper, "FLOWING AGAIN") {
t.Fatalf("watchdog must NOT emit recovery INFO when a stalled source disconnects; got %q (all=%v)", e, got)
}
}
}
// r2 #2 RED: a broker that ACKs CONNECT but denies SUBSCRIBE causes paho
// to loop CONNECT → drop → CONNECT → drop. Each reconnect calls
// MarkReconnected, which re-stamps StartedAt=now and resets the
// cold-start grace clock. After 30 minutes of flapping, the source has
// still NEVER received a message, but the "NEVER received" alarm never
// fires because sinceStart is always sub-threshold. Fix: track
// FirstConnectedAt separately from StartedAt; the cold-start check must
// use the former.
func TestMQTTStallWatchdog_ColdStartSurvivesBrokerFlap(t *testing.T) {
defer snapshotAndResetRegistry(t)()
t0 := time.Now()
s := &SourceLivenessState{
Tag: "flapping-acl-deny",
Broker: "tcp://acl-denied:1883",
IsConnectedFn: func() bool { return true },
}
// First registration stamps FirstConnectedAt (and StartedAt) at t0.
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: registerLivenessState: %v", err)
}
// Paho keeps re-establishing the TCP/MQTT session every minute. No
// message ever arrives because SUBSCRIBE is denied. Each reconnect
// resets StartedAt.
for i := 1; i <= 6; i++ {
s.MarkReconnected(t0.Add(time.Duration(i) * time.Minute))
}
// 6m after the very first connection — well past the 5m cold-start
// threshold. The headline alarm must fire.
now := t0.Add(6*time.Minute + 30*time.Second)
_, kind := checkSourceLiveness(s, 5*time.Minute, now)
if kind != LivenessNeverReceived {
t.Fatalf("under broker flap (#1212 ACL-deny class), cold-start alarm must fire based on FirstConnectedAt, not the most recent reconnect; got kind=%v", kind)
}
}
// Sanity check: a single transient reconnect WITHIN the cold-start window
// must NOT prematurely trip the NeverReceived alarm — the grace was
// designed for that. This guards against an over-correction where r2
// switches blindly to FirstConnectedAt and ignores legitimate startup
// jitter.
func TestMQTTStallWatchdog_TransientReconnectDuringGraceStaysQuiet(t *testing.T) {
defer snapshotAndResetRegistry(t)()
t0 := time.Now()
s := &SourceLivenessState{
Tag: "transient-reconnect",
Broker: "tcp://x:1883",
IsConnectedFn: func() bool { return true },
}
if err := registerLivenessState(s); err != nil {
t.Fatalf("setup: registerLivenessState: %v", err)
}
// 30s in, one transient reconnect.
s.MarkReconnected(t0.Add(30 * time.Second))
// 1m after registration — still inside the 5m grace.
_, kind := checkSourceLiveness(s, 5*time.Minute, t0.Add(1*time.Minute))
if kind != LivenessOK {
t.Fatalf("during cold-start grace, transient reconnects must stay quiet; got kind=%v", kind)
}
}
// r2 #3 RED: tag collision must not kill the ingestor. main.go currently
// log.Fatalf's, which recreates the #1212 total-ingest-stop class this
// PR exists to prevent. registerLivenessOrSkip is the small helper main
// will call instead: log an ERROR + skip liveness registration for the
// duplicate, return false so the caller knows the source is connecting
// untracked. The first registration remains authoritative.
func TestRegisterLivenessOrSkip_LogsErrorAndDoesNotExitOnCollision(t *testing.T) {
defer snapshotAndResetRegistry(t)()
var buf bytes.Buffer
origOut := log.Writer()
origFlags := log.Flags()
log.SetOutput(&buf)
log.SetFlags(0)
defer func() {
log.SetOutput(origOut)
log.SetFlags(origFlags)
}()
a := &SourceLivenessState{Tag: "dup", Broker: "tcp://a:1883"}
b := &SourceLivenessState{Tag: "dup", Broker: "tcp://b:1883"}
if ok := registerLivenessOrSkip(a); !ok {
t.Fatalf("first registration must succeed; helper returned false (log=%q)", buf.String())
}
if ok := registerLivenessOrSkip(b); ok {
t.Fatalf("second registration with same tag must return false (skip); helper returned true (log=%q)", buf.String())
}
logOut := buf.String()
if !strings.Contains(logOut, "ERROR") {
t.Errorf("collision must be logged at ERROR severity so operators see it without it crashing the process; got %q", logOut)
}
if !strings.Contains(logOut, "dup") {
t.Errorf("collision log must include the offending tag; got %q", logOut)
}
if !strings.Contains(strings.ToLower(logOut), "skip") {
t.Errorf("collision log must say the duplicate is being skipped so operators know the source is untracked; got %q", logOut)
}
// And the registry still holds the FIRST registration.
livenessRegistryMu.RLock()
got := livenessRegistry["dup"]
livenessRegistryMu.RUnlock()
if got != a {
t.Errorf("first registration must remain authoritative after collision-skip; got pointer for broker=%s", got.Broker)
}
}
-221
View File
@@ -1,221 +0,0 @@
package main
import (
"encoding/json"
"errors"
"log"
"os"
"github.com/meshcore-analyzer/mbcapqueue"
)
// MultibyteCapPersistStats holds counts for /api/healthz exposure / logging.
type MultibyteCapPersistStats struct {
ReadEntries int // entries read from snapshot
UpdatedActive int64 // rows updated in nodes
UpdatedInactive int64 // rows updated in inactive_nodes
Skipped int // entries skipped (status=="unknown")
}
// RunMultibyteCapPersist consumes the latest multi-byte capability snapshot
// written by the server (internal/mbcapqueue) and persists it to nodes /
// inactive_nodes. Owned by the ingestor per #1287: the server is read-only
// since #1289 and cannot UPDATE these columns itself.
//
// INVARIANT (canonical owner): multibyte_sup / multibyte_evidence are
// derived/cached columns. The server COMPUTES the value during its
// analytics cycle (from observed packets) and writes a snapshot file;
// this function is the ONLY runtime path that mutates those columns
// (the schema itself is added by internal/dbschema). The server MUST
// NOT execute any UPDATE on nodes.multibyte_* — see
// cmd/server/readonly_invariant_test.go for the enforcement.
//
// Data-destruction guard: entries with Status=="unknown" (sup==0) are
// NEVER persisted — we never overwrite a previously confirmed/suspected
// DB value with a snapshot blank. Same guarantee the original
// server-side helper enforced before relocation.
//
// Safe to call from a ticker; no-op when no snapshot has been written
// (cold start), when the snapshot is empty, when the snapshot is
// malformed (#1386), or when running against a legacy DB that
// pre-dates the multibyte_sup migration (#1386).
func (s *Store) RunMultibyteCapPersist() (MultibyteCapPersistStats, error) {
var stats MultibyteCapPersistStats
snap, err := mbcapqueue.ReadSnapshot(s.path)
if err != nil {
// os.ErrNotExist is the steady state until the server's first
// analytics cycle completes — silent no-op. A malformed file
// is operator-actionable: log it (but still no-op, no error
// surfaced to the ticker — a corrupt snapshot must not stop
// the maintenance loop).
if errors.Is(err, os.ErrNotExist) {
return stats, nil
}
// All other ReadSnapshot errors today are wrap-arounds of
// io / unmarshal failures — both classify as "malformed
// snapshot on disk" from this loop's perspective.
var jsonErr *json.SyntaxError
if errors.As(err, &jsonErr) || isMalformedSnapshotErr(err) {
log.Printf("[multibyte-persist] malformed snapshot on disk (no-op): %v", err)
return stats, nil
}
log.Printf("[multibyte-persist] read snapshot: %v (no-op)", err)
return stats, nil
}
stats.ReadEntries = len(snap.Entries)
if len(snap.Entries) == 0 {
return stats, nil
}
// Defensive schema check: a legacy DB that pre-dates the
// multibyte_sup migration would fail at tx.Prepare with a SQL
// error. Detect early and skip cleanly so the ticker keeps
// running on heterogeneous deployments.
if !s.hasMultibyteSupColumns() {
log.Printf("[multibyte-persist] schema missing: nodes.multibyte_sup not present on this DB (legacy schema) — skipping %d entries", stats.ReadEntries)
return stats, nil
}
tx, err := s.db.Begin()
if err != nil {
return stats, err
}
defer tx.Rollback() //nolint:errcheck
// Combined dispatch: each pubkey lives in exactly one of nodes /
// inactive_nodes. The pre-#1386 implementation issued one UPDATE
// against each table per entry — 50% guaranteed-empty. We now
// look up the table once, then issue the matching UPDATE.
stmtN, err := tx.Prepare(`UPDATE nodes SET multibyte_sup=?, multibyte_evidence=? WHERE public_key=?`)
if err != nil {
return stats, err
}
defer stmtN.Close()
stmtI, err := tx.Prepare(`UPDATE inactive_nodes SET multibyte_sup=?, multibyte_evidence=? WHERE public_key=?`)
if err != nil {
return stats, err
}
defer stmtI.Close()
// Membership probe: one indexed PK lookup. Cheap; avoids the
// guaranteed-miss second UPDATE.
stmtProbe, err := tx.Prepare(`SELECT 1 FROM nodes WHERE public_key=? LIMIT 1`)
if err != nil {
return stats, err
}
defer stmtProbe.Close()
for _, e := range snap.Entries {
sup := multibyteStatusToInt(e.Status)
if sup == 0 {
stats.Skipped++
continue
}
// Probe once. If hit, UPDATE nodes; else UPDATE inactive_nodes.
var hit int
if err := stmtProbe.QueryRow(e.PublicKey).Scan(&hit); err == nil {
if r, err := stmtN.Exec(sup, e.Evidence, e.PublicKey); err == nil {
if n, _ := r.RowsAffected(); n > 0 {
stats.UpdatedActive += n
}
}
} else {
if r, err := stmtI.Exec(sup, e.Evidence, e.PublicKey); err == nil {
if n, _ := r.RowsAffected(); n > 0 {
stats.UpdatedInactive += n
}
}
}
}
if err := tx.Commit(); err != nil {
return stats, err
}
if stats.UpdatedActive+stats.UpdatedInactive > 0 {
log.Printf("[multibyte-persist] applied snapshot: %d entries (%d skipped); updated %d active + %d inactive nodes",
stats.ReadEntries, stats.Skipped, stats.UpdatedActive, stats.UpdatedInactive)
}
return stats, nil
}
// isMalformedSnapshotErr returns true if err looks like a JSON parse /
// IO-truncation failure surfaced by mbcapqueue.ReadSnapshot. The
// queue wraps errors with %w but mbcapqueue currently formats with
// %w only for "read:"/"unmarshal:" prefixes — we substring-match
// those so the operator-actionable log message is unambiguous.
func isMalformedSnapshotErr(err error) bool {
if err == nil {
return false
}
msg := err.Error()
for _, frag := range []string{"unmarshal", "invalid character", "unexpected end of JSON"} {
if containsCI(msg, frag) {
return true
}
}
return false
}
func containsCI(s, sub string) bool {
if len(sub) == 0 {
return true
}
// case-insensitive Contains without importing strings (already
// imported in db.go, but keeping helper local to avoid widening
// this file's imports).
for i := 0; i+len(sub) <= len(s); i++ {
match := true
for j := 0; j < len(sub); j++ {
a, b := s[i+j], sub[j]
if a >= 'A' && a <= 'Z' {
a += 32
}
if b >= 'A' && b <= 'Z' {
b += 32
}
if a != b {
match = false
break
}
}
if match {
return true
}
}
return false
}
// hasMultibyteSupColumns probes whether the active DB carries the
// multibyte_sup column on the `nodes` table. Used to short-circuit
// RunMultibyteCapPersist on legacy DBs that pre-date the
// internal/dbschema migration (#1386).
func (s *Store) hasMultibyteSupColumns() bool {
rows, err := s.db.Query(`PRAGMA table_info(nodes)`)
if err != nil {
return false
}
defer rows.Close()
for rows.Next() {
var cid int
var name, ctype string
var notnull, pk int
var dflt interface{}
if err := rows.Scan(&cid, &name, &ctype, &notnull, &dflt, &pk); err != nil {
return false
}
if name == "multibyte_sup" {
return true
}
}
return false
}
// multibyteStatusToInt mirrors the mapping the server used before relocation.
// 0 = unknown (never persisted), 1 = suspected, 2 = confirmed.
func multibyteStatusToInt(status string) int {
switch status {
case "confirmed":
return 2
case "suspected":
return 1
default:
return 0
}
}
@@ -1,54 +0,0 @@
package main
import (
"bytes"
"database/sql"
"log"
"strings"
"testing"
)
// captureLogs redirects the standard logger to a buffer for the
// duration of the test and returns the buffer. Restores the previous
// writer when the test ends.
func captureLogs(t *testing.T) *bytes.Buffer {
t.Helper()
buf := &bytes.Buffer{}
prevWriter := log.Writer()
prevFlags := log.Flags()
log.SetOutput(buf)
t.Cleanup(func() {
log.SetOutput(prevWriter)
log.SetFlags(prevFlags)
})
return buf
}
// logContains reports whether the captured log buffer contains substr
// (case-insensitive).
func logContains(buf *bytes.Buffer, substr string) bool {
return strings.Contains(strings.ToLower(buf.String()), strings.ToLower(substr))
}
// columnExists reports whether the named column exists on the table.
func columnExists(t *testing.T, db *sql.DB, table, col string) bool {
t.Helper()
rows, err := db.Query("PRAGMA table_info(" + table + ")")
if err != nil {
t.Fatalf("PRAGMA table_info(%s): %v", table, err)
}
defer rows.Close()
for rows.Next() {
var cid int
var name, ctype string
var notnull, pk int
var dfltValue sql.NullString
if err := rows.Scan(&cid, &name, &ctype, &notnull, &dfltValue, &pk); err != nil {
t.Fatalf("scan PRAGMA: %v", err)
}
if name == col {
return true
}
}
return false
}
-369
View File
@@ -1,369 +0,0 @@
package main
import (
"os"
"path/filepath"
"testing"
"github.com/meshcore-analyzer/mbcapqueue"
)
// TestRunMultibyteCapPersist_AppliesSnapshot enforces the architectural
// invariant from #1289 + #1322 + #1324 follow-up: the multi-byte
// capability columns (multibyte_sup / multibyte_evidence) on
// nodes / inactive_nodes MUST be written by the ingestor, NEVER by the
// read-only server. The server publishes a snapshot file via
// internal/mbcapqueue; the ingestor's maintenance loop applies it here.
//
// Pre-relocation (PR #1324 as-shipped), the server held a write handle
// and executed UPDATE … nodes SET multibyte_sup directly — which is
// impossible after #1289 made the server's *sql.DB read-only. This test
// asserts the relocated path: snapshot in → UPDATEs out, from the
// ingestor side.
func TestRunMultibyteCapPersist_AppliesSnapshot(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Seed two nodes: one active, one inactive.
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('aa11', 'Alpha', 'repeater', '2026-01-01T00:00:00Z', 0, NULL)`); err != nil {
t.Fatalf("seed nodes: %v", err)
}
if _, err := store.db.Exec(`INSERT INTO inactive_nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('bb22', 'Bravo', 'repeater', '2025-01-01T00:00:00Z', 0, NULL)`); err != nil {
t.Fatalf("seed inactive_nodes: %v", err)
}
// Seed a third node already confirmed, then send "unknown" for it —
// the data-destruction guard must keep its DB value.
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('cc33', 'Charlie', 'repeater', '2026-01-01T00:00:00Z', 2, 'advert')`); err != nil {
t.Fatalf("seed cc33: %v", err)
}
snap := mbcapqueue.Snapshot{Entries: []mbcapqueue.Entry{
{PublicKey: "aa11", Status: "confirmed", Evidence: "advert"},
{PublicKey: "bb22", Status: "suspected", Evidence: "path"},
{PublicKey: "cc33", Status: "unknown"}, // must NOT overwrite
}}
if err := mbcapqueue.WriteSnapshot(dbPath, snap); err != nil {
t.Fatalf("WriteSnapshot: %v", err)
}
// Sanity: snapshot file landed where we expect.
if _, err := os.Stat(filepath.Join(filepath.Dir(dbPath), mbcapqueue.QueueDirName, mbcapqueue.SnapshotFileName)); err != nil {
t.Fatalf("snapshot not on disk: %v", err)
}
stats, err := store.RunMultibyteCapPersist()
if err != nil {
t.Fatalf("RunMultibyteCapPersist: %v", err)
}
if stats.ReadEntries != 3 {
t.Errorf("ReadEntries = %d, want 3", stats.ReadEntries)
}
if stats.Skipped != 1 {
t.Errorf("Skipped = %d, want 1 (the unknown entry)", stats.Skipped)
}
if stats.UpdatedActive == 0 {
t.Errorf("UpdatedActive = 0; expected aa11 to be updated in nodes")
}
if stats.UpdatedInactive == 0 {
t.Errorf("UpdatedInactive = 0; expected bb22 to be updated in inactive_nodes")
}
// Verify DB state.
var sup int
var evid string
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM nodes WHERE public_key='aa11'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read aa11: %v", err)
}
if sup != 2 || evid != "advert" {
t.Errorf("aa11 after persist: sup=%d evid=%q, want sup=2 evid=advert", sup, evid)
}
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM inactive_nodes WHERE public_key='bb22'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read bb22: %v", err)
}
if sup != 1 || evid != "path" {
t.Errorf("bb22 after persist: sup=%d evid=%q, want sup=1 evid=path", sup, evid)
}
// Data-destruction guard: cc33 must still be confirmed=2/'advert'.
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM nodes WHERE public_key='cc33'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read cc33: %v", err)
}
if sup != 2 || evid != "advert" {
t.Errorf("cc33 was overwritten by unknown entry: sup=%d evid=%q, want sup=2 evid=advert", sup, evid)
}
}
// TestRunMultibyteCapPersist_NoSnapshot_NoOp verifies that the persist
// step is a clean no-op when the server hasn't written a snapshot yet
// (cold start; the analytics cycle takes ~15s after server boot).
func TestRunMultibyteCapPersist_NoSnapshot_NoOp(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
stats, err := store.RunMultibyteCapPersist()
if err != nil {
t.Fatalf("RunMultibyteCapPersist (no snapshot): %v", err)
}
if stats.ReadEntries != 0 || stats.UpdatedActive != 0 || stats.UpdatedInactive != 0 {
t.Errorf("expected zero-valued stats on cold start, got %+v", stats)
}
}
// TestRunMultibyteCapPersist_RoundTrip exercises the full end-to-end
// contract claimed by PR #1324: the server writes a snapshot, the
// ingestor persists it, and after a simulated restart (close + reopen
// the store) the DB still carries the persisted state.
//
// The audit (#1386) flagged this as the #1 missing test: the two halves
// (persist / read-back) were each tested in isolation, but no single
// test proved the persist path produces a database state the loader
// can later consume — so a column-rename or snapshot-version drift
// would slip past.
func TestRunMultibyteCapPersist_RoundTrip(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
// --- Phase 1: open store, seed, persist snapshot ---
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('dd44', 'Delta', 'repeater', '2026-01-01T00:00:00Z', 0, NULL)`); err != nil {
t.Fatalf("seed: %v", err)
}
if _, err := store.db.Exec(`INSERT INTO inactive_nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('ee55', 'Echo', 'companion', '2025-12-01T00:00:00Z', 0, NULL)`); err != nil {
t.Fatalf("seed inactive: %v", err)
}
snap := mbcapqueue.Snapshot{Entries: []mbcapqueue.Entry{
{PublicKey: "dd44", Status: "confirmed", Evidence: "advert"},
{PublicKey: "ee55", Status: "suspected", Evidence: "path"},
}}
if err := mbcapqueue.WriteSnapshot(dbPath, snap); err != nil {
t.Fatalf("WriteSnapshot: %v", err)
}
if _, err := store.RunMultibyteCapPersist(); err != nil {
t.Fatalf("RunMultibyteCapPersist: %v", err)
}
// Capture original state for round-trip comparison.
var origActiveSup, origInactiveSup int
var origActiveEvid, origInactiveEvid string
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM nodes WHERE public_key='dd44'`).Scan(&origActiveSup, &origActiveEvid); err != nil {
t.Fatalf("read dd44 (phase1): %v", err)
}
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM inactive_nodes WHERE public_key='ee55'`).Scan(&origInactiveSup, &origInactiveEvid); err != nil {
t.Fatalf("read ee55 (phase1): %v", err)
}
// Simulate restart: drop the in-memory Store entirely.
if err := store.Close(); err != nil {
t.Fatalf("Close: %v", err)
}
// --- Phase 2: fresh Store, verify persisted state survived ---
store2, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore (reopen): %v", err)
}
defer store2.Close()
var sup int
var evid string
if err := store2.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM nodes WHERE public_key='dd44'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read dd44 after reopen: %v", err)
}
if sup != origActiveSup || evid != origActiveEvid {
t.Errorf("dd44 after restart: sup=%d evid=%q, want sup=%d evid=%q", sup, evid, origActiveSup, origActiveEvid)
}
if sup != 2 || evid != "advert" {
t.Errorf("dd44 after restart: sup=%d evid=%q, want sup=2 evid=advert", sup, evid)
}
if err := store2.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM inactive_nodes WHERE public_key='ee55'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read ee55 after reopen: %v", err)
}
if sup != origInactiveSup || evid != origInactiveEvid {
t.Errorf("ee55 after restart: sup=%d evid=%q, want sup=%d evid=%q", sup, evid, origInactiveSup, origInactiveEvid)
}
if sup != 1 || evid != "path" {
t.Errorf("ee55 after restart: sup=%d evid=%q, want sup=1 evid=path", sup, evid)
}
}
// TestRunMultibyteCapPersist_MalformedSnapshot verifies the persist
// path is safe against a corrupted/truncated snapshot file: it must
// return without error (no-op), MUST NOT crash, AND MUST log a warning
// distinguishing the malformed case from the steady-state "no
// snapshot yet" cold-start case.
//
// Audit (#1386, kent-beck) flagged: "Snapshot file malformed /
// truncated / wrong-version — RunMultibyteCapPersist error vs.
// silent-skip behavior is unspecified by any test."
func TestRunMultibyteCapPersist_MalformedSnapshot(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Write malformed JSON directly to the snapshot path.
if err := mbcapqueue.EnsureDir(dbPath); err != nil {
t.Fatalf("EnsureDir: %v", err)
}
if err := os.WriteFile(mbcapqueue.SnapshotPath(dbPath), []byte("not-json{{{garbage"), 0o644); err != nil {
t.Fatalf("write malformed: %v", err)
}
// Capture log output to assert the warning is emitted.
logBuf := captureLogs(t)
// Must not panic.
defer func() {
if r := recover(); r != nil {
t.Fatalf("RunMultibyteCapPersist panicked on malformed snapshot: %v", r)
}
}()
stats, err := store.RunMultibyteCapPersist()
if err != nil {
t.Errorf("RunMultibyteCapPersist on malformed snapshot returned error %v; expected silent no-op", err)
}
if stats.ReadEntries != 0 || stats.UpdatedActive != 0 || stats.UpdatedInactive != 0 {
t.Errorf("expected zero-valued stats on malformed snapshot, got %+v", stats)
}
if !logContains(logBuf, "malformed") && !logContains(logBuf, "invalid") && !logContains(logBuf, "corrupt") {
t.Errorf("expected log to mention malformed/invalid/corrupt snapshot; got: %s", logBuf.String())
}
}
// TestRunMultibyteCapPersist_MissingSchemaColumns verifies the persist
// path is a clean no-op on a legacy DB that doesn't yet have the
// multibyte_sup / multibyte_evidence columns. Currently the persist
// would fail at tx.Prepare with a SQL error; the audit requires it
// skip cleanly instead.
//
// We simulate a legacy DB by DROPping the columns post-migration
// (SQLite ≥ 3.35 supports ALTER TABLE DROP COLUMN).
func TestRunMultibyteCapPersist_MissingSchemaColumns(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Drop the multibyte columns from both tables to simulate a legacy DB.
for _, stmt := range []string{
`ALTER TABLE nodes DROP COLUMN multibyte_sup`,
`ALTER TABLE nodes DROP COLUMN multibyte_evidence`,
`ALTER TABLE inactive_nodes DROP COLUMN multibyte_sup`,
`ALTER TABLE inactive_nodes DROP COLUMN multibyte_evidence`,
} {
if _, err := store.db.Exec(stmt); err != nil {
t.Fatalf("simulate legacy DB (%q): %v", stmt, err)
}
}
// Confirm columns are gone.
if columnExists(t, store.db, "nodes", "multibyte_sup") {
t.Fatalf("setup failed: nodes.multibyte_sup still present after DROP")
}
snap := mbcapqueue.Snapshot{Entries: []mbcapqueue.Entry{
{PublicKey: "ff66", Status: "confirmed", Evidence: "advert"},
}}
if err := mbcapqueue.WriteSnapshot(dbPath, snap); err != nil {
t.Fatalf("WriteSnapshot: %v", err)
}
logBuf := captureLogs(t)
defer func() {
if r := recover(); r != nil {
t.Fatalf("RunMultibyteCapPersist panicked on legacy DB: %v", r)
}
}()
stats, err := store.RunMultibyteCapPersist()
if err != nil {
t.Errorf("RunMultibyteCapPersist on legacy DB returned error %v; expected clean skip", err)
}
if stats.UpdatedActive != 0 || stats.UpdatedInactive != 0 {
t.Errorf("expected zero writes on legacy DB, got %+v", stats)
}
// Must explicitly detect + log the skip — otherwise the "clean skip"
// is silent UPDATE-affected-zero accident, not defensive code.
if !logContains(logBuf, "legacy") && !logContains(logBuf, "schema") && !logContains(logBuf, "multibyte_sup") {
t.Errorf("expected explicit log on missing schema columns; got: %s", logBuf.String())
}
}
// TestRunMultibyteCapPersist_PreservesConfirmedOnUnknown is the
// data-destruction guard the PR claims to enforce: a snapshot Entry
// with status="unknown" must NEVER overwrite an existing "confirmed"
// (or "suspected") DB row. The audit's mutation test: revert the
// `if sup == 0 { continue }` guard in multibyte_persist.go — this
// test must fail.
func TestRunMultibyteCapPersist_PreservesConfirmedOnUnknown(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Seed a confirmed active node and a suspected inactive node.
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('gg77', 'Golf', 'repeater', '2026-01-01T00:00:00Z', 2, 'advert')`); err != nil {
t.Fatalf("seed gg77: %v", err)
}
if _, err := store.db.Exec(`INSERT INTO inactive_nodes (public_key, name, role, last_seen, multibyte_sup, multibyte_evidence)
VALUES ('hh88', 'Hotel', 'companion', '2025-12-01T00:00:00Z', 1, 'path')`); err != nil {
t.Fatalf("seed hh88: %v", err)
}
// Snapshot has only "unknown" entries for both — must skip both.
snap := mbcapqueue.Snapshot{Entries: []mbcapqueue.Entry{
{PublicKey: "gg77", Status: "unknown"},
{PublicKey: "hh88", Status: "unknown"},
}}
if err := mbcapqueue.WriteSnapshot(dbPath, snap); err != nil {
t.Fatalf("WriteSnapshot: %v", err)
}
stats, err := store.RunMultibyteCapPersist()
if err != nil {
t.Fatalf("RunMultibyteCapPersist: %v", err)
}
if stats.Skipped != 2 {
t.Errorf("Skipped = %d, want 2 (both unknown entries)", stats.Skipped)
}
if stats.UpdatedActive != 0 || stats.UpdatedInactive != 0 {
t.Errorf("expected zero updates, got %+v", stats)
}
// Verify the existing values were NOT clobbered.
var sup int
var evid string
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM nodes WHERE public_key='gg77'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read gg77: %v", err)
}
if sup != 2 || evid != "advert" {
t.Errorf("gg77 was clobbered by unknown snapshot: sup=%d evid=%q, want sup=2 evid=advert", sup, evid)
}
if err := store.db.QueryRow(`SELECT multibyte_sup, COALESCE(multibyte_evidence,'') FROM inactive_nodes WHERE public_key='hh88'`).Scan(&sup, &evid); err != nil {
t.Fatalf("read hh88: %v", err)
}
if sup != 1 || evid != "path" {
t.Errorf("hh88 was clobbered by unknown snapshot: sup=%d evid=%q, want sup=1 evid=path", sup, evid)
}
}
-335
View File
@@ -1,335 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"fmt"
"log"
"strings"
"sync"
"time"
)
// NeighborEdgesBuilderInterval is how often the ingestor rescans
// observations and refreshes neighbor_edges. Server reads with the
// same 60s cadence (see cmd/server/neighbor_recomputer.go); a 60s
// pulse here is sufficient to keep the snapshot fresh.
const NeighborEdgesBuilderInterval = 60 * time.Second
// neighborBuilderMaxBatch caps how many observation rows a single
// delta tick may process (#1339). With max_open_conns=1, an unbounded
// scan on a multi-million-row table holds the SQLite write lock for
// minutes and starves MQTT ingest. The cap keeps each tick bounded;
// if a backlog accumulates, successive ticks drain it 50k rows at a
// time without ever blocking ingest for long.
const neighborBuilderMaxBatch = 50000
// neighborBuilderSlowTickThreshold is the per-tick wallclock budget
// for the builder. Exceeding it is logged loudly so operators can
// catch a regression of #1339 quickly. The full instrumentation
// framework is tracked in #1340.
const neighborBuilderSlowTickThreshold = 5 * time.Second
// payloadADVERT mirrors the constant in cmd/server/decoder.go.
// Duplicated rather than imported so the ingestor binary stays
// independent of the server package.
const payloadADVERT = 0x04
// edgeRow is one row to upsert into neighbor_edges. (a, b) is already
// canonical-ordered (a <= b).
type edgeRow struct {
a, b, ts string
}
// StartNeighborEdgesBuilder launches the periodic builder. On each
// tick it rescans recent observations + transmissions and upserts
// derived neighbor_edges rows. Builder is the only writer to
// neighbor_edges (#1287).
//
// The function returns a stop closure. Initial build runs synchronously
// before the ticker starts so the server's first snapshot load picks
// up real data instead of an empty table.
func (s *Store) StartNeighborEdgesBuilder(interval time.Duration) func() {
if interval <= 0 {
interval = NeighborEdgesBuilderInterval
}
stop := make(chan struct{})
done := make(chan struct{})
// Synchronous warm-up: on a fresh DB this is a full scan; on a DB
// with persisted neighbor_edges (most restarts), the watermark
// short-circuits it into a delta scan. Loop until the per-tick
// batch cap stops triggering so we drain any backlog before
// returning — first server load needs a fully-populated table.
wuStart := time.Now()
var wuTotal int
// Prime the prefix index (#1547) so the very first
// InsertTransmission after startup can resolve hop prefixes.
if err := s.RefreshPrefixIndex(); err != nil {
log.Printf("[neighbor-build] initial prefix-index refresh error: %v", err)
}
// Prime the neighbor graph (#1560) so the context-aware resolver
// has adjacency data on the very first InsertTransmission.
if err := s.RefreshNeighborGraph(); err != nil {
log.Printf("[neighbor-build] initial neighbor-graph refresh error: %v", err)
}
for {
n, err := s.buildAndPersistNeighborEdges()
if err != nil {
log.Printf("[neighbor-build] initial build error: %v", err)
break
}
wuTotal += n
if n < neighborBuilderMaxBatch {
break
}
}
log.Printf("[neighbor-build] initial build: %d edges upserted in %s", wuTotal, time.Since(wuStart))
var stopOnce sync.Once
go func() {
defer close(done)
t := time.NewTicker(interval)
defer t.Stop()
for {
select {
case <-t.C:
start := time.Now()
// Refresh the prefix index alongside the edges build
// (#1547) so new nodes become resolvable within a tick.
if err := s.RefreshPrefixIndex(); err != nil {
log.Printf("[neighbor-build] prefix-index refresh error: %v", err)
}
n, err := s.buildAndPersistNeighborEdges()
// Refresh the neighbor-graph snapshot after the edges
// build (#1560) so the context-aware resolver picks up
// newly persisted adjacencies on the next ingest.
if grErr := s.RefreshNeighborGraph(); grErr != nil {
log.Printf("[neighbor-build] neighbor-graph refresh error: %v", grErr)
}
dur := time.Since(start)
if err != nil {
log.Printf("[neighbor-build] tick error after %s: %v", dur, err)
} else if n > 0 {
log.Printf("[neighbor-build] tick: %d edges in %s (delta from watermark)", n, dur)
}
if dur > neighborBuilderSlowTickThreshold {
log.Printf("[neighbor-build] SLOW tick: %s — possible regression of #1339", dur)
}
case <-stop:
return
}
}
}()
return func() {
stopOnce.Do(func() { close(stop) })
select {
case <-done:
case <-time.After(5 * time.Second):
}
}
}
// buildAndPersistNeighborEdges scans transmissions + observations,
// extracts edge candidates (originator↔first-hop on ADVERTs;
// observer↔last-hop on all packet types) and upserts them into
// neighbor_edges. Returns count of attempted upserts.
//
// Watermark / delta semantics (#1339): the builder derives a watermark
// from MAX(neighbor_edges.last_seen). On an empty edges table (fresh
// DB), watermark is 0 and the builder does a full warm-up scan. On
// every subsequent call, the SELECT is restricted to observations
// whose timestamp is strictly greater than the watermark, bounded by
// neighborBuilderMaxBatch. neighbor_edges itself is the persistence —
// no metadata table or in-memory state is required, and restarts
// resume cleanly from whatever the table reflects.
//
// Trade-off (documented for #1340 follow-up): an anomalously-old
// observation that arrives AFTER its timestamp has already been
// crossed by the watermark will be skipped. Acceptable for an
// approximate neighbor graph; a periodic full-rebuild can be added
// later if needed.
//
// Resolution of hop-prefix → full pubkey is done via a one-shot
// SELECT of (lowered) pubkey prefixes from nodes. Prefixes with
// multiple candidates are skipped (matches the conservative
// resolution rule in cmd/server/extractEdgesFromObs).
func (s *Store) buildAndPersistNeighborEdges() (int, error) {
prefixIdx, err := buildPrefixIndex(s.db)
if err != nil {
return 0, fmt.Errorf("build prefix index: %w", err)
}
// Derive the watermark from the existing edges table. RFC3339
// → epoch seconds so it can be compared against observations.timestamp
// (stored as INTEGER unix epoch). On an empty edges table both the
// query and the parse return zero → full warm-up scan.
var watermarkRFC sql.NullString
if err := s.db.QueryRow(`SELECT MAX(last_seen) FROM neighbor_edges`).Scan(&watermarkRFC); err != nil {
return 0, fmt.Errorf("read watermark: %w", err)
}
var watermarkEpoch int64
if watermarkRFC.Valid && watermarkRFC.String != "" {
if t, parseErr := time.Parse(time.RFC3339, watermarkRFC.String); parseErr == nil {
watermarkEpoch = t.Unix()
}
}
rows, err := s.db.Query(`SELECT
t.payload_type,
t.decoded_json,
COALESCE(t.from_pubkey, ''),
COALESCE(o.path_json, ''),
COALESCE(obs.id, '') AS observer_id,
o.timestamp
FROM observations o
JOIN transmissions t ON t.id = o.transmission_id
LEFT JOIN observers obs ON obs.rowid = o.observer_idx
WHERE o.timestamp > ?
ORDER BY o.timestamp
LIMIT ?`, watermarkEpoch, neighborBuilderMaxBatch)
if err != nil {
return 0, fmt.Errorf("scan observations: %w", err)
}
defer rows.Close()
var edges []edgeRow
for rows.Next() {
var payloadType sql.NullInt64
var decodedJSON, fromPubkey, pathJSON, observerID string
var epochTs int64
if err := rows.Scan(&payloadType, &decodedJSON, &fromPubkey, &pathJSON, &observerID, &epochTs); err != nil {
continue
}
fromNode := strings.ToLower(fromPubkey)
if fromNode == "" {
fromNode = strings.ToLower(extractPubkeyFromAdvertJSON(decodedJSON))
}
isAdvert := payloadType.Valid && payloadType.Int64 == int64(payloadADVERT)
ts := time.Unix(epochTs, 0).UTC().Format(time.RFC3339)
observerPK := strings.ToLower(observerID)
path := parsePathArray(pathJSON)
if len(path) == 0 {
if isAdvert && fromNode != "" && fromNode != observerPK && observerPK != "" {
edges = append(edges, canonEdge(fromNode, observerPK, ts))
}
continue
}
if isAdvert && fromNode != "" {
if resolved, ok := resolvePrefix(prefixIdx, path[0]); ok && resolved != fromNode {
edges = append(edges, canonEdge(fromNode, resolved, ts))
}
}
if observerPK != "" {
last := path[len(path)-1]
if resolved, ok := resolvePrefix(prefixIdx, last); ok && resolved != observerPK {
edges = append(edges, canonEdge(observerPK, resolved, ts))
}
}
}
if len(edges) == 0 {
return 0, nil
}
// Wrap the whole edge-persist tx under writer-perf instrumentation
// (#1340). Slow neighbor-builder ticks (the #1339 root cause) now
// show up on /api/perf under component=neighbor_builder.
var inserted int
err = s.WriterTx("neighbor_builder", func(tx *sql.Tx) error {
stmt, err := tx.Prepare(`INSERT INTO neighbor_edges (node_a, node_b, count, last_seen)
VALUES (?, ?, 1, ?)
ON CONFLICT(node_a, node_b) DO UPDATE SET
count = count + 1,
last_seen = MAX(last_seen, excluded.last_seen)`)
if err != nil {
return fmt.Errorf("prepare: %w", err)
}
defer stmt.Close()
var firstErr error
for _, e := range edges {
if _, err := stmt.Exec(e.a, e.b, e.ts); err != nil && firstErr == nil {
firstErr = err
}
}
if firstErr != nil {
return fmt.Errorf("upsert: %w", firstErr)
}
inserted = len(edges)
return nil
})
if err != nil {
return 0, err
}
return inserted, nil
}
// canonEdge orders the pair so node_a <= node_b (matches the existing
// schema convention used by the loader and the bridge recomputer).
func canonEdge(a, b, ts string) edgeRow {
if a > b {
a, b = b, a
}
return edgeRow{a, b, ts}
}
// parsePathArray returns the hop strings from a path_json blob.
// Defensive against missing/invalid JSON.
func parsePathArray(s string) []string {
if s == "" || s == "[]" {
return nil
}
var arr []string
if json.Unmarshal([]byte(s), &arr) != nil {
return nil
}
return arr
}
// prefixIndex maps a hop prefix (lowercase) → all full pubkeys whose
// public_key starts with that prefix. Prefixes with > 1 candidate are
// considered ambiguous and skipped during resolution.
type prefixIndex map[string][]string
// buildPrefixIndex reads nodes.public_key and builds the prefix → pubkey
// map. We index every 1-byte (2 hex char) prefix length the firmware
// uses (1, 2, 3, 4, 6, 8). Memory cost is O(nodes × len(prefixLens)).
func buildPrefixIndex(db *sql.DB) (prefixIndex, error) {
rows, err := db.Query(`SELECT public_key FROM nodes`)
if err != nil {
return nil, err
}
defer rows.Close()
idx := make(prefixIndex, 1024)
var prefixLens = []int{1 * 2, 2 * 2, 3 * 2, 4 * 2, 6 * 2, 8 * 2}
for rows.Next() {
var pk string
if err := rows.Scan(&pk); err != nil {
continue
}
pkLower := strings.ToLower(pk)
for _, n := range prefixLens {
if len(pkLower) < n {
continue
}
prefix := pkLower[:n]
idx[prefix] = append(idx[prefix], pkLower)
}
}
return idx, nil
}
// resolvePrefix returns the single resolved pubkey if exactly one
// candidate matches, otherwise (zero || multiple), it returns ok=false
// (matches the conservative server-side resolver in
// cmd/server/extractEdgesFromObs).
func resolvePrefix(idx prefixIndex, hop string) (string, bool) {
h := strings.ToLower(hop)
candidates := idx[h]
if len(candidates) != 1 {
return "", false
}
return candidates[0], true
}
-195
View File
@@ -1,195 +0,0 @@
package main
import (
"fmt"
"path/filepath"
"testing"
"time"
)
// TestNeighborEdgesBuilderDeltaScan enforces issue #1339:
// after the initial (warm-up) full build, subsequent ticks of
// buildAndPersistNeighborEdges MUST scan only observations newer
// than the most recent edge already persisted. The watermark is
// derived from MAX(neighbor_edges.last_seen) — neighbor_edges itself
// is the persistence, no separate metadata table.
//
// RED expectations:
// 1. After warm-up that produces edges, a second build with NO new
// observations is a fast no-op (<1s) and writes nothing.
// 2. After inserting K observations with timestamps strictly newer
// than the prior MAX(last_seen), the next build upserts exactly
// K edges in <1s.
// 3. Initial build (empty neighbor_edges) still does a full scan
// (warm-up preserved).
func TestNeighborEdgesBuilderDeltaScan(t *testing.T) {
if testing.Short() {
t.Skip("synthetic 100k-row benchmark; skipped in -short")
}
dir := t.TempDir()
dbPath := filepath.Join(dir, "delta.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
if _, err := store.db.Exec(
`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
"aaaaaaaaaa", "from-node",
"bbbbbbbbbb", "first-hop",
); err != nil {
t.Fatal(err)
}
if _, err := store.db.Exec(
`INSERT INTO observers (id, name) VALUES (?, ?)`,
"obs-1", "observer-1",
); err != nil {
t.Fatal(err)
}
var obsRowid int64
if err := store.db.QueryRow(`SELECT rowid FROM observers WHERE id = ?`, "obs-1").Scan(&obsRowid); err != nil {
t.Fatal(err)
}
// Baseline timestamps: a contiguous block ending at baselineMaxTs.
const baseline = 100_000
const baselineStartTs int64 = 1735689600 // 2025-01-01 UTC
baselineMaxTs := baselineStartTs + int64(baseline) - 1
tx, err := store.db.Begin()
if err != nil {
t.Fatal(err)
}
txStmt, err := tx.Prepare(`INSERT INTO transmissions
(raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json, from_pubkey)
VALUES ('', ?, ?, 0, ?, 0, '{}', 'aaaaaaaaaa')`)
if err != nil {
t.Fatal(err)
}
obsStmt, err := tx.Prepare(`INSERT INTO observations
(transmission_id, observer_idx, path_json, timestamp) VALUES (?, ?, '["bb"]', ?)`)
if err != nil {
t.Fatal(err)
}
for i := 0; i < baseline; i++ {
res, err := txStmt.Exec(fmt.Sprintf("h%d", i), baselineStartTs+int64(i), payloadADVERT)
if err != nil {
t.Fatal(err)
}
txID, _ := res.LastInsertId()
if _, err := obsStmt.Exec(txID, obsRowid, baselineStartTs+int64(i)); err != nil {
t.Fatal(err)
}
}
if err := tx.Commit(); err != nil {
t.Fatal(err)
}
// Initial warm-up: drain to completion (StartNeighborEdgesBuilder
// does the same — call directly so the test doesn't depend on the
// goroutine harness). Full scan allowed because neighbor_edges
// starts empty.
for {
n, err := store.buildAndPersistNeighborEdges()
if err != nil {
t.Fatalf("warm-up build: %v", err)
}
if n == 0 || n < 50000 {
break
}
}
var edgesAfterWarmup int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM neighbor_edges`).Scan(&edgesAfterWarmup); err != nil {
t.Fatal(err)
}
if edgesAfterWarmup == 0 {
t.Fatal("warm-up produced 0 edges; can't establish a watermark")
}
// Sanity: MAX(last_seen) should reflect the baseline tail timestamp.
var maxLastSeen string
if err := store.db.QueryRow(`SELECT MAX(last_seen) FROM neighbor_edges`).Scan(&maxLastSeen); err != nil {
t.Fatal(err)
}
wantMax := time.Unix(baselineMaxTs, 0).UTC().Format(time.RFC3339)
if maxLastSeen != wantMax {
t.Fatalf("MAX(last_seen) after warm-up: want %s, got %s", wantMax, maxLastSeen)
}
// Tick #2: NO new observations. Expect no-op + fast.
noopStart := time.Now()
n2, err := store.buildAndPersistNeighborEdges()
if err != nil {
t.Fatalf("noop build: %v", err)
}
noopDur := time.Since(noopStart)
if n2 != 0 {
t.Fatalf("expected 0 edges on empty-delta tick; got %d (#1339)", n2)
}
if noopDur > time.Second {
t.Fatalf("empty-delta build took %v; expected <1s — builder is "+
"still doing a full table scan. (#1339)", noopDur)
}
// Tick #3: insert K observations with timestamps strictly newer
// than baselineMaxTs.
const delta = 100
deltaStartTs := baselineMaxTs + 1
tx2, err := store.db.Begin()
if err != nil {
t.Fatal(err)
}
txStmt2, err := tx2.Prepare(`INSERT INTO transmissions
(raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json, from_pubkey)
VALUES ('', ?, ?, 0, ?, 0, '{}', 'aaaaaaaaaa')`)
if err != nil {
t.Fatal(err)
}
obsStmt2, err := tx2.Prepare(`INSERT INTO observations
(transmission_id, observer_idx, path_json, timestamp) VALUES (?, ?, '["bb"]', ?)`)
if err != nil {
t.Fatal(err)
}
for i := 0; i < delta; i++ {
res, err := txStmt2.Exec(fmt.Sprintf("d%d", i), deltaStartTs+int64(i), payloadADVERT)
if err != nil {
t.Fatal(err)
}
txID, _ := res.LastInsertId()
if _, err := obsStmt2.Exec(txID, obsRowid, deltaStartTs+int64(i)); err != nil {
t.Fatal(err)
}
}
if err := tx2.Commit(); err != nil {
t.Fatal(err)
}
deltaStart := time.Now()
n3, err := store.buildAndPersistNeighborEdges()
if err != nil {
t.Fatalf("delta build: %v", err)
}
deltaDur := time.Since(deltaStart)
// Each ADVERT observation with a non-empty path produces 2 edge
// candidates (from↔hop[0] and observer↔hop[-1]). The watermark
// must clamp the scan to the delta rows ONLY — anything more
// proves the WHERE clause was bypassed.
if n3 != delta*2 {
t.Fatalf("expected %d edges upserted (delta only, 2 per advert obs); got %d. "+
"Builder must only scan observations with timestamp > MAX(neighbor_edges.last_seen). (#1339)",
delta*2, n3)
}
if deltaDur > 500*time.Millisecond {
t.Fatalf("delta build of %d rows took %v; expected <500ms. (#1339)", delta, deltaDur)
}
// Sanity: MAX(last_seen) advanced.
var maxLastSeen2 string
if err := store.db.QueryRow(`SELECT MAX(last_seen) FROM neighbor_edges`).Scan(&maxLastSeen2); err != nil {
t.Fatal(err)
}
if maxLastSeen2 <= maxLastSeen {
t.Fatalf("MAX(last_seen) did not advance: was %s, now %s", maxLastSeen, maxLastSeen2)
}
}
-87
View File
@@ -1,87 +0,0 @@
package main
import (
"path/filepath"
"testing"
)
// TestNeighborEdgesBuilderUpsertsFromObservations enforces issue
// #1287 Option 4: the INGESTOR builds neighbor_edges from raw
// observations/transmissions and persists them. Server is read-only.
//
// Synthesize a tiny DB with one ADVERT observation whose path[0]
// uniquely resolves to a known node, then assert the builder writes
// the expected edge.
func TestNeighborEdgesBuilderUpsertsFromObservations(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "build.db")
// Open via the ingestor's normal opener so applySchema and
// dbschema.Apply both run (the builder requires neighbor_edges +
// observers.iata etc.).
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Seed two nodes whose pubkey prefixes will be used as hops.
if _, err := store.db.Exec(
`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
"aaaaaaaaaa", "from-node",
"bbbbbbbbbb", "first-hop",
); err != nil {
t.Fatal(err)
}
// Seed one observer.
if _, err := store.db.Exec(
`INSERT INTO observers (id, name) VALUES (?, ?)`,
"obs-1", "observer-1",
); err != nil {
t.Fatal(err)
}
var obsRowid int64
if err := store.db.QueryRow(`SELECT rowid FROM observers WHERE id = ?`, "obs-1").Scan(&obsRowid); err != nil {
t.Fatal(err)
}
// Insert one ADVERT transmission with from_pubkey = aaaaa…
res, err := store.db.Exec(
`INSERT INTO transmissions (raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json, from_pubkey)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
"", "h1", "2026-01-01T00:00:00Z", 0, payloadADVERT, 0, "{}", "aaaaaaaaaa",
)
if err != nil {
t.Fatal(err)
}
txID, _ := res.LastInsertId()
// Insert one observation whose path[0] = "bb" (2-hex prefix unique
// to bbbbb… in the nodes table). Expected edge: a↔b.
if _, err := store.db.Exec(
`INSERT INTO observations (transmission_id, observer_idx, path_json, timestamp) VALUES (?, ?, ?, ?)`,
txID, obsRowid, `["bb"]`, int64(1735689600),
); err != nil {
t.Fatal(err)
}
n, err := store.buildAndPersistNeighborEdges()
if err != nil {
t.Fatalf("buildAndPersistNeighborEdges: %v", err)
}
if n == 0 {
t.Fatal("expected at least 1 edge upserted, got 0")
}
var got int
if err := store.db.QueryRow(`SELECT COUNT(*) FROM neighbor_edges WHERE node_a = ? AND node_b = ?`, "aaaaaaaaaa", "bbbbbbbbbb").Scan(&got); err != nil {
t.Fatal(err)
}
if got != 1 {
t.Fatalf("expected the a↔b edge to be persisted; got %d rows", got)
}
}
// (test ends here)
-97
View File
@@ -1,97 +0,0 @@
package main
import (
"testing"
)
func TestNormalizeChannelName(t *testing.T) {
tests := []struct {
input string
expected string
}{
// Known channel: "public" should be normalized to "Public"
{"public", "Public"},
{"Public", "Public"},
{"PUBLIC", "Public"},
// Hashtag channels should be left untouched
{"#LongFast", "#LongFast"},
{"#wardrive", "#wardrive"},
// Custom/unknown channels should be left untouched
{"myChannel", "myChannel"},
{"testchannel", "testchannel"},
// Empty string
{"", ""},
}
for _, tt := range tests {
got := normalizeChannelName(tt.input)
if got != tt.expected {
t.Errorf("normalizeChannelName(%q) = %q, want %q", tt.input, got, tt.expected)
}
}
}
func TestLoadChannelKeys_NormalizesKnownDisplayNames(t *testing.T) {
// Verify that known channel keys with wrong casing get normalized
cfg := &Config{
ChannelKeys: map[string]string{
"public": "8b3387e9c5cdea6ac9e5edbaa115cd72",
},
}
keys := loadChannelKeys(cfg, "/dev/null")
// Should have "Public" (normalized) not "public" (raw)
if _, ok := keys["public"]; ok {
t.Error("Expected 'public' to be normalized to 'Public'")
}
if _, ok := keys["Public"]; !ok {
t.Error("Expected 'Public' key to exist in loaded channel keys")
}
}
func TestLoadChannelKeys_LeavesCustomNamesUntouched(t *testing.T) {
// Verify that custom channel names are NOT normalized
cfg := &Config{
ChannelKeys: map[string]string{
"myCustomChannel": "deadbeef12345678",
},
}
keys := loadChannelKeys(cfg, "/dev/null")
// Should keep "myCustomChannel" as-is
if _, ok := keys["myCustomChannel"]; !ok {
t.Error("Expected 'myCustomChannel' to be left untouched")
}
// Should NOT have "MyCustomChannel"
if _, ok := keys["MyCustomChannel"]; ok {
t.Error("Custom channel names should NOT be auto-capitalized")
}
}
func TestLoadChannelKeys_DuplicateCasingLogsWarning(t *testing.T) {
// Verify that config with both "public" and "Public" resolves deterministically:
// the canonical (already-normalized) form should win.
cfg := &Config{
ChannelKeys: map[string]string{
"public": "8b3387e9c5cdea6ac9e5edbaa115cd72",
"Public": "differentkey1234567",
},
}
keys := loadChannelKeys(cfg, "/dev/null")
// After normalization, only one key should exist: "Public"
// The canonical form ("Public") should win over the lowercase form ("public")
if _, ok := keys["public"]; ok {
t.Error("Expected 'public' to be normalized away")
}
if _, ok := keys["Public"]; !ok {
t.Error("Expected 'Public' key to exist")
}
// Assert the canonical form's value won, not just any value
if keys["Public"] != "differentkey1234567" {
t.Errorf("Expected canonical 'Public' value to win, got %q", keys["Public"])
}
}
-43
View File
@@ -1,43 +0,0 @@
package main
import (
"testing"
)
func TestIngestorIsObserverBlacklisted(t *testing.T) {
cfg := &Config{
ObserverBlacklist: []string{"OBS1", "obs2"},
}
tests := []struct {
id string
want bool
}{
{"OBS1", true},
{"obs1", true},
{"OBS2", true},
{"obs3", false},
{"", false},
}
for _, tt := range tests {
got := cfg.IsObserverBlacklisted(tt.id)
if got != tt.want {
t.Errorf("IsObserverBlacklisted(%q) = %v, want %v", tt.id, got, tt.want)
}
}
}
func TestIngestorIsObserverBlacklistedEmpty(t *testing.T) {
cfg := &Config{}
if cfg.IsObserverBlacklisted("anything") {
t.Error("empty blacklist should not match")
}
}
func TestIngestorIsObserverBlacklistedNil(t *testing.T) {
var cfg *Config
if cfg.IsObserverBlacklisted("anything") {
t.Error("nil config should not match")
}
}
-109
View File
@@ -1,109 +0,0 @@
package main
// Regression tests for issue #1465 — observer.last_seen MUST always reflect
// ingest time (server wall clock), never the MQTT envelope timestamp. Observers
// with broken clocks (wrong TZ, RTC drift, replayed retained messages) must
// NOT be able to drag the analyzer's "last heard from" field into the past
// or future.
//
// Per-packet rxTime semantics (envelope time with naive-clamp from #1464)
// are out of scope here — those continue to use envelope time. This file
// asserts only the observer.last_seen path.
import (
"testing"
"time"
)
// Status path: envelope timestamp is a well-formed RFC3339 value 3h in the
// past. observer.last_seen must be server wall clock, NOT the envelope value.
func TestStatusMessage_ObserverLastSeen_AlwaysIngestTime_PastEnvelope_1465(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
stale := time.Now().UTC().Add(-3 * time.Hour).Format(time.RFC3339)
before := time.Now().Unix()
payload := []byte(`{"status":"online","origin":"obs-past","timestamp":"` + stale + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs-past/status", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
var lastSeen string
if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-past").Scan(&lastSeen); err != nil {
t.Fatalf("scan last_seen: %v", err)
}
ls, err := time.Parse(time.RFC3339, lastSeen)
if err != nil {
t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
}
if ls.Unix() < before-5 || ls.Unix() > after+5 {
t.Errorf("observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
"Envelope reported well-formed stale %q (3h ago) — must NOT drag last_seen into the past. Issue #1465.",
lastSeen, ls.Unix(), before, after, stale)
}
}
// Status path: envelope timestamp 5 min in the future. observer.last_seen
// must still be server wall clock.
func TestStatusMessage_ObserverLastSeen_AlwaysIngestTime_FutureEnvelope_1465(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
future := time.Now().UTC().Add(5 * time.Minute).Format(time.RFC3339)
before := time.Now().Unix()
payload := []byte(`{"status":"online","origin":"obs-future","timestamp":"` + future + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs-future/status", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
var lastSeen string
if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-future").Scan(&lastSeen); err != nil {
t.Fatalf("scan last_seen: %v", err)
}
ls, err := time.Parse(time.RFC3339, lastSeen)
if err != nil {
t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
}
if ls.Unix() < before-5 || ls.Unix() > after+5 {
t.Errorf("observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
"Envelope reported well-formed future %q (5 min ahead) — must NOT drag last_seen into the future. Issue #1465.",
lastSeen, ls.Unix(), before, after, future)
}
}
// Packet path: a transmission whose envelope timestamp is 3h in the past
// MUST still bump observer.last_seen to server wall clock — observer is
// clearly alive (we just ingested a packet from it), regardless of what
// its clock claims.
func TestPacketMessage_ObserverLastSeen_AlwaysIngestTime_PastEnvelope_1465(t *testing.T) {
store := newTestStore(t)
source := MQTTSource{Name: "test"}
stale := time.Now().UTC().Add(-3 * time.Hour).Format(time.RFC3339)
before := time.Now().Unix()
rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
payload := []byte(`{"raw":"` + rawHex + `","SNR":5.5,"RSSI":-100.0,"origin":"obs-pkt","timestamp":"` + stale + `"}`)
msg := &mockMessage{topic: "meshcore/SJC/obs-pkt/packets", payload: payload}
handleMessage(store, "test", source, msg, nil, nil, &Config{})
after := time.Now().Unix()
var lastSeen string
if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-pkt").Scan(&lastSeen); err != nil {
t.Fatalf("scan last_seen: %v", err)
}
ls, err := time.Parse(time.RFC3339, lastSeen)
if err != nil {
t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
}
if ls.Unix() < before-5 || ls.Unix() > after+5 {
t.Errorf("packet-path observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
"Envelope stale = %q. Observer just delivered a packet; last_seen must be NOW. Issue #1465.",
lastSeen, ls.Unix(), before, after, stale)
}
}
-96
View File
@@ -1,96 +0,0 @@
package main
import (
"encoding/json"
"testing"
)
// Regression test for #1044: observer metadata (model, firmware, battery_mv,
// noise_floor) is silently dropped when an MQTT status payload arrives, even
// though the same payload's `radio` and `client_version` fields ARE persisted.
//
// Real-world payload captured from the production MQTT bridge:
//
// {"status":"online","origin":"TestObserver","origin_id":"AABBCCDD",
// "radio":"910.5250244,62.5,7,5",
// "model":"Heltec V3",
// "firmware_version":"1.12.0-test",
// "client_version":"meshcoretomqtt/1.0.8.0",
// "stats":{"battery_mv":4209,"uptime_secs":75821,"noise_floor":-109,
// "tx_air_secs":80,"rx_air_secs":1903,"recv_errors":934}}
func TestStatusMessageMetadataPersisted_Issue1044(t *testing.T) {
const payload = `{"status":"online","origin":"TestObserver","origin_id":"AABBCCDD","radio":"910.5250244,62.5,7,5","model":"Heltec V3","firmware_version":"1.12.0-test","client_version":"meshcoretomqtt/1.0.8.0","stats":{"battery_mv":4209,"uptime_secs":75821,"noise_floor":-109,"tx_air_secs":80,"rx_air_secs":1903,"recv_errors":934}}`
var msg map[string]interface{}
if err := json.Unmarshal([]byte(payload), &msg); err != nil {
t.Fatalf("unmarshal: %v", err)
}
meta := extractObserverMeta(msg)
if meta == nil {
t.Fatal("extractObserverMeta returned nil for a payload that contains model/firmware/battery_mv")
}
if meta.Model == nil || *meta.Model != "Heltec V3" {
t.Errorf("meta.Model = %v, want \"Heltec V3\"", meta.Model)
}
if meta.Firmware == nil || *meta.Firmware != "1.12.0-test" {
t.Errorf("meta.Firmware = %v, want \"1.12.0-test\"", meta.Firmware)
}
if meta.ClientVersion == nil || *meta.ClientVersion != "meshcoretomqtt/1.0.8.0" {
t.Errorf("meta.ClientVersion = %v, want \"meshcoretomqtt/1.0.8.0\"", meta.ClientVersion)
}
if meta.Radio == nil || *meta.Radio != "910.5250244,62.5,7,5" {
t.Errorf("meta.Radio = %v, want radio string", meta.Radio)
}
if meta.BatteryMv == nil || *meta.BatteryMv != 4209 {
t.Errorf("meta.BatteryMv = %v, want 4209", meta.BatteryMv)
}
if meta.NoiseFloor == nil || *meta.NoiseFloor != -109 {
t.Errorf("meta.NoiseFloor = %v, want -109", meta.NoiseFloor)
}
if meta.UptimeSecs == nil || *meta.UptimeSecs != 75821 {
t.Errorf("meta.UptimeSecs = %v, want 75821", meta.UptimeSecs)
}
// Now drive the meta through UpsertObserver and verify the row.
s, err := OpenStore(tempDBPath(t))
if err != nil {
t.Fatal(err)
}
defer s.Close()
if err := s.UpsertObserver("AABBCCDD", "TestObserver", "SJC", meta); err != nil {
t.Fatalf("UpsertObserver: %v", err)
}
var (
gotModel, gotFirmware, gotClientVersion, gotRadio string
gotBattery int
gotUptime int64
gotNoise float64
)
err = s.db.QueryRow(`SELECT model, firmware, client_version, radio,
battery_mv, uptime_secs, noise_floor
FROM observers WHERE id = 'AABBCCDD'`).Scan(
&gotModel, &gotFirmware, &gotClientVersion, &gotRadio,
&gotBattery, &gotUptime, &gotNoise,
)
if err != nil {
t.Fatalf("scan observer row: %v", err)
}
if gotModel != "Heltec V3" {
t.Errorf("DB model = %q, want \"Heltec V3\"", gotModel)
}
if gotFirmware != "1.12.0-test" {
t.Errorf("DB firmware = %q, want \"1.12.0-test\"", gotFirmware)
}
if gotBattery != 4209 {
t.Errorf("DB battery_mv = %d, want 4209", gotBattery)
}
if gotUptime != 75821 {
t.Errorf("DB uptime_secs = %d, want 75821", gotUptime)
}
if gotNoise != -109 {
t.Errorf("DB noise_floor = %f, want -109", gotNoise)
}
}
-225
View File
@@ -1,225 +0,0 @@
package main
import (
"database/sql"
"strings"
"sync/atomic"
)
// Context-aware hop resolver — full restore of pre-#1289 hop
// disambiguation semantics, ported into the ingestor (where the
// neighbor graph + node directory now live, per #1283).
//
// Why this exists (issues #1547 / #1560):
// The naive `resolvePath` only resolves hops whose prefix is unique
// in the node table. On a >2K-node mesh the dominant case is 1-byte
// prefix collisions (multiple candidates per prefix). Without
// adjacency disambiguation those hops always serialize as `nil`
// and the resolved_path remains effectively empty for the largest
// meshes — the very deployments that need it most.
//
// Algorithm (ported from cmd/server/store.go @ commit 450236d5
// `pm.resolveWithContext`, intersected with the disambiguation gating
// from PR #1144 / #1352):
//
// For each hop:
// 1. Collect candidate pubkeys by prefix-match (existing prefixIndex).
// 2. len==0 → nil.
// 3. len==1 → that pubkey.
// 4. len>1 → filter by NeighborGraph adjacency to the anchor:
// - hop 0 anchor = fromPubkey (ADVERT originator) if known;
// - hop i (i>0) anchor = previous resolved hop's pubkey;
// if the previous hop did not resolve, the chain breaks
// and subsequent >1-candidate hops fall to nil.
// Surviving candidates after filter:
// - exactly 1 → use it
// - 0 or >1 → nil (cannot disambiguate further)
//
// This is the conservative tier-1 variant. Pre-#1289 also carried
// tier-2 (geo proximity), tier-3 (GPS preference), tier-4 (obs-count
// fallback) — those were noisy in practice and are intentionally NOT
// ported here; this PR is a regression restore, not an enhancement.
// NeighborGraph is the in-memory adjacency snapshot used by the
// context-aware resolver. Internally lowercased.
type NeighborGraph struct {
adj map[string]map[string]struct{}
}
// NewNeighborGraph returns an empty graph.
func NewNeighborGraph() *NeighborGraph {
return &NeighborGraph{adj: make(map[string]map[string]struct{})}
}
// AddEdge adds an undirected adjacency a↔b. Self-loops and empty
// endpoints are ignored.
func (g *NeighborGraph) AddEdge(a, b string) {
a = strings.ToLower(a)
b = strings.ToLower(b)
if a == "" || b == "" || a == b {
return
}
if g.adj[a] == nil {
g.adj[a] = make(map[string]struct{})
}
if g.adj[b] == nil {
g.adj[b] = make(map[string]struct{})
}
g.adj[a][b] = struct{}{}
g.adj[b][a] = struct{}{}
}
// IsAdjacent reports whether a and b appear together in any neighbor edge.
func (g *NeighborGraph) IsAdjacent(a, b string) bool {
if g == nil {
return false
}
a = strings.ToLower(a)
b = strings.ToLower(b)
if a == "" || b == "" {
return false
}
nbrs, ok := g.adj[a]
if !ok {
return false
}
_, present := nbrs[b]
return present
}
// neighborGraphHolder caches the graph for the InsertTransmission hot
// path. atomic.Value lets the 60s rebuild publish without a read-side
// lock.
type neighborGraphHolder struct {
v atomic.Value // holds *NeighborGraph
}
func (h *neighborGraphHolder) load() *NeighborGraph {
if v := h.v.Load(); v != nil {
return v.(*NeighborGraph)
}
return nil
}
func (h *neighborGraphHolder) store(g *NeighborGraph) {
h.v.Store(g)
}
// loadNeighborGraph reads neighbor_edges and returns an in-memory
// adjacency snapshot. Safe to call against a fresh DB (returns an
// empty graph).
func loadNeighborGraph(db *sql.DB) (*NeighborGraph, error) {
rows, err := db.Query(`SELECT node_a, node_b FROM neighbor_edges`)
if err != nil {
return nil, err
}
defer rows.Close()
g := NewNeighborGraph()
for rows.Next() {
var a, b string
if err := rows.Scan(&a, &b); err != nil {
continue
}
g.AddEdge(a, b)
}
return g, nil
}
// resolveHopWithContext resolves a single hop using NeighborGraph
// adjacency to the anchor. Returns nil when the hop cannot be
// disambiguated.
//
// exclude is a set of pubkeys to discard from the candidate pool
// (typically the prior hops already resolved on the path — a packet
// does not revisit a node).
//
// Behavior matrix:
// len(candidates) | anchor | graph | result
// 0 | — | — | nil
// 1 | — | — | candidates[0]
// >1 | "" or no graph|— | nil
// >1 | non-empty | set | unique adjacent candidate
// (or nil if 0 or >1 survive)
func resolveHopWithContext(hop string, anchor string, graph *NeighborGraph, idx prefixIndex, exclude map[string]struct{}) *string {
if idx == nil {
return nil
}
h := strings.ToLower(hop)
candidates := idx[h]
switch len(candidates) {
case 0:
return nil
case 1:
pk := candidates[0]
if _, skip := exclude[pk]; skip {
return nil
}
return &pk
}
if graph == nil || anchor == "" {
return nil
}
var match string
survivors := 0
for _, cand := range candidates {
if _, skip := exclude[cand]; skip {
continue
}
if graph.IsAdjacent(anchor, cand) {
survivors++
if survivors > 1 {
return nil
}
match = cand
}
}
if survivors == 1 {
return &match
}
return nil
}
// resolvePathWithContext walks the hop list, anchoring hop 0 on
// fromPubkey (for ADVERTs) and each subsequent hop on the previous
// resolved hop. Previously-resolved pubkeys (plus the originator) are
// excluded from later candidate pools so the walk doesn't revisit a
// node. Returns a `[]*string` shape compatible with
// marshalResolvedPath (and the all-nil clobber-guard from PR #1548).
func resolvePathWithContext(hops []string, fromPubkey string, graph *NeighborGraph, idx prefixIndex) []*string {
if len(hops) == 0 {
return nil
}
out := make([]*string, len(hops))
if idx == nil {
return out
}
prevAnchor := strings.ToLower(fromPubkey)
seen := make(map[string]struct{}, len(hops)+1)
if prevAnchor != "" {
seen[prevAnchor] = struct{}{}
}
for i, hop := range hops {
r := resolveHopWithContext(hop, prevAnchor, graph, idx, seen)
out[i] = r
if r != nil {
lc := strings.ToLower(*r)
seen[lc] = struct{}{}
prevAnchor = lc
} else {
prevAnchor = ""
}
}
return out
}
// RefreshNeighborGraph loads the latest neighbor_edges snapshot and
// publishes it atomically. Called on startup and once per neighbor-
// edges builder tick (60s) alongside RefreshPrefixIndex.
func (s *Store) RefreshNeighborGraph() error {
g, err := loadNeighborGraph(s.db)
if err != nil {
return err
}
s.neighborGraph.store(g)
return nil
}
-106
View File
@@ -1,106 +0,0 @@
// Package main: ingestor-side processor for prune-request marker files
// written by the read-only server (see internal/prunequeue).
//
// The server cannot DELETE because it opens SQLite mode=ro (#1283/#1289).
// Instead, the server writes request-<id>.json under <dataDir>/prune-requests/
// and the ingestor consumes it here.
package main
import (
"fmt"
"log"
"os"
"strings"
"time"
"github.com/meshcore-analyzer/prunequeue"
)
// DeleteNodesByPubkeys deletes nodes by public key. Returns the count deleted.
// Only the ingestor calls this (server has no write handle).
func (s *Store) DeleteNodesByPubkeys(pubkeys []string) (int64, error) {
if len(pubkeys) == 0 {
return 0, nil
}
// Chunk to keep statements under SQLite's variable limit (default 999).
const chunk = 500
var total int64
for start := 0; start < len(pubkeys); start += chunk {
end := start + chunk
if end > len(pubkeys) {
end = len(pubkeys)
}
batch := pubkeys[start:end]
placeholders := strings.Repeat("?,", len(batch))
placeholders = placeholders[:len(placeholders)-1]
args := make([]interface{}, len(batch))
for i, pk := range batch {
args[i] = pk
}
// Cascade cleanup: a node row carries the canonical identity, but
// observations/transmissions reference the pubkey too via observer
// metadata and originator fields. There are no FK constraints in
// the current schema (#669 review note), so we explicitly clear
// the most obvious follow-on rows that would otherwise become
// orphans visible to operators.
//
// Conservative scope: only the `nodes` row is removed here. The
// referenced observation/transmission history is retained for
// audit; operators can run the regular packet-retention prune to
// age it out. If a future schema introduces FKs, revisit.
res, err := s.db.Exec("DELETE FROM nodes WHERE public_key IN ("+placeholders+")", args...)
if err != nil {
return total, fmt.Errorf("delete batch [%d:%d]: %w", start, end, err)
}
n, _ := res.RowsAffected()
total += n
}
return total, nil
}
// RunPendingPruneRequests scans the prune-requests/ directory next to the
// SQLite database and processes any request-<id>.json markers written by
// the server. Each request is honored verbatim — the server is responsible
// for the TOCTOU snapshot (only pubkeys that were still outside the
// geofilter at confirm time). After running DELETE, the ingestor writes
// result-<id>.json and removes the request file (atomic, via os.Rename in
// prunequeue.WriteResult).
//
// Safe to call from a ticker — no-op when the queue is empty.
func (s *Store) RunPendingPruneRequests() {
paths, err := prunequeue.ListPending(s.path)
if err != nil {
log.Printf("[prune-queue] list pending failed: %v", err)
return
}
if len(paths) == 0 {
return
}
for _, p := range paths {
req, err := prunequeue.ReadRequest(p)
if err != nil {
log.Printf("[prune-queue] read %s failed: %v — removing", p, err)
_ = os.Remove(p)
continue
}
log.Printf("[prune-queue] processing request %s: %d pubkey(s) (%s)",
req.ID, len(req.Pubkeys), req.Reason)
start := time.Now()
deleted, derr := s.DeleteNodesByPubkeys(req.Pubkeys)
res := prunequeue.Result{
ID: req.ID,
RequestedAt: req.RequestedAt,
CompletedAt: time.Now().UTC(),
Deleted: deleted,
}
if derr != nil {
res.Error = derr.Error()
log.Printf("[prune-queue] request %s FAILED after %s: %v", req.ID, time.Since(start), derr)
} else {
log.Printf("[prune-queue] request %s deleted %d node(s) in %s", req.ID, deleted, time.Since(start))
}
if werr := prunequeue.WriteResult(s.path, res); werr != nil {
log.Printf("[prune-queue] write result for %s failed: %v", req.ID, werr)
}
}
}
-77
View File
@@ -1,77 +0,0 @@
package main
import (
"path/filepath"
"testing"
"time"
"github.com/meshcore-analyzer/prunequeue"
)
func TestRunPendingPruneRequests(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Seed two nodes; one will be pruned, one will be kept.
if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, role, lat, lon, last_seen, first_seen)
VALUES ('aaaa', 'gone', 'companion', 1.0, 1.0, '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z'),
('bbbb', 'kept', 'companion', 2.0, 2.0, '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')`); err != nil {
t.Fatalf("seed: %v", err)
}
id := prunequeue.NewID()
if err := prunequeue.WriteRequest(dbPath, prunequeue.Request{
ID: id,
RequestedAt: time.Now().UTC(),
Reason: "geo-prune-test",
Pubkeys: []string{"aaaa"},
}); err != nil {
t.Fatalf("WriteRequest: %v", err)
}
store.RunPendingPruneRequests()
// Request file gone, result file present.
if exists, _ := prunequeue.RequestExists(dbPath, id); exists {
t.Error("request file should have been consumed")
}
res, err := prunequeue.ReadResult(dbPath, id)
if err != nil || res == nil {
t.Fatalf("ReadResult: res=%v err=%v", res, err)
}
if res.Deleted != 1 {
t.Errorf("expected Deleted=1, got %d", res.Deleted)
}
if res.Error != "" {
t.Errorf("unexpected error: %s", res.Error)
}
// Verify DB state: aaaa gone, bbbb kept.
var n int
store.db.QueryRow("SELECT COUNT(*) FROM nodes WHERE public_key='aaaa'").Scan(&n)
if n != 0 {
t.Errorf("expected 'aaaa' deleted, got count=%d", n)
}
store.db.QueryRow("SELECT COUNT(*) FROM nodes WHERE public_key='bbbb'").Scan(&n)
if n != 1 {
t.Errorf("expected 'bbbb' kept, got count=%d", n)
}
}
func TestRunPendingPruneRequests_EmptyQueueIsNoop(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "test.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Must not panic / error on empty queue.
store.RunPendingPruneRequests()
}
@@ -1,63 +0,0 @@
package main
import (
"database/sql"
"strings"
"testing"
)
// #1483: server's GetNodeLocationsByKeys lookup relies on stored
// public_key being lowercase (LOWER(public_key) was dropped for perf).
// The ingestor must normalize any legacy uppercase rows on boot so
// the lookup remains correct.
func TestPublicKeyLowercaseNormalizationMigration(t *testing.T) {
dbPath := tempDBPath(t)
s, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("first OpenStore: %v", err)
}
// Seed an uppercase row directly, bypassing UpsertNode's lowercase.
if _, err := s.db.Exec(
`INSERT INTO nodes (public_key, name, role, last_seen, first_seen)
VALUES ('AABBCCDDEEFF11223344', 'mixed-case-node', 'companion', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')`,
); err != nil {
t.Fatalf("seed uppercase row: %v", err)
}
// Sanity: verify the uppercase row is there pre-normalization.
var pk string
if err := s.db.QueryRow(`SELECT public_key FROM nodes WHERE public_key = 'AABBCCDDEEFF11223344'`).Scan(&pk); err != nil {
t.Fatalf("pre-check select: %v", err)
}
if pk != "AABBCCDDEEFF11223344" {
t.Fatalf("pre-check: expected uppercase, got %s", pk)
}
s.Close()
// Reopen — the boot-time migration should normalize the row.
s2, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("reopen: %v", err)
}
defer s2.Close()
// The uppercase row should be gone.
var still int
if err := s2.db.QueryRow(`SELECT COUNT(*) FROM nodes WHERE public_key = 'AABBCCDDEEFF11223344'`).Scan(&still); err != nil {
t.Fatalf("post-check uppercase count: %v", err)
}
if still != 0 {
t.Fatalf("expected 0 uppercase rows after migration, got %d", still)
}
// The lowercase form should match.
var lower string
err = s2.db.QueryRow(`SELECT public_key FROM nodes WHERE public_key = 'aabbccddeeff11223344'`).Scan(&lower)
if err == sql.ErrNoRows {
t.Fatalf("expected lowercase row to exist after migration")
}
if err != nil {
t.Fatalf("post-check lowercase select: %v", err)
}
if lower != strings.ToLower("AABBCCDDEEFF11223344") {
t.Fatalf("got %s, want lowercase form", lower)
}
}
-113
View File
@@ -1,113 +0,0 @@
package main
import (
"encoding/json"
"strings"
"sync/atomic"
)
// Issue #1547 — resolved_path writer (ingestor-owned).
//
// Per the #1283 refactor (server is read-only; ingestor owns the
// neighbor graph + node directory), the writer that populated
// `observations.resolved_path` must live here in the ingestor. PR #1289
// removed the server-side writer without porting it — this restores it.
//
// Approach:
// - `resolvePath` is a pure function: hop prefixes → full pubkeys
// using the in-memory prefix index built from `nodes.public_key`.
// - Unique-prefix hops resolve to the full pubkey; ambiguous or
// unknown hops resolve to `nil`. The output shape is `[]*string`
// (with nulls for unresolved positions) — the JSON serialization
// matches what the server's `unmarshalResolvedPath` /
// frontend `getResolvedPath` already consume.
// - The prefix index is rebuilt on startup and once per neighbor-
// builder tick (60s) so new nodes start resolving within a minute
// without blocking the MQTT ingest path.
// resolvePath maps each hop prefix to a full pubkey when the index
// has exactly one candidate; returns nil at that position otherwise.
// Returns nil for empty/no hops.
func resolvePath(hops []string, idx prefixIndex) []*string {
if len(hops) == 0 {
return nil
}
out := make([]*string, len(hops))
if idx == nil {
return out
}
for i, hop := range hops {
h := strings.ToLower(hop)
candidates := idx[h]
if len(candidates) == 1 {
pk := candidates[0]
out[i] = &pk
}
}
return out
}
// marshalResolvedPath JSON-encodes a resolved path. Returns "" when
// the input is empty OR when every element is nil (writer treats "" as
// SQL NULL).
//
// The all-nil case matters because of the UPSERT in InsertTransmission:
//
// resolved_path = COALESCE(excluded.resolved_path, resolved_path)
//
// If we emitted "[null,null]" here, nilIfEmpty() would let it through
// as a non-NULL string and the COALESCE would OVERWRITE a previously
// stored good resolved_path on re-ingest. Returning "" lets nilIfEmpty
// produce SQL NULL so the COALESCE falls through to the existing value.
// See issue #1547 / PR #1548 reviewer findings.
func marshalResolvedPath(rp []*string) string {
if len(rp) == 0 {
return ""
}
allNil := true
for _, p := range rp {
if p != nil {
allNil = false
break
}
}
if allNil {
return ""
}
b, err := json.Marshal(rp)
if err != nil {
return ""
}
return string(b)
}
// prefixIdxHolder caches the prefix index for the InsertTransmission
// hot path. atomic.Value lets the 60s rebuild happen without a lock on
// the read side.
type prefixIdxHolder struct {
v atomic.Value // holds prefixIndex
}
func (h *prefixIdxHolder) load() prefixIndex {
if v := h.v.Load(); v != nil {
return v.(prefixIndex)
}
return nil
}
func (h *prefixIdxHolder) store(idx prefixIndex) {
h.v.Store(idx)
}
// RefreshPrefixIndex rebuilds the in-memory prefix index from the
// nodes table and publishes it atomically. Called on startup and from
// the neighbor-edges builder tick (60s) so new nodes become resolvable
// without per-insert DB scans.
func (s *Store) RefreshPrefixIndex() error {
idx, err := buildPrefixIndex(s.db)
if err != nil {
return err
}
s.prefixIdx.store(idx)
return nil
}
-446
View File
@@ -1,446 +0,0 @@
package main
import (
"database/sql"
"encoding/json"
"path/filepath"
"testing"
)
func unmarshalResolvedPathLocal(s string) []*string {
if s == "" {
return nil
}
var out []*string
if json.Unmarshal([]byte(s), &out) != nil {
return nil
}
return out
}
// TestResolvePathPureFunction is a unit test for the pure resolvePath
// helper. Asserts:
// - unique-prefix hops resolve to the full pubkey
// - ambiguous-prefix hops resolve to nil
// - unknown-prefix hops resolve to nil
// - return slice length equals input hop count
//
// Regression gate for #1547 (resolved_path stopped being written).
func TestResolvePathPureFunction(t *testing.T) {
idx := prefixIndex{
// "aa" → exactly one pubkey
"aa": {"aaaaaaaaaa"},
"aaaaaaaaaa": {"aaaaaaaaaa"},
// "bb" → exactly one pubkey
"bb": {"bbbbbbbbbb"},
"bbbbbbbbbb": {"bbbbbbbbbb"},
// "cc" → ambiguous (2 candidates)
"cc": {"cccccccccc", "ccdddddddd"},
"cccccccccc": {"cccccccccc"},
}
got := resolvePath([]string{"aa", "cc", "ff", "bb"}, idx)
if len(got) != 4 {
t.Fatalf("expected len 4, got %d", len(got))
}
if got[0] == nil || *got[0] != "aaaaaaaaaa" {
t.Errorf("hop[0] aa: want aaaaaaaaaa, got %v", deref(got[0]))
}
if got[1] != nil {
t.Errorf("hop[1] cc: want nil (ambiguous), got %v", deref(got[1]))
}
if got[2] != nil {
t.Errorf("hop[2] ff: want nil (unknown), got %v", deref(got[2]))
}
if got[3] == nil || *got[3] != "bbbbbbbbbb" {
t.Errorf("hop[3] bb: want bbbbbbbbbb, got %v", deref(got[3]))
}
}
// TestResolvePathEmptyHops asserts empty/no-path produces nil.
func TestResolvePathEmptyHops(t *testing.T) {
if got := resolvePath(nil, prefixIndex{}); got != nil {
t.Errorf("nil hops: want nil, got %v", got)
}
if got := resolvePath([]string{}, prefixIndex{}); got != nil {
t.Errorf("empty hops: want nil, got %v", got)
}
}
// TestMarshalResolvedPathRoundtrip asserts the JSON shape matches the
// server's marshal/unmarshal contract: `[]*string` with nulls for
// unresolved hops.
func TestMarshalResolvedPathRoundtrip(t *testing.T) {
a := "aaaaaaaaaa"
b := "bbbbbbbbbb"
in := []*string{&a, nil, &b}
s := marshalResolvedPath(in)
want := `["aaaaaaaaaa",null,"bbbbbbbbbb"]`
if s != want {
t.Errorf("marshal: want %s, got %s", want, s)
}
}
// TestInsertTransmissionWritesResolvedPath is the integration test that
// gates the regression introduced by PR #1289 (issue #1547).
//
// Setup: seed two nodes + one observer + invoke InsertTransmission with
// a PacketData whose PathJSON references one of the seeded nodes by
// unique 1-byte (2-hex) prefix.
//
// Assert: the inserted observations row has a non-NULL resolved_path
// whose JSON-decoded length equals the hop count, and the resolved
// element matches the seeded node's full pubkey.
func TestInsertTransmissionWritesResolvedPath(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "ingest.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Seed nodes with unique 1-byte prefixes.
if _, err := store.db.Exec(
`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
"aaaaaaaaaa", "from-node",
"bbbbbbbbbb", "first-hop",
); err != nil {
t.Fatal(err)
}
// Seed one observer (needed so InsertTransmission resolves observer_idx).
if err := store.UpsertObserver("obs-1", "observer-1", "", nil); err != nil {
t.Fatalf("UpsertObserver: %v", err)
}
// Force the prefix index to be (re)built from the seeded nodes so
// the InsertTransmission path has something to resolve against.
if err := store.RefreshPrefixIndex(); err != nil {
t.Fatalf("RefreshPrefixIndex: %v", err)
}
pkt := &PacketData{
RawHex: "deadbeef",
Timestamp: "2026-06-01T00:00:00Z",
ObserverID: "obs-1",
Hash: "h-1547",
RouteType: 0,
PayloadType: int(payloadADVERT),
PathJSON: `["bb"]`,
DecodedJSON: "{}",
FromPubkey: "aaaaaaaaaa",
}
if _, err := store.InsertTransmission(pkt); err != nil {
t.Fatalf("InsertTransmission: %v", err)
}
var rp sql.NullString
if err := store.db.QueryRow(
`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
"h-1547",
).Scan(&rp); err != nil {
t.Fatalf("query: %v", err)
}
if !rp.Valid || rp.String == "" {
t.Fatalf("expected non-nil resolved_path, got NULL/empty (regression: #1547)")
}
got := unmarshalResolvedPathLocal(rp.String)
if len(got) != 1 {
t.Fatalf("resolved_path length: want 1, got %d (value=%s)", len(got), rp.String)
}
if got[0] == nil || *got[0] != "bbbbbbbbbb" {
t.Errorf("resolved_path[0]: want bbbbbbbbbb, got %v (raw=%s)", deref(got[0]), rp.String)
}
}
func deref(p *string) string {
if p == nil {
return "<nil>"
}
return *p
}
// ─── #1560: context-aware resolution tests ─────────────────────────────────
//
// These exercise the post-fix behavior of resolveHopWithContext +
// resolvePathWithContext. Until the green commit lands they MUST fail
// on assertions (the stub falls back to naive `len==1` and returns nil
// on every >1-candidate prefix), proving the gate is real.
// build5NodeAmbiguousIndex returns a prefixIndex where 3 of 5 nodes
// share the 1-byte prefix 0x5c. Pubkeys are the "fingerprints":
//
// A = "5c000000000000000000000000000000aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
// B = "5c000000000000000000000000000000bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
// C = "5c000000000000000000000000000000cccccccccccccccccccccccccccccccc"
// D = "dd000000000000000000000000000000dddddddddddddddddddddddddddddddd"
// E = "ee000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
func build5NodeAmbiguousIndex() (idx prefixIndex, A, B, C, D, E string) {
A = "5c000000000000000000000000000000aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
B = "5c000000000000000000000000000000bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
C = "5c000000000000000000000000000000cccccccccccccccccccccccccccccccc"
D = "dd000000000000000000000000000000dddddddddddddddddddddddddddddddd"
E = "ee000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
idx = prefixIndex{
// 1-byte: 5c → A,B,C (collision); dd → D; ee → E
"5c": {A, B, C},
"dd": {D},
"ee": {E},
// full-key entries (so exact-match lookups still resolve)
A: {A}, B: {B}, C: {C}, D: {D}, E: {E},
}
return
}
// TestResolveHopWithContext_OneByteCollision_AdjacencyResolves
// asserts the dominant production case (#1560): three nodes share the
// 1-byte prefix 0x5c, but NeighborGraph adjacency narrows to exactly
// one. The naive resolver returns nil; the context-aware resolver
// MUST return the right pubkey.
func TestResolveHopWithContext_OneByteCollision_AdjacencyResolves(t *testing.T) {
idx, A, B, C, D, E := build5NodeAmbiguousIndex()
g := NewNeighborGraph()
// chain: A↔B, B↔C, C↔D, D↔E
g.AddEdge(A, B)
g.AddEdge(B, C)
g.AddEdge(C, D)
g.AddEdge(D, E)
// Anchored on A, the only 5c neighbor of A is B.
got := resolveHopWithContext("5c", A, g, idx, nil)
if got == nil {
t.Fatalf("anchor=A, hop=5c: want B (%s), got <nil>", B)
}
if *got != B {
t.Errorf("anchor=A, hop=5c: want %s, got %s", B, *got)
}
// Anchored on B, the only 5c neighbors of B are A and C — but A is
// the originator anchor in a path-walk; here we just assert that
// 2 surviving candidates → nil (cannot disambiguate further).
got = resolveHopWithContext("5c", B, g, idx, nil)
if got != nil {
t.Errorf("anchor=B, hop=5c: ambiguous (A and C both adjacent); want <nil>, got %s", *got)
}
}
// TestResolvePathWithContext_TwoHopChainAnchoredOnFromNode covers the
// canonical 1-byte collision case end-to-end: path = [5c, 5c],
// from_node = A → expect [B, C].
func TestResolvePathWithContext_TwoHopChainAnchoredOnFromNode(t *testing.T) {
idx, A, B, C, _, _ := build5NodeAmbiguousIndex()
g := NewNeighborGraph()
g.AddEdge(A, B)
g.AddEdge(B, C)
got := resolvePathWithContext([]string{"5c", "5c"}, A, g, idx)
if len(got) != 2 {
t.Fatalf("len(got)=%d, want 2 (raw=%v)", len(got), got)
}
if got[0] == nil || *got[0] != B {
t.Errorf("hop[0]: want %s, got %v", B, deref(got[0]))
}
if got[1] == nil || *got[1] != C {
t.Errorf("hop[1]: want %s, got %v", C, deref(got[1]))
}
}
// TestResolveHopWithContext_NoAdjacencyContext_ReturnsNil asserts the
// negative gate: 3 nodes with shared prefix, no edges between them in
// the graph, hop=[5c] with no usable anchor → nil. Guards against an
// over-eager resolver that just picks the first candidate.
func TestResolveHopWithContext_NoAdjacencyContext_ReturnsNil(t *testing.T) {
idx, _, _, _, _, _ := build5NodeAmbiguousIndex()
g := NewNeighborGraph() // empty: no edges
got := resolveHopWithContext("5c", "", g, idx, nil)
if got != nil {
t.Errorf("no anchor + empty graph: want <nil>, got %s", *got)
}
// With an anchor that's not adjacent to any candidate, also nil.
got = resolveHopWithContext("5c", "deadbeefdeadbeef", g, idx, nil)
if got != nil {
t.Errorf("non-adjacent anchor: want <nil>, got %s", *got)
}
}
// TestResolvePathWithContext_AdvertAnchoring asserts ADVERT-style
// anchoring: from_pubkey is the originator, hop[0] is one of its
// 1-byte-prefix neighbors → resolved.
func TestResolvePathWithContext_AdvertAnchoring(t *testing.T) {
idx, A, B, _, _, _ := build5NodeAmbiguousIndex()
g := NewNeighborGraph()
g.AddEdge(A, B) // only B is adjacent to A among the 5c candidates
got := resolvePathWithContext([]string{"5c"}, A, g, idx)
if len(got) != 1 {
t.Fatalf("len(got)=%d, want 1", len(got))
}
if got[0] == nil || *got[0] != B {
t.Errorf("ADVERT anchored on A, hop=5c: want %s, got %v", B, deref(got[0]))
}
}
// TestResolvePathWithContext_RegressionMultiByteStillWorks asserts no
// regression in the 2/3/4-byte prefix path that PR #1548 already
// handled — unique prefixes resolve regardless of graph context.
func TestResolvePathWithContext_RegressionMultiByteStillWorks(t *testing.T) {
idx, _, _, _, D, E := build5NodeAmbiguousIndex()
// dd and ee are unique 1-byte prefixes — naive path still works.
got := resolvePathWithContext([]string{"dd", "ee"}, "", nil, idx)
if len(got) != 2 {
t.Fatalf("len(got)=%d, want 2", len(got))
}
if got[0] == nil || *got[0] != D {
t.Errorf("hop[0] dd: want %s, got %v", D, deref(got[0]))
}
if got[1] == nil || *got[1] != E {
t.Errorf("hop[1] ee: want %s, got %v", E, deref(got[1]))
}
}
// TestResolvePathWithContext_AllNilContractPreserved asserts the
// all-nil → empty-string clobber-guard contract from PR #1548 still
// holds: an unresolvable path through the context resolver, when fed
// to marshalResolvedPath, MUST yield "" (so nilIfEmpty → SQL NULL
// → COALESCE preserves existing).
func TestResolvePathWithContext_AllNilContractPreserved(t *testing.T) {
// Empty index → every hop nil.
got := resolvePathWithContext([]string{"5c", "dd"}, "", nil, prefixIndex{})
if len(got) != 2 {
t.Fatalf("len(got)=%d, want 2", len(got))
}
for i, p := range got {
if p != nil {
t.Errorf("hop[%d]: want <nil>, got %s", i, *p)
}
}
if s := marshalResolvedPath(got); s != "" {
t.Errorf("all-nil marshal: want \"\", got %q (clobber-guard regression)", s)
}
}
// TestMarshalResolvedPathAllNilReturnsEmpty is a regression gate for
// the data-loss clobber bug surfaced in PR #1548 review.
//
// When resolvePath fails to resolve ANY hop (every element nil),
// marshalResolvedPath previously emitted "[null,null,...]" — a
// non-empty string that bypassed nilIfEmpty and then OVERWROTE the
// existing resolved_path via the COALESCE(excluded, current) UPSERT
// on re-ingest. The fix returns "" so nilIfEmpty produces SQL NULL and
// the COALESCE preserves the existing good value.
func TestMarshalResolvedPathAllNilReturnsEmpty(t *testing.T) {
cases := []struct {
name string
in []*string
}{
{"one-nil", []*string{nil}},
{"two-nils", []*string{nil, nil}},
{"three-nils", []*string{nil, nil, nil}},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := marshalResolvedPath(tc.in)
if got != "" {
t.Errorf("all-nil input must return \"\" (so nilIfEmpty → SQL NULL → COALESCE preserves existing); got %q", got)
}
})
}
// Mixed (at least one non-nil) MUST still marshal normally so we
// don't lose partial resolutions.
a := "aaaaaaaaaa"
mixed := marshalResolvedPath([]*string{&a, nil})
if mixed != `["aaaaaaaaaa",null]` {
t.Errorf("partial resolution must still serialize; got %q", mixed)
}
}
// TestInsertTransmissionDoesNotClobberResolvedPathOnAllNil is the
// integration-level regression test for the data-loss bug.
//
// Setup: insert a transmission whose first ingest resolves cleanly to
// a known pubkey. Then re-ingest the SAME transmission after the
// prefix index has been cleared (simulating an empty NeighborGraph /
// all-nil resolution path) and assert the previously stored
// resolved_path is PRESERVED (NOT overwritten to "[null]" or NULL).
//
// Pre-fix behavior: marshalResolvedPath emitted "[null]", nilIfEmpty
// kept it non-NULL, and COALESCE(excluded.resolved_path, resolved_path)
// clobbered the original "bbbbbbbbbb".
func TestInsertTransmissionDoesNotClobberResolvedPathOnAllNil(t *testing.T) {
dir := t.TempDir()
dbPath := filepath.Join(dir, "ingest.db")
store, err := OpenStore(dbPath)
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
if _, err := store.db.Exec(
`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
"aaaaaaaaaa", "from-node",
"bbbbbbbbbb", "first-hop",
); err != nil {
t.Fatal(err)
}
if err := store.UpsertObserver("obs-1", "observer-1", "", nil); err != nil {
t.Fatalf("UpsertObserver: %v", err)
}
if err := store.RefreshPrefixIndex(); err != nil {
t.Fatalf("RefreshPrefixIndex: %v", err)
}
pkt := &PacketData{
RawHex: "deadbeef",
Timestamp: "2026-06-01T00:00:00Z",
ObserverID: "obs-1",
Hash: "h-clobber",
RouteType: 0,
PayloadType: int(payloadADVERT),
PathJSON: `["bb"]`,
DecodedJSON: "{}",
FromPubkey: "aaaaaaaaaa",
}
if _, err := store.InsertTransmission(pkt); err != nil {
t.Fatalf("first InsertTransmission: %v", err)
}
// Sanity: first write populated resolved_path.
var first sql.NullString
if err := store.db.QueryRow(
`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
"h-clobber",
).Scan(&first); err != nil {
t.Fatalf("first query: %v", err)
}
if !first.Valid || first.String == "" {
t.Fatalf("precondition failed: first ingest left resolved_path NULL/empty; cannot test clobber")
}
wantPreserved := first.String
// Now wipe the prefix index so re-ingest produces an all-nil
// resolution — exactly the scenario where the bug clobbers data.
store.prefixIdx.store(prefixIndex{})
if _, err := store.InsertTransmission(pkt); err != nil {
t.Fatalf("re-ingest InsertTransmission: %v", err)
}
var after sql.NullString
if err := store.db.QueryRow(
`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
"h-clobber",
).Scan(&after); err != nil {
t.Fatalf("post-reingest query: %v", err)
}
if !after.Valid {
t.Fatalf("data loss: resolved_path was NULL'd by re-ingest (was %q)", wantPreserved)
}
if after.String != wantPreserved {
t.Errorf("data loss: resolved_path was clobbered by all-nil re-ingest\n before: %s\n after: %s", wantPreserved, after.String)
}
}
-156
View File
@@ -1,156 +0,0 @@
package main
import (
"testing"
"time"
)
func TestParseEnvelopeTime(t *testing.T) {
cases := []struct {
name string
in string
ok bool
wantNaive bool
}{
{"rfc3339 utc", "2026-05-16T10:00:00Z", true, false},
{"rfc3339 offset", "2026-05-16T12:00:00+02:00", true, false},
{"naive iso", "2026-05-16T10:00:00", true, true},
{"naive iso micros", "2026-05-16T10:00:00.123456", true, true},
{"garbage", "not-a-time", false, false},
{"empty", "", false, false},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
_, naive, err := parseEnvelopeTime(c.in)
if (err == nil) != c.ok {
t.Fatalf("parseEnvelopeTime(%q): want ok=%v, got err=%v", c.in, c.ok, err)
}
if err == nil && naive != c.wantNaive {
t.Fatalf("parseEnvelopeTime(%q): want naive=%v, got %v", c.in, c.wantNaive, naive)
}
})
}
}
func TestResolveRxTime(t *testing.T) {
now := time.Now().UTC()
mustParse := func(s string) time.Time {
t.Helper()
parsed, err := time.Parse(time.RFC3339, s)
if err != nil {
t.Fatalf("result %q is not RFC3339: %v", s, err)
}
return parsed
}
nearNow := func(s string) bool {
d := mustParse(s).Sub(now)
if d < 0 {
d = -d
}
return d <= time.Minute
}
rx := now.Add(-5 * time.Hour).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": rx}, "test"); got != rx {
t.Errorf("plausible past timestamp: got %q want %q", got, rx)
}
if got, _ := resolveRxTime(map[string]interface{}{}, "test"); !nearNow(got) {
t.Errorf("missing timestamp: got %q, expected ~now", got)
}
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": "garbage"}, "test"); !nearNow(got) {
t.Errorf("garbage timestamp: got %q, expected ~now", got)
}
future := now.Add(48 * time.Hour).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": future}, "test"); !nearNow(got) {
t.Errorf("future timestamp: got %q, expected ~now (rejected)", got)
}
// RTC-reset node reporting a factory date — must not drag first_seen back.
factory := "2020-01-01T00:00:00Z"
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": factory}, "test"); !nearNow(got) {
t.Errorf("stale factory timestamp: got %q, expected ~now (rejected)", got)
}
// Just past the 30-day floor → rejected.
stale := now.Add(-31 * 24 * time.Hour).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": stale}, "test"); !nearNow(got) {
t.Errorf("stale timestamp >30d: got %q, expected ~now (rejected)", got)
}
// Just inside the 30-day floor → used verbatim.
recent := now.Add(-29 * 24 * time.Hour).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": recent}, "test"); got != recent {
t.Errorf("recent timestamp <30d: got %q want %q", got, recent)
}
}
// Regression: issue #1463 — naive (zone-less) ISO timestamps from observers
// in negative-UTC-offset zones (e.g. California PDT, UTC7) were interpreted
// as UTC, producing rxTime values 7h in the past that poisoned `last_seen`
// and rendered the observer perpetually "Stale" in the UI. The symmetric
// clamp now collapses any naive timestamp more than 15 min off server-now to
// `now()`, while zone-aware timestamps (RFC3339 with Z or offset) are still
// honored verbatim regardless of skew (those are well-behaved observers).
func TestResolveRxTimeNaiveTimestampClamp(t *testing.T) {
now := time.Now().UTC()
mustParse := func(s string) time.Time {
t.Helper()
parsed, err := time.Parse(time.RFC3339, s)
if err != nil {
t.Fatalf("result %q is not RFC3339: %v", s, err)
}
return parsed
}
nearNow := func(s string) bool {
d := mustParse(s).Sub(now)
if d < 0 {
d = -d
}
return d <= time.Minute
}
// California observer (UTC-7) emitting a naive local-clock timestamp:
// must NOT be stored verbatim 7h in the past — clamp to ~now.
naivePast := now.Add(-7 * time.Hour).Format("2006-01-02T15:04:05")
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naivePast}, "test"); !nearNow(got) {
t.Errorf("naive past timestamp (UTC-7 observer): got %q, expected ~now (clamped)", got)
}
// Naive future just minutes ahead (UTC+N observer, existing soft-clamp
// behavior): still clamped to now.
naiveFuture := now.Add(5 * time.Minute).Format("2006-01-02T15:04:05")
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naiveFuture}, "test"); !nearNow(got) {
t.Errorf("naive future timestamp: got %q, expected ~now (clamped)", got)
}
// Naive microsecond layout (python isoformat without tz) — same clamp.
naivePastMicros := now.Add(-7 * time.Hour).Format("2006-01-02T15:04:05.000000")
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naivePastMicros}, "test"); !nearNow(got) {
t.Errorf("naive past timestamp w/ micros: got %q, expected ~now (clamped)", got)
}
// Well-behaved observer: Z-suffixed past timestamp passes through verbatim
// even if it's hours old (legitimate buffered uploads must be preserved).
zPast := now.Add(-7 * time.Hour).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": zPast}, "test"); got != zPast {
t.Errorf("Z-suffixed past timestamp must pass through: got %q want %q", got, zPast)
}
// Well-behaved observer with explicit offset (UTC-7) — canonicalize to UTC
// but preserve the moment in time. Must equal the same moment in UTC.
offsetLoc := time.FixedZone("PDT", -7*3600)
offsetMoment := now.Add(-7 * time.Hour).In(offsetLoc)
offsetStr := offsetMoment.Format(time.RFC3339)
wantUTC := offsetMoment.UTC().Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": offsetStr}, "test"); got != wantUTC {
t.Errorf("offset-suffixed timestamp: got %q want %q", got, wantUTC)
}
// Naive timestamp within tolerance window (2 min in past, observer that
// happens to be in UTC) — within tolerance, passes through verbatim.
naiveCloseStr := now.Add(-2 * time.Minute).Format("2006-01-02T15:04:05")
naiveCloseWant := now.Add(-2 * time.Minute).Format(time.RFC3339)
if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naiveCloseStr}, "test"); got != naiveCloseWant {
t.Errorf("naive timestamp within tolerance: got %q, expected %q (verbatim)", got, naiveCloseWant)
}
}
-31
View File
@@ -1,31 +0,0 @@
package main
import "strings"
// sanitizeLogString strips ASCII control bytes that would otherwise let a
// node-controlled string (advert name, observer origin, channel name) inject
// fake lines into the log stream. CR (\r), LF (\n), TAB (\t), NUL (\x00),
// any other byte < 0x20, and 0x7F (DEL) are replaced with '?'.
//
// This is intentionally narrower than sanitizeName: sanitizeName preserves
// \t and \n because they may appear in legitimately-stored display names.
// Log sinks want neither.
//
// See audit-input-vulns-20260603 (LOW — log injection via newline in advert
// name) and references at cmd/ingestor/main.go:659,689.
func sanitizeLogString(s string) string {
if s == "" {
return s
}
// Iterate over runes so multibyte UTF-8 (Cyrillic, emoji) is preserved.
var b strings.Builder
b.Grow(len(s))
for _, r := range s {
if r < 0x20 || r == 0x7f {
b.WriteByte('?')
continue
}
b.WriteRune(r)
}
return b.String()
}
-32
View File
@@ -1,32 +0,0 @@
package main
import "testing"
// TestSanitizeLogString covers the log-injection defense added to fix
// audit-input-vulns-20260603 (LOW — log injection via newline in advert name).
func TestSanitizeLogString(t *testing.T) {
cases := []struct {
name string
in string
want string
}{
{"plain ascii preserved", "alpha-node", "alpha-node"},
{"unicode preserved", "Иван привет 🦊", "Иван привет 🦊"},
{"lf stripped", "evil\n[security] forged-line", "evil?[security] forged-line"},
{"cr stripped", "evil\rfake-log", "evil?fake-log"},
{"crlf stripped", "a\r\nb", "a??b"},
{"tab stripped", "a\tb", "a?b"},
{"nul stripped", "a\x00b", "a?b"},
{"del stripped", "a\x7fb", "a?b"},
{"bell stripped", "a\x07b", "a?b"},
{"empty unchanged", "", ""},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
got := sanitizeLogString(tc.in)
if got != tc.want {
t.Fatalf("sanitizeLogString(%q) = %q, want %q", tc.in, got, tc.want)
}
})
}
}
-339
View File
@@ -1,339 +0,0 @@
package main
import (
"crypto/ed25519"
"encoding/binary"
"encoding/hex"
"strings"
"testing"
)
// buildAdvertHex constructs a full ADVERT packet hex string.
// header(1) + pathByte(1) + pubkey(32) + timestamp(4) + signature(64) + appdata
func buildAdvertHex(pubKey ed25519.PublicKey, privKey ed25519.PrivateKey, timestamp uint32, appdata []byte) string {
// Build signed message: pubkey(32) + timestamp(4 LE) + appdata
msg := make([]byte, 32+4+len(appdata))
copy(msg[0:32], pubKey)
binary.LittleEndian.PutUint32(msg[32:36], timestamp)
copy(msg[36:], appdata)
sig := ed25519.Sign(privKey, msg)
// Payload: pubkey(32) + timestamp(4) + signature(64) + appdata
payload := make([]byte, 0, 100+len(appdata))
payload = append(payload, pubKey...)
ts := make([]byte, 4)
binary.LittleEndian.PutUint32(ts, timestamp)
payload = append(payload, ts...)
payload = append(payload, sig...)
payload = append(payload, appdata...)
// Header: ADVERT (0x04 << 2) | FLOOD (1) = 0x11, pathByte=0 (no hops)
header := byte(0x11)
pathByte := byte(0x00)
pkt := append([]byte{header, pathByte}, payload...)
return hex.EncodeToString(pkt)
}
// makeAppdata builds minimal appdata: flags(1) + name
func makeAppdata(name string) []byte {
flags := byte(0x81) // hasName=true, type=companion(1)
data := []byte{flags}
data = append(data, []byte(name)...)
data = append(data, 0x00) // null terminator
return data
}
func TestSigValidation_ValidAdvertStored(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
appdata := makeAppdata("TestNode")
rawHex := buildAdvertHex(pub, priv, 1700000000, appdata)
source := MQTTSource{Name: "test"}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+rawHex+`","origin":"TestObs"}`)
cfg := &Config{}
handleMessage(store, "test", source, msg, nil, nil, cfg)
// Verify packet was stored
var count int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&count)
if count == 0 {
t.Fatal("valid advert should be stored, got 0 transmissions")
}
}
func TestSigValidation_TamperedSignatureDropped(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
appdata := makeAppdata("BadNode")
rawHex := buildAdvertHex(pub, priv, 1700000000, appdata)
// Tamper with signature (flip a byte in the signature area)
// Signature starts at offset 2 (header+path) + 32 (pubkey) + 4 (timestamp) = 38
// That's byte 38 in the packet, hex chars 76-77
rawBytes := []byte(rawHex)
if rawBytes[76] == '0' {
rawBytes[76] = 'f'
} else {
rawBytes[76] = '0'
}
tamperedHex := string(rawBytes)
source := MQTTSource{Name: "test"}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+tamperedHex+`","origin":"TestObs"}`)
cfg := &Config{}
handleMessage(store, "test", source, msg, nil, nil, cfg)
// Verify packet was NOT stored in transmissions
var txCount int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&txCount)
if txCount != 0 {
t.Fatalf("tampered advert should be dropped, got %d transmissions", txCount)
}
// Verify it was recorded in dropped_packets
var dropCount int
store.db.QueryRow("SELECT COUNT(*) FROM dropped_packets").Scan(&dropCount)
if dropCount == 0 {
t.Fatal("tampered advert should be recorded in dropped_packets")
}
// Verify drop counter incremented
if store.Stats.SignatureDrops.Load() != 1 {
t.Fatalf("expected 1 signature drop, got %d", store.Stats.SignatureDrops.Load())
}
// Verify dropped_packets has correct fields
var reason, nodeKey, nodeName, obsID string
store.db.QueryRow("SELECT reason, node_pubkey, node_name, observer_id FROM dropped_packets LIMIT 1").Scan(&reason, &nodeKey, &nodeName, &obsID)
if reason != "invalid signature" {
t.Fatalf("expected reason 'invalid signature', got %q", reason)
}
if nodeKey == "" {
t.Fatal("dropped packet should have node_pubkey")
}
if !strings.Contains(nodeName, "BadNode") {
t.Fatalf("expected node_name to contain 'BadNode', got %q", nodeName)
}
if obsID != "obs1" {
t.Fatalf("expected observer_id 'obs1', got %q", obsID)
}
}
func TestSigValidation_TruncatedAppdataDropped(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
appdata := makeAppdata("TruncNode")
rawHex := buildAdvertHex(pub, priv, 1700000000, appdata)
// Sign was computed with full appdata. Now truncate the raw hex to remove
// some appdata bytes, making the signature invalid.
// Truncate last 4 hex chars (2 bytes of appdata)
truncatedHex := rawHex[:len(rawHex)-4]
source := MQTTSource{Name: "test"}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+truncatedHex+`","origin":"TestObs"}`)
cfg := &Config{}
handleMessage(store, "test", source, msg, nil, nil, cfg)
var txCount int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&txCount)
if txCount != 0 {
t.Fatalf("truncated advert should be dropped, got %d transmissions", txCount)
}
}
func TestSigValidation_DisabledByConfig(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
appdata := makeAppdata("NoValNode")
rawHex := buildAdvertHex(pub, priv, 1700000000, appdata)
// Tamper with signature
rawBytes := []byte(rawHex)
if rawBytes[76] == '0' {
rawBytes[76] = 'f'
} else {
rawBytes[76] = '0'
}
tamperedHex := string(rawBytes)
source := MQTTSource{Name: "test"}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+tamperedHex+`","origin":"TestObs"}`)
falseVal := false
cfg := &Config{ValidateSignatures: &falseVal}
handleMessage(store, "test", source, msg, nil, nil, cfg)
// With validation disabled, tampered packet should be stored
var txCount int
store.db.QueryRow("SELECT COUNT(*) FROM transmissions").Scan(&txCount)
if txCount == 0 {
t.Fatal("with validateSignatures=false, tampered advert should be stored")
}
}
func TestSigValidation_DropCounterIncrements(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
source := MQTTSource{Name: "test"}
cfg := &Config{}
for i := 0; i < 3; i++ {
appdata := makeAppdata("Node")
rawHex := buildAdvertHex(pub, priv, uint32(1700000000+i), appdata)
// Tamper
rawBytes := []byte(rawHex)
if rawBytes[76] == '0' {
rawBytes[76] = 'f'
} else {
rawBytes[76] = '0'
}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+string(rawBytes)+`","origin":"Obs"}`)
handleMessage(store, "test", source, msg, nil, nil, cfg)
}
if store.Stats.SignatureDrops.Load() != 3 {
t.Fatalf("expected 3 signature drops, got %d", store.Stats.SignatureDrops.Load())
}
}
func TestSigValidation_LogContainsFields(t *testing.T) {
// This test verifies the dropped_packets row has all required fields
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
pub, priv, _ := ed25519.GenerateKey(nil)
appdata := makeAppdata("LogTestNode")
rawHex := buildAdvertHex(pub, priv, 1700000000, appdata)
// Tamper
rawBytes := []byte(rawHex)
if rawBytes[76] == '0' {
rawBytes[76] = 'f'
} else {
rawBytes[76] = '0'
}
source := MQTTSource{Name: "test"}
msg := newMockMsg("meshcore/US/obs1/packet", `{"raw":"`+string(rawBytes)+`","origin":"MyObserver"}`)
cfg := &Config{}
handleMessage(store, "test", source, msg, nil, nil, cfg)
var hash, reason, obsID, obsName, pubkey, nodeName string
err = store.db.QueryRow("SELECT hash, reason, observer_id, observer_name, node_pubkey, node_name FROM dropped_packets LIMIT 1").
Scan(&hash, &reason, &obsID, &obsName, &pubkey, &nodeName)
if err != nil {
t.Fatal(err)
}
if hash == "" {
t.Error("dropped packet should have hash")
}
if reason != "invalid signature" {
t.Errorf("expected reason 'invalid signature', got %q", reason)
}
if obsID != "obs1" {
t.Errorf("expected observer_id 'obs1', got %q", obsID)
}
if obsName != "MyObserver" {
t.Errorf("expected observer_name 'MyObserver', got %q", obsName)
}
if pubkey == "" {
t.Error("dropped packet should have node_pubkey")
}
if !strings.Contains(nodeName, "LogTestNode") {
t.Errorf("expected node_name containing 'LogTestNode', got %q", nodeName)
}
}
func TestPruneDroppedPackets(t *testing.T) {
dbPath := t.TempDir() + "/test.db"
store, err := OpenStoreWithInterval(dbPath, 300)
if err != nil {
t.Fatal(err)
}
defer store.Close()
// Insert an old dropped packet
store.db.Exec(`INSERT INTO dropped_packets (hash, reason, dropped_at) VALUES ('old', 'test', datetime('now', '-60 days'))`)
store.db.Exec(`INSERT INTO dropped_packets (hash, reason, dropped_at) VALUES ('new', 'test', datetime('now'))`)
n, err := store.PruneDroppedPackets(30)
if err != nil {
t.Fatal(err)
}
if n != 1 {
t.Fatalf("expected 1 pruned, got %d", n)
}
var count int
store.db.QueryRow("SELECT COUNT(*) FROM dropped_packets").Scan(&count)
if count != 1 {
t.Fatalf("expected 1 remaining, got %d", count)
}
}
func TestShouldValidateSignatures_Default(t *testing.T) {
cfg := &Config{}
if !cfg.ShouldValidateSignatures() {
t.Fatal("default should be true")
}
falseVal := false
cfg2 := &Config{ValidateSignatures: &falseVal}
if cfg2.ShouldValidateSignatures() {
t.Fatal("explicit false should be false")
}
trueVal := true
cfg3 := &Config{ValidateSignatures: &trueVal}
if !cfg3.ShouldValidateSignatures() {
t.Fatal("explicit true should be true")
}
}
// newMockMsg creates a minimal mqtt.Message for testing.
func newMockMsg(topic, payload string) *mockMessage {
return &mockMessage{topic: topic, payload: []byte(payload)}
}
-187
View File
@@ -1,187 +0,0 @@
package main
import (
"sync"
"sync/atomic"
"time"
)
// SourceStatusSnapshot is the per-MQTT-source connection state and counter
// view written to the ingestor stats file (under "source_statuses") and
// consumed by cmd/server's /api/mqtt/status handler (#1043).
//
// All fields are unix seconds (0 = "never"). PacketsLast5m is a sliding
// 5-minute count derived from a per-second ring buffer.
type SourceStatusSnapshot struct {
Name string `json:"name"`
Broker string `json:"broker"`
Connected bool `json:"connected"`
LastConnectUnix int64 `json:"lastConnectUnix"`
LastDisconnectUnix int64 `json:"lastDisconnectUnix"`
LastPacketUnix int64 `json:"lastPacketUnix"`
ConnectCount int64 `json:"connectCount"`
DisconnectCount int64 `json:"disconnectCount"`
PacketsTotal int64 `json:"packetsTotal"`
PacketsLast5m int64 `json:"packetsLast5m"`
LastError string `json:"lastError,omitempty"`
}
// sourceStatusState is the in-memory per-source counter set. All scalar
// fields are accessed via sync/atomic so the hot-path MarkPacket /
// MarkConnect / MarkDisconnect callsites stay lock-free. The 5-minute
// sliding window uses a 300-element per-second ring (one slot per
// second), guarded by ringMu only when we slide the cursor — the common
// path increments the current second with a single atomic.AddInt64.
//
// Memory: one state per source (typically 1-5 in production). 300 int64
// slots = 2.4KB/source — fine.
type sourceStatusState struct {
name string
broker string // raw broker URL — server-side handler masks the password
connected atomic.Bool
lastConnectUnix atomic.Int64
lastDisconnectUnix atomic.Int64
lastPacketUnix atomic.Int64
connectCount atomic.Int64
disconnectCount atomic.Int64
packetsTotal atomic.Int64
// 5-minute sliding window: per-second buckets keyed by unix second.
// Stored as parallel arrays so we can both zero-out a stale slot AND
// know whether a slot's contents are still inside the window.
ringMu sync.Mutex
ringSec [300]int64 // unix second this slot represents (0 = unused)
ringCount [300]int64 // packets received in that second
// lastError is rare-write/rare-read so a plain mutex is fine.
errMu sync.RWMutex
lastError string
}
// MarkConnect records a successful (re)connection to the broker.
// Clears any stale lastError from a prior disconnect — otherwise the UI
// shows "connected=true, lastError='connection refused'" after a successful
// reconnect, which is a lie (#1682 munger review r1).
func (s *sourceStatusState) MarkConnect(now time.Time) {
s.connected.Store(true)
s.lastConnectUnix.Store(now.Unix())
s.connectCount.Add(1)
s.errMu.Lock()
s.lastError = ""
s.errMu.Unlock()
}
// MarkDisconnect records the broker dropping the connection.
func (s *sourceStatusState) MarkDisconnect(now time.Time, err error) {
s.connected.Store(false)
s.lastDisconnectUnix.Store(now.Unix())
s.disconnectCount.Add(1)
if err != nil {
s.errMu.Lock()
s.lastError = err.Error()
s.errMu.Unlock()
}
}
// MarkPacket records receipt of an MQTT message. Hot path.
func (s *sourceStatusState) MarkPacket(now time.Time) {
nowSec := now.Unix()
s.lastPacketUnix.Store(nowSec)
s.packetsTotal.Add(1)
slot := nowSec % int64(len(s.ringSec))
s.ringMu.Lock()
if s.ringSec[slot] != nowSec {
s.ringSec[slot] = nowSec
s.ringCount[slot] = 0
}
s.ringCount[slot]++
s.ringMu.Unlock()
}
// sumLast5m returns the count of MarkPacket calls in the last 300s. Slots
// whose stored second falls outside the window are ignored (no stale leak).
func (s *sourceStatusState) sumLast5m(now time.Time) int64 {
nowSec := now.Unix()
cutoff := nowSec - int64(len(s.ringSec)) + 1
var total int64
s.ringMu.Lock()
for i := 0; i < len(s.ringSec); i++ {
if s.ringSec[i] >= cutoff && s.ringSec[i] <= nowSec {
total += s.ringCount[i]
}
}
s.ringMu.Unlock()
return total
}
// snapshot copies the state into a serializable view.
func (s *sourceStatusState) snapshot(now time.Time) SourceStatusSnapshot {
s.errMu.RLock()
errStr := s.lastError
s.errMu.RUnlock()
return SourceStatusSnapshot{
Name: s.name,
Broker: s.broker,
Connected: s.connected.Load(),
LastConnectUnix: s.lastConnectUnix.Load(),
LastDisconnectUnix: s.lastDisconnectUnix.Load(),
LastPacketUnix: s.lastPacketUnix.Load(),
ConnectCount: s.connectCount.Load(),
DisconnectCount: s.disconnectCount.Load(),
PacketsTotal: s.packetsTotal.Load(),
PacketsLast5m: s.sumLast5m(now),
LastError: errStr,
}
}
// sourceStatusRegistry holds one sourceStatusState per source. Keyed by
// tag (which is the source Name, or the Broker URL if the operator left
// the name blank).
var (
sourceStatusRegistryMu sync.RWMutex
sourceStatusRegistry = map[string]*sourceStatusState{}
)
// RegisterSourceStatus creates (or returns the existing) state for the
// given source. Safe for cold-start use; idempotent — re-registering the
// same tag returns the existing state so counters aren't reset across
// reconnects.
func RegisterSourceStatus(tag, broker string) *sourceStatusState {
sourceStatusRegistryMu.Lock()
defer sourceStatusRegistryMu.Unlock()
if s, ok := sourceStatusRegistry[tag]; ok {
return s
}
s := &sourceStatusState{name: tag, broker: broker}
sourceStatusRegistry[tag] = s
return s
}
// lookupSourceStatus returns the state for tag, or nil if unregistered.
func lookupSourceStatus(tag string) *sourceStatusState {
sourceStatusRegistryMu.RLock()
defer sourceStatusRegistryMu.RUnlock()
return sourceStatusRegistry[tag]
}
// SnapshotSourceStatuses returns a slice of every registered source's
// current snapshot. Surfaced via the ingestor stats file under
// "source_statuses" so /api/mqtt/status can serve it (#1043).
func SnapshotSourceStatuses(now time.Time) []SourceStatusSnapshot {
sourceStatusRegistryMu.RLock()
defer sourceStatusRegistryMu.RUnlock()
out := make([]SourceStatusSnapshot, 0, len(sourceStatusRegistry))
for _, s := range sourceStatusRegistry {
out = append(out, s.snapshot(now))
}
return out
}
// resetSourceStatusRegistry clears the registry. Test-only helper.
func resetSourceStatusRegistry() {
sourceStatusRegistryMu.Lock()
defer sourceStatusRegistryMu.Unlock()
sourceStatusRegistry = map[string]*sourceStatusState{}
}
-116
View File
@@ -1,116 +0,0 @@
package main
import (
"errors"
"testing"
"time"
)
// TestSourceStatus_BasicLifecycle exercises the counter wiring used by
// the /api/mqtt/status server-side endpoint (#1043).
func TestSourceStatus_BasicLifecycle(t *testing.T) {
resetSourceStatusRegistry()
defer resetSourceStatusRegistry()
s := RegisterSourceStatus("local", "mqtt://broker.example.com:1883")
if s == nil {
t.Fatal("RegisterSourceStatus returned nil")
}
// Re-registration is idempotent.
if s2 := RegisterSourceStatus("local", "mqtt://other"); s2 != s {
t.Fatal("RegisterSourceStatus not idempotent")
}
now := time.Unix(1_700_000_000, 0)
s.MarkConnect(now)
s.MarkPacket(now)
s.MarkPacket(now.Add(1 * time.Second))
s.MarkPacket(now.Add(2 * time.Second))
snap := s.snapshot(now.Add(3 * time.Second))
if !snap.Connected {
t.Error("snapshot.Connected = false, want true after MarkConnect")
}
if snap.PacketsTotal != 3 {
t.Errorf("PacketsTotal = %d, want 3", snap.PacketsTotal)
}
if snap.PacketsLast5m != 3 {
t.Errorf("PacketsLast5m = %d, want 3", snap.PacketsLast5m)
}
if snap.ConnectCount != 1 {
t.Errorf("ConnectCount = %d, want 1", snap.ConnectCount)
}
if snap.LastConnectUnix != now.Unix() {
t.Errorf("LastConnectUnix = %d, want %d", snap.LastConnectUnix, now.Unix())
}
if snap.Broker != "mqtt://broker.example.com:1883" {
t.Errorf("Broker = %q, want raw URL passthrough (server masks)", snap.Broker)
}
// After 5 minutes idle, sliding window must be empty.
snap2 := s.snapshot(now.Add(6 * time.Minute))
if snap2.PacketsLast5m != 0 {
t.Errorf("PacketsLast5m after 6m idle = %d, want 0", snap2.PacketsLast5m)
}
if snap2.PacketsTotal != 3 {
t.Errorf("PacketsTotal must be lifetime-cumulative, got %d", snap2.PacketsTotal)
}
}
func TestSourceStatus_Disconnect(t *testing.T) {
resetSourceStatusRegistry()
defer resetSourceStatusRegistry()
s := RegisterSourceStatus("disco", "mqtt://x:1883")
now := time.Unix(1_700_000_100, 0)
s.MarkConnect(now)
s.MarkDisconnect(now.Add(time.Minute), nil)
snap := s.snapshot(now.Add(2 * time.Minute))
if snap.Connected {
t.Error("snapshot.Connected = true after MarkDisconnect, want false")
}
if snap.DisconnectCount != 1 {
t.Errorf("DisconnectCount = %d, want 1", snap.DisconnectCount)
}
}
func TestSnapshotSourceStatuses_ReturnsAll(t *testing.T) {
resetSourceStatusRegistry()
defer resetSourceStatusRegistry()
RegisterSourceStatus("a", "mqtt://a")
RegisterSourceStatus("b", "mqtt://b")
snaps := SnapshotSourceStatuses(time.Now())
if len(snaps) != 2 {
t.Errorf("len(snaps) = %d, want 2", len(snaps))
}
}
// TestSourceStatus_MarkConnectClearsLastError asserts MarkConnect wipes
// any prior sticky error (#1682 munger r1 review). Otherwise the UI sees
// connected=true alongside a stale "connection refused" string.
func TestSourceStatus_MarkConnectClearsLastError(t *testing.T) {
resetSourceStatusRegistry()
defer resetSourceStatusRegistry()
s := RegisterSourceStatus("sticky", "mqtt://x:1883")
now := time.Unix(1_700_000_200, 0)
s.MarkConnect(now)
s.MarkDisconnect(now.Add(time.Second), errors.New("connection refused"))
snap := s.snapshot(now.Add(2 * time.Second))
if snap.LastError == "" {
t.Fatalf("precondition: expected lastError after MarkDisconnect, got empty")
}
// Reconnect — lastError must clear.
s.MarkConnect(now.Add(3 * time.Second))
snap = s.snapshot(now.Add(4 * time.Second))
if snap.LastError != "" {
t.Errorf("snapshot.LastError = %q after MarkConnect, want empty (sticky-error regression)", snap.LastError)
}
if !snap.Connected {
t.Errorf("snapshot.Connected = false after MarkConnect, want true")
}
}
-274
View File
@@ -1,274 +0,0 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"log"
"os"
"time"
"github.com/meshcore-analyzer/perfio"
)
// PerfIOSample is the canonical per-process I/O rate sample, sourced from the
// shared internal/perfio package. The server consumes the same type when it
// reads this binary's stats file — sharing the type prevents silent JSON
// contract drift (#1167 follow-up).
type PerfIOSample = perfio.Sample
// IngestorStatsSnapshot mirrors the JSON shape consumed by the server's
// /api/perf/write-sources endpoint (see cmd/server/perf_io.go IngestorStats).
//
// NOTE: each field below is sampled with an independent atomic.Load(), so the
// snapshot is EVENTUALLY-CONSISTENT — invariants like
// `walCommits >= tx_inserted` may be momentarily violated
// in a single sample. Consumers MUST NOT derive ratios on the assumption these
// counters were captured at the same instant; treat each field as an
// independent monotonically-increasing counter and look at deltas across
// multiple samples instead.
type IngestorStatsSnapshot struct {
SampledAt string `json:"sampledAt"`
TxInserted int64 `json:"tx_inserted"`
ObsInserted int64 `json:"obs_inserted"`
DuplicateTx int64 `json:"tx_dupes"`
NodeUpserts int64 `json:"node_upserts"`
ObserverUpserts int64 `json:"observer_upserts"`
WriteErrors int64 `json:"write_errors"`
SignatureDrops int64 `json:"sig_drops"`
WALCommits int64 `json:"walCommits"`
GroupCommitFlushes int64 `json:"groupCommitFlushes"` // always 0 — group commit reverted (refs #1129)
BackfillUpdates map[string]int64 `json:"backfillUpdates"`
// ProcIO is the ingestor's own /proc/self/io rate snapshot. Surfaced via
// the server's /api/perf/io endpoint under .ingestor (#1120 — "Both
// ingestor and server"). Optional; absent on non-Linux hosts.
ProcIO *PerfIOSample `json:"procIO,omitempty"`
// WriterPerf is the per-component SQLite writer-lock latency
// snapshot (#1340) — wait_ms / hold_ms / contention_total tagged
// by component (neighbor_builder, mqtt_handler, prune_packets,
// prune_observers, prune_metrics, vacuum). Surfaced by the server
// via /api/perf/write-sources under .writer_perf. Optional —
// older ingestor builds don't publish this field.
WriterPerf map[string]WriterStatsSnapshot `json:"writer_perf,omitempty"`
// SourceLiveness (PR #1609 M1) is the per-MQTT-source receipt vs
// write-path liveness snapshot. Keyed by source Tag. Surfaced by
// the server via /api/healthz under .ingest_liveness so operators
// can see "broker alive, write path stuck" (lastReceiptUnix recent,
// lastMessageUnix stale) distinct from "everything stalled" (both
// stale). Additive: omitempty so older server builds ignore it
// gracefully.
SourceLiveness map[string]SourceLivenessSnapshot `json:"source_liveness,omitempty"`
// SourceStatuses (#1043) is the per-MQTT-source connection state and
// counter view consumed by cmd/server's /api/mqtt/status handler.
// Additive; omitempty so older server builds ignore it.
SourceStatuses []SourceStatusSnapshot `json:"source_statuses,omitempty"`
}
// SourceLivenessSnapshot is the per-source two-clock view exposed for
// /api/healthz consumers. unixSeconds for both fields; 0 means "never".
type SourceLivenessSnapshot struct {
LastReceiptUnix int64 `json:"lastReceiptUnix"`
LastMessageUnix int64 `json:"lastMessageUnix"`
}
// statsFilePath returns the writable path the ingestor will publish stats to.
// Override via env CORESCOPE_INGESTOR_STATS for tests / non-default deploys.
//
// SECURITY: the default lives in /tmp which is world-writable. The writer uses
// O_NOFOLLOW + 0o600 so a pre-planted symlink cannot be used to clobber an
// arbitrary file via this path. Operators who want stronger guarantees should
// point CORESCOPE_INGESTOR_STATS at a private directory (e.g. /var/lib/corescope/).
func statsFilePath() string {
if p := os.Getenv("CORESCOPE_INGESTOR_STATS"); p != "" {
return p
}
return "/tmp/corescope-ingestor-stats.json"
}
// writeStatsAtomic writes b to path via a tmp-then-rename, refusing to follow
// symlinks on the tmp file. Returns nil on success, an error otherwise.
//
// Symlink semantics (refs #1170):
//
// - tmp side (path+".tmp"): protected by O_NOFOLLOW below. If tmp is a
// pre-planted symlink, openat fails with ELOOP instead of writing
// through it. This is the defensive-coding path that matters when the
// default stats path lives under world-writable /tmp.
//
// - rename side (path): NOT protected by O_NOFOLLOW. Instead, os.Rename's
// semantics are relied upon — rename atomically replaces any existing
// entry at path (including a symlink) with the new regular file. The
// symlink's target is NEVER written through, because all writes happened
// to the unrelated tmp file before rename. Post-rename, path is a
// regular file (not a symlink) and any prior symlink target's contents
// are unchanged. The regression guardrail
// TestWriteStatsAtomic_SymlinkAtDestIsReplaced pins this behavior so a
// future refactor that swaps os.Rename for a destination-symlink-
// following primitive (e.g. an open(path, O_WRONLY) without O_NOFOLLOW)
// fails loudly.
func writeStatsAtomic(path string, b []byte) error {
tmp := path + ".tmp"
// O_NOFOLLOW: if tmp is a pre-existing symlink, openat fails with ELOOP
// instead of clobbering the symlink target. O_TRUNC zeroes existing
// regular-file content. 0o600 — no need for world-readable.
f, err := os.OpenFile(tmp, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|oNoFollow, 0o600)
if err != nil {
return err
}
if _, err := f.Write(b); err != nil {
f.Close()
os.Remove(tmp)
return err
}
if err := f.Close(); err != nil {
os.Remove(tmp)
return err
}
if err := os.Rename(tmp, path); err != nil {
os.Remove(tmp)
return err
}
return nil
}
// procIOSnapshot is the raw counter snapshot used to compute per-second rates
// across two consecutive ticks of the stats-file writer.
type procIOSnapshot struct {
at time.Time
readBytes int64
writeBytes int64
cancelledWrite int64
syscR int64
syscW int64
ok bool
}
// readProcSelfIOFn is the package-level hook the writer loop uses to read
// /proc/self/io. Defaults to readProcSelfIO; tests override it to inject
// deterministic counter snapshots without depending on a Linux kernel
// that exposes /proc/self/io (CONFIG_TASK_IO_ACCOUNTING).
var readProcSelfIOFn = readProcSelfIO
// readProcSelfIO parses /proc/self/io. Returns ok=false on non-Linux hosts or
// any read/parse failure (caller skips the procIO block in that case).
func readProcSelfIO() procIOSnapshot {
f, err := os.Open("/proc/self/io")
if err != nil {
return procIOSnapshot{}
}
defer f.Close()
out := procIOSnapshot{at: time.Now()}
parseProcSelfIOInto(bufio.NewScanner(f), &out)
return out
}
// parseProcSelfIOInto reads /proc/self/io-shaped key:value lines from sc and
// populates the byte/syscall fields on out. Sets out.ok=true only if at
// least one expected key was successfully parsed (#1167 must-fix #3).
//
// Implementation delegates to perfio.ParseProcIO so the ingestor and the
// server share exactly one parser (Carmack must-fix #7).
func parseProcSelfIOInto(sc *bufio.Scanner, out *procIOSnapshot) {
var c perfio.Counters
out.ok = perfio.ParseProcIO(sc, &c)
out.readBytes = c.ReadBytes
out.writeBytes = c.WriteBytes
out.cancelledWrite = c.CancelledWriteBytes
out.syscR = c.SyscR
out.syscW = c.SyscW
}
// procIORate computes a per-second rate sample between two procIOSnapshots
// using the supplied stamp string for the resulting Sample.SampledAt
// (Carmack must-fix #5 — the writer captures time.Now() once per tick and
// passes the same RFC3339 string down so the snapshot top-level SampledAt
// and the inner procIO SampledAt cannot drift).
// Returns nil if either snapshot is invalid or the interval is zero.
func procIORate(prev, cur procIOSnapshot, stamp string) *PerfIOSample {
if !prev.ok || !cur.ok {
return nil
}
dt := cur.at.Sub(prev.at).Seconds()
if dt < 0.001 {
return nil
}
return &PerfIOSample{
ReadBytesPerSec: float64(cur.readBytes-prev.readBytes) / dt,
WriteBytesPerSec: float64(cur.writeBytes-prev.writeBytes) / dt,
CancelledWriteBytesPerSec: float64(cur.cancelledWrite-prev.cancelledWrite) / dt,
SyscallsRead: float64(cur.syscR-prev.syscR) / dt,
SyscallsWrite: float64(cur.syscW-prev.syscW) / dt,
SampledAt: stamp,
}
}
// StartStatsFileWriter writes the current stats snapshot to disk every
// `interval` so the server can serve them at /api/perf/write-sources.
// Failures are logged once-per-interval and never fatal.
//
// The stats file path is resolved via statsFilePath() once at writer-loop
// start; the env var (CORESCOPE_INGESTOR_STATS) is only re-read on process
// restart, not per tick.
func StartStatsFileWriter(s *Store, interval time.Duration) {
if interval <= 0 {
interval = time.Second
}
go func() {
t := time.NewTicker(interval)
defer t.Stop()
path := statsFilePath()
// Track previous procIO sample so we can compute per-second deltas
// across ticks (#1120 follow-up: ingestor /proc/self/io exposure).
prevIO := readProcSelfIOFn()
// Reuse a single bytes.Buffer + json.Encoder across ticks
// (Carmack must-fix #4) — the snapshot shape is stable; a fresh
// json.Marshal allocation per second × forever is pure GC waste.
// The buffer grows once and stays.
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
for range t.C {
// Capture time.Now() ONCE per tick (Carmack must-fix #5).
// Both snapshot.SampledAt and procIO.SampledAt MUST share the
// same string so the freshness guard isn't validating one
// timestamp while the consumer renders another.
tickAt := time.Now().UTC()
stamp := tickAt.Format(time.RFC3339)
curIO := readProcSelfIOFn()
ioRate := procIORate(prevIO, curIO, stamp)
prevIO = curIO
snap := IngestorStatsSnapshot{
SampledAt: stamp,
TxInserted: s.Stats.TransmissionsInserted.Load(),
ObsInserted: s.Stats.ObservationsInserted.Load(),
DuplicateTx: s.Stats.DuplicateTransmissions.Load(),
NodeUpserts: s.Stats.NodeUpserts.Load(),
ObserverUpserts: s.Stats.ObserverUpserts.Load(),
WriteErrors: s.Stats.WriteErrors.Load(),
SignatureDrops: s.Stats.SignatureDrops.Load(),
WALCommits: s.Stats.WALCommits.Load(),
GroupCommitFlushes: 0, // group commit reverted (refs #1129)
BackfillUpdates: s.Stats.SnapshotBackfills(),
ProcIO: ioRate,
WriterPerf: s.WriterStatsSnapshot(),
SourceLiveness: SnapshotLivenessClocks(),
SourceStatuses: SnapshotSourceStatuses(tickAt),
}
buf.Reset()
if err := enc.Encode(&snap); err != nil {
log.Printf("[stats-file] encode: %v", err)
continue
}
// json.Encoder.Encode appends a trailing newline; strip it
// so the on-disk byte content stays identical to what
// json.Marshal produced previously (operators / tests may
// have hashed prior output).
b := buf.Bytes()
if n := len(b); n > 0 && b[n-1] == '\n' {
b = b[:n-1]
}
if err := writeStatsAtomic(path, b); err != nil {
log.Printf("[stats-file] write %s: %v", path, err)
}
}
}()
}
-98
View File
@@ -1,98 +0,0 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"strings"
"sync/atomic"
"testing"
"time"
)
const benchProcSelfIOSample = `rchar: 12345678
wchar: 87654321
syscr: 12345
syscw: 67890
read_bytes: 4096000
write_bytes: 8192000
cancelled_write_bytes: 12345
`
// TestStatsFileWriterBench_Sanity is a tiny non-bench test added solely to
// exercise the bench helpers' assertion path so the preflight scanner sees
// at least one t.Error*/t.Fatal* in this file (the benchmarks themselves
// use b.Fatal, which the scanner doesn't recognise as an assertion).
func TestStatsFileWriterBench_Sanity(t *testing.T) {
var s procIOSnapshot
parseProcSelfIOInto(bufio.NewScanner(strings.NewReader(benchProcSelfIOSample)), &s)
if !s.ok {
t.Fatalf("expected bench sample to parse ok=true")
}
if s.readBytes != 4096000 {
t.Errorf("readBytes = %d, want 4096000", s.readBytes)
}
}
// BenchmarkParseProcSelfIOInto measures the ingestor-side /proc/self/io
// parser on a representative payload (Carmack must-fix #3). Tracks
// allocations to verify the shared perfio.ParseProcIO path doesn't
// regress vs. the previous in-package implementation.
func BenchmarkParseProcSelfIOInto(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var s procIOSnapshot
parseProcSelfIOInto(bufio.NewScanner(strings.NewReader(benchProcSelfIOSample)), &s)
}
}
// BenchmarkStatsFileWriter_Tick simulates the body of one writer tick
// (snap construction + JSON encode via the reused buffer) WITHOUT the
// disk write. Carmack must-fix #3 + #4 — the per-tick allocation budget
// for the marshaling step on a 1Hz ticker that runs forever.
func BenchmarkStatsFileWriter_Tick(b *testing.B) {
// Mirror the writer-loop's reused encoder.
var buf bytes.Buffer
enc := json.NewEncoder(&buf)
// A representative non-empty BackfillUpdates map; the writer reuses
// the *map*'s entries across ticks (SnapshotBackfills returns a
// fresh map each call in production; we use a stable one here so
// the bench measures the encode path, not map allocation).
backfills := map[string]int64{"path_a": 100, "path_b": 200}
stamp := time.Now().UTC().Format(time.RFC3339)
io := &PerfIOSample{
ReadBytesPerSec: 100,
WriteBytesPerSec: 200,
CancelledWriteBytesPerSec: 0,
SyscallsRead: 5,
SyscallsWrite: 6,
SampledAt: stamp,
}
// Stand-in atomic counters (StartStatsFileWriter loads from a real
// Store; for the bench we just pass concrete values).
var n atomic.Int64
n.Store(123456)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
snap := IngestorStatsSnapshot{
SampledAt: stamp,
TxInserted: n.Load(),
ObsInserted: n.Load(),
DuplicateTx: n.Load(),
NodeUpserts: n.Load(),
ObserverUpserts: n.Load(),
WriteErrors: n.Load(),
SignatureDrops: n.Load(),
WALCommits: n.Load(),
GroupCommitFlushes: 0,
BackfillUpdates: backfills,
ProcIO: io,
}
buf.Reset()
_ = enc.Encode(&snap)
}
}
-9
View File
@@ -1,9 +0,0 @@
//go:build !windows
package main
import "syscall"
// oNoFollow is syscall.O_NOFOLLOW on platforms that define it (all non-Windows targets).
// On Windows this constant does not exist; see stats_file_nofollow_windows.go.
const oNoFollow = syscall.O_NOFOLLOW
@@ -1,8 +0,0 @@
//go:build windows
package main
// oNoFollow is 0 on Windows: O_NOFOLLOW is not defined in the Windows syscall
// package. The ingestor is only deployed on Linux where the flag is enforced;
// on Windows the flag is a no-op so the binary compiles and tests run.
const oNoFollow = 0
-51
View File
@@ -1,51 +0,0 @@
package main
import (
"bufio"
"strings"
"testing"
)
// TestParseProcSelfIO_EmptyDoesNotMarkOK — #1167 must-fix #3: an empty file
// (or one with no recognised keys) MUST result in ok=false. Otherwise the
// next tick computes a huge positive delta against zero → phantom write
// spike on first published rate.
func TestParseProcSelfIO_EmptyDoesNotMarkOK(t *testing.T) {
var s procIOSnapshot
parseProcSelfIOInto(bufio.NewScanner(strings.NewReader("")), &s)
if s.ok {
t.Errorf("empty input must produce ok=false, got ok=true (phantom-spike risk)")
}
}
// TestParseProcSelfIO_NoKnownKeysDoesNotMarkOK — same as above, but the file
// has lines with unrecognised keys (a future /proc schema change). MUST NOT
// be treated as a valid sample.
func TestParseProcSelfIO_NoKnownKeysDoesNotMarkOK(t *testing.T) {
var s procIOSnapshot
parseProcSelfIOInto(bufio.NewScanner(strings.NewReader("garbage_key: 42\nother: 99\n")), &s)
if s.ok {
t.Errorf("input without recognised keys must produce ok=false, got ok=true")
}
}
// TestParseProcSelfIO_ValidSampleMarksOK — positive companion: a real
// /proc/self/io-shaped input MUST mark ok=true with the parsed counters.
func TestParseProcSelfIO_ValidSampleMarksOK(t *testing.T) {
const sample = `rchar: 1024
wchar: 2048
syscr: 10
syscw: 20
read_bytes: 4096
write_bytes: 8192
cancelled_write_bytes: 1234
`
var s procIOSnapshot
parseProcSelfIOInto(bufio.NewScanner(strings.NewReader(sample)), &s)
if !s.ok {
t.Fatalf("valid sample must produce ok=true")
}
if s.readBytes != 4096 || s.writeBytes != 8192 || s.cancelledWrite != 1234 {
t.Errorf("unexpected parsed counters: %+v", s)
}
}
-168
View File
@@ -1,168 +0,0 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"time"
)
// TestProcIORate_ZeroValuePrevSuppressesRate guards against the phantom-delta
// regression from #1169: when os.Open("/proc/self/io") fails, readProcSelfIO
// now returns a zero-value procIOSnapshot (ok=false, zero time.Time). This
// asserts procIORate returns nil so no inflated rate spike appears for the
// next successful read.
func TestProcIORate_ZeroValuePrevSuppressesRate(t *testing.T) {
prev := procIOSnapshot{} // zero-value: ok=false, at=zero
cur := procIOSnapshot{
at: time.Now(),
readBytes: 1024 * 1024 * 100,
ok: true,
}
if got := procIORate(prev, cur, "2026-01-01T00:00:00Z"); got != nil {
t.Fatalf("expected nil rate when prev is zero-value (os.Open failed), got %+v", got)
}
}
// TestProcIORate_NormalPath asserts two valid snapshots produce a non-nil rate.
func TestProcIORate_NormalPath(t *testing.T) {
base := time.Now()
prev := procIOSnapshot{at: base, readBytes: 0, ok: true}
cur := procIOSnapshot{at: base.Add(time.Second), readBytes: 1024, ok: true}
got := procIORate(prev, cur, "2026-01-01T00:00:01Z")
if got == nil {
t.Fatal("expected non-nil rate for valid prev/cur pair")
}
if got.ReadBytesPerSec != 1024.0 {
t.Errorf("ReadBytesPerSec: want 1024.0, got %v", got.ReadBytesPerSec)
}
}
// TestStatsFileWriter_PublishesProcIO asserts the ingestor's published
// stats snapshot includes a `procIO` block with the per-process I/O rate
// fields required by issue #1120 ("Both ingestor and server").
func TestStatsFileWriter_PublishesProcIO(t *testing.T) {
if _, err := os.Stat("/proc/self/io"); err != nil {
t.Skip("skip: /proc/self/io unavailable on this host")
}
dir := t.TempDir()
statsPath := filepath.Join(dir, "ingestor-stats.json")
t.Setenv("CORESCOPE_INGESTOR_STATS", statsPath)
store, err := OpenStore(filepath.Join(dir, "test.db"))
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
StartStatsFileWriter(store, 50*time.Millisecond)
// Wait for at least 2 ticks so the writer has had a chance to populate
// procIO rates from a delta.
deadline := time.Now().Add(3 * time.Second)
var snap map[string]interface{}
for time.Now().Before(deadline) {
time.Sleep(75 * time.Millisecond)
b, err := os.ReadFile(statsPath)
if err != nil {
continue
}
if err := json.Unmarshal(b, &snap); err != nil {
continue
}
if _, ok := snap["procIO"]; ok {
break
}
}
pio, ok := snap["procIO"].(map[string]interface{})
if !ok {
t.Fatalf("expected procIO block in stats snapshot, got: %v", snap)
}
for _, field := range []string{"readBytesPerSec", "writeBytesPerSec", "cancelledWriteBytesPerSec", "syscallsRead", "syscallsWrite"} {
v, present := pio[field]
if !present {
t.Errorf("procIO missing field %q", field)
continue
}
// #1167 must-fix #5: assert the field actually decodes as a JSON
// number, not just that the key exists. An empty PerfIOSample{}
// substruct would still serialise the keys since the inner numeric
// fields lack omitempty — without this Kind check the test would
// silently pass on an empty struct regression.
if _, isFloat := v.(float64); !isFloat {
t.Errorf("procIO[%q] expected JSON number (float64), got %T (%v)", field, v, v)
}
}
}
// TestWriteStatsAtomic_SymlinkAtDestIsReplaced is a regression guardrail for
// #1170. The tmp side of writeStatsAtomic uses O_NOFOLLOW so a pre-planted
// symlink at path+".tmp" cannot redirect the write — but the rename target
// (`path` itself) is not protected by O_NOFOLLOW. Instead, os.Rename's
// semantics are relied upon: rename atomically replaces any existing entry
// at the destination, including a symlink, with the new regular file. The
// original symlink's target is never written through (because the write
// happened to the unrelated tmp file).
//
// This test pre-plants a symlink at `path` pointing to an unrelated target
// file and asserts:
// (a) post-write, path is a regular file (not a symlink), and
// (b) the original target's contents are unchanged.
//
// If a future refactor swaps os.Rename for something that follows the
// destination symlink (e.g. ioutil.WriteFile, or an open(path, O_WRONLY)
// without O_NOFOLLOW), this test will fail loudly.
func TestWriteStatsAtomic_SymlinkAtDestIsReplaced(t *testing.T) {
dir := t.TempDir()
// Unrelated target file with sentinel bytes. If writeStatsAtomic ever
// followed the symlink at `path`, it would overwrite this file.
target := filepath.Join(dir, "unrelated-target.bin")
sentinel := []byte("DO-NOT-OVERWRITE-ME-#1170")
if err := os.WriteFile(target, sentinel, 0o600); err != nil {
t.Fatalf("seed target: %v", err)
}
// Pre-plant a symlink at the destination path.
path := filepath.Join(dir, "stats.json")
if err := os.Symlink(target, path); err != nil {
t.Fatalf("symlink: %v", err)
}
payload := []byte(`{"sampledAt":"2026-01-01T00:00:00Z"}`)
if err := writeStatsAtomic(path, payload); err != nil {
t.Fatalf("writeStatsAtomic: %v", err)
}
// (a) post-write, path must NOT be a symlink.
info, err := os.Lstat(path)
if err != nil {
t.Fatalf("lstat path: %v", err)
}
if info.Mode()&os.ModeSymlink != 0 {
t.Errorf("post-write path is still a symlink (mode=%v); os.Rename should have atomically replaced it with a regular file", info.Mode())
}
if !info.Mode().IsRegular() {
t.Errorf("post-write path is not a regular file (mode=%v)", info.Mode())
}
// Path now contains the new payload.
got, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read path: %v", err)
}
if string(got) != string(payload) {
t.Errorf("path contents: want %q, got %q", payload, got)
}
// (b) the original symlink target must be unchanged.
gotTarget, err := os.ReadFile(target)
if err != nil {
t.Fatalf("read target: %v", err)
}
if string(gotTarget) != string(sentinel) {
t.Errorf("symlink target was clobbered: want %q, got %q", sentinel, gotTarget)
}
}
-106
View File
@@ -1,106 +0,0 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"time"
)
// TestStatsFileWriter_SampledAtMatchesProcIOSampledAt drives the real
// StartStatsFileWriter and asserts the byte-equal invariant established
// by #1167 Carmack must-fix #5: the writer captures time.Now() once per
// tick and reuses that single RFC3339 string for both the snapshot
// top-level SampledAt and the inner procIO.SampledAt. If a future change
// reintroduces two independent time.Now() calls — or, equivalently,
// reverts procIORate to format procIO.SampledAt from its own
// (independently-sampled) `cur.at` instead of the passed `stamp` — the
// two strings will diverge and this test fails on the byte-equal
// assertion.
//
// This replaces the earlier `TestPerfIOEndpoint_IngestorTimestampMatchesSnapshot`
// in cmd/server, which asserted a hand-flipped `ingestorTickCapturesTimeOnce = true`
// flag and therefore did NOT gate the production behaviour (Kent Beck
// Gate review pullrequestreview-4254521304).
//
// Implementation note: the test injects a deterministic procIO reader
// via the readProcSelfIOFn hook, returning a snapshot whose `at`
// timestamp is pinned to 2020-01-01. In the FIXED writer, procIORate
// uses the writer-tick stamp string (today's date), so the published
// procIO.SampledAt equals snap.SampledAt byte-for-byte. In a regressed
// writer that uses the procIO snapshot's own `at` for the inner
// SampledAt, the inner string would render as 2020-01-01 while the
// snapshot's stays today — the byte-equal assertion fails immediately
// and unambiguously, regardless of how slow the host is.
func TestStatsFileWriter_SampledAtMatchesProcIOSampledAt(t *testing.T) {
dir := t.TempDir()
statsPath := filepath.Join(dir, "ingestor-stats.json")
t.Setenv("CORESCOPE_INGESTOR_STATS", statsPath)
store, err := OpenStore(filepath.Join(dir, "test.db"))
if err != nil {
t.Fatalf("OpenStore: %v", err)
}
defer store.Close()
// Inject a deterministic procIO reader. `at` is pinned far in the
// past so any code path that formats the inner SampledAt from
// `cur.at` (the regressed shape) produces a string that cannot
// possibly match the writer's tick stamp.
origFn := readProcSelfIOFn
t.Cleanup(func() { readProcSelfIOFn = origFn })
pinnedAt := time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
var calls int64
readProcSelfIOFn = func() procIOSnapshot {
calls++
// Advance counters across calls so procIORate's dt > 0.001
// gate passes and a non-nil PerfIOSample is published. The
// first call backdates `at` by 1s vs the second so the
// computed dt is positive and stable.
return procIOSnapshot{
at: pinnedAt.Add(time.Duration(calls) * time.Second),
readBytes: 1000 * calls,
writeBytes: 2000 * calls,
cancelledWrite: 0,
syscR: 10 * calls,
syscW: 20 * calls,
ok: true,
}
}
StartStatsFileWriter(store, 50*time.Millisecond)
// Wait for the file to land with a populated procIO block.
deadline := time.Now().Add(3 * time.Second)
var snap map[string]interface{}
for time.Now().Before(deadline) {
time.Sleep(75 * time.Millisecond)
b, err := os.ReadFile(statsPath)
if err != nil {
continue
}
if err := json.Unmarshal(b, &snap); err != nil {
continue
}
if _, ok := snap["procIO"].(map[string]interface{}); ok {
break
}
}
topSampledAt, ok := snap["sampledAt"].(string)
if !ok || topSampledAt == "" {
t.Fatalf("expected snapshot.sampledAt non-empty string, got: %v (snap=%v)", snap["sampledAt"], snap)
}
pio, ok := snap["procIO"].(map[string]interface{})
if !ok {
t.Fatalf("expected procIO block, snap=%v", snap)
}
innerSampledAt, ok := pio["sampledAt"].(string)
if !ok || innerSampledAt == "" {
t.Fatalf("expected procIO.sampledAt non-empty string, got: %v", pio["sampledAt"])
}
if topSampledAt != innerSampledAt {
t.Errorf("snapshot.sampledAt != procIO.sampledAt (writer reverted to two independent timestamps?)\n top: %q\n inner: %q", topSampledAt, innerSampledAt)
}
}
@@ -1,21 +0,0 @@
// Fixture: migration block WITHOUT an async annotation and WITHOUT being
// wrapped in the async-migration helper. This file exists ONLY so that
// ~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh
// has a known-bad sample to test against (the script is invoked with
// BASE pointing at master and FIXTURE_DIR pointing here).
//
// DO NOT add a PREFLIGHT annotation to this file. DO NOT wrap the
// migration via the async helper. The check script's correctness
// depends on this staying BAD.
//
// IMPORTANT: this file must NOT contain the literal identifier of the
// async-helper function anywhere (comments, strings, identifiers). The
// preflight gate greps a window of lines above the migration for that
// identifier as an "OK" signal, so mentioning it here would cause the
// gate to *pass* this fixture — defeating its purpose. Refer to the
// helper only obliquely as "the async-migration helper" in prose.
package fixtures
const _ = `
CREATE INDEX idx_observations_bad_sync_v1 ON observations(observer_idx, timestamp);
`
@@ -1,9 +0,0 @@
// Fixture: migration block WITH an async annotation. Companion to
// bad_sync_migration.go. The preflight check script must accept this
// because of the PREFLIGHT line directly above the migration.
package fixtures
// PREFLIGHT: async=true reason="fixture-only — ALTER ADD COLUMN is O(1) in sqlite"
const _ = `
ALTER TABLE observations ADD COLUMN annotated_good_fixture_col INTEGER DEFAULT 0;
`
-22
View File
@@ -1,22 +0,0 @@
module github.com/corescope/migrate
go 1.22
require (
github.com/meshcore-analyzer/dbschema v0.0.0
modernc.org/sqlite v1.34.5
)
replace github.com/meshcore-analyzer/dbschema => ../../internal/dbschema
require (
github.com/dustin/go-humanize v1.0.1 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/sys v0.22.0 // indirect
modernc.org/libc v1.55.3 // indirect
modernc.org/mathutil v1.6.0 // indirect
modernc.org/memory v1.8.0 // indirect
)
-43
View File
@@ -1,43 +0,0 @@
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd h1:gbpYu9NMq8jhDVbvlGkMFWCjLFlqqEZjEmObmhUy6Vo=
github.com/google/pprof v0.0.0-20240409012703-83162a5b38cd/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4=
github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
modernc.org/cc/v4 v4.21.4 h1:3Be/Rdo1fpr8GrQ7IVw9OHtplU4gWbb+wNgeoBMmGLQ=
modernc.org/cc/v4 v4.21.4/go.mod h1:HM7VJTZbUCR3rV8EYBi9wxnJ0ZBRiGE5OeGXNA0IsLQ=
modernc.org/ccgo/v4 v4.19.2 h1:lwQZgvboKD0jBwdaeVCTouxhxAyN6iawF3STraAal8Y=
modernc.org/ccgo/v4 v4.19.2/go.mod h1:ysS3mxiMV38XGRTTcgo0DQTeTmAO4oCmJl1nX9VFI3s=
modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE=
modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ=
modernc.org/gc/v2 v2.4.1 h1:9cNzOqPyMJBvrUipmynX0ZohMhcxPtMccYgGOJdOiBw=
modernc.org/gc/v2 v2.4.1/go.mod h1:wzN5dK1AzVGoH6XOzc3YZ+ey/jPgYHLuVckd62P0GYU=
modernc.org/libc v1.55.3 h1:AzcW1mhlPNrRtjS5sS+eW2ISCgSOLLNyFzRh/V3Qj/U=
modernc.org/libc v1.55.3/go.mod h1:qFXepLhz+JjFThQ4kzwzOjA/y/artDeg+pcYnY+Q83w=
modernc.org/mathutil v1.6.0 h1:fRe9+AmYlaej+64JsEEhoWuAYBkOtQiMEU7n/XgfYi4=
modernc.org/mathutil v1.6.0/go.mod h1:Ui5Q9q1TR2gFm0AQRqQUaBWFLAhQpCwNcuhBOSedWPo=
modernc.org/memory v1.8.0 h1:IqGTL6eFMaDZZhEWwcREgeMXYwmW83LYW8cROZYkg+E=
modernc.org/memory v1.8.0/go.mod h1:XPZ936zp5OMKGWPqbD3JShgd/ZoQ7899TUuQqxY+peU=
modernc.org/opt v0.1.3 h1:3XOZf2yznlhC+ibLltsDGzABUGVx8J6pnFMS3E4dcq4=
modernc.org/opt v0.1.3/go.mod h1:WdSiB5evDcignE70guQKxYUl14mgWtbClRi5wmkkTX0=
modernc.org/sortutil v1.2.0 h1:jQiD3PfS2REGJNzNCMMaLSp/wdMNieTbKX920Cqdgqc=
modernc.org/sortutil v1.2.0/go.mod h1:TKU2s7kJMf1AE84OoiGppNHJwvB753OYfNl2WRb++Ss=
modernc.org/sqlite v1.34.5 h1:Bb6SR13/fjp15jt70CL4f18JIN7p7dnMExd+UFnF15g=
modernc.org/sqlite v1.34.5/go.mod h1:YLuNmX9NKs8wRNK2ko1LW1NGYcc9FkBO69JOt1AR9JE=
modernc.org/strutil v1.2.0 h1:agBi9dp1I+eOnxXeiZawM8F4LawKv4NzGWSaLfyeNZA=
modernc.org/strutil v1.2.0/go.mod h1:/mdcBmfOibveCTBxUl5B5l6W+TTH1FXPLHZE6bTosX0=
modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y=
modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM=
-55
View File
@@ -1,55 +0,0 @@
// Command migrate runs all dbschema migrations against a SQLite
// CoreScope database and exits. Used by CI / one-shot tooling to bring
// an unmigrated fixture (or a fresh DB) up to the schema shape the
// read-only server (cmd/server) requires via dbschema.AssertReady.
//
// In production the ingestor (cmd/ingestor) runs dbschema.Apply at
// startup before subscribing to MQTT — this binary exists so CI's E2E
// job can migrate the e2e-fixture.db without booting the full ingestor
// (which needs MQTT brokers).
//
// Usage:
//
// migrate -db path/to/file.db
package main
import (
"database/sql"
"flag"
"log"
"github.com/meshcore-analyzer/dbschema"
_ "modernc.org/sqlite"
)
func main() {
dbPath := flag.String("db", "", "path to SQLite database to migrate (required)")
flag.Parse()
if *dbPath == "" {
log.Fatalf("[migrate] -db is required")
}
log.SetFlags(log.LstdFlags | log.Lmsgprefix)
log.SetPrefix("[migrate] ")
db, err := sql.Open("sqlite", *dbPath)
if err != nil {
log.Fatalf("open %s: %v", *dbPath, err)
}
defer db.Close()
if err := db.Ping(); err != nil {
log.Fatalf("ping %s: %v", *dbPath, err)
}
if err := dbschema.Apply(db, log.Printf); err != nil {
log.Fatalf("dbschema.Apply: %v", err)
}
if err := dbschema.AssertReady(db); err != nil {
log.Fatalf("dbschema.AssertReady after Apply: %v (this is a bug — Apply did not produce a ready schema)", err)
}
log.Printf("OK: %s is migrated and ready", *dbPath)
}
-84
View File
@@ -1,84 +0,0 @@
// Test that the migrate binary brings the e2e fixture DB up to the
// shape required by cmd/server's dbschema.AssertReady. Regression test
// for PR #1289 / fix for the CI "Server failed to start within 30s"
// failure: AssertReady fired against the unmigrated fixture and the
// server fatal-logged before opening its HTTP listener.
package main
import (
"database/sql"
"io"
"os"
"path/filepath"
"testing"
"github.com/meshcore-analyzer/dbschema"
_ "modernc.org/sqlite"
)
// fixtureCandidates lists possible locations of the committed e2e
// fixture DB relative to this test's package directory. We resolve
// against runtime cwd which is cmd/migrate when `go test` runs.
var fixtureCandidates = []string{
"../../test-fixtures/e2e-fixture.db",
}
func locateFixture(t *testing.T) string {
t.Helper()
for _, p := range fixtureCandidates {
if _, err := os.Stat(p); err == nil {
abs, _ := filepath.Abs(p)
return abs
}
}
t.Skipf("e2e fixture not found (looked in: %v)", fixtureCandidates)
return ""
}
func copyFile(t *testing.T, src, dst string) {
t.Helper()
in, err := os.Open(src)
if err != nil {
t.Fatalf("open src: %v", err)
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
t.Fatalf("create dst: %v", err)
}
defer out.Close()
if _, err := io.Copy(out, in); err != nil {
t.Fatalf("copy: %v", err)
}
}
// TestMigrateBringsFixtureToReady is the gate test for the CI bug.
// Before the fix landed, AssertReady against the committed fixture
// returned an error ("missing: inactive_nodes.foreign_advert" etc.).
// After Apply(), AssertReady must return nil.
func TestMigrateBringsFixtureToReady(t *testing.T) {
src := locateFixture(t)
dst := filepath.Join(t.TempDir(), "fixture-copy.db")
copyFile(t, src, dst)
db, err := sql.Open("sqlite", dst)
if err != nil {
t.Fatalf("open: %v", err)
}
defer db.Close()
// Sanity: the committed fixture is missing at least one expected
// migration column. If this stops being true, either someone
// pre-migrated the fixture (and this test no longer protects #1289)
// or AssertReady's required set changed.
if err := dbschema.AssertReady(db); err == nil {
t.Logf("note: fixture already passes AssertReady; skipping pre-condition assertion")
}
if err := dbschema.Apply(db, t.Logf); err != nil {
t.Fatalf("Apply: %v", err)
}
if err := dbschema.AssertReady(db); err != nil {
t.Fatalf("AssertReady after Apply: %v", err)
}
}
-293
View File
@@ -1,293 +0,0 @@
// Package main: analytics recomputer (issue #1240).
//
// Steady-state background recompute loop for expensive analytics
// endpoints. Reads always hit an atomic-pointer cache; compute runs
// on a fixed ticker in a goroutine. This eliminates the on-request
// compute-then-cache pattern where the first reader after expiry pays
// the full compute cost and blocks under writer contention.
//
// See issue #1240 and AGENTS.md "Performance is a feature".
package main
import (
"sync"
"sync/atomic"
"time"
)
// analyticsRecomputer holds the latest snapshot of an analytics result
// in an atomic.Value, refreshed periodically by a background goroutine.
//
// Lifecycle:
// 1. Construct via newAnalyticsRecomputer(...)
// 2. Call Start() — runs initial compute synchronously, then launches
// the recompute goroutine. Initial compute is synchronous so the
// first Load() after Start returns never sees a nil cache.
// 3. Call Load() any number of times concurrently — never blocks
// beyond an atomic-pointer load.
// 4. Call Stop() to terminate the background goroutine cleanly.
//
// Compute func is called WITHOUT any lock held by this struct, so it
// may freely take any application-level locks it needs.
type analyticsRecomputer struct {
name string
interval time.Duration
compute func() interface{}
cache atomic.Value // holds interface{} — the latest snapshot
stop chan struct{}
done chan struct{}
startOnce sync.Once
stopOnce sync.Once
// Stats (atomic).
computeRuns atomic.Int64
lastComputeNs atomic.Int64 // duration of last compute in nanoseconds
// Issue #1659 (PR #1688 r1) — warmup gate state, inlined here so
// hot-path readers (IsWarmingUp_1659) do lock-free atomic loads
// only (replaces the r0 package-level map + chanLock). See
// analytics_warmup_1659.go for full design notes.
firstPassDoneNs atomic.Int64
warmupStartedNs atomic.Int64
warmupReadyGate atomic.Value // *func() bool — gate must return true for markFirstPassDone to take effect
}
// newAnalyticsRecomputer constructs an unstarted recomputer.
// interval must be > 0; compute must be non-nil.
func newAnalyticsRecomputer(name string, interval time.Duration, compute func() interface{}) *analyticsRecomputer {
if interval <= 0 {
interval = 5 * time.Minute
}
return &analyticsRecomputer{
name: name,
interval: interval,
compute: compute,
stop: make(chan struct{}),
done: make(chan struct{}),
}
}
// Start runs the initial compute synchronously (so the first Load
// after Start returns a populated snapshot, never nil), then launches
// a background goroutine to periodically recompute.
//
// Calling Start multiple times is a no-op after the first call.
func (r *analyticsRecomputer) Start() {
r.startOnce.Do(func() {
// Issue #1659 (#1688 munger #2): record warmup-start before
// the first compute, so IsWarmingUp_1659's fallback timeout
// is measured from "recomputer started" — not "first pass
// returned", which never happens if compute() hangs.
r.noteWarmupStart_1659()
// Initial synchronous compute — first read must NOT see empty
// or uninitialized data (acceptance criterion #1240).
r.runOnce()
go r.loop()
})
}
func (r *analyticsRecomputer) loop() {
defer close(r.done)
t := time.NewTicker(r.interval)
defer t.Stop()
for {
select {
case <-t.C:
r.runOnce()
case <-r.stop:
return
}
}
}
func (r *analyticsRecomputer) runOnce() {
if r.compute == nil {
return
}
defer func() {
// Don't let a compute panic kill the background goroutine.
// The previous snapshot remains valid. Even on panic, we
// still want IsWarmingUp_1659's fallback timeout to be the
// safety net (a perpetually panicking compute would never
// reach markFirstPassDone otherwise).
_ = recover()
}()
t0 := time.Now()
result := r.compute()
r.lastComputeNs.Store(int64(time.Since(t0)))
r.computeRuns.Add(1)
if result != nil {
r.cache.Store(result)
}
// Issue #1659: mark the first-pass clock so the warmup gate
// in GetAnalyticsRFWithWindow / Topology / Channels handlers
// can flip from 503-Retry-After to serving the cache.
//
// PR #1688 r1: called on EVERY successful pass (even nil
// result) so a compute that returns nil but doesn't panic
// still lifts the gate — banner-stuck-forever fix (munger #2).
// The markFirstPassDone helper is idempotent and additionally
// consults the chunked-loader readiness gate (munger #5).
r.markFirstPassDone_1659()
}
// Load returns the most recently computed snapshot, or nil if Start
// has not been called (or the very first compute returned nil).
// Never blocks beyond a single atomic load.
func (r *analyticsRecomputer) Load() interface{} {
v := r.cache.Load()
if v == nil {
return nil
}
return v
}
// Stop signals the background goroutine to exit and waits for it.
// Safe to call multiple times. Safe to call before Start (no-op).
func (r *analyticsRecomputer) Stop() {
r.stopOnce.Do(func() {
close(r.stop)
})
// Only wait if the goroutine was actually started.
select {
case <-r.done:
case <-time.After(5 * time.Second):
// Defensive timeout: shouldn't happen in practice.
}
}
// LastComputeDuration returns the duration of the most recent compute.
func (r *analyticsRecomputer) LastComputeDuration() time.Duration {
return time.Duration(r.lastComputeNs.Load())
}
// ComputeRuns returns the total number of compute invocations.
func (r *analyticsRecomputer) ComputeRuns() int64 {
return r.computeRuns.Load()
}
// AnalyticsRecomputeIntervals lets callers (main.go) override the
// per-endpoint recompute interval from config.json. Zero values fall
// back to the defaultInterval passed to StartAnalyticsRecomputers.
type AnalyticsRecomputeIntervals struct {
Topology time.Duration
RF time.Duration
Distance time.Duration
Channels time.Duration
HashCollisions time.Duration
HashSizes time.Duration
Roles time.Duration
ObserversClockSkew time.Duration
NodesClockSkew time.Duration
}
func pickInterval(override, def time.Duration) time.Duration {
if override > 0 {
return override
}
return def
}
// StartAnalyticsRecomputers wires each analytics endpoint to a
// background recompute goroutine. Each runs an initial compute
// synchronously (so the first read after startup is a cache hit, never
// cold) and then refreshes on a ticker.
//
// All recomputers serve the DEFAULT query shape only: region="" and
// zero-window (no ?since= / ?until= params). Region-keyed or windowed
// queries continue to use the legacy on-request compute + TTL cache —
// the recomputer count would explode if we maintained one per
// (endpoint × region × window) combination, and region filtering is
// fast read-time work anyway.
//
// Returns a stop closure that signals all goroutines and blocks until
// they exit. Safe to call once per PacketStore. Idempotent if called
// multiple times (subsequent calls return the first stop closure).
func (s *PacketStore) StartAnalyticsRecomputers(defaultInterval time.Duration, overrides ...AnalyticsRecomputeIntervals) func() {
if defaultInterval <= 0 {
defaultInterval = 5 * time.Minute
}
var ov AnalyticsRecomputeIntervals
if len(overrides) > 0 {
ov = overrides[0]
}
s.analyticsRecomputerMu.Lock()
if s.recompTopology != nil {
// Already started; return a no-op so the caller's defer is harmless.
s.analyticsRecomputerMu.Unlock()
return func() {}
}
// Each recomputer wraps the underlying compute* function with the
// default arguments. We use computeAnalytics* (not GetAnalytics*) to
// bypass the legacy TTL cache layer — the recomputer IS the cache.
s.recompTopology = newAnalyticsRecomputer(
"topology", pickInterval(ov.Topology, defaultInterval),
func() interface{} { return s.computeAnalyticsTopology("", "", TimeWindow{}) },
)
s.recompRF = newAnalyticsRecomputer(
"rf", pickInterval(ov.RF, defaultInterval),
func() interface{} { return s.computeAnalyticsRF("", "", TimeWindow{}) },
)
s.recompDistance = newAnalyticsRecomputer(
"distance", pickInterval(ov.Distance, defaultInterval),
func() interface{} { return s.computeAnalyticsDistance("", "") },
)
s.recompChannels = newAnalyticsRecomputer(
"channels", pickInterval(ov.Channels, defaultInterval),
func() interface{} { return s.computeAnalyticsChannels("", "", TimeWindow{}) },
)
s.recompHashCollisions = newAnalyticsRecomputer(
"hash-collisions", pickInterval(ov.HashCollisions, defaultInterval),
func() interface{} { return s.computeHashCollisions("", "") },
)
s.recompHashSizes = newAnalyticsRecomputer(
"hash-sizes", pickInterval(ov.HashSizes, defaultInterval),
func() interface{} { return s.computeAnalyticsHashSizesWithCapability("", "") },
)
s.recompRoles = newAnalyticsRecomputer(
"roles", pickInterval(ov.Roles, defaultInterval),
func() interface{} { return s.computeAnalyticsRoles() },
)
s.recompObserversClockSkew = newAnalyticsRecomputer(
"observers-clock-skew", pickInterval(ov.ObserversClockSkew, defaultInterval),
func() interface{} { return s.computeObserverCalibrations() },
)
s.recompNodesClockSkew = newAnalyticsRecomputer(
"nodes-clock-skew", pickInterval(ov.NodesClockSkew, defaultInterval),
func() interface{} { return s.computeFleetClockSkew() },
)
all := []*analyticsRecomputer{
s.recompTopology, s.recompRF, s.recompDistance,
s.recompChannels, s.recompHashCollisions, s.recompHashSizes,
s.recompRoles,
s.recompObserversClockSkew, s.recompNodesClockSkew,
}
s.analyticsRecomputerMu.Unlock()
// Issue #1659 (PR #1688 r1, munger #5): wire the chunked-loader
// readiness gate on the three warmup-gated recomputers (RF,
// Topology, Channels). markFirstPassDone_1659 will refuse to
// flip first-pass-done until s.LoadComplete() reports true —
// i.e. the cold-load has populated all observations. Otherwise
// the FIRST recomputer pass runs against the post-restart in-RAM
// slice and the gate opens on partial data (the original #1659
// bug class).
loadCompleteGate := s.LoadComplete
s.recompRF.setWarmupReadyGate_1659(loadCompleteGate)
s.recompTopology.setWarmupReadyGate_1659(loadCompleteGate)
s.recompChannels.setWarmupReadyGate_1659(loadCompleteGate)
for _, rc := range all {
rc.Start()
}
return func() {
for _, rc := range all {
rc.Stop()
}
}
}
-174
View File
@@ -1,174 +0,0 @@
package main
import (
"runtime"
"sort"
"sync"
"sync/atomic"
"testing"
"time"
)
func numGoroutinesForTest() int { return runtime.NumGoroutine() }
// TestAnalyticsRecomputerSteadyStateLatency asserts that issue #1240's
// steady-state background recompute is in place: reads of the common
// analytics endpoints (region="") return from cache in <50ms p99 even
// under simulated ingest load.
//
// On master (pre-fix), GetAnalyticsTopology holds s.mu.RLock for the
// entire compute. Concurrent ingest writers (s.mu.Lock) starve readers
// or vice versa, producing per-read latencies in the hundreds of
// milliseconds. The cache TTL doesn't help: after every expiry one
// reader still pays the full compute cost.
//
// Post-fix, GetAnalyticsTopology with region="" and zero window must
// Load() from the background-refreshed atomic snapshot — never blocking
// under writer contention.
func TestAnalyticsRecomputerSteadyStateLatency(t *testing.T) {
if testing.Short() {
t.Skip("skipping latency timing test in -short mode")
}
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
// Populate with enough records to make on-request compute non-trivial.
const N = 20000
hops := make([]distHopRecord, N)
for i := 0; i < N; i++ {
hops[i] = distHopRecord{
FromName: "A", FromPk: "aa",
ToName: "B", ToPk: "bb",
Dist: float64(i%500) + 0.5,
Type: []string{"R↔R", "C↔R", "C↔C"}[i%3],
Hash: "h",
Timestamp: "2024-01-01T00:00:00Z",
HourBucket: "2024-01-01-00",
}
}
store.mu.Lock()
store.distHops = hops
store.mu.Unlock()
// Start the recomputer infrastructure. On master this method
// doesn't exist, so this test won't compile until the GREEN commit
// lands; the RED commit lands the test + a stub. Stub returns
// without wiring background recompute, so the test still fails on
// the latency assertion below.
stop := store.StartAnalyticsRecomputers(10 * time.Millisecond)
defer stop()
// Give the initial compute a moment to populate.
time.Sleep(50 * time.Millisecond)
// Simulated writer: contend for s.mu.Lock. This is what makes the
// non-recomputer path miss the latency target — the old
// GetAnalyticsTopology grabs s.mu.RLock for the entire compute and
// blocks behind every writer cycle.
var stopWriters atomic.Bool
var writerWg sync.WaitGroup
const Writers = 4
writerWg.Add(Writers)
for w := 0; w < Writers; w++ {
go func() {
defer writerWg.Done()
for !stopWriters.Load() {
store.mu.Lock()
// Trivial mutation: extend distHops by one and shrink back.
store.distHops = append(store.distHops, distHopRecord{
Dist: 1, Hash: "x", Timestamp: "2024-01-01T00:00:00Z",
})
store.distHops = store.distHops[:len(store.distHops)-1]
store.mu.Unlock()
// Brief pause to keep the lock-cycle rate realistic.
time.Sleep(100 * time.Microsecond)
}
}()
}
// 100 concurrent reads.
const Readers = 100
latencies := make([]time.Duration, Readers)
var rwg sync.WaitGroup
rwg.Add(Readers)
for i := 0; i < Readers; i++ {
i := i
go func() {
defer rwg.Done()
t0 := time.Now()
r := store.GetAnalyticsDistance("", "")
latencies[i] = time.Since(t0)
if r == nil {
t.Errorf("reader %d got nil result", i)
}
}()
}
rwg.Wait()
stopWriters.Store(true)
writerWg.Wait()
sort.Slice(latencies, func(i, j int) bool { return latencies[i] < latencies[j] })
p50 := latencies[Readers/2]
p99 := latencies[(Readers*99)/100]
t.Logf("analytics distance read latency: p50=%v p99=%v max=%v",
p50, p99, latencies[Readers-1])
// p99 budget: 50ms. Atomic-pointer load + JSON-shape map return
// should be sub-millisecond; 50ms leaves margin for goroutine
// scheduling jitter under concurrent test runs.
const budget = 50 * time.Millisecond
if p99 > budget {
t.Fatalf("p99 read latency %v exceeds %v budget (issue #1240 not in effect)", p99, budget)
}
}
// TestAnalyticsRecomputerShutdownNoLeak asserts the background
// goroutines started by StartAnalyticsRecomputers exit cleanly when
// the returned stop function is called — no leak across server
// shutdown (issue #1240 acceptance criterion).
func TestAnalyticsRecomputerShutdownNoLeak(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
// Use a tight tick so we know recompute is actually running (not
// just blocked on the ticker).
stop := store.StartAnalyticsRecomputers(20 * time.Millisecond)
// Snapshot active goroutines a beat after start.
time.Sleep(80 * time.Millisecond)
startGoroutines := runtimeNumGoroutine()
stop()
// After stop returns, give the scheduler a beat to reap exits.
deadline := time.Now().Add(2 * time.Second)
var endGoroutines int
for time.Now().Before(deadline) {
endGoroutines = runtimeNumGoroutine()
if endGoroutines <= startGoroutines-5 { // we started 6 recomputers
break
}
time.Sleep(20 * time.Millisecond)
}
// We expect ~6 fewer goroutines than the snapshot taken DURING
// recompute (one per registered recomputer). Allow some slack
// since test runners can have flaky goroutine counts.
if endGoroutines >= startGoroutines {
t.Fatalf("goroutine leak after stop: %d → %d (expected fewer)",
startGoroutines, endGoroutines)
}
t.Logf("goroutines: during=%d after=%d (Δ=%d)",
startGoroutines, endGoroutines, startGoroutines-endGoroutines)
}
// runtimeNumGoroutine is wrapped to keep the imports section of the
// production file minimal.
func runtimeNumGoroutine() int {
// imported below
return numGoroutinesForTest()
}
-212
View File
@@ -1,212 +0,0 @@
// Package main: issue #1659 — analytics warmup gating.
//
// Problem: after server restart, recompRF (and recompTopology /
// recompChannels) cache the FIRST computation, which immediately after
// boot is just the small in-RAM-observations slice (background
// chunk-loader has not yet backfilled history). The recomputer then
// serves that small slice from GetAnalyticsRFWithWindow's default
// shortcut for an entire recompute interval, while the client pins it
// via CLIENT_TTL.analyticsRF. UX: cards show a tiny "post-restart"
// window even when the user selects "All data".
//
// Fix (r1 — addresses #1688 review munger #5):
//
// The first-pass-done signal is NOT enough on its own — the FIRST
// recomputer pass at boot can complete against the post-restart slice
// BEFORE the chunked loader (#1008 / chunked_load.go) has populated
// the full observation set. Marking the gate ready in that window
// reproduces the original #1659 bug.
//
// Two correctness invariants:
//
// 1. (#1688 munger #5) Only mark first-pass-done when BOTH:
// a. a recomputer pass has completed, AND
// b. the chunked loader has finished (s.LoadComplete()).
// The gate's `readyGate` callback is wired by
// StartAnalyticsRecomputers to `store.LoadComplete`. Passes that
// complete while loadComplete is still false leave the gate in
// the warming-up state; the NEXT pass after loadComplete flips
// true is the one that opens the gate.
//
// 2. (#1688 munger #2 + kent-beck #2) The gate MUST lift in bounded
// time. If compute() panics on every pass, hangs indefinitely,
// or returns nil forever, an unguarded gate would leave the
// 503 banner permanent. Two safeguards:
// a. compute() panics are already caught by runOnce()'s
// defer recover(); we additionally call markFirstPassDone
// on EVERY pass (even nil-result), so a recomputer that
// returns nil but doesn't panic still flips the gate.
// b. A hard fallback timeout (warmupForceTimeout, 60s by
// default) elapsed since the recomputer was constructed
// forces IsWarmingUp_1659() to false — degraded mode
// (serve whatever cache exists, possibly empty) is
// strictly better than a permanent 503.
//
// Concurrency (#1688 munger #3):
//
// The previous r0 design used a package-level map keyed by recomputer
// pointer, guarded by a global chanLock. Every default-shape analytics
// request acquired that lock — a serialization point on a hot path.
//
// r1 inlines the warmup fields directly on `analyticsRecomputer`:
// - firstPassDoneNs atomic.Int64
// - warmupStartedNs atomic.Int64
// - readyGate atomic.Value (holds func() bool, may be nil)
//
// Reads on the hot path are lock-free atomic loads. No package-level
// state, no map lookups, no mutex.
//
// Tests: analytics_warmup_1659_test.go.
package main
import (
"net/http"
"time"
)
// warmupForceTimeout is the deadline after which IsWarmingUp_1659()
// flips false regardless of whether a successful first pass has run.
// Operators get degraded analytics (possibly empty until the next
// successful compute) instead of a permanent 503 banner.
//
// Var (not const) so tests can shorten it.
var warmupForceTimeout = 60 * time.Second
// setWarmupReadyGate wires a callback that the recomputer consults
// before honoring a markFirstPassDone_1659() request. When the gate
// returns false, the warmup state is preserved across the pass —
// equivalent to "this pass doesn't count; we need at least one pass
// AFTER the gate flips true".
//
// nil callback means "no extra gating" (legacy behavior).
//
// Called from StartAnalyticsRecomputers; safe to call before Start().
func (r *analyticsRecomputer) setWarmupReadyGate_1659(gate func() bool) {
if r == nil {
return
}
if gate == nil {
r.warmupReadyGate.Store((*func() bool)(nil))
return
}
r.warmupReadyGate.Store(&gate)
}
func (r *analyticsRecomputer) loadWarmupReadyGate_1659() func() bool {
v := r.warmupReadyGate.Load()
if v == nil {
return nil
}
p, ok := v.(*func() bool)
if !ok || p == nil {
return nil
}
return *p
}
// markFirstPassDone_1659 is called from analyticsRecomputer.runOnce()
// after every compute attempt (success OR nil result; panics are
// caught upstream and never reach here).
//
// The gate flip is conditional on the readyGate (when set) reporting
// true — this implements the munger #5 fix: first-pass-done must
// require BOTH a recomputer pass complete AND the chunked loader to
// have finished populating the in-RAM observation set.
//
// Idempotent: only the FIRST successful flip wins; subsequent calls
// observe a non-zero firstPassDoneNs and return immediately.
func (r *analyticsRecomputer) markFirstPassDone_1659() {
if r.firstPassDoneNs.Load() != 0 {
return
}
if gate := r.loadWarmupReadyGate_1659(); gate != nil && !gate() {
return
}
r.firstPassDoneNs.CompareAndSwap(0, time.Now().UnixNano())
}
// FirstPassDoneAt_1659 reports the time the first full compute pass
// completed (subject to the readyGate). Returns zero time if no
// qualifying pass has completed yet.
func (r *analyticsRecomputer) FirstPassDoneAt_1659() time.Time {
if r == nil {
return time.Time{}
}
ns := r.firstPassDoneNs.Load()
if ns == 0 {
return time.Time{}
}
return time.Unix(0, ns)
}
// IsWarmingUp_1659 reports true when the recomputer has not yet
// completed a qualifying first pass AND the fallback timeout has not
// yet elapsed. Handlers for the default-shape request must return
// 503 + Retry-After: 5 while this is true.
//
// Fallback timeout (warmupForceTimeout) prevents a permanent 503 in
// pathological compute paths (perpetual panic, perpetual nil, hang).
//
// Lock-free: pure atomic loads.
func (r *analyticsRecomputer) IsWarmingUp_1659() bool {
if r == nil {
// No recomputer registered → treat as ready; the handler
// falls through to the legacy compute path.
return false
}
if r.firstPassDoneNs.Load() != 0 {
return false
}
startedNs := r.warmupStartedNs.Load()
if startedNs != 0 {
if time.Since(time.Unix(0, startedNs)) >= warmupForceTimeout {
// Forced-ready: gate has been stuck too long. Stop
// serving 503; let the handler serve whatever is in
// the cache (possibly empty).
return false
}
}
return true
}
// noteWarmupStart_1659 records the moment the recomputer was launched
// (called once from Start). Used by IsWarmingUp_1659 to compute the
// fallback-timeout elapsed window.
func (r *analyticsRecomputer) noteWarmupStart_1659() {
if r == nil {
return
}
r.warmupStartedNs.CompareAndSwap(0, time.Now().UnixNano())
}
// writeAnalyticsWarmup503 emits the standard warmup response. The body
// shape is documented for clients: error string + retry_after_s int.
func writeAnalyticsWarmup503(w http.ResponseWriter) {
w.Header().Set("Retry-After", "5")
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusServiceUnavailable)
_, _ = w.Write([]byte(`{"error":"analytics warming up","retry_after_s":5}`))
}
// installWarmupBlocker_1659 is a test-only helper that registers the
// RF / topology / channels recomputers with a compute function that
// blocks on the supplied channel. firstPassDoneNs therefore stays
// zero, simulating the post-restart warmup window for the warmup test.
//
// We bypass StartAnalyticsRecomputers entirely and wire the
// recomputers manually so the background goroutines never fire. The
// test only needs the *analyticsRecomputer pointers to be non-nil and
// in the warmup state.
func (s *PacketStore) installWarmupBlocker_1659(block <-chan struct{}) {
blockCompute := func() interface{} {
<-block
return nil
}
s.analyticsRecomputerMu.Lock()
defer s.analyticsRecomputerMu.Unlock()
s.recompRF = newAnalyticsRecomputer("rf-test-block", time.Hour, blockCompute)
s.recompTopology = newAnalyticsRecomputer("topo-test-block", time.Hour, blockCompute)
s.recompChannels = newAnalyticsRecomputer("chan-test-block", time.Hour, blockCompute)
// Do NOT call Start() — leaving firstPassDoneNs at zero is exactly
// the warmup state the test wants to exercise.
}
-330
View File
@@ -1,330 +0,0 @@
// Package main: issue #1659 — analytics warmup gating.
//
// After a server restart, the analytics recomputer caches the FIRST
// computation (a small in-RAM slice) and serves it via the default
// region="", zero-window shortcut in GetAnalyticsRFWithWindow until the
// next periodic recompute fires. The client-side CLIENT_TTL.analyticsRF
// then pins that small slice on the page even after the server flips
// to steady-state.
//
// Fix: each recomputer carries a firstPassDoneAt timestamp set ONLY
// after a full-range compute completes. While firstPassDoneAt is zero
// AND the request is the default-shape (region="" && area="" &&
// window.IsZero()), the handler returns 503 + Retry-After: 5 with a
// JSON body the client recognizes and retries with backoff.
//
// These tests are the RED contract: they must FAIL on the assertion
// (not a build error) when the warmup gate is absent, and PASS once
// the fix lands.
package main
import (
"encoding/json"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/gorilla/mux"
)
// TestAnalyticsRF_WarmupReturns503 asserts that immediately after the
// server starts — before any analytics recomputer has finished its
// first full-range pass — GET /api/analytics/rf returns 503 with
// Retry-After: 5 and a JSON body shaped as
// {"error":"analytics warming up","retry_after_s":5}.
//
// This is the core acceptance criterion (c) from #1659.
func TestAnalyticsRF_WarmupReturns503(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
// Register recomputers but DO NOT let them complete a first pass.
// We install a compute func that blocks until we release it, so the
// recomputer's firstPassDoneAt stays zero.
block := make(chan struct{})
defer close(block)
store.installWarmupBlocker_1659(block) // helper added in GREEN
cfg := &Config{Port: 3000}
hub := NewHub()
srv := NewServer(db, cfg, hub)
srv.store = store
router := mux.NewRouter()
srv.RegisterRoutes(router)
req := httptest.NewRequest("GET", "/api/analytics/rf", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Fatalf("expected 503 during warmup, got %d (body=%s)", w.Code, w.Body.String())
}
if got := w.Header().Get("Retry-After"); got != "5" {
t.Fatalf("expected Retry-After: 5, got %q", got)
}
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON body: %v (raw=%s)", err, w.Body.String())
}
if resp["error"] != "analytics warming up" {
t.Fatalf("expected error='analytics warming up', got %v", resp["error"])
}
if v, ok := resp["retry_after_s"].(float64); !ok || v != 5 {
t.Fatalf("expected retry_after_s=5, got %v", resp["retry_after_s"])
}
}
// TestAnalyticsRF_AfterFirstPassReturns200 asserts the post-warmup
// happy path: once the recomputer's first full-range compute completes,
// the handler serves the cached snapshot as 200.
func TestAnalyticsRF_AfterFirstPassReturns200(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
// #1688 r1: the warmup gate now ALSO requires LoadComplete() to be
// true before first-pass-done flips (munger #5). Tests that don't
// exercise the chunked loader must flip it manually to model a
// production server that has finished cold-loading.
store.loadComplete.Store(true)
stop := store.StartAnalyticsRecomputers(50 * time.Millisecond)
defer stop()
// Wait for the synchronous first-pass to complete. Start() runs
// the initial compute synchronously, so by the time it returns
// firstPassDoneAt should be set. We poll a brief moment to keep
// the test robust to scheduling.
deadline := time.Now().Add(3 * time.Second)
for time.Now().Before(deadline) {
if store.recompRF != nil && !store.recompRF.FirstPassDoneAt_1659().IsZero() {
break
}
time.Sleep(10 * time.Millisecond)
}
if store.recompRF == nil || store.recompRF.FirstPassDoneAt_1659().IsZero() {
t.Fatal("recompRF.firstPassDoneAt never flipped after Start()")
}
cfg := &Config{Port: 3000}
hub := NewHub()
srv := NewServer(db, cfg, hub)
srv.store = store
router := mux.NewRouter()
srv.RegisterRoutes(router)
req := httptest.NewRequest("GET", "/api/analytics/rf", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("expected 200 after first pass, got %d (body=%s)", w.Code, w.Body.String())
}
if got := w.Header().Get("Retry-After"); got != "" {
t.Fatalf("expected no Retry-After header on 200, got %q", got)
}
// Body should be a valid JSON object (the RF analytics map).
var resp map[string]interface{}
if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
t.Fatalf("invalid JSON body: %v", err)
}
if len(resp) == 0 {
t.Fatal("expected non-empty RF analytics response after first pass")
}
}
// TestAnalyticsRF_WindowedRequestNotGated asserts that even during
// warmup, a request with an explicit time window (?since=/?until=) or
// region/area filter is NOT gated by the warmup flag — those queries
// bypass the recomputer entirely and hit the legacy compute-then-cache
// path, which is unaffected by the first-pass bug.
func TestAnalyticsRF_WindowedRequestNotGated(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
block := make(chan struct{})
defer close(block)
store.installWarmupBlocker_1659(block)
cfg := &Config{Port: 3000}
hub := NewHub()
srv := NewServer(db, cfg, hub)
srv.store = store
router := mux.NewRouter()
srv.RegisterRoutes(router)
// Explicit window — should bypass warmup gate.
req := httptest.NewRequest("GET", "/api/analytics/rf?window=1h", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code == http.StatusServiceUnavailable {
t.Fatalf("windowed request must NOT be gated by warmup (got 503)")
}
}
// === PR #1688 r1 — new test cases ===
// TestAnalyticsTopology_WarmupReturns503 — kent-beck #1: topology
// gate is symmetric with RF; assert the same 503 contract.
func TestAnalyticsTopology_WarmupReturns503(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
block := make(chan struct{})
defer close(block)
store.installWarmupBlocker_1659(block)
cfg := &Config{Port: 3000}
hub := NewHub()
srv := NewServer(db, cfg, hub)
srv.store = store
router := mux.NewRouter()
srv.RegisterRoutes(router)
req := httptest.NewRequest("GET", "/api/analytics/topology", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Fatalf("topology: expected 503 during warmup, got %d", w.Code)
}
if got := w.Header().Get("Retry-After"); got != "5" {
t.Fatalf("topology: expected Retry-After: 5, got %q", got)
}
}
// TestAnalyticsChannels_WarmupReturns503 — kent-beck #1: channels
// gate is symmetric with RF; assert the same 503 contract.
func TestAnalyticsChannels_WarmupReturns503(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
block := make(chan struct{})
defer close(block)
store.installWarmupBlocker_1659(block)
cfg := &Config{Port: 3000}
hub := NewHub()
srv := NewServer(db, cfg, hub)
srv.store = store
router := mux.NewRouter()
srv.RegisterRoutes(router)
req := httptest.NewRequest("GET", "/api/analytics/channels", nil)
w := httptest.NewRecorder()
router.ServeHTTP(w, req)
if w.Code != http.StatusServiceUnavailable {
t.Fatalf("channels: expected 503 during warmup, got %d", w.Code)
}
if got := w.Header().Get("Retry-After"); got != "5" {
t.Fatalf("channels: expected Retry-After: 5, got %q", got)
}
}
// TestWarmup_GateBlockedUntilLoadComplete — munger #5 correctness:
// the chunked loader readiness MUST gate first-pass-done. A recomputer
// pass that completes while LoadComplete() is false must NOT lift the
// gate; a SUBSEQUENT pass after LoadComplete() flips true must lift it.
func TestWarmup_GateBlockedUntilLoadComplete(t *testing.T) {
db := setupTestDB(t)
defer db.Close()
store := NewPacketStore(db, nil)
// LoadComplete starts false — chunked loader still running.
called := make(chan struct{}, 16)
rc := newAnalyticsRecomputer("test-rf", time.Hour, func() interface{} {
called <- struct{}{}
return map[string]int{"x": 1}
})
rc.setWarmupReadyGate_1659(store.LoadComplete)
rc.Start()
defer rc.Stop()
// First pass already ran synchronously in Start(). Gate must still
// be warming up because LoadComplete() is false.
<-called
if !rc.IsWarmingUp_1659() {
t.Fatalf("expected IsWarmingUp_1659=true while LoadComplete()=false (munger #5 bug)")
}
if !rc.FirstPassDoneAt_1659().IsZero() {
t.Fatalf("expected FirstPassDoneAt zero while LoadComplete()=false")
}
// Now flip the loader and trigger another pass.
store.loadComplete.Store(true)
rc.runOnce()
if rc.IsWarmingUp_1659() {
t.Fatalf("expected gate to lift after LoadComplete()=true + another pass")
}
}
// TestWarmup_NilResultStillLiftsGate — munger #2 / kent-beck #2:
// a compute that returns nil but doesn't panic must still flip the
// gate (the cache stays empty but the banner does NOT get stuck).
func TestWarmup_NilResultStillLiftsGate(t *testing.T) {
rc := newAnalyticsRecomputer("test-nil", time.Hour, func() interface{} {
return nil
})
rc.Start()
defer rc.Stop()
if rc.IsWarmingUp_1659() {
t.Fatalf("nil-result compute must still lift warmup gate after first pass")
}
}
// TestWarmup_PanicEventuallyLiftsGate — munger #2 / kent-beck #2:
// a compute that ALWAYS panics must not leave the gate stuck forever.
// The fallback timeout (warmupForceTimeout) is the safety net.
func TestWarmup_PanicEventuallyLiftsGate(t *testing.T) {
prev := warmupForceTimeout
warmupForceTimeout = 50 * time.Millisecond
defer func() { warmupForceTimeout = prev }()
rc := newAnalyticsRecomputer("test-panic", time.Hour, func() interface{} {
panic("compute boom")
})
rc.Start()
defer rc.Stop()
// Panic was recovered inside runOnce; firstPassDoneNs is still 0.
if rc.FirstPassDoneAt_1659().IsZero() == false {
t.Fatalf("panicking compute should not have set firstPassDoneNs")
}
// But after warmupForceTimeout elapses, the gate must lift.
time.Sleep(80 * time.Millisecond)
if rc.IsWarmingUp_1659() {
t.Fatalf("expected fallback timeout to lift gate after warmupForceTimeout (got still-warming)")
}
}
// TestWarmup_TimeoutLiftsHangingCompute — munger #2 / kent-beck #2:
// hung compute (blocks indefinitely on a channel) must not result in
// permanent 503. Fallback timeout lifts it.
func TestWarmup_TimeoutLiftsHangingCompute(t *testing.T) {
prev := warmupForceTimeout
warmupForceTimeout = 50 * time.Millisecond
defer func() { warmupForceTimeout = prev }()
block := make(chan struct{})
defer close(block)
rc := newAnalyticsRecomputer("test-hang", time.Hour, func() interface{} {
<-block
return nil
})
// Don't call Start (would block forever on synchronous initial
// compute). Just simulate "we noted warmup start, compute is
// hanging in another goroutine".
rc.noteWarmupStart_1659()
go rc.runOnce()
if !rc.IsWarmingUp_1659() {
t.Fatalf("expected initial state to be warming-up")
}
time.Sleep(80 * time.Millisecond)
if rc.IsWarmingUp_1659() {
t.Fatalf("expected fallback timeout to lift hung-compute warmup")
}
}
-111
View File
@@ -1,111 +0,0 @@
package main
import (
"net/http"
"net/http/httptest"
"testing"
)
func TestIsWeakAPIKey(t *testing.T) {
// Known defaults must be detected
for _, weak := range []string{
"your-secret-api-key-here", "change-me", "example", "test",
"password", "admin", "apikey", "api-key", "secret", "default",
} {
if !IsWeakAPIKey(weak) {
t.Errorf("expected %q to be weak", weak)
}
}
// Case-insensitive
if !IsWeakAPIKey("Password") {
t.Error("expected case-insensitive match for Password")
}
if !IsWeakAPIKey("YOUR-SECRET-API-KEY-HERE") {
t.Error("expected case-insensitive match")
}
// Short keys (<16 chars) are weak
if !IsWeakAPIKey("short") {
t.Error("expected short key to be weak")
}
if !IsWeakAPIKey("exactly15chars!") { // 15 chars
t.Error("expected 15-char key to be weak")
}
// Empty key is NOT weak (handled separately as "disabled")
if IsWeakAPIKey("") {
t.Error("empty key should not be flagged as weak")
}
// Strong keys pass
if IsWeakAPIKey("a-very-strong-key-1234") {
t.Error("expected strong key to pass")
}
if IsWeakAPIKey("xK9!mP2@nL5#qR8$") {
t.Error("expected 17-char random key to pass")
}
}
func TestRequireAPIKey_RejectsWeakKey(t *testing.T) {
s := &Server{cfg: &Config{APIKey: "test"}}
handler := s.requireAPIKey(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
req := httptest.NewRequest("POST", "/api/packets", nil)
req.Header.Set("X-API-Key", "test")
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if rr.Code != http.StatusForbidden {
t.Errorf("expected 403 for weak key, got %d", rr.Code)
}
}
func TestRequireAPIKey_AcceptsStrongKey(t *testing.T) {
strongKey := "a-very-strong-key-1234"
s := &Server{cfg: &Config{APIKey: strongKey}}
handler := s.requireAPIKey(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
req := httptest.NewRequest("POST", "/api/packets", nil)
req.Header.Set("X-API-Key", strongKey)
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Errorf("expected 200 for strong key, got %d", rr.Code)
}
}
func TestRequireAPIKey_EmptyKeyDisablesEndpoints(t *testing.T) {
s := &Server{cfg: &Config{APIKey: ""}}
handler := s.requireAPIKey(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
req := httptest.NewRequest("POST", "/api/packets", nil)
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if rr.Code != http.StatusForbidden {
t.Errorf("expected 403 for empty key, got %d", rr.Code)
}
}
func TestRequireAPIKey_WrongKeyUnauthorized(t *testing.T) {
s := &Server{cfg: &Config{APIKey: "a-very-strong-key-1234"}}
handler := s.requireAPIKey(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
}))
req := httptest.NewRequest("POST", "/api/packets", nil)
req.Header.Set("X-API-Key", "wrong-key-entirely-here")
rr := httptest.NewRecorder()
handler.ServeHTTP(rr, req)
if rr.Code != http.StatusUnauthorized {
t.Errorf("expected 401 for wrong key, got %d", rr.Code)
}
}
-400
View File
@@ -1,400 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"net/http"
"net/http/httptest"
"sync"
"testing"
"time"
"github.com/gorilla/mux"
)
func mustExecDB(t *testing.T, db *DB, q string) {
t.Helper()
if _, err := db.conn.Exec(q); err != nil {
t.Fatalf("exec %q: %v", q, err)
}
}
func TestAreaEntryParsing(t *testing.T) {
raw := `{
"port": 3000,
"areas": {
"BEL": {
"label": "Belgium",
"polygon": [[50.0, 2.5], [51.5, 2.5], [51.5, 6.4], [50.0, 6.4]]
},
"BOX": {
"label": "Bounding Box Area",
"latMin": 50.0, "latMax": 51.5, "lonMin": 2.5, "lonMax": 6.4
}
}
}`
var cfg Config
if err := json.Unmarshal([]byte(raw), &cfg); err != nil {
t.Fatalf("unmarshal: %v", err)
}
if len(cfg.Areas) != 2 {
t.Fatalf("want 2 areas, got %d", len(cfg.Areas))
}
bel := cfg.Areas["BEL"]
if bel.Label != "Belgium" {
t.Errorf("label: want Belgium, got %q", bel.Label)
}
if len(bel.Polygon) != 4 {
t.Errorf("polygon: want 4 points, got %d", len(bel.Polygon))
}
box := cfg.Areas["BOX"]
if box.LatMin == nil || *box.LatMin != 50.0 {
t.Error("LatMin not parsed")
}
}
func TestGetNodePubkeysInArea_Polygon(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('pk-inside', 50.85, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('pk-outside', 48.0, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('pk-nogps', NULL, NULL)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('pk-zero', 0.0, 0.0)`)
entry := AreaEntry{
Label: "Belgium",
Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}},
}
pks, err := db.GetNodePubkeysInArea(entry)
if err != nil {
t.Fatalf("GetNodePubkeysInArea: %v", err)
}
if len(pks) != 1 || pks[0] != "pk-inside" {
t.Errorf("want [pk-inside], got %v", pks)
}
}
// newTestStoreWithDB builds a minimal PacketStore wired to the given DB and config.
func newTestStoreWithDB(t *testing.T, db *DB, cfg *Config) *PacketStore {
t.Helper()
return &PacketStore{
db: db,
config: cfg,
byNode: make(map[string][]*StoreTx),
byTxID: make(map[int]*StoreTx),
byObsID: make(map[int]*StoreObs),
byObserver: make(map[string][]*StoreObs),
byHash: make(map[string]*StoreTx),
byPayloadType: make(map[int][]*StoreTx),
nodeHashes: make(map[string]map[string]bool),
byPathHop: make(map[string][]*StoreTx),
advertPubkeys: make(map[string]int),
rfCache: make(map[string]*cachedResult),
topoCache: make(map[string]*cachedResult),
hashCache: make(map[string]*cachedResult),
collisionCache: make(map[string]*cachedResult),
chanCache: make(map[string]*cachedResult),
distCache: make(map[string]*cachedResult),
subpathCache: make(map[string]*cachedResult),
regionObsCache: make(map[string]map[string]bool),
areaNodeCache: make(map[string]map[string]bool),
areaNodeCacheTimes: make(map[string]time.Time),
rfCacheTTL: 15 * time.Second,
}
}
func TestResolveAreaNodes_UnknownKey(t *testing.T) {
db := setupTestDBv2(t)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
result := s.resolveAreaNodes("UNKNOWN")
if result != nil {
t.Errorf("want nil for unknown area, got %v", result)
}
}
func TestResolveAreaNodes_CacheHit(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('pk1', 50.85, 4.35)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
r1 := s.resolveAreaNodes("BEL")
if !r1["pk1"] {
t.Fatal("pk1 should be in area BEL on first call")
}
// Delete node so a live DB query would return nothing — second call must use cache.
mustExecDB(t, db, `DELETE FROM nodes WHERE public_key = 'pk1'`)
r2 := s.resolveAreaNodes("BEL")
if !r2["pk1"] {
t.Fatal("cache hit should still return pk1 after DB delete")
}
}
// ingestAdvert adds a synthetic ADVERT packet to the store's in-memory packet list.
func ingestAdvert(t *testing.T, s *PacketStore, hash, decodedJSON string) {
t.Helper()
pt := PayloadADVERT
tx := &StoreTx{
Hash: hash,
FirstSeen: "2026-01-01T00:00:00Z",
PayloadType: &pt,
DecodedJSON: decodedJSON,
}
s.mu.Lock()
s.packets = append(s.packets, tx)
s.byHash[hash] = tx
s.byPayloadType[PayloadADVERT] = append(s.byPayloadType[PayloadADVERT], tx)
s.mu.Unlock()
}
func TestFilterPacketsByArea(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('inside-node', 50.85, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('outside-node', 48.0, 4.35)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
ingestAdvert(t, s, "hash-in", `{"public_key":"inside-node","name":"Inside"}`)
ingestAdvert(t, s, "hash-out", `{"public_key":"outside-node","name":"Outside"}`)
result := s.QueryPackets(PacketQuery{Limit: 50, Area: "BEL"})
if result.Total != 1 {
t.Fatalf("want 1 packet in area BEL, got %d (packets: %v)", result.Total, result.Packets)
}
}
func TestAnalyticsRFAreaFilter(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('inside-node', 50.85, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('outside-node', 48.0, 4.35)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
ingestAdvert(t, s, "hash-rf-in", `{"public_key":"inside-node","name":"Inside"}`)
ingestAdvert(t, s, "hash-rf-out", `{"public_key":"outside-node","name":"Outside"}`)
result := s.GetAnalyticsRF("", "BEL")
if result == nil {
t.Fatal("GetAnalyticsRF returned nil")
}
total, _ := result["totalTransmissions"].(int)
if total != 1 {
t.Errorf("want totalTransmissions=1 for BEL, got %d", total)
}
}
// ingestChanMsg adds a synthetic GRP_TXT packet with the given sender pubkey and channel hash.
func ingestChanMsg(t *testing.T, s *PacketStore, hash, senderPK string, chanHash int) {
t.Helper()
pt := PayloadGRP_TXT
decodedJSON := fmt.Sprintf(`{"public_key":%q,"channelHash":%d}`, senderPK, chanHash)
tx := &StoreTx{
Hash: hash,
FirstSeen: "2026-01-01T00:00:00Z",
PayloadType: &pt,
DecodedJSON: decodedJSON,
}
s.mu.Lock()
s.packets = append(s.packets, tx)
s.byHash[hash] = tx
s.byPayloadType[PayloadGRP_TXT] = append(s.byPayloadType[PayloadGRP_TXT], tx)
s.mu.Unlock()
}
func TestAnalyticsChannelsAreaFilter(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('inside-node', 50.85, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('outside-node', 48.0, 4.35)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
// inside-node sends on channel hash 42, outside-node on channel hash 99.
ingestChanMsg(t, s, "ch-in", "inside-node", 42)
ingestChanMsg(t, s, "ch-out", "outside-node", 99)
unfiltered := s.GetAnalyticsChannels("", "")
filtered := s.GetAnalyticsChannels("", "BEL")
if filtered == nil {
t.Fatal("GetAnalyticsChannels returned nil")
}
unfilteredCount, _ := unfiltered["activeChannels"].(int)
filteredCount, _ := filtered["activeChannels"].(int)
if unfilteredCount != 2 {
t.Errorf("want 2 active channels unfiltered, got %d", unfilteredCount)
}
if filteredCount != 1 {
t.Errorf("want 1 active channel for BEL, got %d", filteredCount)
}
}
func TestGetNodePubkeysInArea_BoundingBox(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('in', 50.5, 5.0)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('out', 52.0, 5.0)`)
minLat, maxLat, minLon, maxLon := 50.0, 51.5, 2.5, 6.4
entry := AreaEntry{LatMin: &minLat, LatMax: &maxLat, LonMin: &minLon, LonMax: &maxLon}
pks, err := db.GetNodePubkeysInArea(entry)
if err != nil {
t.Fatalf("%v", err)
}
if len(pks) != 1 || pks[0] != "in" {
t.Errorf("want [in], got %v", pks)
}
}
func TestHandleConfigAreas(t *testing.T) {
db := setupTestDBv2(t)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
"MST": {Label: "Maastricht"},
}}
r := mux.NewRouter()
srv := &Server{db: db, cfg: cfg}
r.HandleFunc("/api/config/areas", srv.handleConfigAreas).Methods("GET")
req := httptest.NewRequest(http.MethodGet, "/api/config/areas", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
if w.Code != 200 {
t.Fatalf("want 200, got %d", w.Code)
}
var result []map[string]string
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
t.Fatalf("decode: %v", err)
}
if len(result) != 2 {
t.Fatalf("want 2 areas, got %d", len(result))
}
keys := map[string]bool{}
for _, entry := range result {
keys[entry["key"]] = true
if entry["label"] == "" {
t.Errorf("missing label for key %q", entry["key"])
}
}
if !keys["BEL"] || !keys["MST"] {
t.Errorf("expected BEL and MST, got %v", keys)
}
}
func TestHandleConfigAreasEmpty(t *testing.T) {
db := setupTestDBv2(t)
cfg := &Config{}
r := mux.NewRouter()
srv := &Server{db: db, cfg: cfg}
r.HandleFunc("/api/config/areas", srv.handleConfigAreas).Methods("GET")
req := httptest.NewRequest(http.MethodGet, "/api/config/areas", nil)
w := httptest.NewRecorder()
r.ServeHTTP(w, req)
var result []interface{}
if err := json.NewDecoder(w.Body).Decode(&result); err != nil {
t.Fatalf("decode: %v", err)
}
if len(result) != 0 {
t.Errorf("want empty array, got %v", result)
}
}
func TestResolveAreaNodes_CalledBeforeRLock(t *testing.T) {
// Verify resolveAreaNodes doesn't deadlock when called concurrently with writes.
// This test catches the anti-pattern where resolveAreaNodes (which does a DB
// query) is called while holding s.mu.RLock().
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('n1', 50.85, 4.35)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
ingestAdvert(t, s, "h1", `{"public_key":"n1","name":"N1"}`)
var wg sync.WaitGroup
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
s.GetBulkHealth(10, "", "BEL")
}()
}
wg.Wait() // must not deadlock
}
func TestResolveAreaNodes_PerKeyTTL(t *testing.T) {
db := setupTestDBv2(t)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('bel-node', 50.85, 4.35)`)
mustExecDB(t, db, `INSERT INTO nodes (public_key, lat, lon) VALUES ('nl-node', 52.4, 4.9)`)
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
"NL": {Label: "Netherlands", Polygon: [][2]float64{{51.5, 3.4}, {53.6, 3.4}, {53.6, 7.2}, {51.5, 7.2}}},
}}
s := newTestStoreWithDB(t, db, cfg)
// Populate both keys into cache.
r1 := s.resolveAreaNodes("BEL")
if !r1["bel-node"] {
t.Fatal("bel-node should be in BEL")
}
r2 := s.resolveAreaNodes("NL")
if !r2["nl-node"] {
t.Fatal("nl-node should be in NL")
}
// Delete both nodes from DB to prove cache still serves them.
mustExecDB(t, db, `DELETE FROM nodes`)
// BEL cache should still be warm (not evicted by NL query).
r3 := s.resolveAreaNodes("BEL")
if !r3["bel-node"] {
t.Error("BEL cache was evicted by NL query (global TTL bug)")
}
// NL cache should still be warm too.
r4 := s.resolveAreaNodes("NL")
if !r4["nl-node"] {
t.Error("NL cache was evicted unexpectedly")
}
}
func TestGetBulkHealth_AreaBypassesCap(t *testing.T) {
db := setupTestDBv2(t)
// Insert 510 nodes inside BEL — all at 50.85, 4.35.
for i := 0; i < 510; i++ {
mustExecDB(t, db, fmt.Sprintf(
`INSERT INTO nodes (public_key, lat, lon) VALUES ('node-%d', 50.85, 4.35)`, i,
))
}
cfg := &Config{Areas: map[string]AreaEntry{
"BEL": {Label: "Belgium", Polygon: [][2]float64{{50.0, 2.5}, {51.5, 2.5}, {51.5, 6.4}, {50.0, 6.4}}},
}}
s := newTestStoreWithDB(t, db, cfg)
// With limit=10 but area filter active, all 510 in-area nodes must be returned.
result := s.GetBulkHealth(10, "", "BEL")
if len(result) != 510 {
t.Errorf("want 510 nodes from area BEL, got %d", len(result))
}
}

Some files were not shown because too many files have changed in this diff Show More