docs: rename v3.8.4 → v3.9.0 (tag v3.8.4 reserved by immutable-releases)

Release notes — v3.8.4 (#1666 )
Release notes for v3.8.4 — the "Phosphor migration" release. Six PRs (#1649–#1654, tracking #1648) plus three followup fixes (#1659/#1660/#1665) replaced all decorative emoji in the UI with Phosphor sprites and added a lint gate to prevent regression. ## Verification summary Test plan: `workspace-meshcore/test-plans/v3.8.4-cdp-test-plan.md` (93 tests, 16 sections). - Initial run (pre-#1665): 56 pass / 22 partial / 5 fail / 14 skipped. Two BLOCKER lint-gate breaches in observers and analytics Channels. - Final run (post-#1665, hot-patched to staging): both blockers ✅ — v384-1.2 (11 chips, 11 sprites, 0 emoji), v384-12.18 (315 lock sprites, 0 🔒 emoji). - 22 partials are plan selector drift, not code regressions; deferred to v3.8.5. ## Tagging Per the notes file, this is ready for `git tag -a v3.8.4 037dc8c4 -m "v3.8.4"` after merge — **not executed by this PR**. ## Review Draft for user review. Will be marked ready / merged before tag. --------- Co-authored-by: meshcore-bot <bot@meshcore.dev>
2026-06-13 00:51:39 +00:00 · 2026-06-12 02:55:15 +00:00 · 2026-06-11 19:36:47 -07:00 · 2026-06-11 18:29:30 -07:00 · 2026-06-11 15:44:33 -07:00 · 2026-06-11 11:51:03 -07:00
315 changed files with 44151 additions and 2786 deletions
@@ -1 +1 @@
-{"schemaVersion":1,"label":"e2e tests","message":"717 passed","color":"brightgreen"}
+{"schemaVersion":1,"label":"e2e tests","message":"821 passed","color":"brightgreen"}
@@ -1 +1 @@
-{"schemaVersion":1,"label":"frontend coverage","message":"37.73%","color":"red"}
+{"schemaVersion":1,"label":"frontend coverage","message":"36.64%","color":"red"}
@@ -42,6 +42,7 @@
    "PULL_THRESHOLD_PX": "readonly",
    "PacketFilter": "readonly",
    "PathInspector": "readonly",
+    "PrefixReserved": "readonly",
    "QRCode": "readonly",
    "ROLE_COLORS": "readonly",
    "ROLE_EMOJI": "readonly",
@@ -51,6 +52,7 @@
    "ROLE_STYLE": "readonly",
    "ROUTE_TYPES": "readonly",
    "RegionFilter": "readonly",
+    "RegionShowAll": "readonly",
    "SITE_CONFIG": "readonly",
    "SKEW_SEVERITY_COLORS": "readonly",
    "SKEW_SEVERITY_LABELS": "readonly",
@@ -59,6 +61,12 @@
    "SlideOver": "readonly",
    "TILE_DARK": "readonly",
    "TILE_LIGHT": "readonly",
+    "MC_TILE_PROVIDERS": "readonly",
+    "MC_setDarkTileProvider": "readonly",
+    "MC_getDarkTileProvider": "readonly",
+    "MC_setServerDefaultTileProvider": "readonly",
+    "MC_applyTileFilter": "readonly",
+    "MC_DARK_TILE_DEFAULT": "readonly",
    "TYPE_COLORS": "readonly",
    "TableResponsive": "readonly",
    "TableSort": "readonly",
@@ -201,20 +209,19 @@
    "escapeHtml": "readonly",
    "exports": "readonly",
    "favStar": "readonly",
+    "fetchAllNodes": "readonly",
    "filterPacketsByRoute": "readonly",
    "formatAbsoluteTimestamp": "readonly",
    "formatChartAxisLabel": "readonly",
    "formatDistance": "readonly",
    "formatDistanceRound": "readonly",
    "formatDrift": "readonly",
-    "formatEngineBadge": "readonly",
    "formatHex": "readonly",
    "formatIsoLike": "readonly",
    "formatSkew": "readonly",
    "formatTimestamp": "readonly",
    "formatTimestampCustom": "readonly",
    "formatTimestampWithTooltip": "readonly",
-    "formatVersionBadge": "readonly",
    "getDistanceUnit": "readonly",
    "getFavorites": "readonly",
    "getHashParams": "readonly",
@@ -252,6 +259,7 @@
    "pullReconnect": "readonly",
    "qrcode": "readonly",
    "registerPage": "readonly",
+    "renderVersionCard": "readonly",
    "renderSkewBadge": "readonly",
    "renderSkewSparkline": "readonly",
    "require": "readonly",
@@ -14,7 +14,7 @@ permissions:

 concurrency:
  group: ci-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}

 env:
  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
@@ -57,7 +57,7 @@ jobs:
          go build .
          # -race gates PR #1208's atomic.Pointer migration: the race-detector
          # is what makes path_inspect_atomic_race_test.go actually assert.
-          go test -race -coverprofile=server-coverage.out ./... 2>&1 | tee server-test.log
+          go test -timeout 15m -race -coverprofile=server-coverage.out ./... 2>&1 | tee server-test.log
          echo "--- Go Server Coverage ---"
          go tool cover -func=server-coverage.out | tail -1

@@ -66,7 +66,7 @@ jobs:
          set -e -o pipefail
          cd cmd/ingestor
          go build .
-          go test -coverprofile=ingestor-coverage.out ./... 2>&1 | tee ingestor-test.log
+          go test -timeout 15m -coverprofile=ingestor-coverage.out ./... 2>&1 | tee ingestor-test.log
          echo "--- Go Ingestor Coverage ---"
          go tool cover -func=ingestor-coverage.out | tail -1

@@ -81,6 +81,9 @@ jobs:
          go test ./...
          echo "--- Decrypt CLI tests passed ---"

+      - name: Verify Dockerfile COPY invariants (issue #1316)
+        run: bash scripts/check-dockerfile-internal-pkgs.sh
+
      - name: Lint CSS variables (issue #1128)
        run: |
          set -e
@@ -92,6 +95,8 @@ jobs:
          set -e
          node test-packet-filter.js
          node test-packet-filter-time.js
+          node test-channels-merge-1498-unit.js
+          node test-issue-1518-home-url.js
          node test-channel-decrypt-insecure-context.js
          node test-live-region-filter.js
          node test-issue-1136-observer-iata-map.js
@@ -99,6 +104,7 @@ jobs:
          node test-channel-qr-wiring.js
          node test-channel-modal-ux.js
          node test-channel-issue-1087.js
+          node test-issue-1409-no-encrypted-flood.js
          node test-channel-issue-1101.js
          node test-observer-iata-1188.js
          node test-pull-to-reconnect-1091.js
@@ -111,7 +117,47 @@ jobs:
          node test-issue-1364-pill-no-clamp.js
          node test-issue-1375-scope-stats-fetch.js
          node test-issue-1361-cb-presets.js
+          node test-issue-1380-cb-sim-overlay.js
+          node test-issue-1380-cb-reset-button.js
+          node test-issue-1407-cb-preset-propagation.js
+          node test-issue-1412-customizer-no-override.js
+          node test-issue-1418-raw-hex-extraction.js
+          node test-issue-1418-edge-weights.js
+          node test-issue-1418-cb-preset-ramp.js
+          node test-issue-1418-spider-fan.js
+          node test-issue-1418-deeplink-hops-channels.js
+          node test-issue-1418-polish-review.js
+          node test-issue-1420-tile-providers.js
+          node test-issue-1614-tile-url-function.js
+          node test-issue-1438-marker-css-vars.js
+          node test-issue-1562-observers-summary.js
+          node test-issue-1509-nav-active-bg.js
+          node test-issue-1509-detect-preset.js
          node test-live.js
+          node test-issue-1532-live-fullscreen.js
+          node test-issue-1619-feed-detail-card-draggable.js
+          node test-xss-escape-sinks.js
+          node test-preflight-xss-gate.js
+          node test-traces.js
+          node test-issue-1648-m4-emoji-scan.js
+
+      - name: 🛡️ Preflight XSS gate — actual --diff check (PR only)
+        # The fixture self-test above (test-preflight-xss-gate.js) only
+        # asserts the script's behavior against fixtures. It does NOT scan
+        # the PR's own changes. This step closes that gap by running the
+        # gate against added lines in public/**/*.{js,html} on the PR.
+        # Gate is PR-scoped only (per djb finding: merge commits would
+        # slip an opt-out otherwise). Master pushes skip this step.
+        if: github.event_name == 'pull_request'
+        env:
+          PR_BODY: ${{ github.event.pull_request.body }}
+          PREFLIGHT_PR_LABELS: ${{ join(github.event.pull_request.labels.*.name, ' ') }}
+        run: |
+          set -e
+          git fetch origin master --depth=50 2>&1 | tail -3 || true
+          # Materialize PR body to a file for the opt-out parser.
+          printf '%s' "$PR_BODY" > /tmp/pr-body.md
+          PREFLIGHT_PR_BODY=/tmp/pr-body.md bash scripts/check-xss-sinks.sh --diff origin/master

      - name: 🧹 Frontend lint (eslint no-undef) — issue #1342
        run: |
@@ -227,6 +273,54 @@ jobs:
      - name: Freshen fixture timestamps
        run: bash tools/freshen-fixture.sh test-fixtures/e2e-fixture.db

+      - name: Seed grouped-packet row for #1486 collapse test
+        # The committed fixture has 499 packets, each with exactly ONE
+        # observation, so the packets-page renders only flat
+        # (select-hash) rows. The #1486 repro needs at least one grouped
+        # (toggle-select) row. Insert a NEW transmission with 3
+        # observations.
+        #
+        # The server's async hash-migrate (cmd/server/hash_migrate.go)
+        # recomputes `transmissions.hash` from `raw_hex` via
+        # ComputeContentHash(), so the inserted hash MUST equal that
+        # function's output for the chosen raw_hex — otherwise the row
+        # gets relabelled and the E2E can't find it.
+        #
+        # raw_hex 15000102030405060708090a0b0c0d0e0f
+        #   → header=0x15 (route_type=1, payload_type=5)
+        #   → ComputeContentHash(...) = fae0c9e6d357a814
+        #
+        # The first_seen / observation timestamps are pinned to a date
+        # within retentionHours but outside the default 15-min UI
+        # window so the row is hidden in the default view (keeping
+        # test-e2e-playwright's first-10-rows hex-pane test
+        # unaffected) and reachable via the explicit ?timeWindow=0
+        # deep-link the #1486 test uses.
+        run: |
+          sqlite3 test-fixtures/e2e-fixture.db <<'SQL'
+          -- Sort the seeded row LAST in BOTH default packets views:
+          --   • flat view sorts by transmissions.id DESC → id=0 puts it last
+          --   • grouped view (#default for the packets page) sorts by
+          --     MAX(observations.timestamp) DESC → we must keep our obs
+          --     timestamps OLDER than every other fixture observation.
+          -- Fixture (after freshen) has obs timestamps spanning
+          --   2026-05-17 16:01:39Z .. 2026-05-28 00:00:00Z (max).
+          --   Note: freshen only shifts transmissions.first_seen forward
+          --   to ~now; observation.timestamp is left alone except for
+          --   the timestamp=0 case.
+          -- Use 2026-05-15 (~2 days older than the oldest fixture obs)
+          -- so our row sorts LAST in the grouped view too, keeping
+          -- test-e2e-playwright's first-10-rows hex-pane test
+          -- unaffected. The #1486 test still reaches the row via the
+          -- explicit hash + ?timeWindow=0 deep-link.
+          INSERT INTO transmissions(id,raw_hex,hash,first_seen,route_type,payload_type,payload_version,decoded_json,channel_hash,from_pubkey)
+            VALUES (0,'15000102030405060708090a0b0c0d0e0f','fae0c9e6d357a814','2026-05-15T00:00:00Z',1,5,0,'{"type":"CHAN","channel":"#test","text":"#1486 fixture"}',NULL,NULL);
+          INSERT INTO observations(transmission_id,observer_idx,direction,snr,rssi,score,path_json,timestamp,resolved_path) VALUES
+            (0,1,'rx',5.0,-95,0,'["AA"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["aa00000000000000000000000000000000000000000000000000000000000000"]'),
+            (0,2,'rx',5.5,-92,0,'["BB"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["bb00000000000000000000000000000000000000000000000000000000000000"]'),
+            (0,3,'rx',6.0,-90,0,'["CC"]',CAST(strftime('%s','2026-05-15T00:00:00Z') AS INTEGER),'["cc00000000000000000000000000000000000000000000000000000000000000"]');
+          SQL
+
      - name: Migrate fixture DB to current schema (#1287)
        # Server now ASSERTs schema is migrated and refuses to start
        # otherwise (cmd/server/main.go: dbschema.AssertReady). In prod
@@ -261,11 +355,15 @@ jobs:
          BASE_URL=http://localhost:13581 node test-channel-issue-1087-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-channel-issue-1111-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-map-modal-fluid-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-map-nodes-pagination-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-observer-iata-1188-e2e.js 2>&1 | tee -a e2e-output.txt
+          BASE_URL=http://localhost:13581 node test-issue-1639-observers-sort-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-fluid-1055-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1102-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1311-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-priority-1391-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1413-nav-overlap-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1400-nav-vertical-clip.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-nav-more-floor-1139-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-bottom-nav-1061-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-gestures-1062-e2e.js 2>&1 | tee -a e2e-output.txt
@@ -286,6 +384,7 @@ jobs:
          BASE_URL=http://localhost:13581 node test-issue-1146-path-link-contrast-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-issue-1147-section-order-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-issue-1151-orphan-separators-e2e.js 2>&1 | tee -a e2e-output.txt
+          BASE_URL=http://localhost:13581 node test-issue-1486-collapse-reopens-detail-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-rebrand-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-theme-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-logo-default-sage-teal-e2e.js 2>&1 | tee -a e2e-output.txt
@@ -297,6 +396,14 @@ jobs:
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1234-live-chrome-pass2-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1206-vcr-overlap-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1244-live-vcr-row-hints-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1510-live-nav-pin-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-live-fullscreen-1572-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1599-replay-freeze-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m1-icons-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m2-icons-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m3-icons-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1648-m4-icons-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1657-analytics-channels-group-sprites-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-issue-1224-channels-mobile-ux-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-issue-1367-channels-chat-app-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-issue-1236-map-mobile-e2e.js 2>&1 | tee -a e2e-output.txt
@@ -318,8 +425,38 @@ jobs:
          BASE_URL=http://localhost:13581 node test-customize-display-e2e.js 2>&1 | tee -a e2e-output.txt
          BASE_URL=http://localhost:13581 node test-customize-export-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-drag-manager-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1567-corner-clears-drag-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1306-collisions-terminology-e2e.js 2>&1 | tee -a e2e-output.txt
          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1374-route-map-a11y-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-list-render-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-selection-flow-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-add-modal-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-share-color-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-ws-batch-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-channels-ws-race-1498-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1487-byop-modal-layout-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1630-reach-mobile-e2e.js 2>&1 | tee -a e2e-output.txt
+          CHROMIUM_REQUIRE=1 BASE_URL=http://localhost:13581 node test-issue-1640-compare-discovery-e2e.js 2>&1 | tee -a e2e-output.txt
+
+      # #1616: slide-over focus-restore flake-gate. Runs the slide-over
+      # E2E 20 consecutive times against the SAME backend instance so
+      # the Chromium-headless focus race documented in #1172/#1616 has
+      # a 20× shot at firing. Any single non-zero exit aborts. This is
+      # the architectural-fix gate — if it ever turns red post-merge,
+      # the focused-but-hidden state has crept back in.
+      #
+      # PERMANENT step. Adds ~3-4 min to the e2e-test job in exchange
+      # for closing out a flake family that was blocking ~8 unrelated
+      # PRs at a time. If profiling pressures the budget later, drop
+      # repeat count first; do not delete.
+      - name: Slide-over E2E flake-gate (#1616, --repeat-each=20)
+        run: |
+          set -e
+          for i in $(seq 1 20); do
+            echo "--- slide-over E2E run $i/20 ---"
+            BASE_URL=http://localhost:13581 node test-slideover-1056-e2e.js 2>&1 | tee -a slideover-repeat-output.txt
+          done
+          echo "20 passed"

      - name: Collect frontend coverage (parallel)
        if: success() && github.event_name == 'push'
@@ -2,7 +2,22 @@

 ## [Unreleased]

-### 📝 Documentation Corrections
+## [3.9.0] — 2026-06-12
+
+See [docs/release-notes/v3.9.0.md](docs/release-notes/v3.9.0.md) for the full notes. 257 commits since v3.8.3 (72 substantive + 185 coverage bumps).
+
+### ✨ Highlights
+- **Relay timelines survive an ingestor restart** (#1643) — relay-hop attribution is rebuilt from `path_json` on cold load.
+- **Observer Compare is first-class** (#1642, #1645, #1647) — three new entry points + Tufte-grade compare page with state-preserving multi-select.
+- **Emoji → Phosphor icon migration** (#1648, #1649–#1654) — every UI emoji replaced with theme-tinted Phosphor sprites, lint-gated.
+- **Per-node Reach page + API** (#1627) — `GET /api/nodes/{pubkey}/reach` with cache invalidation on blacklist changes (#1636).
+- **Hashtag channels catalogue integration** (#1656) — public hashtag channels appear without manual config.
+- **Operator-customizable name-prefix hiding** (#1655) — new `hiddenNamePrefixes` config (default `["🚫"]`).
+
+### ⚙️ Config
+- New: `hiddenNamePrefixes`, `liveMap.maxNodes`, `runtime.maxMemoryMB`, configurable observer-health thresholds, `branding.homeUrl`, customizer disabled-tabs.
+
+### 📝 Documentation Corrections (carried from prior [Unreleased])
 - **PR #1324 historical record correction** (#1387) — the merged PR #1324 body referenced four tests that do NOT exist in master: `TestMultibyteCapPersistRoundTrip`, `TestMultibyteCapPersistSkipsUnknown`, `TestMaybePersistCoalesces`, and a `TryLock` coalescing test. The actual tests that landed are `TestRunMultibyteCapPersist_AppliesSnapshot` and `TestRunMultibyteCapPersist_NoSnapshot_NoOp`. See issue #1386 for the corrective test additions (round-trip, unknown-key skip, coalescing).

 ## [3.7.2] — 2026-05-06
@@ -0,0 +1,142 @@
+# MIGRATIONS — async vs sync policy
+
+CoreScope's ingestor applies schema/data migrations inline at boot in
+`cmd/ingestor/db.go`. Every migration that runs synchronously blocks the
+ingestor from accepting packets until it returns. On a dev DB that's
+milliseconds; at prod scale (1.9M+ observations, 80K+ adverts, 2600+ nodes
+on Cascadia) it can pin the boot for minutes and trigger restart loops —
+the "upgrade broke prod" failure class (#791, #1483, and others).
+
+## The rule
+
+**Any new `CREATE INDEX`, `ALTER TABLE`, or data-rewriting `UPDATE`/`DELETE`
+in a migration file MUST do ONE of the following:**
+
+### Option 1 — Run via `Store.RunAsyncMigration` (preferred for backfills)
+
+```go
+// Scheduled in OpenStore() AFTER the *Store is constructed.
+if err := s.RunAsyncMigration(ctx, "my_migration_v1",
+    func(ctx context.Context, db *sql.DB) error {
+        _, err := db.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS ...`)
+        return err
+    }); err != nil {
+    log.Printf("[migration/async] scheduling failed: %v", err)
+}
+```
+
+- The migration is recorded as `pending_async` in the `_async_migrations`
+  table **immediately** — the ingestor boots and starts ingesting.
+- `fn` runs in a goroutine; the WaitGroup is shared with the rest of the
+  ingestor (`Store.WaitForAsyncMigrations()` waits for everything).
+- On success the row flips to `done`; on error/panic to `failed` with the
+  error message captured.
+- Idempotent: rows in `done` state short-circuit; `failed`/`pending_async`
+  rows are retried on the next boot.
+
+Reference implementations: `Store.BackfillPathJSONAsync` (path_json
+backfill) and the converted `obs_observer_ts_idx_v1` index build in
+`OpenStore`.
+
+### Option 2 — Annotate as preflight-cheap
+
+Some migrations are genuinely cheap at any scale (e.g. `ALTER TABLE ADD
+COLUMN`, `CREATE INDEX` on a table you know is bounded to a few thousand
+rows). Annotate the migration block with a comment **on the line
+immediately above the migration block** so the preflight gate recognises
+the opt-out:
+
+```go
+// PREFLIGHT: async=true reason="ALTER ADD COLUMN — O(1) sqlite operation"
+if r := db.QueryRow("SELECT 1 FROM _migrations WHERE name = 'foo_v1'"); ...
+```
+
+The reason MUST be a real one-line justification you can defend in
+review. "It's fine" is not a reason.
+
+### Option 3 — Opt out per PR
+
+If the migration is genuinely safe and you don't want to add an inline
+annotation, put a single line in the PR body:
+
+```
+PREFLIGHT-MIGRATION-SCALE: <30s N=80K verified on Cascadia staging snapshot
+```
+
+This must include both `<30s` and `N=<some scale>` so a reviewer can
+challenge the measurement.
+
+## The gate
+
+`~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh` runs
+on every PR via the preflight orchestrator. It greps the diff for new or
+modified migration blocks (files matching `cmd/ingestor/db.go`,
+`cmd/ingestor/maintenance.go`, `internal/dbschema/**`, `**/migrations/**`,
+`**/*.sql`, plus any Go file touching `CREATE INDEX` / `ALTER TABLE` /
+`CREATE UNIQUE INDEX`). For each hit it requires one of the three
+opt-outs above. Hard-fail (exit 1) — no warning-only mode.
+
+## Concurrency model
+
+CoreScope runs **one ingestor process** per deployment (`cmd/ingestor/`,
+single binary, single `*Store`). There is no cluster mode, no leader
+election, no second writer. SQLite is opened with `SetMaxOpenConns(1)`
+and a 5s `busy_timeout`; all writes (live MQTT ingest + async migration
+goroutines + maintenance backfills) serialize through the one connection
+in a single process.
+
+What this means for async migrations:
+
+- **No cross-process race** to worry about. Two ingestor instances
+  running against the same DB is not a supported deployment shape.
+- **Within a single process**, concurrent `RunAsyncMigration(name=X)`
+  callers race the initial `SELECT status` → `UPDATE/INSERT` step. The
+  current implementation re-schedules `fn` on a pending/failed row so a
+  duplicate caller may legitimately re-run it; once status is `done` all
+  further calls short-circuit. See
+  `TestRunAsyncMigration_ConcurrentSameNameSerialized` for the contract.
+- **`fn` runs concurrently with live ingest writers.** Because
+  `MaxOpenConns=1`, a long `CREATE INDEX` will serialize behind / ahead
+  of insert batches via SQLite's busy-timeout. This is acceptable for
+  index builds (the boot path is unblocked, which was the whole point),
+  but it means long migrations DO add latency to live writes. Document
+  expected runtime in the `reason=` annotation and prefer batched/chunked
+  fn implementations for multi-minute work (see `BackfillPathJSONAsync`
+  for the canonical batched pattern with inter-batch `time.Sleep`).
+
+## Scale budgets
+
+Per-migration target: **<30s** at current prod scale (Cascadia: ~2,600
+nodes, ~80K observations; previous prod snapshot: ~1.9M observations).
+
+Worked example (#1483, `obs_observer_ts_idx_v1`): composite index build
+on `observations(observer_idx, timestamp)`. At ~1.9M rows the sync build
+pinned ingestor boot for several minutes → restart loop. Converted to
+async via `RunAsyncMigration` in `OpenStore` so boot returns immediately
+and the index materializes in the background; the existing `_migrations`
+short-circuit at the top of the migration block ensures DBs that already
+completed the sync v3.8.3 build do NOT re-run it through the goroutine
+path on subsequent boots.
+
+If you cannot meet the <30s budget, document the expected upper bound
+and operator runbook expectation (e.g. "index build expected ~10 min on
+a 5M-row table; ingestor remains responsive; monitor via
+`SELECT status, error FROM _async_migrations WHERE name = ...`").
+
+## Why this exists
+
+Pattern that keeps repeating:
+
+1. Author writes `CREATE INDEX foo ON observations(...)` in a migration.
+2. Local dev DB has ~100 rows. Migration returns in 1ms. CI is green.
+3. Reviewer focuses on plan correctness, not scale.
+4. Ship.
+5. Prod boots, sqlite scans 1.9M rows, the ingestor sits at `[migration]
+   Adding index...` for 8 minutes, healthcheck times out, container
+   restarts, loops.
+6. Operator pages. Hotfix. Apology.
+
+The gate doesn't try to detect table size (undecidable from a diff). It
+enforces **annotation discipline**: every author who adds a migration
+must consciously decide which bucket it falls into and write that down.
+That is the cheapest possible intervention that breaks the cycle.
@@ -21,6 +21,7 @@ The Go backend serves all 40+ API endpoints from an in-memory packet store with
 | Memory (56K packets) | **~300 MB** (vs 1.3 GB on Node.js) |
 | WebSocket broadcast | **Real-time** to all connected browsers |
 | Channel decryption | **AES-128-ECB** with rainbow table |
+| GOMEMLIMIT (memory-constrained hosts) | **set to ≥1.5× working set** (e.g. 1536 MiB on a 2 GB Pi for a ~1 GB store). Lower values trigger a GC death-spiral. Configure via the `GOMEMLIMIT` env var or `runtime.maxMemoryMB` in `config.json`; env wins. Applies to both server and ingestor. See [#1010](https://github.com/Kpa-clawbot/CoreScope/issues/1010). |

 See [PERFORMANCE.md](PERFORMANCE.md) for full benchmarks.

@@ -294,5 +294,6 @@
  "#colombia": "bea223a8c1d13ed9638ee000ea3a6aca",
  "#bogota": "6d0864985b64350ce4cbfebf4979e970",
  "#peru": "7e6fc347bf29a4c128ac3156865bd521",
-  "#lima": "5f167ce354eca08ab742463df10ef255"
-}
+  "#lima": "5f167ce354eca08ab742463df10ef255",
+  "Public": "8b3387e9c5cdea6ac9e5edbaa115cd72"
+}
@@ -0,0 +1,148 @@
+// Async migration helper — runs schema/backfill work that may take minutes on
+// large prod tables WITHOUT blocking ingestor startup.
+//
+// MIGRATION ANNOTATION CONVENTION (read this before touching migrations):
+//
+//   Sync schema/data migrations (CREATE INDEX, ALTER TABLE, UPDATE ... WHERE)
+//   that run inline during OpenStore() block the ingestor from accepting
+//   packets until they finish. On an empty dev DB they return in milliseconds;
+//   at prod scale (1.9M+ observations, 80K+ adverts) they can pin the boot
+//   for minutes and trigger restart loops. This regression class has bitten us
+//   repeatedly (#791 resolved_path backfill, #1483 obs_observer_ts_idx_v1).
+//
+//   ANY new CREATE INDEX / ALTER TABLE / data-rewrite migration MUST EITHER:
+//     1. Run via Store.RunAsyncMigration(...) below (preferred for backfills
+//        and any work that may touch >1K rows). The migration is recorded as
+//        `pending_async` immediately, returns to the caller (boot proceeds),
+//        and completes in a goroutine. Status flips to `done` (or `failed`
+//        with an error message) when fn returns.
+//     2. Carry the preflight annotation comment immediately above the
+//        migration block, e.g.
+//             // PREFLIGHT: async=true reason="<one-line justification>"
+//        Use this for migrations that are genuinely cheap at any scale
+//        (e.g. ALTER TABLE ADD COLUMN, CREATE INDEX on a known-bounded
+//        table). The annotation is grepped by
+//        ~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh
+//        — its absence on a touched migration block is a hard-fail gate.
+//
+//   See MIGRATIONS.md in the repo root for the full policy and examples.
+
+package main
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"log"
+)
+
+// ensureAsyncMigrationsTable creates the bookkeeping table used by
+// RunAsyncMigration / AsyncMigrationStatus. Idempotent.
+func ensureAsyncMigrationsTable(db *sql.DB) error {
+	_, err := db.Exec(`
+		CREATE TABLE IF NOT EXISTS _async_migrations (
+			name       TEXT PRIMARY KEY,
+			status     TEXT NOT NULL,             -- pending_async | done | failed
+			started_at TEXT NOT NULL DEFAULT (datetime('now')),
+			ended_at   TEXT,
+			error      TEXT
+		)
+	`)
+	return err
+}
+
+// RunAsyncMigration registers `name` as a pending async migration and
+// schedules `fn` to run in a background goroutine. It returns to the caller
+// immediately so the ingestor can keep booting.
+//
+// Contract (pinned by async_migration_test.go):
+//   - status is `pending_async` IMMEDIATELY after this returns.
+//   - fn runs in a goroutine; on success status becomes `done`, on error or
+//     panic status becomes `failed` and the error is recorded.
+//   - Idempotent: if a row with the same name already exists in `done`
+//     state, fn is NOT re-run. If in `failed` or `pending_async` state,
+//     fn IS re-scheduled (a previous run may have crashed mid-flight).
+//   - The caller's WaitGroup tracks the goroutine so tests/shutdown can
+//     wait via Store.WaitForAsyncMigrations().
+func (s *Store) RunAsyncMigration(ctx context.Context, name string, fn func(context.Context, *sql.DB) error) error {
+	if err := ensureAsyncMigrationsTable(s.db); err != nil {
+		return fmt.Errorf("ensure _async_migrations: %w", err)
+	}
+
+	var existing string
+	row := s.db.QueryRow(`SELECT status FROM _async_migrations WHERE name = ?`, name)
+	switch err := row.Scan(&existing); err {
+	case nil:
+		if existing == "done" {
+			return nil // already complete, nothing to do
+		}
+		// pending_async or failed → reset and retry.
+		if _, err := s.db.Exec(`
+			UPDATE _async_migrations
+			SET status = 'pending_async', started_at = datetime('now'), ended_at = NULL, error = NULL
+			WHERE name = ?`, name); err != nil {
+			return fmt.Errorf("reset async migration %q: %w", name, err)
+		}
+	case sql.ErrNoRows:
+		if _, err := s.db.Exec(`
+			INSERT INTO _async_migrations (name, status) VALUES (?, 'pending_async')`,
+			name); err != nil {
+			return fmt.Errorf("register async migration %q: %w", name, err)
+		}
+	default:
+		return fmt.Errorf("lookup async migration %q: %w", name, err)
+	}
+
+	s.backfillWg.Add(1)
+	go func() {
+		defer s.backfillWg.Done()
+		var runErr error
+		defer func() {
+			if r := recover(); r != nil {
+				runErr = fmt.Errorf("panic: %v", r)
+				log.Printf("[async-migration] %q panic recovered: %v", name, r)
+			}
+			if runErr != nil {
+				if _, err := s.db.Exec(`
+					UPDATE _async_migrations
+					SET status = 'failed', ended_at = datetime('now'), error = ?
+					WHERE name = ?`, runErr.Error(), name); err != nil {
+					log.Printf("[async-migration] failed to record failure for %q: %v", name, err)
+				}
+				log.Printf("[async-migration] %q FAILED: %v", name, runErr)
+				return
+			}
+			if _, err := s.db.Exec(`
+				UPDATE _async_migrations
+				SET status = 'done', ended_at = datetime('now'), error = NULL
+				WHERE name = ?`, name); err != nil {
+				log.Printf("[async-migration] failed to mark %q done: %v", name, err)
+				return
+			}
+			log.Printf("[async-migration] %q done", name)
+		}()
+		log.Printf("[async-migration] %q starting (boot continues)", name)
+		runErr = fn(ctx, s.db)
+	}()
+
+	return nil
+}
+
+// AsyncMigrationStatus returns the current status of an async migration
+// (one of "pending_async", "done", "failed") or sql.ErrNoRows if no such
+// migration has been registered.
+func (s *Store) AsyncMigrationStatus(name string) (string, error) {
+	if err := ensureAsyncMigrationsTable(s.db); err != nil {
+		return "", err
+	}
+	var status string
+	err := s.db.QueryRow(`SELECT status FROM _async_migrations WHERE name = ?`, name).Scan(&status)
+	return status, err
+}
+
+// WaitForAsyncMigrations blocks until all currently-scheduled async migrations
+// finish. Intended for tests + graceful shutdown; production boot path does NOT
+// call this (that's the whole point).
+func (s *Store) WaitForAsyncMigrations() {
+	s.backfillWg.Wait()
+}
@@ -0,0 +1,299 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// waitForStatus polls AsyncMigrationStatus until it matches `want` or `deadline` passes.
+func waitForStatus(t *testing.T, s *Store, name, want string, timeout time.Duration) string {
+	t.Helper()
+	deadline := time.Now().Add(timeout)
+	var status string
+	var err error
+	for time.Now().Before(deadline) {
+		status, err = s.AsyncMigrationStatus(name)
+		if err == nil && status == want {
+			return status
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	t.Fatalf("status never reached %q within %s: got %q (err=%v)", want, timeout, status, err)
+	return status
+}
+
+// TestRunAsyncMigration_PendingThenDone pins the contract for RunAsyncMigration:
+//
+//   1. After calling, the migration name MUST be queryable in the migrations
+//      table with status `pending_async` IMMEDIATELY (no waiting for fn).
+//   2. After fn returns, the status MUST transition to `done`.
+//   3. RunAsyncMigration MUST return without blocking on fn.
+//
+// This is the regression test for the recurring "sync migration on large
+// table blocks ingestor startup" class (#791, #1483, ...). If this test
+// fails the contract is broken — do not relax it; fix the runner.
+func TestRunAsyncMigration_PendingThenDone(t *testing.T) {
+	s := newTestStore(t)
+	ctx := context.Background()
+
+	started := make(chan struct{})
+	release := make(chan struct{})
+
+	const name = "test_async_migration_v1"
+	if err := s.RunAsyncMigration(ctx, name, func(ctx context.Context, db *sql.DB) error {
+		close(started)
+		<-release
+		return nil
+	}); err != nil {
+		t.Fatalf("RunAsyncMigration returned error: %v", err)
+	}
+
+	// Wait for the goroutine to actually start before checking status; this
+	// proves RunAsyncMigration did not block on fn and that fn is running
+	// concurrently.
+	select {
+	case <-started:
+	case <-time.After(2 * time.Second):
+		t.Fatal("async migration fn did not start within 2s — RunAsyncMigration may have blocked or never scheduled")
+	}
+
+	status, err := s.AsyncMigrationStatus(name)
+	if err != nil {
+		t.Fatalf("AsyncMigrationStatus while running: %v", err)
+	}
+	if status != "pending_async" {
+		t.Fatalf("status while fn running: got %q, want %q", status, "pending_async")
+	}
+
+	close(release)
+
+	// Poll for transition to done.
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) {
+		status, err = s.AsyncMigrationStatus(name)
+		if err == nil && status == "done" {
+			return
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	t.Fatalf("status never transitioned to done within 2s: got %q (err=%v)", status, err)
+}
+
+// TestRunAsyncMigration_PanicCapture proves that a panic inside fn does NOT
+// leak past the recover, AND that the migration row transitions to
+// "failed" with the panic message captured — NOT silently to "done".
+// Operator visibility into mid-migration crashes is the whole point.
+func TestRunAsyncMigration_PanicCapture(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_panic_capture_v1"
+
+	if err := s.RunAsyncMigration(context.Background(), name,
+		func(ctx context.Context, db *sql.DB) error {
+			panic("synthetic boom")
+		}); err != nil {
+		t.Fatalf("RunAsyncMigration returned error: %v", err)
+	}
+
+	s.WaitForAsyncMigrations()
+
+	status, err := s.AsyncMigrationStatus(name)
+	if err != nil {
+		t.Fatalf("status lookup: %v", err)
+	}
+	if status != "failed" {
+		t.Fatalf("status after panic: got %q, want %q (silent-done would be catastrophic)", status, "failed")
+	}
+
+	var errMsg sql.NullString
+	if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errMsg); err != nil {
+		t.Fatalf("error column lookup: %v", err)
+	}
+	if !errMsg.Valid || errMsg.String == "" {
+		t.Fatalf("error column empty after panic — operator has no clue what failed")
+	}
+}
+
+// TestRunAsyncMigration_IdempotentSecondCallNoOps verifies that calling
+// RunAsyncMigration a second time with the same name AFTER it has reached
+// "done" status does NOT re-run fn. This protects the prod path: ingestor
+// restarts must not rebuild already-built indexes.
+func TestRunAsyncMigration_IdempotentSecondCallNoOps(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_idempotent_v1"
+
+	var calls int32
+	fn := func(ctx context.Context, db *sql.DB) error {
+		atomic.AddInt32(&calls, 1)
+		return nil
+	}
+
+	if err := s.RunAsyncMigration(context.Background(), name, fn); err != nil {
+		t.Fatalf("first call: %v", err)
+	}
+	s.WaitForAsyncMigrations()
+	waitForStatus(t, s, name, "done", 2*time.Second)
+
+	// Second call must short-circuit; fn must not be invoked again.
+	if err := s.RunAsyncMigration(context.Background(), name, fn); err != nil {
+		t.Fatalf("second call: %v", err)
+	}
+	s.WaitForAsyncMigrations()
+
+	if got := atomic.LoadInt32(&calls); got != 1 {
+		t.Fatalf("fn invoked %d times, want 1 (done-state row must short-circuit)", got)
+	}
+}
+
+// TestRunAsyncMigration_RestartSafetyFailedIsRetried simulates a crashed
+// previous run: a row exists in `failed` state from a prior boot. The next
+// RunAsyncMigration call MUST re-schedule fn (reset to pending_async, then
+// run it), not leave the migration stuck in `failed` forever.
+func TestRunAsyncMigration_RestartSafetyFailedIsRetried(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_restart_failed_v1"
+
+	if err := ensureAsyncMigrationsTable(s.db); err != nil {
+		t.Fatalf("ensure table: %v", err)
+	}
+	if _, err := s.db.Exec(`INSERT INTO _async_migrations (name, status, error) VALUES (?, 'failed', 'simulated prior crash')`, name); err != nil {
+		t.Fatalf("seed failed row: %v", err)
+	}
+
+	var calls int32
+	if err := s.RunAsyncMigration(context.Background(), name,
+		func(ctx context.Context, db *sql.DB) error {
+			atomic.AddInt32(&calls, 1)
+			return nil
+		}); err != nil {
+		t.Fatalf("RunAsyncMigration on failed row: %v", err)
+	}
+	s.WaitForAsyncMigrations()
+	waitForStatus(t, s, name, "done", 2*time.Second)
+
+	if got := atomic.LoadInt32(&calls); got != 1 {
+		t.Fatalf("fn invoked %d times, want 1 (failed-state row must be retried)", got)
+	}
+
+	// And the error column must be cleared on success.
+	var errCol sql.NullString
+	if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errCol); err != nil {
+		t.Fatalf("error col: %v", err)
+	}
+	if errCol.Valid && errCol.String != "" {
+		t.Fatalf("error column not cleared on retry success: %q", errCol.String)
+	}
+}
+
+// TestRunAsyncMigration_RestartSafetyPendingIsRetried simulates the
+// ingestor crashing while a migration was still in `pending_async` (the
+// goroutine never finished). On next boot the migration MUST be re-picked-up
+// — leaving it stuck in pending forever would be a silent prod outage.
+func TestRunAsyncMigration_RestartSafetyPendingIsRetried(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_restart_pending_v1"
+
+	if err := ensureAsyncMigrationsTable(s.db); err != nil {
+		t.Fatalf("ensure table: %v", err)
+	}
+	if _, err := s.db.Exec(`INSERT INTO _async_migrations (name, status) VALUES (?, 'pending_async')`, name); err != nil {
+		t.Fatalf("seed pending row: %v", err)
+	}
+
+	var calls int32
+	if err := s.RunAsyncMigration(context.Background(), name,
+		func(ctx context.Context, db *sql.DB) error {
+			atomic.AddInt32(&calls, 1)
+			return nil
+		}); err != nil {
+		t.Fatalf("RunAsyncMigration on pending row: %v", err)
+	}
+	s.WaitForAsyncMigrations()
+	waitForStatus(t, s, name, "done", 2*time.Second)
+
+	if got := atomic.LoadInt32(&calls); got != 1 {
+		t.Fatalf("fn invoked %d times, want 1 (pending row must be retried after crash)", got)
+	}
+}
+
+// TestRunAsyncMigration_FnErrorRecorded covers the non-panic failure path:
+// fn returns an error → status MUST be "failed" with the error captured.
+func TestRunAsyncMigration_FnErrorRecorded(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_fn_error_v1"
+
+	if err := s.RunAsyncMigration(context.Background(), name,
+		func(ctx context.Context, db *sql.DB) error {
+			return fmt.Errorf("simulated migration error")
+		}); err != nil {
+		t.Fatalf("RunAsyncMigration: %v", err)
+	}
+	s.WaitForAsyncMigrations()
+
+	status, err := s.AsyncMigrationStatus(name)
+	if err != nil {
+		t.Fatalf("status: %v", err)
+	}
+	if status != "failed" {
+		t.Fatalf("status: got %q, want failed", status)
+	}
+
+	var errCol sql.NullString
+	if err := s.db.QueryRow(`SELECT error FROM _async_migrations WHERE name = ?`, name).Scan(&errCol); err != nil {
+		t.Fatalf("error col: %v", err)
+	}
+	if !errCol.Valid || errCol.String == "" {
+		t.Fatalf("error column empty after fn error")
+	}
+}
+
+// TestRunAsyncMigration_ConcurrentSameNameSerialized validates the
+// single-process-instance assumption: ingestor has only one *Store, and
+// concurrent RunAsyncMigration(name=X) calls on the SAME *Store must not
+// execute fn more than once for a given name. (CoreScope does not support
+// multi-ingestor / cluster mode — see MIGRATIONS.md "Concurrency" note —
+// so cross-process races are out of scope.)
+func TestRunAsyncMigration_ConcurrentSameNameSerialized(t *testing.T) {
+	s := newTestStore(t)
+	const name = "test_concurrent_serialize_v1"
+
+	var calls int32
+	fn := func(ctx context.Context, db *sql.DB) error {
+		atomic.AddInt32(&calls, 1)
+		time.Sleep(20 * time.Millisecond)
+		return nil
+	}
+
+	var wg sync.WaitGroup
+	for i := 0; i < 5; i++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			// All concurrent callers use the SAME name. Each is allowed
+			// to either no-op (status==done short-circuit) or schedule
+			// a re-run; the invariant is "fn never runs more than once
+			// concurrently and on second-call-after-done it does not
+			// re-execute."
+			_ = s.RunAsyncMigration(context.Background(), name, fn)
+		}()
+	}
+	wg.Wait()
+	s.WaitForAsyncMigrations()
+	waitForStatus(t, s, name, "done", 2*time.Second)
+
+	// The contract per the helper's docstring + Idempotent test is: once
+	// status is `done`, subsequent calls short-circuit. Concurrent calls
+	// that lose the race to set up the pending_async row may legitimately
+	// re-schedule fn (the comment "previous run may have crashed
+	// mid-flight" justifies retry on pending_async). The hard bound is
+	// "fn runs at most ONCE PER pending->done transition" — for this
+	// test we assert fn ran at least once and at most a small bounded
+	// number (5 callers, each may have scheduled before any reached done).
+	if got := atomic.LoadInt32(&calls); got < 1 || got > 5 {
+		t.Fatalf("fn invoked %d times, want 1..5 inclusive (bounded by caller count)", got)
+	}
+}
@@ -53,6 +53,7 @@ type Config struct {
 	HashRegions     []string          `json:"hashRegions,omitempty"`
 	Retention       *RetentionConfig  `json:"retention,omitempty"`
 	Metrics         *MetricsConfig    `json:"metrics,omitempty"`
+	Runtime         *RuntimeConfig    `json:"runtime,omitempty"`
 	GeoFilter            *GeoFilterConfig     `json:"geo_filter,omitempty"`
 	ForeignAdverts       *ForeignAdvertConfig `json:"foreignAdverts,omitempty"`
 	ValidateSignatures   *bool             `json:"validateSignatures,omitempty"`
@@ -80,6 +81,12 @@ type Config struct {
 	// NeighborEdgesMaxAgeDays controls neighbor_edges row retention
 	// (#1287 — moved from cmd/server). 0 = default 5.
 	NeighborEdgesMaxAgeDays int `json:"neighborEdgesMaxAgeDays,omitempty"`
+
+	// IngestBufferSize caps the in-memory queue (number of MQTT messages) held
+	// while the single SQLite writer is blocked by startup migrations/prunes
+	// (#1608). Received messages are drained once the write path is ready.
+	// 0 / unset => default. Bounded memory.
+	IngestBufferSize int `json:"ingestBufferSize,omitempty"`
 }

 // NeighborEdgesDaysOrDefault returns the configured pruning window or 5.
@@ -90,6 +97,17 @@ func (c *Config) NeighborEdgesDaysOrDefault() int {
 	return c.NeighborEdgesMaxAgeDays
 }

+// IngestBufferSizeOrDefault returns the ingest buffer capacity. Default 50000:
+// at typical mesh rates (~1-2 msg/s) that is many minutes of headroom while a
+// startup migration holds the writer; each queued item is a small closure, so
+// worst-case memory stays in the tens of MB.
+func (c *Config) IngestBufferSizeOrDefault() int {
+	if c.IngestBufferSize > 0 {
+		return c.IngestBufferSize
+	}
+	return 50000
+}
+
 // GeoFilterConfig is an alias for the shared geofilter.Config type.
 type GeoFilterConfig = geofilter.Config

@@ -134,6 +152,15 @@ type MetricsConfig struct {
 	SampleIntervalSec int `json:"sampleIntervalSec"`
 }

+// RuntimeConfig holds Go runtime tuning knobs (#1010).
+type RuntimeConfig struct {
+	// MaxMemoryMB is the soft memory limit (GOMEMLIMIT) in MiB applied via
+	// runtime/debug.SetMemoryLimit at startup. The GOMEMLIMIT environment
+	// variable, when set, takes precedence over this value. 0/unset means
+	// no limit is applied and default Go runtime behavior is preserved.
+	MaxMemoryMB int `json:"maxMemoryMB"`
+}
+
 // DBConfig is the shared SQLite vacuum/maintenance config (#919, #921).
 type DBConfig = dbconfig.DBConfig

@@ -286,15 +313,24 @@ func LoadConfig(path string) (*Config, error) {
 }

 // ResolvedSources returns the final list of MQTT sources to connect to.
+//
+// Scheme mapping:
+//
+//	mqtt://  → tcp://   (paho plain TCP)
+//	mqtts:// → ssl://   (paho TLS over TCP)
+//	ws://               (paho WebSocket — passed through, no mapping needed)
+//	wss://              (paho WebSocket TLS — passed through, no mapping needed)
 func (c *Config) ResolvedSources() []MQTTSource {
 	for i := range c.MQTTSources {
-		// paho uses tcp:// and ssl:// not mqtt:// and mqtts://
+		// paho uses tcp:// and ssl:// for plain MQTT; ws:// and wss:// are accepted natively.
 		b := c.MQTTSources[i].Broker
 		if strings.HasPrefix(b, "mqtt://") {
 			c.MQTTSources[i].Broker = "tcp://" + b[7:]
 		} else if strings.HasPrefix(b, "mqtts://") {
 			c.MQTTSources[i].Broker = "ssl://" + b[8:]
 		}
+		// ws:// and wss:// pass through unchanged — paho handles WebSocket
+		// connections natively via gorilla/websocket.
 	}
 	return c.MQTTSources
 }
@@ -394,3 +394,105 @@ func TestMQTTSourceRegionField(t *testing.T) {
 		t.Fatalf("expected region PDX, got %q", cfg.MQTTSources[0].Region)
 	}
 }
+
+// TestResolvedSourcesSchemeMapping verifies that mqtt:// and mqtts:// are translated
+// to the paho-native tcp:// and ssl:// schemes, while ws:// and wss:// pass through
+// unchanged (paho handles WebSocket connections natively).
+func TestResolvedSourcesSchemeMapping(t *testing.T) {
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"mqtt://host:1883", "tcp://host:1883"},
+		{"mqtts://host:8883", "ssl://host:8883"},
+		{"tcp://host:1883", "tcp://host:1883"},
+		{"ssl://host:8883", "ssl://host:8883"},
+		{"ws://host:9001", "ws://host:9001"},
+		{"wss://host:9001", "wss://host:9001"},
+		{"ws://host:9001/mqtt", "ws://host:9001/mqtt"},
+		{"wss://host:9001/mqtt", "wss://host:9001/mqtt"},
+	}
+
+	for _, tt := range tests {
+		cfg := &Config{
+			MQTTSources: []MQTTSource{
+				{Name: "test", Broker: tt.input, Topics: []string{"meshcore/#"}},
+			},
+		}
+		sources := cfg.ResolvedSources()
+		if got := sources[0].Broker; got != tt.want {
+			t.Errorf("ResolvedSources(%q) = %q, want %q", tt.input, got, tt.want)
+		}
+	}
+}
+
+// TestLoadConfigWSSource verifies that a WebSocket MQTT source round-trips through
+// LoadConfig correctly — username/password preserved, scheme unchanged.
+func TestLoadConfigWSSource(t *testing.T) {
+	t.Setenv("DB_PATH", "")
+	t.Setenv("MQTT_BROKER", "")
+
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "config.json")
+	os.WriteFile(cfgPath, []byte(`{
+		"dbPath": "test.db",
+		"mqttSources": [
+			{
+				"name": "local-tcp",
+				"broker": "mqtt://localhost:1883",
+				"topics": ["meshcore/#"]
+			},
+			{
+				"name": "wsmqtt-ws",
+				"broker": "wss://wsmqtt.example.com/mqtt",
+				"username": "corescope",
+				"password": "s3cr3t",
+				"topics": ["meshcore/#"]
+			}
+		]
+	}`), 0o644)
+
+	cfg, err := LoadConfig(cfgPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(cfg.MQTTSources) != 2 {
+		t.Fatalf("mqttSources len=%d, want 2", len(cfg.MQTTSources))
+	}
+
+	tcp := cfg.MQTTSources[0]
+	if tcp.Name != "local-tcp" {
+		t.Errorf("name=%s, want local-tcp", tcp.Name)
+	}
+
+	ws := cfg.MQTTSources[1]
+	if ws.Name != "wsmqtt-ws" {
+		t.Errorf("name=%s, want wsmqtt-ws", ws.Name)
+	}
+	if ws.Broker != "wss://wsmqtt.example.com/mqtt" {
+		t.Errorf("broker=%s, want wss://wsmqtt.example.com/mqtt", ws.Broker)
+	}
+	if ws.Username != "corescope" {
+		t.Errorf("username=%s, want corescope", ws.Username)
+	}
+	if ws.Password != "s3cr3t" {
+		t.Errorf("password=%s, want s3cr3t", ws.Password)
+	}
+
+	sources := cfg.ResolvedSources()
+	if sources[1].Broker != "wss://wsmqtt.example.com/mqtt" {
+		t.Errorf("ResolvedSources wss broker=%s, want unchanged", sources[1].Broker)
+	}
+}
+
+func TestIngestBufferSizeOrDefault(t *testing.T) {
+	if got := (&Config{}).IngestBufferSizeOrDefault(); got != 50000 {
+		t.Fatalf("default: want 50000, got %d", got)
+	}
+	if got := (&Config{IngestBufferSize: 10}).IngestBufferSizeOrDefault(); got != 10 {
+		t.Fatalf("override: want 10, got %d", got)
+	}
+	if got := (&Config{IngestBufferSize: -5}).IngestBufferSizeOrDefault(); got != 50000 {
+		t.Fatalf("invalid negative should fall back to default, got %d", got)
+	}
+}
@@ -1,12 +1,14 @@
 package main

 import (
+	"context"
 	"database/sql"
 	"encoding/json"
 	"fmt"
 	"log"
 	"os"
 	"path/filepath"
+	"sort"
 	"strings"
 	"sync"
 	"sync/atomic"
@@ -80,6 +82,16 @@ type Store struct {

 	sampleIntervalSec int
 	backfillWg        sync.WaitGroup
+
+	// prefixIdx holds the prefix → pubkey index used by the
+	// resolved_path writer (#1547). Rebuilt on startup and once per
+	// neighbor-edges builder tick (60s).
+	prefixIdx prefixIdxHolder
+
+	// neighborGraph holds the in-memory NeighborGraph snapshot used
+	// by the context-aware resolver (#1560). Rebuilt on startup and
+	// once per neighbor-edges builder tick (60s).
+	neighborGraph neighborGraphHolder
 }

 // OpenStore opens or creates a SQLite DB at the given path, applying the
@@ -124,6 +136,27 @@ func OpenStoreWithInterval(dbPath string, sampleIntervalSec int) (*Store, error)
 		return nil, fmt.Errorf("preparing statements: %w", err)
 	}

+	// Schedule async migrations. These must NOT block boot. See
+	// async_migration.go for the convention.
+	// PREFLIGHT: async=true reason="composite index build on observations (1.9M+ rows in prod) — converted from sync after v3.8.3"
+	var idxDone int
+	if s.db.QueryRow("SELECT 1 FROM _migrations WHERE name = 'obs_observer_ts_idx_v1'").Scan(&idxDone) != nil {
+		if err := s.RunAsyncMigration(context.Background(), "obs_observer_ts_idx_v1",
+			func(ctx context.Context, d *sql.DB) error {
+				log.Println("[migration/async] Building (observer_idx, timestamp) composite index on observations...")
+				if _, err := d.ExecContext(ctx, `CREATE INDEX IF NOT EXISTS idx_observations_observer_idx_timestamp ON observations(observer_idx, timestamp)`); err != nil {
+					return err
+				}
+				if _, err := d.ExecContext(ctx, `INSERT OR IGNORE INTO _migrations (name) VALUES ('obs_observer_ts_idx_v1')`); err != nil {
+					return err
+				}
+				log.Println("[migration/async] observations(observer_idx, timestamp) index created")
+				return nil
+			}); err != nil {
+			log.Printf("[migration/async] scheduling obs_observer_ts_idx_v1 failed: %v", err)
+		}
+	}
+
 	return s, nil
 }

@@ -161,7 +194,12 @@ func applySchema(db *sql.DB) error {
 			uptime_secs INTEGER,
 			noise_floor REAL,
 			inactive INTEGER DEFAULT 0,
-			last_packet_at TEXT DEFAULT NULL
+			last_packet_at TEXT DEFAULT NULL,
+			clock_skew_seconds INTEGER DEFAULT NULL,
+			clock_skew_count_24h INTEGER DEFAULT 0,
+			clock_last_naive_at TEXT DEFAULT NULL,
+			can_relay INTEGER DEFAULT 1,
+			can_relay_seen INTEGER DEFAULT 0
 		);

 		CREATE INDEX IF NOT EXISTS idx_nodes_last_seen ON nodes(last_seen);
@@ -360,6 +398,39 @@ func applySchema(db *sql.DB) error {
 		log.Println("[migration] observations timestamp index created")
 	}

+	// #1481 P0-3: covering index for GetObserverPacketCounts. The query
+	// joins observations → observers and GROUP BYs observer_idx with a
+	// timestamp WHERE filter; a composite (observer_idx, timestamp)
+	// index lets SQLite resolve the grouping + range filter from the
+	// index alone instead of a 1.9M-row scan.
+	//
+	// CONVERTED TO ASYNC (preflight-async-migration-gate). Scheduling
+	// happens in OpenStore() once the real *Store exists so the
+	// backfill WaitGroup is shared with the rest of the ingestor.
+	// The legacy `_migrations` gate is preserved by the async fn so
+	// DBs that already completed the sync build stay no-op.
+
+	// #1483: normalize nodes.public_key to lowercase. The server's
+	// GetNodeLocationsByKeys lookup dropped LOWER(public_key) for perf
+	// (#1481 P0-3) and now relies on stored keys being lowercase. The
+	// decoder writes lowercase today, but legacy/admin/API inserts may
+	// have left mixed-case rows. Idempotent: counts and lowers any
+	// non-lowercase rows on every boot, runs once via _migrations gate
+	// for the bulk fix. Re-running stays cheap because subsequent
+	// passes match zero rows.
+	if r := db.QueryRow("SELECT COUNT(*) FROM nodes WHERE public_key != lower(public_key)"); r != nil {
+		var n int64
+		_ = r.Scan(&n)
+		if n > 0 {
+			log.Printf("[migration] Normalizing %d nodes.public_key row(s) to lowercase (#1483)...", n)
+			if _, err := db.Exec(`UPDATE nodes SET public_key = lower(public_key) WHERE public_key != lower(public_key)`); err != nil {
+				log.Printf("[migration] public_key lowercase normalize failed: %v", err)
+			} else {
+				log.Printf("[migration] public_key lowercase normalize complete (%d rows)", n)
+			}
+		}
+	}
+
 	// observer_metrics table for RF health dashboard
 	row = db.QueryRow("SELECT 1 FROM _migrations WHERE name = 'observer_metrics_v1'")
 	if row.Scan(&migDone) != nil {
@@ -497,6 +568,28 @@ func applySchema(db *sql.DB) error {
 		log.Println("[migration] observers.last_packet_at column added")
 	}

+	// Migration: per-observer naive-clock skew tracking (#1478).
+	// When the ingestor clamps a packet's envelope timestamp because the
+	// observer emitted a zone-less local-time string off from UTC by >15min
+	// (resolveRxTime in main.go), we record the event here so the UI can
+	// surface a ⚠️ chip + banner. Decays after 24h via server-side read sweep.
+	row = db.QueryRow("SELECT 1 FROM _migrations WHERE name = 'observers_clock_naive_v1'")
+	if row.Scan(&migDone) != nil {
+		log.Println("[migration] Adding clock-naive columns to observers (#1478)...")
+		// Each ALTER is independent — ignore "duplicate column" so reruns are safe.
+		for _, stmt := range []string{
+			`ALTER TABLE observers ADD COLUMN clock_skew_seconds INTEGER DEFAULT NULL`,
+			`ALTER TABLE observers ADD COLUMN clock_skew_count_24h INTEGER DEFAULT 0`,
+			`ALTER TABLE observers ADD COLUMN clock_last_naive_at TEXT DEFAULT NULL`,
+		} {
+			if _, err := db.Exec(stmt); err != nil && !strings.Contains(err.Error(), "duplicate column") {
+				return fmt.Errorf("clock_naive migration: %w", err)
+			}
+		}
+		db.Exec(`INSERT INTO _migrations (name) VALUES ('observers_clock_naive_v1')`)
+		log.Println("[migration] observers.clock_naive columns added")
+	}
+
 	// Migration: backfill observations.path_json from raw_hex (#888)
 	// NOTE: This runs ASYNC via BackfillPathJSONAsync() to avoid blocking MQTT startup.
 	// See staging outage where ~502K rows blocked ingest for 15+ hours.
@@ -601,13 +694,14 @@ func (s *Store) prepareStatements() error {
 	}

 	s.stmtInsertObservation, err = s.db.Prepare(`
-		INSERT INTO observations (transmission_id, observer_idx, direction, snr, rssi, score, path_json, timestamp, raw_hex)
-		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+		INSERT INTO observations (transmission_id, observer_idx, direction, snr, rssi, score, path_json, timestamp, raw_hex, resolved_path)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 		ON CONFLICT(transmission_id, observer_idx, COALESCE(path_json, '')) DO UPDATE SET
-			snr     = COALESCE(excluded.snr,     snr),
-			rssi    = COALESCE(excluded.rssi,    rssi),
-			score   = COALESCE(excluded.score,   score),
-			raw_hex = COALESCE(excluded.raw_hex, raw_hex)
+			snr           = COALESCE(excluded.snr,           snr),
+			rssi          = COALESCE(excluded.rssi,          rssi),
+			score         = COALESCE(excluded.score,         score),
+			raw_hex       = COALESCE(excluded.raw_hex,       raw_hex),
+			resolved_path = COALESCE(excluded.resolved_path, resolved_path)
 	`)
 	if err != nil {
 		return err
@@ -635,8 +729,8 @@ func (s *Store) prepareStatements() error {
 	}

 	s.stmtUpsertObserver, err = s.db.Prepare(`
-		INSERT INTO observers (id, name, iata, last_seen, first_seen, packet_count, model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor)
-		VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?)
+		INSERT INTO observers (id, name, iata, last_seen, first_seen, packet_count, model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor, can_relay, can_relay_seen)
+		VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?, ?, ?, ?, ?, COALESCE(?, 1), CASE WHEN ? IS NULL THEN 0 ELSE 1 END)
 		ON CONFLICT(id) DO UPDATE SET
 			name = COALESCE(?, name),
 			iata = COALESCE(?, iata),
@@ -648,7 +742,9 @@ func (s *Store) prepareStatements() error {
 			radio = COALESCE(?, radio),
 			battery_mv = COALESCE(?, battery_mv),
 			uptime_secs = COALESCE(?, uptime_secs),
-			noise_floor = COALESCE(?, noise_floor)
+			noise_floor = COALESCE(?, noise_floor),
+			can_relay = COALESCE(?, can_relay),
+			can_relay_seen = CASE WHEN ? IS NULL THEN can_relay_seen ELSE 1 END
 	`)
 	if err != nil {
 		return err
@@ -700,6 +796,21 @@ func (s *Store) InsertTransmission(data *PacketData) (bool, error) {
 		return false, nil
 	}

+	// Wait/hold instrumentation (#1340). The hot path uses prepared
+	// statements that auto-commit; gate the whole function under
+	// writerMu so concurrent mqtt_handler inserts queue behind any
+	// other writer (vacuum, prune, neighbor-builder) and the wait is
+	// Go-visible.
+	mqttWaitStart := time.Now()
+	writerMu.Lock()
+	mqttWait := time.Since(mqttWaitStart)
+	mqttHoldStart := time.Now()
+	defer func() {
+		mqttHold := time.Since(mqttHoldStart)
+		writerMu.Unlock()
+		recordWriterTiming("mqtt_handler", mqttWait, mqttHold, "InsertTransmission")
+	}()
+
 	rxTime := data.Timestamp
 	ingestNow := time.Now().UTC().Format(time.RFC3339)
 	if rxTime == "" {
@@ -748,9 +859,11 @@ func (s *Store) InsertTransmission(data *PacketData) (bool, error) {
 		err := s.stmtGetObserverRowid.QueryRow(data.ObserverID).Scan(&rowid)
 		if err == nil {
 			observerIdx = &rowid
-			// Update observer last_seen and last_packet_at on every packet to prevent
-			// low-traffic observers from appearing offline (#463)
-			_, _ = s.stmtUpdateObserverLastSeen.Exec(ingestNow, rxTime, ingestNow, rxTime, rowid)
+			// observer.last_seen and last_packet_at answer "when did the analyzer
+			// last hear from this observer" — both are ingest-time questions.
+			// Per-packet rxTime is stored separately on observations/transmissions
+			// using envelope time (see InsertTransmission above). See #1465.
+			_, _ = s.stmtUpdateObserverLastSeen.Exec(ingestNow, ingestNow, ingestNow, ingestNow, rowid)
 		}
 	}

@@ -760,10 +873,25 @@ func (s *Store) InsertTransmission(data *PacketData) (bool, error) {
 		epochTs = t.Unix()
 	}

+	// Resolve hop prefixes to full pubkeys for `observations.resolved_path`.
+	// Per #1547: this writer was lost in the #1289 refactor and lives in
+	// the ingestor now. Per #1560: use the context-aware resolver so
+	// 1-byte prefix collisions are disambiguated via NeighborGraph
+	// adjacency (anchored on from_pubkey for ADVERTs, previous hop
+	// otherwise). Empty resolved JSON → NULL via nilIfEmpty.
+	resolved := resolvePathWithContext(
+		parsePathArray(data.PathJSON),
+		strings.ToLower(data.FromPubkey),
+		s.neighborGraph.load(),
+		s.prefixIdx.load(),
+	)
+	resolvedJSON := marshalResolvedPath(resolved)
+
 	_, err = s.stmtInsertObservation.Exec(
 		txID, observerIdx, data.Direction,
 		data.SNR, data.RSSI, data.Score,
 		data.PathJSON, epochTs, nilIfEmpty(data.RawHex),
+		nilIfEmpty(resolvedJSON),
 	)
 	if err != nil {
 		s.Stats.WriteErrors.Add(1)
@@ -849,6 +977,13 @@ type ObserverMeta struct {
 	RecvErrors    *int     // cumulative CRC/decode failures since boot
 	PacketsSent   *int     // cumulative packets sent since boot
 	PacketsRecv   *int     // cumulative packets received since boot
+	// CanRelay reflects the firmware 1.16 /status `repeat` flag (#1290).
+	// nil means the firmware did not send the field — caller must
+	// preserve the existing observers.can_relay value (default 1).
+	// true → relay-capable (`repeat:on`); false → listener-only
+	// (`repeat:off`), which causes the server-side disambiguator to
+	// exclude this observer's pubkey from path-hop candidate sets.
+	CanRelay *bool
 }

 // UpsertObserver inserts or updates an observer using the current wall-clock
@@ -871,7 +1006,7 @@ func (s *Store) UpsertObserverAt(id, name, iata string, meta *ObserverMeta, last
 	normalizedIATA := strings.TrimSpace(strings.ToUpper(iata))

 	var model, firmware, clientVersion, radio interface{}
-	var batteryMv, uptimeSecs, noiseFloor interface{}
+	var batteryMv, uptimeSecs, noiseFloor, canRelay interface{}
 	if meta != nil {
 		if meta.Model != nil {
 			model = *meta.Model
@@ -894,11 +1029,22 @@ func (s *Store) UpsertObserverAt(id, name, iata string, meta *ObserverMeta, last
 		if meta.NoiseFloor != nil {
 			noiseFloor = *meta.NoiseFloor
 		}
+		// Issue #1290: nil → leave DB column unchanged (COALESCE in
+		// the prepared stmt); 0/1 written when firmware provided
+		// the `repeat` field. INSERT branch defaults to 1 via the
+		// COALESCE in the VALUES clause.
+		if meta.CanRelay != nil {
+			if *meta.CanRelay {
+				canRelay = 1
+			} else {
+				canRelay = 0
+			}
+		}
 	}

 	_, err := s.stmtUpsertObserver.Exec(
-		id, name, normalizedIATA, lastSeen, lastSeen, model, firmware, clientVersion, radio, batteryMv, uptimeSecs, noiseFloor,
-		name, normalizedIATA, ingestNow, lastSeen, model, firmware, clientVersion, radio, batteryMv, uptimeSecs, noiseFloor,
+		id, name, normalizedIATA, lastSeen, lastSeen, model, firmware, clientVersion, radio, batteryMv, uptimeSecs, noiseFloor, canRelay, canRelay,
+		name, normalizedIATA, ingestNow, lastSeen, model, firmware, clientVersion, radio, batteryMv, uptimeSecs, noiseFloor, canRelay, canRelay,
 	)
 	if err != nil {
 		s.Stats.WriteErrors.Add(1)
@@ -980,7 +1126,8 @@ func (s *Store) InsertMetrics(data *MetricsData) error {
 // PruneOldMetrics deletes observer_metrics rows older than retentionDays.
 func (s *Store) PruneOldMetrics(retentionDays int) (int64, error) {
 	cutoff := time.Now().UTC().AddDate(0, 0, -retentionDays).Format(time.RFC3339)
-	result, err := s.db.Exec(`DELETE FROM observer_metrics WHERE timestamp < ?`, cutoff)
+	// Tagged for /api/perf writer-lock visibility (#1340).
+	result, err := s.instrumentedExec("prune_metrics", `DELETE FROM observer_metrics WHERE timestamp < ?`, cutoff)
 	if err != nil {
 		return 0, fmt.Errorf("prune metrics: %w", err)
 	}
@@ -1021,11 +1168,11 @@ func (s *Store) CheckAutoVacuum(cfg *Config) {
 		log.Printf("[db] vacuumOnStartup=true — starting one-time full VACUUM (ensure 2x DB size free disk space)...")
 		start := time.Now()

-		if _, err := s.db.Exec("PRAGMA auto_vacuum = INCREMENTAL"); err != nil {
+		if _, err := s.instrumentedExec("vacuum", "PRAGMA auto_vacuum = INCREMENTAL"); err != nil {
 			log.Printf("[db] VACUUM failed: could not set auto_vacuum: %v", err)
 			return
 		}
-		if _, err := s.db.Exec("VACUUM"); err != nil {
+		if _, err := s.instrumentedExec("vacuum", "VACUUM"); err != nil {
 			log.Printf("[db] VACUUM failed: %v", err)
 			return
 		}
@@ -1038,19 +1185,26 @@ func (s *Store) CheckAutoVacuum(cfg *Config) {
 // RunIncrementalVacuum returns free pages to the OS (#919).
 // Safe to call on auto_vacuum=NONE databases (noop).
 func (s *Store) RunIncrementalVacuum(pages int) {
-	if _, err := s.db.Exec(fmt.Sprintf("PRAGMA incremental_vacuum(%d)", pages)); err != nil {
+	// Tagged for /api/perf writer-lock visibility (#1340).
+	if _, err := s.instrumentedExec("vacuum", fmt.Sprintf("PRAGMA incremental_vacuum(%d)", pages)); err != nil {
 		log.Printf("[vacuum] incremental_vacuum error: %v", err)
 	}
 }

-// Checkpoint forces a WAL checkpoint to release the WAL lock file,
-// preventing lock contention with a new process starting up.
-func (s *Store) Checkpoint() {
-	if _, err := s.db.Exec("PRAGMA wal_checkpoint(TRUNCATE)"); err != nil {
+// Checkpoint runs a WAL checkpoint (TRUNCATE mode).
+// Returns the number of WAL frames checkpointed (0 if WAL was already empty).
+// TRUNCATE resets the WAL file to zero bytes when all frames are checkpointed;
+// if active readers hold frames, it checkpoints what it can and leaves the rest.
+func (s *Store) Checkpoint() int {
+	var busy, walFrames, checkpointed int
+	if err := s.db.QueryRow("PRAGMA wal_checkpoint(TRUNCATE)").Scan(&busy, &walFrames, &checkpointed); err != nil {
 		log.Printf("[db] WAL checkpoint error: %v", err)
-	} else {
-		log.Println("[db] WAL checkpoint complete")
+		return 0
 	}
+	if walFrames > 0 {
+		log.Printf("[db] WAL checkpoint: %d/%d frames checkpointed (blocked=%v)", checkpointed, walFrames, busy != 0)
+	}
+	return checkpointed
 }

 // BackfillPathJSONAsync launches the path_json backfill in a background goroutine.
@@ -1247,14 +1401,15 @@ func (s *Store) RemoveStaleObservers(observerDays int) (int64, error) {
 		return 0, nil // keep forever
 	}
 	cutoff := time.Now().UTC().AddDate(0, 0, -observerDays).Format(time.RFC3339)
-	result, err := s.db.Exec(`UPDATE observers SET inactive = 1 WHERE last_seen < ? AND (inactive IS NULL OR inactive = 0)`, cutoff)
+	// Tagged for /api/perf writer-lock visibility (#1340).
+	result, err := s.instrumentedExec("prune_observers", `UPDATE observers SET inactive = 1 WHERE last_seen < ? AND (inactive IS NULL OR inactive = 0)`, cutoff)
 	if err != nil {
 		return 0, fmt.Errorf("mark stale observers inactive: %w", err)
 	}
 	removed, _ := result.RowsAffected()
 	if removed > 0 {
 		// Clean up orphaned metrics for now-inactive observers
-		s.db.Exec(`DELETE FROM observer_metrics WHERE observer_id IN (SELECT id FROM observers WHERE inactive = 1)`)
+		_, _ = s.instrumentedExec("prune_observers", `DELETE FROM observer_metrics WHERE observer_id IN (SELECT id FROM observers WHERE inactive = 1)`)
 		log.Printf("Marked %d observer(s) as inactive (not seen in %d days)", removed, observerDays)
 	}
 	return removed, nil
@@ -1349,7 +1504,15 @@ func scopeNameForDB(data *PacketData) *string {
 // node. Skips the UPDATE when the stored value already matches to avoid
 // redundant writes on the hot MQTT ingest path. Updates both nodes and
 // inactive_nodes to stay consistent.
+//
+// Defense-in-depth (#1534): an empty scope is treated as a no-op. The call
+// site at handleMessage is the primary guard (shouldUpdateDefaultScope),
+// but this layer refuses the invalid write so a future caller cannot
+// reintroduce the bug by passing "" directly.
 func (s *Store) UpdateNodeDefaultScope(pubkey, scope string) error {
+	if scope == "" {
+		return nil
+	}
 	// Short-circuit: skip if already stored.
 	var cur sql.NullString
 	row := s.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = ?`, pubkey)
@@ -1364,6 +1527,39 @@ func (s *Store) UpdateNodeDefaultScope(pubkey, scope string) error {
 	return err
 }

+// RecordNaiveSkew is called when resolveRxTime() clamps a packet's envelope
+// timestamp because the observer is emitting a zone-less local-time string
+// off from UTC by more than 15 min (issue #1478). Stamps the observer's
+// clock_skew_seconds / clock_skew_count_24h / clock_last_naive_at so the
+// server can surface a ⚠️ chip + banner in the UI.
+//
+// The count is reset to 1 (not incremented) if no event has been recorded in
+// the past 24h, otherwise incremented. deltaSec is signed: negative = observer
+// clock is behind UTC, positive = ahead.
+func (s *Store) RecordNaiveSkew(observerID string, deltaSec int64, now time.Time) error {
+	if observerID == "" {
+		return nil
+	}
+	nowStr := now.UTC().Format(time.RFC3339)
+	cutoff := now.Add(-24 * time.Hour).UTC().Format(time.RFC3339)
+	// One INSERT-or-UPDATE round trip. ON CONFLICT path resets the rolling
+	// counter when the previous event is older than the 24h window, otherwise
+	// increments it.
+	_, err := s.db.Exec(`
+		INSERT INTO observers (id, clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at)
+		VALUES (?, ?, 1, ?)
+		ON CONFLICT(id) DO UPDATE SET
+			clock_skew_seconds = excluded.clock_skew_seconds,
+			clock_last_naive_at = excluded.clock_last_naive_at,
+			clock_skew_count_24h = CASE
+				WHEN clock_last_naive_at IS NULL OR clock_last_naive_at < ?
+					THEN 1
+				ELSE COALESCE(clock_skew_count_24h, 0) + 1
+			END
+	`, observerID, deltaSec, nowStr, cutoff)
+	return err
+}
+
 // MQTTPacketMessage is the JSON payload from an MQTT raw packet message.
 type MQTTPacketMessage struct {
 	Raw       string   `json:"raw"`
@@ -1453,3 +1649,292 @@ func BuildPacketData(msg *MQTTPacketMessage, decoded *DecodedPacket, observerID,

 	return pd
 }
+
+
+// ─── Writer-lock instrumentation (issue #1340) ────────────────────────────
+//
+// Make SQLite writer-lock starvation visible to operators. Per-component
+// wait_ms / hold_ms / contention_total histograms, surfaced via
+// /api/perf/write-sources under the "writer_perf" key. Component tags:
+// neighbor_builder, mqtt_handler, prune_packets, prune_observers,
+// prune_metrics, mbcap_persist (deferred — see PR body), vacuum.
+//
+// The single writer connection (SetMaxOpenConns(1)) means writes serialise
+// inside the driver and the wait is invisible to Go. writerMu measures the
+// wait Go can see (everyone queueing behind the current holder) by gating
+// every wrapped call site through the same package-level mutex.
+
+// WriterStatsSnapshot is a per-component wait/hold latency snapshot
+// surfaced via /api/perf to make SQLite writer-lock starvation visible
+// to operators (issue #1340). Times are in milliseconds.
+type WriterStatsSnapshot struct {
+	Count           int64   `json:"count"`
+	ContentionTotal int64   `json:"contention_total"`
+	WaitMsP50       float64 `json:"wait_ms_p50"`
+	WaitMsP95       float64 `json:"wait_ms_p95"`
+	WaitMsP99       float64 `json:"wait_ms_p99"`
+	WaitMsMax       float64 `json:"wait_ms_max"`
+	HoldMsP50       float64 `json:"hold_ms_p50"`
+	HoldMsP95       float64 `json:"hold_ms_p95"`
+	HoldMsP99       float64 `json:"hold_ms_p99"`
+	HoldMsMax       float64 `json:"hold_ms_max"`
+}
+
+const (
+	// writerSampleWindow bounds the per-component rolling window so a
+	// long-running ingestor doesn't grow this unbounded.
+	writerSampleWindow = 1024
+	// contentionThresholdMs: wait_ms above this counts as a "contended"
+	// write (per #1340 spec).
+	contentionThresholdMs = 100.0
+	defaultSlowWriterMs   = 500.0
+)
+
+// slowWriterThresholdMsAtomic — hold_ms threshold above which writes
+// emit a [db-slow-writer] log line. Read on the hot path; written once
+// at startup by SetSlowWriterThresholdMs.
+var slowWriterThresholdMsAtomic atomic.Uint64
+
+// SetSlowWriterThresholdMs sets the [db-slow-writer] log threshold.
+// ms<=0 restores the 500ms default. Operators can also set
+// CORESCOPE_DB_SLOW_WRITER_MS at process start — see initSlowWriterFromEnv.
+func SetSlowWriterThresholdMs(ms float64) {
+	if ms <= 0 {
+		ms = defaultSlowWriterMs
+	}
+	slowWriterThresholdMsAtomic.Store(uint64(ms))
+}
+
+func getSlowWriterThresholdMs() float64 {
+	v := slowWriterThresholdMsAtomic.Load()
+	if v == 0 {
+		return defaultSlowWriterMs
+	}
+	return float64(v)
+}
+
+// initSlowWriterFromEnv is called once from package init so operators can
+// override the threshold via CORESCOPE_DB_SLOW_WRITER_MS without a
+// Go-side Config change.
+func initSlowWriterFromEnv() {
+	v := os.Getenv("CORESCOPE_DB_SLOW_WRITER_MS")
+	if v == "" {
+		return
+	}
+	var ms float64
+	if _, err := fmt.Sscanf(v, "%f", &ms); err == nil && ms > 0 {
+		SetSlowWriterThresholdMs(ms)
+	}
+}
+
+func init() { initSlowWriterFromEnv() }
+
+type writerComponentStats struct {
+	mu              sync.Mutex
+	count           int64
+	contentionTotal int64
+	waitMs          []float64
+	holdMs          []float64
+	waitMax         float64
+	holdMax         float64
+}
+
+func (c *writerComponentStats) record(waitMs, holdMs float64) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.count++
+	if waitMs > contentionThresholdMs {
+		c.contentionTotal++
+	}
+	if waitMs > c.waitMax {
+		c.waitMax = waitMs
+	}
+	if holdMs > c.holdMax {
+		c.holdMax = holdMs
+	}
+	c.waitMs = appendBoundedFloat(c.waitMs, waitMs, writerSampleWindow)
+	c.holdMs = appendBoundedFloat(c.holdMs, holdMs, writerSampleWindow)
+}
+
+func appendBoundedFloat(s []float64, v float64, max int) []float64 {
+	if len(s) < max {
+		return append(s, v)
+	}
+	copy(s, s[1:])
+	s[len(s)-1] = v
+	return s
+}
+
+func (c *writerComponentStats) snapshot() WriterStatsSnapshot {
+	c.mu.Lock()
+	wait := append([]float64(nil), c.waitMs...)
+	hold := append([]float64(nil), c.holdMs...)
+	snap := WriterStatsSnapshot{
+		Count:           c.count,
+		ContentionTotal: c.contentionTotal,
+		WaitMsMax:       c.waitMax,
+		HoldMsMax:       c.holdMax,
+	}
+	c.mu.Unlock()
+	sort.Float64s(wait)
+	sort.Float64s(hold)
+	snap.WaitMsP50 = nearestRankPercentile(wait, 0.50)
+	snap.WaitMsP95 = nearestRankPercentile(wait, 0.95)
+	snap.WaitMsP99 = nearestRankPercentile(wait, 0.99)
+	snap.HoldMsP50 = nearestRankPercentile(hold, 0.50)
+	snap.HoldMsP95 = nearestRankPercentile(hold, 0.95)
+	snap.HoldMsP99 = nearestRankPercentile(hold, 0.99)
+	return snap
+}
+
+func nearestRankPercentile(sorted []float64, p float64) float64 {
+	n := len(sorted)
+	if n == 0 {
+		return 0
+	}
+	if n == 1 {
+		return sorted[0]
+	}
+	idx := int(p*float64(n-1) + 0.5)
+	if idx < 0 {
+		idx = 0
+	}
+	if idx >= n {
+		idx = n - 1
+	}
+	return sorted[idx]
+}
+
+type writerStatsAggregator struct {
+	mu         sync.Mutex
+	components map[string]*writerComponentStats
+}
+
+var writerStatsAgg = &writerStatsAggregator{
+	components: make(map[string]*writerComponentStats),
+}
+
+func (a *writerStatsAggregator) get(component string) *writerComponentStats {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	c, ok := a.components[component]
+	if !ok {
+		c = &writerComponentStats{}
+		a.components[component] = c
+	}
+	return c
+}
+
+// reset clears all per-component samples. Test-only: lets a single
+// scenario assert against a clean aggregator without prior-test noise
+// in the same package run (TestWriterStarvationVisibleInPerf would
+// otherwise mix this run's 5 starved samples with thousands of fast
+// InsertTransmission samples from earlier tests and the p99 would
+// collapse below the 50s threshold).
+func (a *writerStatsAggregator) reset() {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	a.components = make(map[string]*writerComponentStats)
+}
+
+// ResetWriterStatsForTest wipes the per-component writer stats
+// aggregator. Test-only; not safe to call from production code paths.
+func ResetWriterStatsForTest() { writerStatsAgg.reset() }
+
+func (a *writerStatsAggregator) snapshot() map[string]WriterStatsSnapshot {
+	a.mu.Lock()
+	keys := make([]string, 0, len(a.components))
+	stats := make([]*writerComponentStats, 0, len(a.components))
+	for k, v := range a.components {
+		keys = append(keys, k)
+		stats = append(stats, v)
+	}
+	a.mu.Unlock()
+	out := make(map[string]WriterStatsSnapshot, len(keys))
+	for i, k := range keys {
+		out[k] = stats[i].snapshot()
+	}
+	return out
+}
+
+// WriterStatsSnapshot returns a per-component wait/hold/contention
+// snapshot for exposure on /api/perf/write-sources (issue #1340).
+func (s *Store) WriterStatsSnapshot() map[string]WriterStatsSnapshot {
+	return writerStatsAgg.snapshot()
+}
+
+// recordWriterTiming aggregates a single sample under component and
+// emits [db-slow-writer] if hold_ms > configured threshold (default
+// 500ms). queryForLog is truncated to 200 chars.
+func recordWriterTiming(component string, wait, hold time.Duration, queryForLog string) {
+	waitMs := float64(wait.Nanoseconds()) / 1e6
+	holdMs := float64(hold.Nanoseconds()) / 1e6
+	writerStatsAgg.get(component).record(waitMs, holdMs)
+	if holdMs > getSlowWriterThresholdMs() {
+		q := queryForLog
+		if len(q) > 200 {
+			q = q[:200]
+		}
+		log.Printf("[db-slow-writer] component=%s duration=%.1fms query=%s", component, holdMs, q)
+	}
+}
+
+// writerMu serialises every wrapped writer call so the wait the next
+// caller sees is the wait the perf snapshot can attribute. The
+// SQLite driver also enforces serial writes (SetMaxOpenConns(1)),
+// but the wait inside the driver is invisible to Go — writerMu makes
+// it Go-visible.
+var writerMu sync.Mutex
+
+// WriterExec wraps s.db.Exec with per-component wait/hold/contention
+// instrumentation (issue #1340).
+func (s *Store) WriterExec(component, query string, args ...interface{}) (sql.Result, error) {
+	waitStart := time.Now()
+	writerMu.Lock()
+	wait := time.Since(waitStart)
+	holdStart := time.Now()
+	res, err := s.db.Exec(query, args...)
+	hold := time.Since(holdStart)
+	writerMu.Unlock()
+	recordWriterTiming(component, wait, hold, query)
+	return res, err
+}
+
+// WriterTx wraps Begin → fn → Commit under component tagging.
+// hold_ms covers the whole tx so a slow body counts against its owner.
+func (s *Store) WriterTx(component string, fn func(*sql.Tx) error) error {
+	waitStart := time.Now()
+	writerMu.Lock()
+	wait := time.Since(waitStart)
+	holdStart := time.Now()
+	tx, err := s.db.Begin()
+	if err != nil {
+		hold := time.Since(holdStart)
+		writerMu.Unlock()
+		recordWriterTiming(component, wait, hold, "BEGIN")
+		return err
+	}
+	if err := fn(tx); err != nil {
+		_ = tx.Rollback()
+		hold := time.Since(holdStart)
+		writerMu.Unlock()
+		recordWriterTiming(component, wait, hold, "tx-body")
+		return err
+	}
+	err = tx.Commit()
+	hold := time.Since(holdStart)
+	writerMu.Unlock()
+	recordWriterTiming(component, wait, hold, "COMMIT")
+	return err
+}
+
+// Wrap helpers below tag existing call sites with the canonical
+// component names so the call sites read naturally. These keep the
+// instrumentation out of the hot-path business logic.
+
+// instrumentedExec is the package-internal pass-through used by call
+// sites already inside db.go (PruneOldMetrics, RemoveStaleObservers,
+// vacuum). Equivalent to WriterExec, kept short for readability.
+func (s *Store) instrumentedExec(component, query string, args ...interface{}) (sql.Result, error) {
+	return s.WriterExec(component, query, args...)
+}
@@ -554,18 +554,26 @@ func TestInsertTransmissionUpdatesObserverLastSeen(t *testing.T) {
 		PathJSON:    "[]",
 		DecodedJSON: `{"type":"TXT_MSG"}`,
 	}
+	before := time.Now().Unix()
 	if _, err := s.InsertTransmission(data); err != nil {
 		t.Fatal(err)
 	}
+	after := time.Now().Unix()

-	// Verify last_seen was updated
+	// Verify last_seen was updated to INGEST time, not envelope time (#1465).
 	var lastSeenAfter string
 	s.db.QueryRow("SELECT last_seen FROM observers WHERE id = ?", "obs1").Scan(&lastSeenAfter)
 	if lastSeenAfter == oldTime {
 		t.Error("observer last_seen was NOT updated after packet insertion — low-traffic observers will appear offline")
 	}
-	if lastSeenAfter != "2026-03-25T01:00:00Z" {
-		t.Errorf("expected last_seen=2026-03-25T01:00:00Z, got %s", lastSeenAfter)
+	ls, err := time.Parse(time.RFC3339, lastSeenAfter)
+	if err != nil {
+		t.Fatalf("last_seen %q not RFC3339: %v", lastSeenAfter, err)
+	}
+	if ls.Unix() < before-5 || ls.Unix() > after+5 {
+		t.Errorf("expected last_seen ≈ server now (in [%d, %d]), got %s (epoch %d). "+
+			"observer.last_seen must use ingest time, not envelope time (#1465).",
+			before, after, lastSeenAfter, ls.Unix())
 	}
 }

@@ -598,18 +606,26 @@ func TestLastPacketAtUpdatedOnPacketOnly(t *testing.T) {
 		PathJSON:    "[]",
 		DecodedJSON: `{"type":"TXT_MSG"}`,
 	}
+	before := time.Now().Unix()
 	if _, err := s.InsertTransmission(data); err != nil {
 		t.Fatal(err)
 	}
+	after := time.Now().Unix()

 	s.db.QueryRow("SELECT last_packet_at FROM observers WHERE id = ?", "obs1").Scan(&lastPacketAt)
 	if !lastPacketAt.Valid {
 		t.Fatal("expected last_packet_at to be non-NULL after InsertTransmission")
 	}
-	// InsertTransmission uses `now = data.Timestamp || time.Now()`, so last_packet_at
-	// should match the packet's Timestamp when provided (same source-of-truth as last_seen).
-	if lastPacketAt.String != "2026-04-24T12:00:00Z" {
-		t.Errorf("expected last_packet_at=2026-04-24T12:00:00Z, got %s", lastPacketAt.String)
+	// last_packet_at, like last_seen, is "when did the analyzer last receive a
+	// packet from this observer" — an ingest-time question, independent of the
+	// envelope timestamp. See #1465.
+	lp, err := time.Parse(time.RFC3339, lastPacketAt.String)
+	if err != nil {
+		t.Fatalf("last_packet_at %q not RFC3339: %v", lastPacketAt.String, err)
+	}
+	if lp.Unix() < before-5 || lp.Unix() > after+5 {
+		t.Errorf("expected last_packet_at ≈ server now (in [%d, %d]), got %s (epoch %d)",
+			before, after, lastPacketAt.String, lp.Unix())
 	}

 	// UpsertObserver again (status path) — last_packet_at should NOT change
@@ -2901,3 +2917,46 @@ func TestSchemaMultibyteSupColumns(t *testing.T) {
 	}
 	store2.Close()
 }
+
+// TestUpdateNodeDefaultScope_EmptyScopeIsNoop is the DB-layer defense-in-depth
+// regression test for #1534. Even if the call-site guard at main.go:720 is
+// later removed or refactored, the DB function MUST refuse to overwrite a
+// previously-correct default_scope with the empty string. This is the
+// belt-and-braces guard recommended by adversarial review (MAJOR-2) and
+// dijkstra review (MINOR-2).
+func TestUpdateNodeDefaultScope_EmptyScopeIsNoop(t *testing.T) {
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "test.db")
+	store, err := OpenStore(dbPath)
+	if err != nil {
+		t.Fatalf("OpenStore: %v", err)
+	}
+	defer store.Close()
+
+	if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, default_scope) VALUES ('pk1', 'Node1', '#belgium')`); err != nil {
+		t.Fatalf("insert node: %v", err)
+	}
+	if _, err := store.db.Exec(`INSERT INTO inactive_nodes (public_key, name, default_scope) VALUES ('pk1', 'Node1', '#belgium')`); err != nil {
+		t.Fatalf("insert inactive node: %v", err)
+	}
+
+	// Empty-scope call must be a silent no-op (return nil), NOT overwrite.
+	if err := store.UpdateNodeDefaultScope("pk1", ""); err != nil {
+		t.Fatalf("UpdateNodeDefaultScope(\"\") returned error: %v (want nil)", err)
+	}
+
+	var got string
+	if err := store.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = 'pk1'`).Scan(&got); err != nil {
+		t.Fatalf("read nodes.default_scope: %v", err)
+	}
+	if got != "#belgium" {
+		t.Errorf("nodes.default_scope after empty-scope call = %q, want #belgium (DB-layer guard missing — #1534)", got)
+	}
+	var gotInactive string
+	if err := store.db.QueryRow(`SELECT default_scope FROM inactive_nodes WHERE public_key = 'pk1'`).Scan(&gotInactive); err != nil {
+		t.Fatalf("read inactive_nodes.default_scope: %v", err)
+	}
+	if gotInactive != "#belgium" {
+		t.Errorf("inactive_nodes.default_scope after empty-scope call = %q, want #belgium (DB-layer guard missing — #1534)", gotInactive)
+	}
+}
@@ -0,0 +1,115 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+)
+
+// TestWriterStarvationVisibleInPerf reproduces the #1339 class of bug:
+// one component (neighbor_builder) holds the writer connection for an
+// extended period; a second component (mqtt_handler) firing concurrent
+// writes must show observable wait_ms in the perf snapshot.
+//
+// This is the gate test for issue #1340: SQLite write-lock instrumentation
+// per component. If the wait_ms percentile collapses to zero, the
+// observability gap remains and the regression class is invisible again.
+//
+// Runs ~60s — guarded by testing.Short() so fast unit-test passes can
+// skip it locally, but CI runs `go test ./...` without -short.
+func TestWriterStarvationVisibleInPerf(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping 60s starvation test in short mode")
+	}
+
+	// Isolate from samples accumulated by earlier tests in the same
+	// package run — without this the mqtt_handler component already
+	// has ~thousand fast InsertTransmission samples and the 5 slow
+	// follower samples can't move p99 above 50s.
+	ResetWriterStatsForTest()
+
+	s, err := OpenStore(tempDBPath(t))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer s.Close()
+
+	const blockDur = 60 * time.Second
+
+	// Blocker: acquire the writer via the wrapped Tx path, tag as
+	// neighbor_builder, sleep 60s while holding the single conn,
+	// then commit. This monopolises the writer for the duration.
+	blockStarted := make(chan struct{})
+	blockerDone := make(chan struct{})
+	go func() {
+		defer close(blockerDone)
+		err := s.WriterTx("neighbor_builder", func(tx *sql.Tx) error {
+			if _, err := tx.Exec(`UPDATE nodes SET name = name WHERE 0`); err != nil {
+				return err
+			}
+			close(blockStarted)
+			time.Sleep(blockDur)
+			return nil
+		})
+		if err != nil {
+			t.Errorf("blocker tx: %v", err)
+		}
+	}()
+
+	// Wait for the blocker to be inside its transaction.
+	<-blockStarted
+	// Small safety margin so the blocker is firmly holding the conn.
+	time.Sleep(100 * time.Millisecond)
+
+	// Now fire several mqtt_handler writes. Each will block on the
+	// single writer connection until the blocker commits.
+	const followers = 5
+	var wg sync.WaitGroup
+	wg.Add(followers)
+	for i := 0; i < followers; i++ {
+		i := i
+		go func() {
+			defer wg.Done()
+			_, err := s.WriterExec(
+				"mqtt_handler",
+				`INSERT OR IGNORE INTO _migrations (name) VALUES (?)`,
+				fmt.Sprintf("writer_starvation_test_%d", i),
+			)
+			if err != nil {
+				t.Errorf("mqtt follower %d: %v", i, err)
+			}
+		}()
+	}
+
+	wg.Wait()
+	<-blockerDone
+
+	snap := s.WriterStatsSnapshot()
+	mqtt, ok := snap["mqtt_handler"]
+	if !ok {
+		t.Fatalf("no perf snapshot for mqtt_handler component (got components: %v)", componentKeys(snap))
+	}
+	if mqtt.Count < followers {
+		t.Fatalf("expected at least %d mqtt_handler samples, got %d", followers, mqtt.Count)
+	}
+	// This is the gate assertion. With instrumentation present the
+	// follower writes should each register ~60s of wait_ms; p99 must
+	// be well above 50_000ms. With instrumentation missing or broken
+	// the percentile collapses to zero and this fails — which is the
+	// exact regression class #1340 is meant to prevent.
+	if mqtt.WaitMsP99 <= 50_000 {
+		t.Fatalf("mqtt_handler wait_ms p99 = %.1fms, want > 50000ms; "+
+			"writer starvation is invisible to /api/perf — issue #1340 not fixed",
+			mqtt.WaitMsP99)
+	}
+}
+
+func componentKeys(m map[string]WriterStatsSnapshot) []string {
+	out := make([]string, 0, len(m))
+	for k := range m {
+		out = append(out, k)
+	}
+	return out
+}
@@ -109,6 +109,15 @@ type Payload struct {
 	MAC           string       `json:"mac,omitempty"`
 	EncryptedData string       `json:"encryptedData,omitempty"`
 	ExtraHash     string       `json:"extraHash,omitempty"`
+	// Extended ACK fields per firmware 1.16.0 (issue #1610) —
+	// firmware/src/helpers/BaseChatMesh.cpp:218-234. ACK payloads grew from
+	// always-4 bytes to 4/5/6 (4-byte truncated sha256 CRC, optional 1-byte
+	// attempt counter, optional 1-byte RNG byte added in commit a130a95a).
+	// AckLen is the wire payload length; AckAttempt/AckRand are surfaced
+	// only when the sender included them (legacy 4-byte ACKs leave them nil).
+	AckLen        *int   `json:"ackLen,omitempty"`
+	AckAttempt    *int   `json:"ackAttempt,omitempty"`
+	AckRand       *int   `json:"ackRand,omitempty"`
 	PubKey        string       `json:"pubKey,omitempty"`
 	Timestamp     uint32       `json:"timestamp,omitempty"`
 	TimestampISO  string       `json:"timestampISO,omitempty"`
@@ -148,6 +157,12 @@ type Payload struct {
 	InnerType     *int    `json:"innerType,omitempty"`
 	InnerTypeName string  `json:"innerTypeName,omitempty"`
 	InnerAckCrc   string  `json:"innerAckCrc,omitempty"`
+	// Extended ACK inner fields (issue #1610) — when the multipart inner
+	// blob is a v1.16+ extended ACK (5 or 6 bytes after the byte0 header),
+	// surface the same attempt/rand bytes as the top-level decoder.
+	InnerAckLen     *int  `json:"innerAckLen,omitempty"`
+	InnerAckAttempt *int  `json:"innerAckAttempt,omitempty"`
+	InnerAckRand    *int  `json:"innerAckRand,omitempty"`
 	InnerPayload  string  `json:"innerPayload,omitempty"`
 	// CONTROL (PAYLOAD_TYPE_CONTROL=0x0B) byte0 flags, per
 	// firmware/src/Mesh.cpp:69 — byte0 high-bit marks zero-hop direct subset.
@@ -266,10 +281,27 @@ func decodeAck(buf []byte) Payload {
 		return Payload{Type: "ACK", Error: "too short", RawHex: hex.EncodeToString(buf)}
 	}
 	checksum := binary.LittleEndian.Uint32(buf[0:4])
-	return Payload{
+	ackLen := len(buf)
+	if ackLen > 6 {
+		ackLen = 6
+	}
+	p := Payload{
 		Type:      "ACK",
 		ExtraHash: fmt.Sprintf("%08x", checksum),
+		AckLen:    &ackLen,
 	}
+	// Firmware 1.16.0 extended ACK (issue #1610): 5th byte is the attempt
+	// counter (commit f6e6fdaa), 6th byte is a random byte added so identical
+	// attempts still hash uniquely (commit a130a95a).
+	if len(buf) >= 5 {
+		attempt := int(buf[4])
+		p.AckAttempt = &attempt
+	}
+	if len(buf) >= 6 {
+		rnd := int(buf[5])
+		p.AckRand = &rnd
+	}
+	return p
 }

 func decodeAdvert(buf []byte, validateSignatures bool) Payload {
@@ -664,6 +696,21 @@ func decodeMultipart(buf []byte) Payload {
 		// to match decodeAck's extraHash convention.
 		crc := binary.LittleEndian.Uint32(buf[1:5])
 		p.InnerAckCrc = fmt.Sprintf("%08x", crc)
+		// Firmware 1.16.0 extended ACK (issue #1610): inner ACK blob may be
+		// 5 or 6 bytes (payload_len = 1 + ack_len) instead of always 4.
+		ackLen := len(buf) - 1
+		if ackLen > 6 {
+			ackLen = 6
+		}
+		p.InnerAckLen = &ackLen
+		if len(buf) >= 6 {
+			attempt := int(buf[5])
+			p.InnerAckAttempt = &attempt
+		}
+		if len(buf) >= 7 {
+			rnd := int(buf[6])
+			p.InnerAckRand = &rnd
+		}
 	} else if len(buf) > 1 {
 		p.InnerPayload = hex.EncodeToString(buf[1:])
 	}
@@ -0,0 +1,202 @@
+package main
+
+import (
+	"log"
+	"sync"
+	"sync/atomic"
+	"time"
+)
+
+// IngestBuffer decouples MQTT message receipt from DB writes (#1608).
+//
+// On boot the ingestor must subscribe to MQTT immediately, but the single
+// SQLite writer (#1283) can be held for minutes by a startup migration
+// (e.g. a large CREATE INDEX) or prune. Without buffering, every QoS-0 packet
+// received in that window is lost. IngestBuffer holds received work in a
+// bounded FIFO and a single consumer goroutine drains it once Ready() is
+// called — i.e. once the write path is free.
+//
+// A single consumer preserves the single-writer invariant: jobs run one at a
+// time, exactly as paho's in-order handler did before. Submit never blocks the
+// MQTT delivery goroutine; if the buffer is full it drops and counts (bounded
+// memory). Buffering replays the original messages, so it introduces NO
+// duplicates (contrast: a QoS-1 broker-queue would).
+type IngestBuffer struct {
+	jobs      chan func()
+	ready     chan struct{}
+	stop      chan struct{}
+	done      chan struct{}
+	dropped   atomic.Int64
+	startOnce sync.Once
+	readyOnce sync.Once
+	stopOnce  sync.Once
+
+	// dropLogMu guards the time-based drop-log throttle (PR #1623
+	// round-1 fix to #1609 M1). Per-drop logging under sustained
+	// stalls could flood the log at MQTT inbound rate; instead we
+	// always log the FIRST drop of a stall and then summarize at
+	// most once per second until the stall ends.
+	dropLogMu      sync.Mutex
+	stallActive    bool      // true between first drop and first successful Submit
+	stallStart     time.Time // when the current stall began
+	stallStartDrop int64     // dropped() value when stall began
+	lastSummaryAt  time.Time // last time we wrote a summary line
+}
+
+// dropLogSummaryInterval is the minimum interval between summary lines
+// during a sustained stall. Exposed as a var so tests can shrink it.
+var dropLogSummaryInterval = time.Second
+
+// NewIngestBuffer returns a buffer holding up to capacity pending jobs.
+// Non-positive capacity is clamped to 1 and a WARN is logged so the
+// misconfiguration is visible (PR #1609 m2 — silent clamp hid bad
+// ingestBufferSize values).
+func NewIngestBuffer(capacity int) *IngestBuffer {
+	if capacity < 1 {
+		log.Printf("[ingest-buffer] WARN: requested capacity %d < 1, clamping to 1 — check ingestBufferSize config; default is 50000", capacity)
+		capacity = 1
+	}
+	return &IngestBuffer{
+		jobs:  make(chan func(), capacity),
+		ready: make(chan struct{}),
+		stop:  make(chan struct{}),
+		done:  make(chan struct{}),
+	}
+}
+
+// Submit enqueues a job without blocking. If the buffer is full the job is
+// dropped and the dropped counter is incremented. Safe for concurrent callers.
+//
+// Ordering invariant: callers MUST call Start() before the first Submit().
+// Submit only enqueues — without a running consumer, jobs sit in the channel
+// and (once cap is reached) are silently dropped until Start()+Ready() run.
+//
+// Drop logging (PR #1623 round-1 fix to #1609 M1) uses a time-based
+// throttle to stay loud-on-stall-start without flooding under sustained
+// stalls:
+//   - the FIRST drop of a stall logs immediately
+//   - subsequent drops are summarized at most once per second
+//   - when the next Submit succeeds, a "drained" recovery line is
+//     emitted so operators can quantify the burst
+//
+// All log lines include the buffer capacity for operator triage.
+func (b *IngestBuffer) Submit(job func()) {
+	select {
+	case b.jobs <- job:
+		b.maybeLogRecovery()
+	default:
+		n := b.dropped.Add(1)
+		b.logDrop(n)
+	}
+}
+
+// logDrop emits a drop log line under the time-based throttle. The first
+// drop of a stall always logs; subsequent drops summarize at most once
+// per dropLogSummaryInterval.
+func (b *IngestBuffer) logDrop(n int64) {
+	b.dropLogMu.Lock()
+	defer b.dropLogMu.Unlock()
+	now := time.Now()
+	if !b.stallActive {
+		b.stallActive = true
+		b.stallStart = now
+		b.stallStartDrop = n - 1 // last successful Submit -> this is the 1st drop of the stall
+		b.lastSummaryAt = now
+		log.Printf("[ingest-buffer] WARNING: buffer full (cap %d), dropped %d message(s) total — write path stalled, raise ingestBufferSize or investigate slow writer", cap(b.jobs), n)
+		return
+	}
+	if now.Sub(b.lastSummaryAt) >= dropLogSummaryInterval {
+		b.lastSummaryAt = now
+		stallDrops := n - b.stallStartDrop
+		log.Printf("[ingest-buffer] WARNING: buffer full (cap %d), %d drop(s) in current stall, %d total — write path still stalled", cap(b.jobs), stallDrops, n)
+	}
+}
+
+// maybeLogRecovery is called from the success branch of Submit. If a
+// stall was active, it logs a recovery line summarizing the burst and
+// clears the stall state.
+func (b *IngestBuffer) maybeLogRecovery() {
+	b.dropLogMu.Lock()
+	defer b.dropLogMu.Unlock()
+	if !b.stallActive {
+		return
+	}
+	stallDrops := b.dropped.Load() - b.stallStartDrop
+	dur := time.Since(b.stallStart)
+	log.Printf("[ingest-buffer] INFO: buffer drained, %d drop(s) over %s (cap %d) — write path recovered", stallDrops, dur.Round(time.Millisecond), cap(b.jobs))
+	b.stallActive = false
+}
+
+// Start launches the consumer goroutine. It blocks until Ready() is called
+// (or Stop() fires, whichever comes first), then drains buffered jobs and
+// runs newly-submitted ones serially, in FIFO order. Idempotent.
+//
+// Lifecycle: Stop() closes b.stop, which causes the consumer to exit via
+// the stop-select arm (after draining any queued jobs if Ready() had
+// already fired). The b.jobs channel is never closed — closing it would
+// race with concurrent Submit() callers and panic; instead jobs is
+// garbage-collected with the buffer once all references drop. Done() is
+// closed when the consumer goroutine returns.
+func (b *IngestBuffer) Start() {
+	b.startOnce.Do(func() {
+		go func() {
+			defer close(b.done)
+			select {
+			case <-b.ready:
+			case <-b.stop:
+				// Stopped before Ready — exit immediately. Pending jobs
+				// are discarded; the buffer was never authorized to drain.
+				return
+			}
+			for {
+				select {
+				case job := <-b.jobs:
+					job()
+				case <-b.stop:
+					// Stop after Ready — drain whatever is queued so
+					// shutdown is graceful, then exit. b.jobs is never
+					// closed (see Start godoc), so a default-case
+					// non-blocking receive is the correct drain idiom.
+					for {
+						select {
+						case job := <-b.jobs:
+							job()
+						default:
+							return
+						}
+					}
+				}
+			}
+		}()
+	})
+}
+
+// Ready signals that the write path is available; the consumer begins
+// draining. Idempotent.
+//
+// Ordering invariant: Start() MUST have been called before Ready() takes
+// effect. Calling Ready() without a prior Start() simply closes the ready
+// channel — nothing drains until a later Start() runs its consumer goroutine.
+func (b *IngestBuffer) Ready() {
+	b.readyOnce.Do(func() { close(b.ready) })
+}
+
+// Dropped returns the number of jobs dropped due to a full buffer.
+func (b *IngestBuffer) Dropped() int64 { return b.dropped.Load() }
+
+// Pending returns the current queue depth (best-effort; for observability).
+func (b *IngestBuffer) Pending() int { return len(b.jobs) }
+
+// Stop signals the consumer goroutine to exit. Test-hygiene helper so unit
+// tests don't leak the goroutine that Start() spawns. Idempotent / safe to
+// call without a prior Start(). After Stop() the consumer exits and Done()
+// is closed.
+func (b *IngestBuffer) Stop() {
+	b.stopOnce.Do(func() { close(b.stop) })
+}
+
+// Done returns a channel that is closed after the consumer goroutine has
+// exited. If Start() was never called, Done() never closes.
+func (b *IngestBuffer) Done() <-chan struct{} {
+	return b.done
+}
@@ -0,0 +1,274 @@
+package main
+
+import (
+	"bytes"
+	"log"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestIngestBuffer_BuffersUntilReady(t *testing.T) {
+	b := NewIngestBuffer(10)
+	t.Cleanup(b.Stop)
+	var ran atomic.Int64
+	b.Start()
+	for i := 0; i < 3; i++ {
+		b.Submit(func() { ran.Add(1) })
+	}
+	time.Sleep(30 * time.Millisecond)
+	if ran.Load() != 0 {
+		t.Fatalf("jobs ran before Ready(): %d", ran.Load())
+	}
+	b.Ready()
+	deadline := time.Now().Add(time.Second)
+	for ran.Load() < 3 && time.Now().Before(deadline) {
+		time.Sleep(5 * time.Millisecond)
+	}
+	if ran.Load() != 3 {
+		t.Fatalf("want 3 ran after Ready, got %d", ran.Load())
+	}
+}
+
+func TestIngestBuffer_FIFOOrder(t *testing.T) {
+	b := NewIngestBuffer(10)
+	t.Cleanup(b.Stop)
+	out := make(chan int, 5)
+	b.Start()
+	for i := 0; i < 5; i++ {
+		i := i
+		b.Submit(func() { out <- i })
+	}
+	b.Ready()
+	for want := 0; want < 5; want++ {
+		select {
+		case got := <-out:
+			if got != want {
+				t.Fatalf("order: want %d got %d", want, got)
+			}
+		case <-time.After(time.Second):
+			t.Fatalf("timeout waiting for job %d", want)
+		}
+	}
+}
+
+func TestIngestBuffer_DropsWhenFull(t *testing.T) {
+	b := NewIngestBuffer(2)
+	t.Cleanup(b.Stop) // never Ready()'d -> nothing drains
+	for i := 0; i < 5; i++ {
+		b.Submit(func() {})
+	}
+	if got := b.Dropped(); got != 3 {
+		t.Fatalf("want 3 dropped (cap 2, 5 submitted), got %d", got)
+	}
+}
+
+func TestIngestBuffer_ProcessesAfterReady(t *testing.T) {
+	b := NewIngestBuffer(10)
+	t.Cleanup(b.Stop)
+	b.Start()
+	b.Ready()
+	done := make(chan struct{})
+	b.Submit(func() { close(done) })
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("job submitted after Ready was not processed")
+	}
+}
+
+func TestIngestBuffer_SerialExecution(t *testing.T) {
+	b := NewIngestBuffer(50)
+	t.Cleanup(b.Stop)
+	var inFlight atomic.Int32
+	var overlap atomic.Bool
+	var wg sync.WaitGroup
+	b.Start()
+	const n = 20
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		b.Submit(func() {
+			if inFlight.Add(1) > 1 {
+				overlap.Store(true)
+			}
+			time.Sleep(time.Millisecond)
+			inFlight.Add(-1)
+			wg.Done()
+		})
+	}
+	b.Ready()
+	wg.Wait()
+	if overlap.Load() {
+		t.Fatal("jobs overlapped — consumer is not serial (violates single-writer)")
+	}
+}
+
+func TestIngestBuffer_ConcurrentSubmitSafe(t *testing.T) {
+	b := NewIngestBuffer(20000)
+	t.Cleanup(b.Stop)
+	b.Start()
+	var wg sync.WaitGroup
+	for g := 0; g < 8; g++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for i := 0; i < 1000; i++ {
+				b.Submit(func() {})
+			}
+		}()
+	}
+	wg.Wait()
+	b.Ready()
+	// Assertion is the absence of a race/panic; run under -race in CI.
+}
+
+// TestIngestBuffer_StopUnblocksConsumer guards the consumer-goroutine leak
+// described in PR #1609 review m1: Start() blocks on <-b.ready forever if
+// Ready() is never called, leaking the goroutine in test runs. Stop() must
+// signal the consumer to exit cleanly without requiring Ready().
+func TestIngestBuffer_StopUnblocksConsumer(t *testing.T) {
+	b := NewIngestBuffer(10)
+	t.Cleanup(b.Stop)
+	b.Start()
+	// Do NOT call Ready(). The consumer must exit purely because of Stop().
+	b.Stop()
+	select {
+	case <-b.Done():
+		// good — consumer goroutine returned
+	case <-time.After(time.Second):
+		t.Fatal("Stop() did not unblock the consumer goroutine within 1s (Done() never closed)")
+	}
+}
+
+// TestNewIngestBuffer_WarnsOnSubOneClamp asserts that constructing the
+// buffer with a non-positive capacity emits a WARN log line. Silent
+// clamping (PR #1609 review m2) hid misconfigurations like
+// ingestBufferSize=-1 or 0-from-default-not-applied paths.
+func TestNewIngestBuffer_WarnsOnSubOneClamp(t *testing.T) {
+	var buf bytes.Buffer
+	oldOut := log.Writer()
+	oldFlags := log.Flags()
+	log.SetOutput(&buf)
+	log.SetFlags(0)
+	t.Cleanup(func() {
+		log.SetOutput(oldOut)
+		log.SetFlags(oldFlags)
+	})
+
+	b := NewIngestBuffer(0)
+	t.Cleanup(b.Stop)
+
+	got := buf.String()
+	if !strings.Contains(got, "WARN") || !strings.Contains(got, "ingest-buffer") {
+		t.Fatalf("expected WARN log on sub-one clamp, got %q", got)
+	}
+}
+
+// TestIngestBuffer_DropLogThrottle asserts the time-based throttle (PR
+// #1623 round-1 fix to #1609 M1): the FIRST drop of a stall logs
+// immediately (loud), then subsequent drops within the same stall are
+// rate-limited to at most one summary line per second, and a recovery
+// line is emitted when Submit succeeds again. This prevents log-flood
+// under sustained stalls (potentially hundreds of MB/min) while
+// preserving "loud the instant the stall starts".
+func TestIngestBuffer_DropLogThrottle(t *testing.T) {
+	var buf bytes.Buffer
+	oldOut := log.Writer()
+	oldFlags := log.Flags()
+	log.SetOutput(&buf)
+	log.SetFlags(0)
+	t.Cleanup(func() {
+		log.SetOutput(oldOut)
+		log.SetFlags(oldFlags)
+	})
+
+	b := NewIngestBuffer(2)
+	t.Cleanup(b.Stop)
+	// Fill to capacity (no Ready() — nothing drains).
+	for i := 0; i < 2; i++ {
+		b.Submit(func() {})
+	}
+	// 100 drops in tight loop (well under 1s).
+	for i := 0; i < 100; i++ {
+		b.Submit(func() {})
+	}
+
+	got := buf.String()
+	lines := strings.Count(got, "buffer full")
+	if lines < 1 {
+		t.Fatalf("expected the FIRST drop to log immediately; got 0 'buffer full' lines:\n%s", got)
+	}
+	if lines > 2 {
+		t.Fatalf("expected at most 2 'buffer full' lines for 100 drops in <1s (first + at-most-one summary), got %d:\n%s", lines, got)
+	}
+	// Every line must include the capacity for operator triage.
+	if !strings.Contains(got, "cap 2") {
+		t.Fatalf("expected every drop log line to include 'cap 2', got:\n%s", got)
+	}
+}
+
+// TestIngestBuffer_DropLogFirstAlwaysImmediate guards the "loud the
+// instant the stall starts" half of the throttle contract from PR
+// #1623: even a single drop must log immediately, not be silently
+// absorbed by the per-second summary window.
+func TestIngestBuffer_DropLogFirstAlwaysImmediate(t *testing.T) {
+	var buf bytes.Buffer
+	oldOut := log.Writer()
+	oldFlags := log.Flags()
+	log.SetOutput(&buf)
+	log.SetFlags(0)
+	t.Cleanup(func() {
+		log.SetOutput(oldOut)
+		log.SetFlags(oldFlags)
+	})
+
+	b := NewIngestBuffer(1)
+	t.Cleanup(b.Stop)
+	b.Submit(func() {}) // fills cap=1
+	b.Submit(func() {}) // first drop
+	got := buf.String()
+	if !strings.Contains(got, "buffer full") {
+		t.Fatalf("expected FIRST drop to log immediately; got:\n%s", got)
+	}
+}
+
+// TestIngestBuffer_DropLogRecoveryAfterDrain guards the recovery-line
+// half of the throttle contract: once Submit succeeds again after one
+// or more drops, a "recovered" / "drained" line must be emitted so
+// operators can quantify the burst (PR #1623).
+func TestIngestBuffer_DropLogRecoveryAfterDrain(t *testing.T) {
+	var buf bytes.Buffer
+	oldOut := log.Writer()
+	oldFlags := log.Flags()
+	log.SetOutput(&buf)
+	log.SetFlags(0)
+	t.Cleanup(func() {
+		log.SetOutput(oldOut)
+		log.SetFlags(oldFlags)
+	})
+
+	b := NewIngestBuffer(1)
+	t.Cleanup(b.Stop)
+	b.Submit(func() {}) // fills cap=1
+	for i := 0; i < 3; i++ {
+		b.Submit(func() {}) // drops
+	}
+	// Drain: start consumer and Ready(), wait for queue to empty.
+	b.Start()
+	b.Ready()
+	deadline := time.Now().Add(time.Second)
+	for b.Pending() > 0 && time.Now().Before(deadline) {
+		time.Sleep(2 * time.Millisecond)
+	}
+	// Now a successful Submit should trigger the recovery line.
+	b.Submit(func() {})
+	// Give the goroutine + log a moment.
+	time.Sleep(20 * time.Millisecond)
+
+	got := buf.String()
+	if !strings.Contains(got, "drained") && !strings.Contains(got, "recovered") {
+		t.Fatalf("expected a 'drained'/'recovered' log line after stall ended; got:\n%s", got)
+	}
+}
@@ -0,0 +1,134 @@
+package main
+
+// Tests for issue #1610: firmware 1.16.0 extended ACK support.
+//
+// Wire vectors are synthetic, derived by hand from the firmware spec:
+//   - Variable-length ACK on the wire:
+//       firmware/src/Mesh.cpp:545-575 createAck/createMultiAck (commit f6e6fdaa)
+//   - 5-byte ACK = 4-byte truncated sha256 CRC + 1-byte attempt counter:
+//       firmware/src/helpers/BaseChatMesh.cpp:218-232 (commit f6e6fdaa)
+//   - 6-byte ACK = 5-byte + 1-byte RNG (so identical attempts get unique hash):
+//       firmware/src/helpers/BaseChatMesh.cpp:219-234 (commit a130a95a)
+//   - Multipart ACK inner blob: firmware/src/Mesh.cpp:292-307 — byte0 then
+//       ack bytes, payload_len = 1 + ack_len.
+
+import (
+	"testing"
+)
+
+// --- top-level ACK (decodeAck) ---
+
+func TestDecodeAckLegacy4Byte(t *testing.T) {
+	// Backwards-compat: 4-byte ACK leaves the new optional fields nil.
+	buf := []byte{0xAA, 0xBB, 0xCC, 0xDD}
+	p := decodeAck(buf)
+	if p.ExtraHash != "ddccbbaa" {
+		t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
+	}
+	if p.AckLen == nil || *p.AckLen != 4 {
+		t.Errorf("ackLen=%v want 4", p.AckLen)
+	}
+	if p.AckAttempt != nil {
+		t.Errorf("ackAttempt=%v want nil for legacy 4-byte ACK", *p.AckAttempt)
+	}
+	if p.AckRand != nil {
+		t.Errorf("ackRand=%v want nil for legacy 4-byte ACK", *p.AckRand)
+	}
+}
+
+func TestDecodeAck5ByteExtended(t *testing.T) {
+	// v1.16 sender (commit f6e6fdaa): 4-byte CRC + 1-byte attempt.
+	buf := []byte{0xAA, 0xBB, 0xCC, 0xDD, 0x07}
+	p := decodeAck(buf)
+	if p.ExtraHash != "ddccbbaa" {
+		t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
+	}
+	if p.AckLen == nil || *p.AckLen != 5 {
+		t.Errorf("ackLen=%v want 5", p.AckLen)
+	}
+	if p.AckAttempt == nil || *p.AckAttempt != 7 {
+		t.Errorf("ackAttempt=%v want 7", p.AckAttempt)
+	}
+	if p.AckRand != nil {
+		t.Errorf("ackRand=%v want nil for 5-byte ACK", *p.AckRand)
+	}
+}
+
+func TestDecodeAck6ByteExtended(t *testing.T) {
+	// v1.16 sender (commit a130a95a): 4-byte CRC + 1-byte attempt + 1-byte RNG.
+	buf := []byte{0xAA, 0xBB, 0xCC, 0xDD, 0x02, 0x5A}
+	p := decodeAck(buf)
+	if p.ExtraHash != "ddccbbaa" {
+		t.Errorf("extraHash=%q want ddccbbaa", p.ExtraHash)
+	}
+	if p.AckLen == nil || *p.AckLen != 6 {
+		t.Errorf("ackLen=%v want 6", p.AckLen)
+	}
+	if p.AckAttempt == nil || *p.AckAttempt != 2 {
+		t.Errorf("ackAttempt=%v want 2", p.AckAttempt)
+	}
+	if p.AckRand == nil || *p.AckRand != 0x5A {
+		t.Errorf("ackRand=%v want 90", p.AckRand)
+	}
+}
+
+// --- multipart-with-ACK (decodeMultipart) ---
+
+// buildMultipartAckByte0: remaining<<4 | PayloadACK (0x02).
+func buildMultipartAckByte0(remaining int) byte {
+	return byte((remaining<<4)&0xF0) | byte(PayloadACK&0x0F)
+}
+
+func TestDecodeMultipartAck4ByteLegacy(t *testing.T) {
+	// Pre-1.16 inner ACK is 4 bytes → ackLen=4, attempt/rand nil.
+	buf := []byte{buildMultipartAckByte0(3), 0xAA, 0xBB, 0xCC, 0xDD}
+	p := decodeMultipart(buf)
+	if p.InnerAckCrc != "ddccbbaa" {
+		t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
+	}
+	if p.InnerAckLen == nil || *p.InnerAckLen != 4 {
+		t.Errorf("innerAckLen=%v want 4", p.InnerAckLen)
+	}
+	if p.InnerAckAttempt != nil {
+		t.Errorf("innerAckAttempt=%v want nil", *p.InnerAckAttempt)
+	}
+	if p.InnerAckRand != nil {
+		t.Errorf("innerAckRand=%v want nil", *p.InnerAckRand)
+	}
+}
+
+func TestDecodeMultipartAck5Byte(t *testing.T) {
+	// v1.16: byte0 + 4-byte CRC + 1-byte attempt → payload_len = 6.
+	buf := []byte{buildMultipartAckByte0(1), 0xAA, 0xBB, 0xCC, 0xDD, 0x09}
+	p := decodeMultipart(buf)
+	if p.InnerAckCrc != "ddccbbaa" {
+		t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
+	}
+	if p.InnerAckLen == nil || *p.InnerAckLen != 5 {
+		t.Errorf("innerAckLen=%v want 5", p.InnerAckLen)
+	}
+	if p.InnerAckAttempt == nil || *p.InnerAckAttempt != 9 {
+		t.Errorf("innerAckAttempt=%v want 9", p.InnerAckAttempt)
+	}
+	if p.InnerAckRand != nil {
+		t.Errorf("innerAckRand=%v want nil for 5-byte inner ACK", *p.InnerAckRand)
+	}
+}
+
+func TestDecodeMultipartAck6Byte(t *testing.T) {
+	// v1.16: byte0 + 4-byte CRC + 1-byte attempt + 1-byte RNG → payload_len = 7.
+	buf := []byte{buildMultipartAckByte0(0), 0xAA, 0xBB, 0xCC, 0xDD, 0x04, 0xC3}
+	p := decodeMultipart(buf)
+	if p.InnerAckCrc != "ddccbbaa" {
+		t.Errorf("innerAckCrc=%q want ddccbbaa", p.InnerAckCrc)
+	}
+	if p.InnerAckLen == nil || *p.InnerAckLen != 6 {
+		t.Errorf("innerAckLen=%v want 6", p.InnerAckLen)
+	}
+	if p.InnerAckAttempt == nil || *p.InnerAckAttempt != 4 {
+		t.Errorf("innerAckAttempt=%v want 4", p.InnerAckAttempt)
+	}
+	if p.InnerAckRand == nil || *p.InnerAckRand != 0xC3 {
+		t.Errorf("innerAckRand=%v want 195", p.InnerAckRand)
+	}
+}
@@ -0,0 +1,30 @@
+package main
+
+import "fmt"
+
+// formatStatusLog formats the "status: name (iata)" log line emitted on
+// MQTT status messages. name + iata are MQTT-controlled and routed
+// through sanitizeLogString so CR/LF/control bytes cannot inject forged
+// log lines.
+//
+// See audit-input-vulns-20260603 follow-up to #1540 — call site
+// cmd/ingestor/main.go:531.
+func formatStatusLog(tag, name, iata string) string {
+	return fmt.Sprintf("MQTT [%s] status: %s (%s)", tag, sanitizeLogString(name), sanitizeLogString(iata))
+}
+
+// formatChannelMessageLog formats the "channel message: chN from S" log line
+// emitted on MQTT channel messages. channelIdx + sender are MQTT-controlled.
+//
+// Call site cmd/ingestor/main.go:854.
+func formatChannelMessageLog(tag, channelIdx, sender string) string {
+	return fmt.Sprintf("MQTT [%s] channel message: ch%s from %s", tag, sanitizeLogString(channelIdx), sanitizeLogString(sender))
+}
+
+// formatDirectMessageLog formats the "direct message from S" log line
+// emitted on MQTT DM messages. sender is MQTT-controlled.
+//
+// Call site cmd/ingestor/main.go:940.
+func formatDirectMessageLog(tag, sender string) string {
+	return fmt.Sprintf("MQTT [%s] direct message from %s", tag, sanitizeLogString(sender))
+}
@@ -0,0 +1,53 @@
+package main
+
+import (
+	"strings"
+	"testing"
+)
+
+// TestFormatStatusLog_SanitizesMQTTFields pins the status log line at
+// cmd/ingestor/main.go:531 — MQTT-derived name + iata must not be able to
+// inject CR/LF/control bytes into the log stream.
+func TestFormatStatusLog_SanitizesMQTTFields(t *testing.T) {
+	got := formatStatusLog("ds1", "evil\r\n[FAKE LOG LINE]", "X\nY")
+	if strings.ContainsAny(got, "\r\n") {
+		t.Fatalf("formatStatusLog leaked CR/LF: %q", got)
+	}
+	if strings.Contains(got, "[FAKE LOG LINE]") && !strings.Contains(got, "?[FAKE LOG LINE]") {
+		t.Fatalf("formatStatusLog passed injection payload through unmodified: %q", got)
+	}
+}
+
+// TestFormatChannelMessageLog_SanitizesMQTTFields pins
+// cmd/ingestor/main.go:854 — channelIdx + sender are MQTT-controlled.
+func TestFormatChannelMessageLog_SanitizesMQTTFields(t *testing.T) {
+	got := formatChannelMessageLog("ds1", "0\r\n[FAKE]", "evil\nguy")
+	if strings.ContainsAny(got, "\r\n") {
+		t.Fatalf("formatChannelMessageLog leaked CR/LF: %q", got)
+	}
+}
+
+// TestFormatDirectMessageLog_SanitizesMQTTFields pins
+// cmd/ingestor/main.go:940 — sender is MQTT-controlled.
+func TestFormatDirectMessageLog_SanitizesMQTTFields(t *testing.T) {
+	got := formatDirectMessageLog("ds1", "evil\r\n[FAKE LOG LINE] something")
+	if strings.ContainsAny(got, "\r\n") {
+		t.Fatalf("formatDirectMessageLog leaked CR/LF: %q", got)
+	}
+	if !strings.Contains(got, "??[FAKE LOG LINE]") {
+		t.Fatalf("formatDirectMessageLog did not sanitize injection payload: %q", got)
+	}
+}
+
+// Sanity: legitimate input passes through untouched apart from tag framing.
+func TestFormatLogs_LegitInputUnchanged(t *testing.T) {
+	if got := formatStatusLog("ds1", "alpha-node", "BG"); got != "MQTT [ds1] status: alpha-node (BG)" {
+		t.Fatalf("unexpected status line: %q", got)
+	}
+	if got := formatChannelMessageLog("ds1", "3", "bob"); got != "MQTT [ds1] channel message: ch3 from bob" {
+		t.Fatalf("unexpected channel line: %q", got)
+	}
+	if got := formatDirectMessageLog("ds1", "bob"); got != "MQTT [ds1] direct message from bob" {
+		t.Fatalf("unexpected DM line: %q", got)
+	}
+}
@@ -51,6 +51,25 @@ func main() {
 		log.Fatalf("config: %v", err)
 	}

+	// Apply Go runtime soft memory limit (GOMEMLIMIT). See #1010.
+	// Precedence: GOMEMLIMIT env > runtime.maxMemoryMB > unset (default).
+	{
+		_, envSet := os.LookupEnv("GOMEMLIMIT")
+		runtimeMaxMB := 0
+		if cfg.Runtime != nil {
+			runtimeMaxMB = cfg.Runtime.MaxMemoryMB
+		}
+		limit, source := applyMemoryLimit(runtimeMaxMB, envSet)
+		switch source {
+		case "env":
+			log.Printf("[memlimit] using GOMEMLIMIT from environment (%s)", os.Getenv("GOMEMLIMIT"))
+		case "config":
+			log.Printf("[memlimit] runtime.maxMemoryMB=%d → SetMemoryLimit(%d MiB)", runtimeMaxMB, limit/(1024*1024))
+		default:
+			log.Printf("[memlimit] unset → default (no soft memory limit; recommend setting GOMEMLIMIT or runtime.maxMemoryMB to ≥1.5× working set to avoid OOM-kill)")
+		}
+	}
+
 	sources := cfg.ResolvedSources()

 	store, err := OpenStoreWithInterval(cfg.DBPath, cfg.MetricsSampleInterval())
@@ -75,154 +94,6 @@ func main() {
 	// Check auto_vacuum mode and optionally migrate (#919)
 	store.CheckAutoVacuum(cfg)

-	// Node retention: move stale nodes to inactive_nodes on startup
-	nodeDays := cfg.NodeDaysOrDefault()
-	store.MoveStaleNodes(nodeDays)
-
-	// Observer retention: remove stale observers on startup
-	observerDays := cfg.ObserverDaysOrDefault()
-	store.RemoveStaleObservers(observerDays)
-
-	// Metrics retention: prune old metrics on startup
-	metricsDays := cfg.MetricsRetentionDays()
-	store.PruneOldMetrics(metricsDays)
-	store.PruneDroppedPackets(metricsDays)
-
-	// Packet (transmissions) retention: previously lived in cmd/server,
-	// moved to ingestor in #1283 to eliminate cross-process write
-	// contention (SQLITE_BUSY). 0 = disabled.
-	packetDays := cfg.PacketDaysOrZero()
-	if packetDays > 0 {
-		if n, err := store.PruneOldPackets(packetDays); err != nil {
-			log.Printf("[prune] error: %v", err)
-		} else if n > 0 {
-			log.Printf("[prune] startup pruned %d transmissions older than %d days", n, packetDays)
-		}
-	}
-
-	vacuumPages := cfg.IncrementalVacuumPages()
-	store.RunIncrementalVacuum(vacuumPages)
-
-	// Daily ticker for node retention
-	retentionTicker := time.NewTicker(1 * time.Hour)
-	go func() {
-		for range retentionTicker.C {
-			store.MoveStaleNodes(nodeDays)
-			store.RunIncrementalVacuum(vacuumPages)
-		}
-	}()
-
-	// Daily ticker for observer retention (every 24h, staggered 90s after startup)
-	observerRetentionTicker := time.NewTicker(24 * time.Hour)
-	go func() {
-		time.Sleep(90 * time.Second) // stagger after metrics prune
-		store.RemoveStaleObservers(observerDays)
-		store.RunIncrementalVacuum(vacuumPages)
-		for range observerRetentionTicker.C {
-			store.RemoveStaleObservers(observerDays)
-			store.RunIncrementalVacuum(vacuumPages)
-		}
-	}()
-
-	// Daily ticker for metrics retention (every 24h)
-	metricsRetentionTicker := time.NewTicker(24 * time.Hour)
-	go func() {
-		for range metricsRetentionTicker.C {
-			store.PruneOldMetrics(metricsDays)
-			store.PruneDroppedPackets(metricsDays)
-			store.RunIncrementalVacuum(vacuumPages)
-		}
-	}()
-
-	// Daily ticker for transmission retention (#1283).
-	var packetRetentionTicker *time.Ticker
-	if packetDays > 0 {
-		packetRetentionTicker = time.NewTicker(24 * time.Hour)
-		go func() {
-			for range packetRetentionTicker.C {
-				if n, err := store.PruneOldPackets(packetDays); err != nil {
-					log.Printf("[prune] error: %v", err)
-				} else if n > 0 {
-					store.RunIncrementalVacuum(vacuumPages)
-				}
-			}
-		}()
-		log.Printf("[prune] auto-prune enabled: packets older than %d days will be removed daily", packetDays)
-	}
-
-	// Daily neighbor_edges retention (#1287 — moved from cmd/server).
-	{
-		nDays := cfg.NeighborEdgesDaysOrDefault()
-		neighborPruneTicker := time.NewTicker(24 * time.Hour)
-		go func() {
-			time.Sleep(4 * time.Minute) // stagger
-			if n, err := store.PruneNeighborEdges(nDays); err != nil {
-				log.Printf("[neighbor-prune] error: %v", err)
-			} else if n > 0 {
-				log.Printf("[neighbor-prune] startup pruned %d edges older than %d days", n, nDays)
-			}
-			for range neighborPruneTicker.C {
-				if n, err := store.PruneNeighborEdges(nDays); err != nil {
-					log.Printf("[neighbor-prune] error: %v", err)
-				} else if n > 0 {
-					log.Printf("[neighbor-prune] pruned %d edges older than %d days", n, nDays)
-				}
-			}
-		}()
-		log.Printf("[neighbor-prune] auto-prune enabled: edges older than %d days", nDays)
-	}
-
-	// Periodic stats logging (every 5 minutes)
-	statsTicker := time.NewTicker(5 * time.Minute)
-	go func() {
-		for range statsTicker.C {
-			store.LogStats()
-		}
-	}()
-
-	// Prune-request queue (#669 M4 / #738): the read-only server enqueues
-	// geo-prune requests as marker files; the ingestor (which holds the
-	// write handle) executes the DELETEs. Process on startup, then every
-	// 15 seconds — short enough for a one-click UX, long enough to avoid
-	// useless wake-ups.
-	store.RunPendingPruneRequests()
-	pruneQueueTicker := time.NewTicker(15 * time.Second)
-	go func() {
-		for range pruneQueueTicker.C {
-			store.RunPendingPruneRequests()
-		}
-	}()
-
-	// Per-second stats file writer for the server's /api/perf/write-sources
-	// endpoint (#1120). Best-effort; never fatal.
-	StartStatsFileWriter(store, time.Second)
-
-	// Multi-byte capability persister (#1324 follow-up): the server's
-	// analytics cycle publishes a snapshot file via internal/mbcapqueue
-	// (it cannot UPDATE itself, mode=ro since #1289). The ingestor
-	// applies the snapshot here every 5 minutes — derived/cached
-	// columns, ingestor owns the write.
-	multibytePersistTicker := time.NewTicker(5 * time.Minute)
-	go func() {
-		time.Sleep(2 * time.Minute) // stagger after analytics warmup
-		if _, err := store.RunMultibyteCapPersist(); err != nil {
-			log.Printf("[multibyte-persist] error: %v", err)
-		}
-		for range multibytePersistTicker.C {
-			if _, err := store.RunMultibyteCapPersist(); err != nil {
-				log.Printf("[multibyte-persist] error: %v", err)
-			}
-		}
-	}()
-	log.Printf("[multibyte-persist] enabled (interval=5m)")
-
-	// Neighbor-edges builder (#1287 — Option 4): ingestor owns
-	// neighbor_edges writes. Runs every 60s. Server reads the snapshot
-	// via cmd/server/neighbor_recomputer.go on the same cadence.
-	stopNeighborBuilder := store.StartNeighborEdgesBuilder(NeighborEdgesBuilderInterval)
-	defer stopNeighborBuilder()
-	log.Printf("[neighbor-build] enabled (interval=%s)", NeighborEdgesBuilderInterval)
-
 	channelKeys := loadChannelKeys(cfg, *configPath)
 	if len(channelKeys) > 0 {
 		log.Printf("Loaded %d channel keys for GRP_TXT decryption", len(channelKeys))
@@ -233,6 +104,13 @@ func main() {
 	regionKeys := loadRegionKeys(cfg)
 	store.BackfillDefaultScopeAsync(regionKeys)

+	// Subscribe-early + buffer (#1608): the MQTT subscription is brought up
+	// before startup maintenance so no packets are missed while the single
+	// SQLite writer is blocked (e.g. a large CREATE INDEX migration). Received
+	// messages are buffered here and drained once Ready() is called below.
+	ingestBuffer := NewIngestBuffer(cfg.IngestBufferSizeOrDefault())
+	ingestBuffer.Start()
+
 	// Connect to each MQTT source
 	var clients []mqtt.Client
 	connectedCount := 0
@@ -287,7 +165,15 @@ func main() {
 		// Capture source for closure
 		src := source
 		opts.SetDefaultPublishHandler(func(c mqtt.Client, m mqtt.Message) {
-			handleMessage(store, tag, src, m, channelKeys, regionKeys, cfg)
+			// PR #1609 M1: stamp the RECEIPT clock here (broker liveness)
+			// independently of the post-write clock that handleMessage
+			// stamps. Without separation the watchdog/healthz could
+			// report "fresh" while the writer was stalled and the
+			// buffer was filling.
+			markReceiptForTag(tag, time.Now())
+			ingestBuffer.Submit(func() {
+				handleMessage(store, tag, src, m, channelKeys, regionKeys, cfg)
+			})
 		})

 		client := mqtt.NewClient(opts)
@@ -354,6 +240,184 @@ func main() {
 		log.Printf("Running — %d MQTT source(s) connected", connectedCount)
 	}

+	// Node retention: move stale nodes to inactive_nodes on startup
+	nodeDays := cfg.NodeDaysOrDefault()
+	store.MoveStaleNodes(nodeDays)
+
+	// Observer retention: remove stale observers on startup
+	observerDays := cfg.ObserverDaysOrDefault()
+	store.RemoveStaleObservers(observerDays)
+
+	// Metrics retention: prune old metrics on startup
+	metricsDays := cfg.MetricsRetentionDays()
+	store.PruneOldMetrics(metricsDays)
+	store.PruneDroppedPackets(metricsDays)
+
+	// Packet (transmissions) retention: previously lived in cmd/server,
+	// moved to ingestor in #1283 to eliminate cross-process write
+	// contention (SQLITE_BUSY). 0 = disabled.
+	packetDays := cfg.PacketDaysOrZero()
+	if packetDays > 0 {
+		if n, err := store.PruneOldPackets(packetDays); err != nil {
+			log.Printf("[prune] error: %v", err)
+		} else if n > 0 {
+			log.Printf("[prune] startup pruned %d transmissions older than %d days", n, packetDays)
+		}
+	}
+
+	vacuumPages := cfg.IncrementalVacuumPages()
+	store.RunIncrementalVacuum(vacuumPages)
+
+	// Gate open: the synchronous startup writes above cannot return until the
+	// single SQLite writer is free, which means any blocking async migration
+	// (e.g. the CREATE INDEX) has finished. WaitForAsyncMigrations() makes that
+	// explicit. Now drain everything the subscription buffered during startup.
+	store.WaitForAsyncMigrations()
+	ingestBuffer.Ready()
+	if d := ingestBuffer.Dropped(); d > 0 {
+		log.Printf("[ingest-buffer] write path ready; draining backlog (dropped %d during startup — consider raising ingestBufferSize)", d)
+	} else {
+		log.Printf("[ingest-buffer] write path ready; draining backlog (0 dropped)")
+	}
+
+	// Daily ticker for node retention
+	retentionTicker := time.NewTicker(1 * time.Hour)
+	go func() {
+		for range retentionTicker.C {
+			store.MoveStaleNodes(nodeDays)
+			store.RunIncrementalVacuum(vacuumPages)
+		}
+	}()
+
+	// Daily ticker for observer retention (every 24h, staggered 90s after startup)
+	observerRetentionTicker := time.NewTicker(24 * time.Hour)
+	go func() {
+		time.Sleep(90 * time.Second) // stagger after metrics prune
+		store.RemoveStaleObservers(observerDays)
+		store.RunIncrementalVacuum(vacuumPages)
+		for range observerRetentionTicker.C {
+			store.RemoveStaleObservers(observerDays)
+			store.RunIncrementalVacuum(vacuumPages)
+		}
+	}()
+
+	// Daily ticker for metrics retention (every 24h)
+	metricsRetentionTicker := time.NewTicker(24 * time.Hour)
+	go func() {
+		for range metricsRetentionTicker.C {
+			store.PruneOldMetrics(metricsDays)
+			store.PruneDroppedPackets(metricsDays)
+			store.RunIncrementalVacuum(vacuumPages)
+		}
+	}()
+
+	// Daily ticker for transmission retention (#1283).
+	var packetRetentionTicker *time.Ticker
+	if packetDays > 0 {
+		packetRetentionTicker = time.NewTicker(24 * time.Hour)
+		go func() {
+			for range packetRetentionTicker.C {
+				if n, err := store.PruneOldPackets(packetDays); err != nil {
+					log.Printf("[prune] error: %v", err)
+				} else if n > 0 {
+					store.RunIncrementalVacuum(vacuumPages)
+				}
+			}
+		}()
+		log.Printf("[prune] auto-prune enabled: packets older than %d days will be removed daily", packetDays)
+	}
+
+	// Hourly WAL checkpoint to prevent unbounded WAL growth.
+	// TRUNCATE resets the WAL file to zero bytes when all frames are flushed;
+	// if the server's read connection holds frames, remaining pages stay in the
+	// WAL until the next tick. Staggered 30s after startup to avoid competing
+	// with the initial burst of ingest writes.
+	walCheckpointTicker := time.NewTicker(1 * time.Hour)
+	go func() {
+		time.Sleep(30 * time.Second)
+		store.Checkpoint()
+		for range walCheckpointTicker.C {
+			store.Checkpoint()
+		}
+	}()
+	log.Printf("[db] WAL checkpoint scheduled every 1h")
+
+	// Daily neighbor_edges retention (#1287 — moved from cmd/server).
+	{
+		nDays := cfg.NeighborEdgesDaysOrDefault()
+		neighborPruneTicker := time.NewTicker(24 * time.Hour)
+		go func() {
+			time.Sleep(4 * time.Minute) // stagger
+			if n, err := store.PruneNeighborEdges(nDays); err != nil {
+				log.Printf("[neighbor-prune] error: %v", err)
+			} else if n > 0 {
+				log.Printf("[neighbor-prune] startup pruned %d edges older than %d days", n, nDays)
+			}
+			for range neighborPruneTicker.C {
+				if n, err := store.PruneNeighborEdges(nDays); err != nil {
+					log.Printf("[neighbor-prune] error: %v", err)
+				} else if n > 0 {
+					log.Printf("[neighbor-prune] pruned %d edges older than %d days", n, nDays)
+				}
+			}
+		}()
+		log.Printf("[neighbor-prune] auto-prune enabled: edges older than %d days", nDays)
+	}
+
+	// Periodic stats logging (every 5 minutes)
+	statsTicker := time.NewTicker(5 * time.Minute)
+	go func() {
+		for range statsTicker.C {
+			store.LogStats()
+			if d := ingestBuffer.Dropped(); d > 0 || ingestBuffer.Pending() > 0 {
+				log.Printf("[ingest-buffer] pending=%d dropped_total=%d", ingestBuffer.Pending(), d)
+			}
+		}
+	}()
+
+	// Prune-request queue (#669 M4 / #738): the read-only server enqueues
+	// geo-prune requests as marker files; the ingestor (which holds the
+	// write handle) executes the DELETEs. Process on startup, then every
+	// 15 seconds — short enough for a one-click UX, long enough to avoid
+	// useless wake-ups.
+	store.RunPendingPruneRequests()
+	pruneQueueTicker := time.NewTicker(15 * time.Second)
+	go func() {
+		for range pruneQueueTicker.C {
+			store.RunPendingPruneRequests()
+		}
+	}()
+
+	// Per-second stats file writer for the server's /api/perf/write-sources
+	// endpoint (#1120). Best-effort; never fatal.
+	StartStatsFileWriter(store, time.Second)
+
+	// Multi-byte capability persister (#1324 follow-up): the server's
+	// analytics cycle publishes a snapshot file via internal/mbcapqueue
+	// (it cannot UPDATE itself, mode=ro since #1289). The ingestor
+	// applies the snapshot here every 5 minutes — derived/cached
+	// columns, ingestor owns the write.
+	multibytePersistTicker := time.NewTicker(5 * time.Minute)
+	go func() {
+		time.Sleep(2 * time.Minute) // stagger after analytics warmup
+		if _, err := store.RunMultibyteCapPersist(); err != nil {
+			log.Printf("[multibyte-persist] error: %v", err)
+		}
+		for range multibytePersistTicker.C {
+			if _, err := store.RunMultibyteCapPersist(); err != nil {
+				log.Printf("[multibyte-persist] error: %v", err)
+			}
+		}
+	}()
+	log.Printf("[multibyte-persist] enabled (interval=5m)")
+
+	// Neighbor-edges builder (#1287 — Option 4): ingestor owns
+	// neighbor_edges writes. Runs every 60s. Server reads the snapshot
+	// via cmd/server/neighbor_recomputer.go on the same cadence.
+	stopNeighborBuilder := store.StartNeighborEdgesBuilder(NeighborEdgesBuilderInterval)
+	defer stopNeighborBuilder()
+	log.Printf("[neighbor-build] enabled (interval=%s)", NeighborEdgesBuilderInterval)
+
 	// #1212: per-source stall watchdog. Detects "silently dead" sources
 	// where the client reports connected but no messages have flowed. Logs
 	// a WARN line every minute for any source silent for >5m. Scan every
@@ -373,6 +437,7 @@ func main() {
 	}
 	statsTicker.Stop()
 	pruneQueueTicker.Stop()
+	walCheckpointTicker.Stop()
 	stopWatchdog()
 	store.LogStats() // final stats on shutdown
 	for _, c := range clients {
@@ -436,7 +501,9 @@ func buildMQTTOpts(source MQTTSource) *mqtt.ClientOptions {
 	}
 	if source.RejectUnauthorized != nil && !*source.RejectUnauthorized {
 		opts.SetTLSConfig(&tls.Config{InsecureSkipVerify: true})
-	} else if strings.HasPrefix(source.Broker, "ssl://") {
+	} else if strings.HasPrefix(source.Broker, "ssl://") || strings.HasPrefix(source.Broker, "wss://") {
+		// TLS with system CA pool — valid for ssl:// MQTT brokers and
+		// wss:// WebSocket brokers behind a publicly-trusted certificate.
 		opts.SetTLSConfig(&tls.Config{})
 	}
 	return opts
@@ -487,7 +554,11 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 		name, _ := msg["origin"].(string)
 		iata := parts[1]
 		meta := extractObserverMeta(msg)
-		if err := store.UpsertObserverAt(observerID, name, iata, meta, resolveRxTime(msg, tag)); err != nil {
+		// observer.last_seen is "when did the analyzer last hear from this
+		// observer" — fundamentally an ingest-time question. Passing "" makes
+		// UpsertObserverAt use time.Now(), independent of the envelope timestamp
+		// (which can be stale/skewed even when well-formed). See #1465.
+		if err := store.UpsertObserverAt(observerID, name, iata, meta, ""); err != nil {
 			log.Printf("MQTT [%s] observer status error: %v", tag, err)
 		}
 		// Insert metrics sample from status message
@@ -506,7 +577,7 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 				log.Printf("MQTT [%s] metrics insert error: %v", tag, err)
 			}
 		}
-		log.Printf("MQTT [%s] status: %s (%s)", tag, firstNonEmpty(name, observerID), iata)
+		log.Print(formatStatusLog(tag, firstNonEmpty(name, observerID), iata))
 		return
 	}

@@ -571,7 +642,14 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 		}

 		mqttMsg := &MQTTPacketMessage{Raw: rawHex}
-		mqttMsg.Timestamp = resolveRxTime(msg, tag)
+		var naiveSkewSec int64
+		mqttMsg.Timestamp, naiveSkewSec = resolveRxTime(msg, tag)
+		if naiveSkewSec != 0 && observerID != "" {
+			// Issue #1478: record so /api/observers can surface ⚠️ chip.
+			if err := store.RecordNaiveSkew(observerID, naiveSkewSec, time.Now()); err != nil {
+				log.Printf("MQTT [%s] RecordNaiveSkew(%s): %v", tag, observerID, err)
+			}
+		}
 		// Parse optional region from JSON payload (#788)
 		if v, ok := msg["region"].(string); ok && v != "" {
 			mqttMsg.Region = v
@@ -628,7 +706,7 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 					truncPK = truncPK[:16]
 				}
 				log.Printf("MQTT [%s] DROPPED invalid signature: hash=%s name=%s observer=%s pubkey=%s",
-					tag, hash, decoded.Payload.Name, firstNonEmpty(mqttMsg.Origin, observerID), truncPK)
+					tag, hash, sanitizeLogString(decoded.Payload.Name), sanitizeLogString(firstNonEmpty(mqttMsg.Origin, observerID)), truncPK)
 				store.InsertDroppedPacket(&DroppedPacket{
 					Hash:         hash,
 					RawHex:       rawHex,
@@ -658,7 +736,7 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 					truncPK = truncPK[:16]
 				}
 				log.Printf("MQTT [%s] foreign advert: node=%s name=%s lat=%.4f lon=%.4f observer=%s",
-					tag, truncPK, decoded.Payload.Name, lat, lon, firstNonEmpty(mqttMsg.Origin, observerID))
+					tag, truncPK, sanitizeLogString(decoded.Payload.Name), lat, lon, sanitizeLogString(firstNonEmpty(mqttMsg.Origin, observerID)))
 			}
 			pktData := BuildPacketData(mqttMsg, decoded, observerID, region, regionKeys)
 			pktData.Foreign = foreign
@@ -686,8 +764,8 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 					log.Printf("MQTT [%s] node telemetry update error: %v", tag, err)
 				}
 			}
-			// Update default_scope when advert carries a matched transport scope (#899)
-			if pktData.IsTransportScoped {
+			// Update default_scope when advert carries a matched transport scope (#899, #1534)
+			if shouldUpdateDefaultScope(pktData) {
 				if err := store.UpdateNodeDefaultScope(decoded.Payload.PubKey, pktData.ScopeName); err != nil {
 					log.Printf("MQTT [%s] node default_scope update error: %v", tag, err)
 				}
@@ -709,7 +787,10 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 			if mqttMsg.Region != "" {
 				effectiveRegion = mqttMsg.Region
 			}
-			if err := store.UpsertObserverAt(observerID, origin, effectiveRegion, nil, mqttMsg.Timestamp); err != nil {
+			// Same as the status-path call above: observer.last_seen is ingest
+			// time, not envelope time. Per-packet rxTime (stored in observations
+			// via InsertTransmission) still uses envelope time. See #1465.
+			if err := store.UpsertObserverAt(observerID, origin, effectiveRegion, nil, ""); err != nil {
 				log.Printf("MQTT [%s] observer upsert error: %v", tag, err)
 			}
 		}
@@ -819,7 +900,7 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 		// used for claiming/health lookups. The node will get a proper entry when it
 		// sends an advert. See issue #665.

-		log.Printf("MQTT [%s] channel message: ch%s from %s", tag, channelIdx, firstNonEmpty(sender, "unknown"))
+		log.Print(formatChannelMessageLog(tag, channelIdx, firstNonEmpty(sender, "unknown")))
 		return
 	}

@@ -905,7 +986,7 @@ func handleMessage(store *Store, tag string, source MQTTSource, m mqtt.Message,
 			log.Printf("MQTT [%s] DM insert error: %v", tag, err)
 		}

-		log.Printf("MQTT [%s] direct message from %s", tag, firstNonEmpty(sender, "unknown"))
+		log.Print(formatDirectMessageLog(tag, firstNonEmpty(sender, "unknown")))
 		return
 	}
 }
@@ -1043,6 +1124,37 @@ func extractObserverMeta(msg map[string]interface{}) *ObserverMeta {
 		}
 	}

+	// Issue #1290: firmware 1.16 publishes a `repeat` flag at the top
+	// level of the /status JSON (MQTTMessageBuilder.cpp:58 — see
+	// agessaman/MeshCore mqtt-bridge-implementation-flex). Accept
+	// either a boolean or a case-insensitive `on|off|true|false|1|0`
+	// string. Missing field → leave CanRelay nil; the writer preserves
+	// the prior column value (default 1, back-compat).
+	if v, ok := msg["repeat"]; ok && v != nil {
+		switch t := v.(type) {
+		case bool:
+			b := t
+			meta.CanRelay = &b
+			hasData = true
+		case string:
+			s := strings.ToLower(strings.TrimSpace(t))
+			switch s {
+			case "on", "true", "1", "yes":
+				b := true
+				meta.CanRelay = &b
+				hasData = true
+			case "off", "false", "0", "no":
+				b := false
+				meta.CanRelay = &b
+				hasData = true
+			}
+		case float64:
+			b := t != 0
+			meta.CanRelay = &b
+			hasData = true
+		}
+	}
+
 	if !hasData {
 		return nil
 	}
@@ -1080,22 +1192,28 @@ func firstNonEmpty(vals ...string) string {
 // the frame, not when the MQTT message is published — so a buffered packet
 // uploaded hours late still carries its true receive time. Using ingest time
 // (time.Now()) here mis-dated such packets by the upload delay.
-func resolveRxTime(msg map[string]interface{}, tag string) string {
+//
+// The returned naiveSkewSec is 0 unless a naive (zone-less) timestamp had to
+// be clamped because it was off from server-now by >15min — in which case it
+// is the signed offset in seconds (negative = observer behind UTC, positive =
+// ahead). Caller records this via Store.RecordNaiveSkew so the UI can flag
+// the observer (#1478).
+func resolveRxTime(msg map[string]interface{}, tag string) (string, int64) {
 	now := time.Now().UTC()
 	raw, _ := msg["timestamp"].(string)
 	if raw == "" {
-		return now.Format(time.RFC3339)
+		return now.Format(time.RFC3339), 0
 	}
-	t, err := parseEnvelopeTime(raw)
+	t, naive, err := parseEnvelopeTime(raw)
 	if err != nil {
 		log.Printf("MQTT [%s] unparseable timestamp %q, using ingest time", tag, raw)
-		return now.Format(time.RFC3339)
+		return now.Format(time.RFC3339), 0
 	}
 	// Hard reject: > 14h ahead is a genuine clock error (UTC+14 is the maximum
 	// standard offset, so nothing valid should be further ahead than that).
 	if t.After(now.Add(14 * time.Hour)) {
 		log.Printf("MQTT [%s] future timestamp %q, using ingest time", tag, raw)
-		return now.Format(time.RFC3339)
+		return now.Format(time.RFC3339), 0
 	}
 	// Hard reject: > 30 days in the past is an RTC-reset node reporting a
 	// factory date (e.g. 2020-01-01). Such a value would permanently drag
@@ -1103,37 +1221,61 @@ func resolveRxTime(msg map[string]interface{}, tag string) string {
 	// InsertTransmission. No legitimate buffered upload is that stale.
 	if t.Before(now.Add(-30 * 24 * time.Hour)) {
 		log.Printf("MQTT [%s] stale timestamp %q (>30d old), using ingest time", tag, raw)
-		return now.Format(time.RFC3339)
+		return now.Format(time.RFC3339), 0
 	}
-	// Soft clamp: naive local-clock timestamps from UTC+N observers are parsed
-	// as-if UTC, making them appear N hours in the future. A UTC+2 observer's
-	// live packet looks 2h ahead, but it is NOT a buffered packet — the whole
-	// point of using rxTime is to preserve the past timestamp for packets that
-	// were buffered offline. If rxTime is ahead of now, the packet is live and
-	// ingest time is the correct value. This also prevents storing future
-	// timestamps that would show ⚠️ in the UI for every packet from UTC+N nodes.
+	// Symmetric naive-timestamp clamp (issue #1463). Naive (zone-less) ISO
+	// values from observers in non-UTC zones are parsed as-if UTC, leaving a
+	// residual offset equal to the observer's UTC offset:
+	//   - UTC+N observer → value appears N hours in the future
+	//   - UTC-N observer → value appears N hours in the past
+	// The past case was silently stored verbatim, poisoning last_seen and
+	// rendering UTC-N observers perpetually "Stale" in the UI. Collapse any
+	// naive value more than 15 min off server-now to now() — well-behaved
+	// observers (Z-suffixed or explicit offset) are untouched regardless of
+	// skew so legitimate buffered uploads remain accurate.
+	const naiveTolerance = 15 * time.Minute
+	if naive {
+		signed := t.Sub(now) // signed: positive = ahead, negative = behind
+		abs := signed
+		if abs < 0 {
+			abs = -abs
+		}
+		if abs > naiveTolerance {
+			// Issue #1478: surface to UI via RecordNaiveSkew (called by handler).
+			// Per-message log was silenced in #1479 — chip + banner in the UI
+			// replace it.
+			deltaSec := int64(signed / time.Second)
+			return now.Format(time.RFC3339), deltaSec
+		}
+	}
+	// Legacy soft clamp for zone-aware near-future values: any value ahead of
+	// now is from a slightly skewed observer clock — collapse to now so we
+	// don't render ⚠️ in the UI for live packets from those nodes.
 	if t.After(now) {
-		return now.Format(time.RFC3339)
+		return now.Format(time.RFC3339), 0
 	}
-	return t.UTC().Format(time.RFC3339)
+	return t.UTC().Format(time.RFC3339), 0
 }

 // parseEnvelopeTime parses the MQTT envelope timestamp. Two on-wire forms
 // occur: zone-aware ISO8601 (RFC3339), and a naive local-clock ISO string
 // with no zone (python datetime.isoformat()). Zone-aware layouts are tried
-// first; naive layouts are assumed UTC, leaving a bounded residual offset
-// equal to the observer's UTC offset for naive-timestamp uploaders.
-func parseEnvelopeTime(s string) (time.Time, error) {
+// first; naive layouts are assumed UTC but the caller is informed via the
+// returned `naive` flag so it can apply a symmetric clamp (see issue #1463).
+func parseEnvelopeTime(s string) (time.Time, bool, error) {
+	// Zone-aware first — RFC3339 demands Z or ±HH:MM.
+	if t, err := time.Parse(time.RFC3339, s); err == nil {
+		return t, false, nil
+	}
 	for _, layout := range []string{
-		time.RFC3339,                 // 2026-05-16T10:00:00Z / +02:00
 		"2006-01-02T15:04:05.999999", // python isoformat w/ microseconds
 		"2006-01-02T15:04:05",        // naive ISO
 	} {
 		if t, err := time.Parse(layout, s); err == nil {
-			return t, nil
+			return t, true, nil
 		}
 	}
-	return time.Time{}, fmt.Errorf("unrecognized timestamp layout: %q", s)
+	return time.Time{}, false, fmt.Errorf("unrecognized timestamp layout: %q", s)
 }

 // deriveHashtagChannelKey derives an AES-128 key from a channel name.
@@ -1143,12 +1285,29 @@ func deriveHashtagChannelKey(channelName string) string {
 	return hex.EncodeToString(h[:16])
 }

+// builtinChannelKeys returns channel keys that are part of the MeshCore firmware
+// defaults and should always be available, regardless of the rainbow file or config.
+// Adding new entries here is the right move when a key is part of the protocol spec
+// (not a community-named hashtag channel).
+func builtinChannelKeys() map[string]string {
+	return map[string]string{
+		// Default Public channel — well-known PSK from the MeshCore companion
+		// protocol spec. Channel-hash byte = 0x11.
+		"Public": "8b3387e9c5cdea6ac9e5edbaa115cd72",
+	}
+}
+
 // loadChannelKeys loads channel decryption keys from config and/or a JSON file.
-// Merge priority: rainbow (lowest) → derived from hashChannels → explicit config (highest).
+// Merge priority: builtin (lowest) → rainbow → derived from hashChannels → explicit config (highest).
 func loadChannelKeys(cfg *Config, configPath string) map[string]string {
 	keys := make(map[string]string)

-	// 1. Rainbow table keys (lowest priority)
+	// 0. Built-in firmware-default keys (lowest priority — overridable by everything else)
+	for k, v := range builtinChannelKeys() {
+		keys[k] = v
+	}
+
+	// 1. Rainbow table keys
 	keysPath := os.Getenv("CHANNEL_KEYS_PATH")
 	if keysPath == "" {
 		keysPath = cfg.ChannelKeysPath
@@ -1277,3 +1436,11 @@ func init() {
 		os.Exit(0)
 	}
 }
+
+// shouldUpdateDefaultScope returns true when the packet carries a transport
+// scope whose region key matched (#1534). Without the ScopeName non-empty
+// guard, transport-scoped adverts from non-matching regions would overwrite
+// previously-correct default_scope values with the empty string.
+func shouldUpdateDefaultScope(pktData *PacketData) bool {
+	return pktData.IsTransportScoped && pktData.ScopeName != ""
+}
@@ -2,8 +2,10 @@ package main

 import (
 	"bytes"
+	"database/sql"
 	"encoding/hex"
 	"encoding/json"
+	"fmt"
 	"math"
 	"os"
 	"path/filepath"
@@ -614,8 +616,41 @@ func TestLoadChannelKeysHashChannelsNormalization(t *testing.T) {
 	if _, ok := keys["#Spaced"]; !ok {
 		t.Error("should derive key for #Spaced (trimmed)")
 	}
-	if len(keys) != 3 {
-		t.Errorf("expected 3 keys, got %d", len(keys))
+	// 3 derived + builtins (Public)
+	expected := 3 + len(builtinChannelKeys())
+	if len(keys) != expected {
+		t.Errorf("expected %d keys, got %d", expected, len(keys))
+	}
+}
+
+// Default Public channel must always be present from the built-in floor,
+// regardless of whether a rainbow file is provided.
+func TestLoadChannelKeysBuiltinPublic(t *testing.T) {
+	t.Setenv("CHANNEL_KEYS_PATH", "")
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "config.json")
+	cfg := &Config{}
+
+	keys := loadChannelKeys(cfg, cfgPath)
+
+	if got := keys["Public"]; got != "8b3387e9c5cdea6ac9e5edbaa115cd72" {
+		t.Errorf("Public key = %q, want firmware-default 8b3387e9c5cdea6ac9e5edbaa115cd72", got)
+	}
+}
+
+// Explicit config and rainbow entries must still override the built-in floor.
+func TestLoadChannelKeysBuiltinOverridable(t *testing.T) {
+	t.Setenv("CHANNEL_KEYS_PATH", "")
+	dir := t.TempDir()
+	cfgPath := filepath.Join(dir, "config.json")
+	cfg := &Config{
+		ChannelKeys: map[string]string{"Public": "deadbeefdeadbeefdeadbeefdeadbeef"},
+	}
+
+	keys := loadChannelKeys(cfg, cfgPath)
+
+	if got := keys["Public"]; got != "deadbeefdeadbeefdeadbeefdeadbeef" {
+		t.Errorf("Public key = %q, want explicit override deadbeef...", got)
 	}
 }

@@ -1020,3 +1055,133 @@ func TestHandleMessageObserverIATAWhitelist(t *testing.T) {
 		t.Errorf("observer from whitelisted IATA ARN should be accepted, got count=%d", count)
 	}
 }
+
+// TestBuildPacketDataScopeMatchingNoMatch covers the #1534 regression: a
+// transport-scoped advert from a non-matching region carries
+// IsTransportScoped=true and ScopeName="". The default_scope update guard
+// must skip these packets so previously-correct scopes aren't overwritten
+// with the empty string.
+func TestBuildPacketDataScopeMatchingNoMatch(t *testing.T) {
+	// Code1=2AB5 is the precomputed code for region "#test" (payload="hello",
+	// payloadType=5). Build a region-key map for a DIFFERENT region so
+	// matchScope() finds no match and returns "".
+	const rawHex = "142AB500000068656C6C6F"
+	otherKey, _ := hex.DecodeString("aabbccddeeff00112233445566778899")
+	regionKeys := map[string][]byte{"#other": otherKey}
+
+	decoded, err := DecodePacket(rawHex, nil, false)
+	if err != nil {
+		t.Fatalf("DecodePacket: %v", err)
+	}
+	msg := &MQTTPacketMessage{Raw: rawHex}
+	pktData := BuildPacketData(msg, decoded, "obs1", "region1", regionKeys)
+
+	if !pktData.IsTransportScoped {
+		t.Fatalf("precondition: IsTransportScoped should be true (Code1 != 0000)")
+	}
+	if pktData.ScopeName != "" {
+		t.Fatalf("precondition: ScopeName should be empty (no region match), got %q", pktData.ScopeName)
+	}
+
+	// Regression assertion: when ScopeName is empty, the guard must skip the
+	// UpdateNodeDefaultScope call so an empty value never overwrites a
+	// previously-correct default_scope (#1534).
+	if shouldUpdateDefaultScope(pktData) {
+		t.Errorf("shouldUpdateDefaultScope = true for empty ScopeName; want false (would overwrite default_scope with \"\")")
+	}
+}
+
+// TestHandleMessageAdvert_EmptyScopeSkipsDefaultScopeUpdate is the call-site
+// regression test for #1534. It drives a transport-scoped ADVERT whose
+// region key does NOT match any configured region (so ScopeName=="") through
+// handleMessage end-to-end and asserts that a pre-existing default_scope on
+// the node is NOT overwritten with the empty string. This anchors the
+// call-site guard at main.go:720 — a future refactor that drops the
+// `if shouldUpdateDefaultScope(...)` wrapper and calls
+// `store.UpdateNodeDefaultScope(pubkey, pktData.ScopeName)` unconditionally
+// would re-introduce the #1534 bug and fail this test.
+func TestHandleMessageAdvert_EmptyScopeSkipsDefaultScopeUpdate(t *testing.T) {
+	store := newTestStore(t)
+	source := MQTTSource{Name: "test"}
+
+	// A transport-scoped ADVERT: header byte 0x10 = route_type 0
+	// (TRANSPORT_FLOOD) + payload_type 4 (ADVERT). Code1=AABB (non-zero, so
+	// IsTransportScoped becomes true), Code2=0000, path_byte=00, then a
+	// 100-byte ADVERT payload (32-byte pubkey starting 46D62D… + 4-byte ts
+	// + 64-byte signature) reused from TestHandleMessageAdvertWithTelemetry.
+	const rawHex = "10AABB00000046D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
+	const pubkey = "46d62de27d4c5194d7821fc5a34a45565dcc2537b300b9ab6275255cefb65d84"
+
+	// Pre-seed the node with a non-empty default_scope so we can detect an
+	// erroneous overwrite with "".
+	if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, default_scope) VALUES (?, 'Node1', '#belgium')`, pubkey); err != nil {
+		t.Fatalf("seed node: %v", err)
+	}
+
+	// Empty regionKeys → matchScope() returns "" for any Code1 → ScopeName "".
+	msg := &mockMessage{
+		topic:   "meshcore/SJC/obs1/packets",
+		payload: []byte(`{"raw":"` + rawHex + `"}`),
+	}
+	handleMessage(store, "test", source, msg, nil, map[string][]byte{}, &Config{})
+
+	var got sql.NullString
+	if err := store.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = ?`, pubkey).Scan(&got); err != nil {
+		t.Fatalf("read default_scope: %v", err)
+	}
+	if !got.Valid || got.String != "#belgium" {
+		t.Errorf("default_scope after empty-scope advert = %q (valid=%v), want #belgium — call-site guard at main.go:720 is missing or broken (#1534)", got.String, got.Valid)
+	}
+}
+
+// TestHandleMessageAdvert_MatchedScopeUpdatesDefaultScope is the positive
+// counterpart: a transport-scoped ADVERT whose Code1 matches a configured
+// region key MUST cause default_scope to be updated to the matched region
+// name. Together with the empty-scope test above this proves the call-site
+// branch routes correctly for both ScopeName states.
+func TestHandleMessageAdvert_MatchedScopeUpdatesDefaultScope(t *testing.T) {
+	store := newTestStore(t)
+	source := MQTTSource{Name: "test"}
+
+	// Same ADVERT bytes; this time we compute the matching region key for
+	// the (payloadType=4, payload=<advert bytes>) tuple so matchScope() will
+	// return "#de".
+	const advertBytes = "46D62DE27D4C5194D7821FC5A34A45565DCC2537B300B9AB6275255CEFB65D840CE5C169C94C9AED39E8BCB6CB6EB0335497A198B33A1A610CD3B03D8DCFC160900E5244280323EE0B44CACAB8F02B5B38B91CFA18BD067B0B5E63E94CFC85F758A8530B9240933402E0E6B8F84D5252322D52"
+	const pubkey = "46d62de27d4c5194d7821fc5a34a45565dcc2537b300b9ab6275255cefb65d84"
+
+	advertRaw, _ := hex.DecodeString(advertBytes)
+	// Derive the region key whose HMAC produces Code1 we can plant in the
+	// header. Choose key = first 16 bytes of HMAC-SHA256(zeros, advertBytes)
+	// is non-deterministic to find; instead pick an arbitrary key and
+	// compute Code1 from it, then build the packet around that Code1.
+	regionKey, _ := hex.DecodeString("0123456789abcdef0123456789abcdef")
+	mac := hmacSHA256(regionKey, append([]byte{4}, advertRaw...))
+	// Per firmware (#1534 helper logic): Code1 is the first 2 bytes of the
+	// HMAC, sentinel-shifted so 0x0000 → 0x0001 and 0xFFFF → 0xFFFE.
+	code := uint16(mac[0]) | (uint16(mac[1]) << 8)
+	if code == 0x0000 {
+		code = 0x0001
+	} else if code == 0xFFFF {
+		code = 0xFFFE
+	}
+	code1 := fmt.Sprintf("%02X%02X", byte(code&0xFF), byte(code>>8))
+	rawHex := "10" + code1 + "000000" + advertBytes
+
+	if _, err := store.db.Exec(`INSERT INTO nodes (public_key, name, default_scope) VALUES (?, 'Node1', '#old')`, pubkey); err != nil {
+		t.Fatalf("seed node: %v", err)
+	}
+
+	msg := &mockMessage{
+		topic:   "meshcore/SJC/obs1/packets",
+		payload: []byte(`{"raw":"` + rawHex + `"}`),
+	}
+	handleMessage(store, "test", source, msg, nil, map[string][]byte{"#de": regionKey}, &Config{})
+
+	var got sql.NullString
+	if err := store.db.QueryRow(`SELECT default_scope FROM nodes WHERE public_key = ?`, pubkey).Scan(&got); err != nil {
+		t.Fatalf("read default_scope: %v", err)
+	}
+	if !got.Valid || got.String != "#de" {
+		t.Errorf("default_scope after matched-scope advert = %q (valid=%v), want #de", got.String, got.Valid)
+	}
+}
@@ -22,26 +22,25 @@ func (s *Store) PruneOldPackets(days int) (int64, error) {
 	}
 	cutoff := time.Now().UTC().AddDate(0, 0, -days).Format(time.RFC3339)

-	tx, err := s.db.Begin()
-	if err != nil {
-		return 0, fmt.Errorf("prune begin: %w", err)
-	}
-	defer tx.Rollback()
+	// Tagged for writer-perf visibility (#1340).
+	var n int64
+	err := s.WriterTx("prune_packets", func(tx *sql.Tx) error {
+		// Delete child observations first (no CASCADE in SQLite).
+		if _, err := tx.Exec(`DELETE FROM observations WHERE transmission_id IN (
+			SELECT id FROM transmissions WHERE first_seen < ?
+		)`, cutoff); err != nil {
+			return fmt.Errorf("prune observations: %w", err)
+		}

-	// Delete child observations first (no CASCADE in SQLite).
-	if _, err := tx.Exec(`DELETE FROM observations WHERE transmission_id IN (
-		SELECT id FROM transmissions WHERE first_seen < ?
-	)`, cutoff); err != nil {
-		return 0, fmt.Errorf("prune observations: %w", err)
-	}
-
-	res, err := tx.Exec(`DELETE FROM transmissions WHERE first_seen < ?`, cutoff)
+		res, err := tx.Exec(`DELETE FROM transmissions WHERE first_seen < ?`, cutoff)
+		if err != nil {
+			return fmt.Errorf("prune transmissions: %w", err)
+		}
+		n, _ = res.RowsAffected()
+		return nil
+	})
 	if err != nil {
-		return 0, fmt.Errorf("prune transmissions: %w", err)
-	}
-	n, _ := res.RowsAffected()
-	if err := tx.Commit(); err != nil {
-		return 0, fmt.Errorf("prune commit: %w", err)
+		return 0, err
 	}
 	if n > 0 {
 		log.Printf("[prune] deleted %d transmissions older than %d days", n, days)
@@ -0,0 +1,26 @@
+package main
+
+import "runtime/debug"
+
+// applyMemoryLimit configures Go's soft memory limit (GOMEMLIMIT) for the
+// ingestor process. See #1010.
+//
+// Precedence:
+//  1. GOMEMLIMIT env var (parsed by the runtime at startup) — we do not
+//     override; report source="env" with limit=0.
+//  2. runtimeMaxMB > 0 (from config runtime.maxMemoryMB) — set limit of
+//     runtimeMaxMB MiB via debug.SetMemoryLimit; source="config".
+//  3. Otherwise no limit applied; source="none" (default behavior).
+//
+// Returns the limit (bytes) we set, or 0 if we did not set one.
+func applyMemoryLimit(runtimeMaxMB int, envSet bool) (int64, string) {
+	if envSet {
+		return 0, "env"
+	}
+	if runtimeMaxMB <= 0 {
+		return 0, "none"
+	}
+	limit := int64(runtimeMaxMB) * 1024 * 1024
+	debug.SetMemoryLimit(limit)
+	return limit, "config"
+}
@@ -0,0 +1,71 @@
+package main
+
+import (
+	"runtime/debug"
+	"testing"
+)
+
+// TestApplyMemoryLimit_FromEnv: when GOMEMLIMIT env var is set, the runtime
+// already parsed it. Our function MUST NOT override and MUST report env source.
+func TestApplyMemoryLimit_FromEnv(t *testing.T) {
+	t.Setenv("GOMEMLIMIT", "850MiB")
+	defer debug.SetMemoryLimit(-1)
+
+	limit, source := applyMemoryLimit(512, true /* envSet */)
+	if source != "env" {
+		t.Fatalf("expected source=env, got %q", source)
+	}
+	if limit != 0 {
+		t.Fatalf("expected limit=0 (not set by us), got %d", limit)
+	}
+}
+
+// TestApplyMemoryLimit_FromConfig: when env is unset and runtime.maxMemoryMB
+// is set, derive a limit of exactly runtimeMaxMB * 1 MiB (no headroom — the
+// ingestor's working set is bounded by MQTT batch decode, not packet store).
+func TestApplyMemoryLimit_FromConfig(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+
+	limit, source := applyMemoryLimit(512, false /* envSet */)
+	if source != "config" {
+		t.Fatalf("expected source=config, got %q", source)
+	}
+	want := int64(512) * 1024 * 1024
+	if limit != want {
+		t.Fatalf("expected limit=%d, got %d", want, limit)
+	}
+	cur := debug.SetMemoryLimit(-1)
+	if cur != want {
+		t.Fatalf("runtime memory limit not set: want=%d got=%d", want, cur)
+	}
+}
+
+// TestApplyMemoryLimit_None: neither env nor config — no limit applied,
+// default behavior preserved.
+func TestApplyMemoryLimit_None(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+	debug.SetMemoryLimit(int64(1<<63 - 1)) // math.MaxInt64 = "no limit"
+
+	limit, source := applyMemoryLimit(0, false)
+	if source != "none" {
+		t.Fatalf("expected source=none, got %q", source)
+	}
+	if limit != 0 {
+		t.Fatalf("expected limit=0, got %d", limit)
+	}
+}
+
+// TestApplyMemoryLimit_EnvWinsOverConfig: env set AND config set → env wins,
+// our function does not override. Locks the precedence triage specified.
+func TestApplyMemoryLimit_EnvWinsOverConfig(t *testing.T) {
+	t.Setenv("GOMEMLIMIT", "1GiB")
+	defer debug.SetMemoryLimit(-1)
+
+	limit, source := applyMemoryLimit(512, true /* envSet */)
+	if source != "env" {
+		t.Fatalf("expected source=env when both set, got %q", source)
+	}
+	if limit != 0 {
+		t.Fatalf("expected limit=0 when env wins, got %d", limit)
+	}
+}
@@ -57,7 +57,12 @@ const (
 type SourceLivenessState struct {
 	Tag    string
 	Broker string
-	LastMessageUnix int64 // atomic; unix seconds of last successfully received MQTT message
+	LastMessageUnix int64 // atomic; unix seconds of last successfully WRITTEN MQTT message (handleMessage post-write)
+	// LastReceiptUnix (PR #1609 M1) is stamped at MQTT receipt time —
+	// BEFORE the message is handed to the buffer/writer. STUB: unused
+	// in production until the green commit wires MarkReceipt at the
+	// receipt callsite and surfaces it in stats/healthz.
+	LastReceiptUnix int64 // atomic; unix seconds of last RECEIPT (broker liveness)
 	// FirstConnectedAt (PR #1216 r2 item 2) is stamped ONCE at
 	// registerLivenessState time and never reset. Cold-start grace
 	// checks against this so a flapping broker (CONNECT ok, SUBSCRIBE
@@ -95,6 +100,16 @@ func (s *SourceLivenessState) MarkMessage(now time.Time) {
 	atomic.StoreInt64(&s.LastMessageUnix, now.Unix())
 }

+// MarkReceipt records the time of an MQTT message receipt — stamped at the
+// paho receipt callback BEFORE the message enters the ingest buffer. PR
+// #1609 M1: kept separate from LastMessageUnix so the watchdog/healthz can
+// distinguish "broker alive, write path stuck" (LastReceiptUnix fresh,
+// LastMessageUnix stale) from "everything stalled" (both stale). Cheap;
+// safe to call from the message-handling hot path.
+func (s *SourceLivenessState) MarkReceipt(now time.Time) {
+	atomic.StoreInt64(&s.LastReceiptUnix, now.Unix())
+}
+
 // MarkReconnected clears stale liveness state so the watchdog does not
 // false-alarm on a pre-outage timestamp after paho re-establishes the
 // connection (PR #1216 r1 item 2). Resets LastMessageUnix, re-stamps
@@ -217,7 +232,8 @@ func registerLivenessOrSkip(s *SourceLivenessState) bool {
 }

 // markLivenessForTag is the hot-path entry point: O(1) map lookup +
-// atomic store. Safe to call for unknown tags (no-op).
+// atomic store. Safe to call for unknown tags (no-op). Updates
+// LastMessageUnix (post-write clock).
 func markLivenessForTag(tag string, now time.Time) {
 	livenessRegistryMu.RLock()
 	s := livenessRegistry[tag]
@@ -227,6 +243,38 @@ func markLivenessForTag(tag string, now time.Time) {
 	}
 }

+// markReceiptForTag is the hot-path entry point used at MQTT receipt
+// (BEFORE the message is buffered/written). Updates LastReceiptUnix only.
+// PR #1609 M1 — separates broker-liveness signal from write-path
+// liveness so /healthz can show a stalled writer with a live broker.
+func markReceiptForTag(tag string, now time.Time) {
+	livenessRegistryMu.RLock()
+	s := livenessRegistry[tag]
+	livenessRegistryMu.RUnlock()
+	if s != nil {
+		s.MarkReceipt(now)
+	}
+}
+
+// SnapshotLivenessClocks returns the per-source receipt vs write-path
+// liveness pair for every registered source. Read-only; safe to call
+// from the stats-file writer. PR #1609 M1.
+func SnapshotLivenessClocks() map[string]SourceLivenessSnapshot {
+	livenessRegistryMu.RLock()
+	defer livenessRegistryMu.RUnlock()
+	if len(livenessRegistry) == 0 {
+		return nil
+	}
+	out := make(map[string]SourceLivenessSnapshot, len(livenessRegistry))
+	for tag, s := range livenessRegistry {
+		out[tag] = SourceLivenessSnapshot{
+			LastReceiptUnix: atomic.LoadInt64(&s.LastReceiptUnix),
+			LastMessageUnix: atomic.LoadInt64(&s.LastMessageUnix),
+		}
+	}
+	return out
+}
+
 // runLivenessWatchdog starts a goroutine that scans the registry every
 // `interval` and logs a warning for any source that has been silent while
 // connected for more than `threshold`. Returns a stop function that halts
@@ -0,0 +1,43 @@
+package main
+
+import (
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestSourceLivenessState_ReceiptVsWriteSeparate asserts that the receipt-
+// time and post-write liveness clocks are independent (PR #1609 review
+// MAJOR M1): stamping at receipt must NOT advance the post-write clock so
+// the watchdog/healthz can distinguish "broker alive, write path stuck"
+// from "everything fine". Without separation, /healthz reports "fresh"
+// while the writer is stalled and the ingest buffer is filling.
+func TestSourceLivenessState_ReceiptVsWriteSeparate(t *testing.T) {
+	s := &SourceLivenessState{Tag: "t"}
+	now := time.Now()
+
+	// Receipt at T0; post-write never happens (writer stalled).
+	s.MarkReceipt(now)
+
+	gotReceipt := atomic.LoadInt64(&s.LastReceiptUnix)
+	gotWrite := atomic.LoadInt64(&s.LastMessageUnix)
+	if gotReceipt != now.Unix() {
+		t.Fatalf("LastReceiptUnix: want %d, got %d", now.Unix(), gotReceipt)
+	}
+	if gotWrite != 0 {
+		t.Fatalf("LastMessageUnix MUST stay 0 while writer stalled (only MarkReceipt called); got %d — receipt is double-stamping the write clock and /healthz will lie about ingestion freshness", gotWrite)
+	}
+
+	// Write completes later: only MarkMessage advances LastMessageUnix.
+	later := now.Add(5 * time.Second)
+	s.MarkMessage(later)
+
+	gotReceipt2 := atomic.LoadInt64(&s.LastReceiptUnix)
+	gotWrite2 := atomic.LoadInt64(&s.LastMessageUnix)
+	if gotReceipt2 != now.Unix() {
+		t.Fatalf("MarkMessage must not move LastReceiptUnix backwards or forwards; want %d, got %d", now.Unix(), gotReceipt2)
+	}
+	if gotWrite2 != later.Unix() {
+		t.Fatalf("LastMessageUnix after MarkMessage: want %d, got %d", later.Unix(), gotWrite2)
+	}
+}
@@ -63,6 +63,16 @@ func (s *Store) StartNeighborEdgesBuilder(interval time.Duration) func() {
 	// returning — first server load needs a fully-populated table.
 	wuStart := time.Now()
 	var wuTotal int
+	// Prime the prefix index (#1547) so the very first
+	// InsertTransmission after startup can resolve hop prefixes.
+	if err := s.RefreshPrefixIndex(); err != nil {
+		log.Printf("[neighbor-build] initial prefix-index refresh error: %v", err)
+	}
+	// Prime the neighbor graph (#1560) so the context-aware resolver
+	// has adjacency data on the very first InsertTransmission.
+	if err := s.RefreshNeighborGraph(); err != nil {
+		log.Printf("[neighbor-build] initial neighbor-graph refresh error: %v", err)
+	}
 	for {
 		n, err := s.buildAndPersistNeighborEdges()
 		if err != nil {
@@ -85,7 +95,18 @@ func (s *Store) StartNeighborEdgesBuilder(interval time.Duration) func() {
 			select {
 			case <-t.C:
 				start := time.Now()
+				// Refresh the prefix index alongside the edges build
+				// (#1547) so new nodes become resolvable within a tick.
+				if err := s.RefreshPrefixIndex(); err != nil {
+					log.Printf("[neighbor-build] prefix-index refresh error: %v", err)
+				}
 				n, err := s.buildAndPersistNeighborEdges()
+				// Refresh the neighbor-graph snapshot after the edges
+				// build (#1560) so the context-aware resolver picks up
+				// newly persisted adjacencies on the next ingest.
+				if grErr := s.RefreshNeighborGraph(); grErr != nil {
+					log.Printf("[neighbor-build] neighbor-graph refresh error: %v", grErr)
+				}
 				dur := time.Since(start)
 				if err != nil {
 					log.Printf("[neighbor-build] tick error after %s: %v", dur, err)
@@ -213,33 +234,36 @@ func (s *Store) buildAndPersistNeighborEdges() (int, error) {
 		return 0, nil
 	}

-	tx, err := s.db.Begin()
-	if err != nil {
-		return 0, fmt.Errorf("begin: %w", err)
-	}
-	defer tx.Rollback()
-	stmt, err := tx.Prepare(`INSERT INTO neighbor_edges (node_a, node_b, count, last_seen)
-		VALUES (?, ?, 1, ?)
-		ON CONFLICT(node_a, node_b) DO UPDATE SET
-		  count = count + 1,
-		  last_seen = MAX(last_seen, excluded.last_seen)`)
-	if err != nil {
-		return 0, fmt.Errorf("prepare: %w", err)
-	}
-	defer stmt.Close()
-	var firstErr error
-	for _, e := range edges {
-		if _, err := stmt.Exec(e.a, e.b, e.ts); err != nil && firstErr == nil {
-			firstErr = err
+	// Wrap the whole edge-persist tx under writer-perf instrumentation
+	// (#1340). Slow neighbor-builder ticks (the #1339 root cause) now
+	// show up on /api/perf under component=neighbor_builder.
+	var inserted int
+	err = s.WriterTx("neighbor_builder", func(tx *sql.Tx) error {
+		stmt, err := tx.Prepare(`INSERT INTO neighbor_edges (node_a, node_b, count, last_seen)
+			VALUES (?, ?, 1, ?)
+			ON CONFLICT(node_a, node_b) DO UPDATE SET
+			  count = count + 1,
+			  last_seen = MAX(last_seen, excluded.last_seen)`)
+		if err != nil {
+			return fmt.Errorf("prepare: %w", err)
 		}
+		defer stmt.Close()
+		var firstErr error
+		for _, e := range edges {
+			if _, err := stmt.Exec(e.a, e.b, e.ts); err != nil && firstErr == nil {
+				firstErr = err
+			}
+		}
+		if firstErr != nil {
+			return fmt.Errorf("upsert: %w", firstErr)
+		}
+		inserted = len(edges)
+		return nil
+	})
+	if err != nil {
+		return 0, err
 	}
-	if firstErr != nil {
-		return 0, fmt.Errorf("upsert: %w", firstErr)
-	}
-	if err := tx.Commit(); err != nil {
-		return 0, fmt.Errorf("commit: %w", err)
-	}
-	return len(edges), nil
+	return inserted, nil
 }

 // canonEdge orders the pair so node_a <= node_b (matches the existing
@@ -0,0 +1,109 @@
+package main
+
+// Regression tests for issue #1465 — observer.last_seen MUST always reflect
+// ingest time (server wall clock), never the MQTT envelope timestamp. Observers
+// with broken clocks (wrong TZ, RTC drift, replayed retained messages) must
+// NOT be able to drag the analyzer's "last heard from" field into the past
+// or future.
+//
+// Per-packet rxTime semantics (envelope time with naive-clamp from #1464)
+// are out of scope here — those continue to use envelope time. This file
+// asserts only the observer.last_seen path.
+
+import (
+	"testing"
+	"time"
+)
+
+// Status path: envelope timestamp is a well-formed RFC3339 value 3h in the
+// past. observer.last_seen must be server wall clock, NOT the envelope value.
+func TestStatusMessage_ObserverLastSeen_AlwaysIngestTime_PastEnvelope_1465(t *testing.T) {
+	store := newTestStore(t)
+	source := MQTTSource{Name: "test"}
+
+	stale := time.Now().UTC().Add(-3 * time.Hour).Format(time.RFC3339)
+	before := time.Now().Unix()
+
+	payload := []byte(`{"status":"online","origin":"obs-past","timestamp":"` + stale + `"}`)
+	msg := &mockMessage{topic: "meshcore/SJC/obs-past/status", payload: payload}
+
+	handleMessage(store, "test", source, msg, nil, nil, &Config{})
+	after := time.Now().Unix()
+
+	var lastSeen string
+	if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-past").Scan(&lastSeen); err != nil {
+		t.Fatalf("scan last_seen: %v", err)
+	}
+	ls, err := time.Parse(time.RFC3339, lastSeen)
+	if err != nil {
+		t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
+	}
+	if ls.Unix() < before-5 || ls.Unix() > after+5 {
+		t.Errorf("observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
+			"Envelope reported well-formed stale %q (3h ago) — must NOT drag last_seen into the past. Issue #1465.",
+			lastSeen, ls.Unix(), before, after, stale)
+	}
+}
+
+// Status path: envelope timestamp 5 min in the future. observer.last_seen
+// must still be server wall clock.
+func TestStatusMessage_ObserverLastSeen_AlwaysIngestTime_FutureEnvelope_1465(t *testing.T) {
+	store := newTestStore(t)
+	source := MQTTSource{Name: "test"}
+
+	future := time.Now().UTC().Add(5 * time.Minute).Format(time.RFC3339)
+	before := time.Now().Unix()
+
+	payload := []byte(`{"status":"online","origin":"obs-future","timestamp":"` + future + `"}`)
+	msg := &mockMessage{topic: "meshcore/SJC/obs-future/status", payload: payload}
+
+	handleMessage(store, "test", source, msg, nil, nil, &Config{})
+	after := time.Now().Unix()
+
+	var lastSeen string
+	if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-future").Scan(&lastSeen); err != nil {
+		t.Fatalf("scan last_seen: %v", err)
+	}
+	ls, err := time.Parse(time.RFC3339, lastSeen)
+	if err != nil {
+		t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
+	}
+	if ls.Unix() < before-5 || ls.Unix() > after+5 {
+		t.Errorf("observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
+			"Envelope reported well-formed future %q (5 min ahead) — must NOT drag last_seen into the future. Issue #1465.",
+			lastSeen, ls.Unix(), before, after, future)
+	}
+}
+
+// Packet path: a transmission whose envelope timestamp is 3h in the past
+// MUST still bump observer.last_seen to server wall clock — observer is
+// clearly alive (we just ingested a packet from it), regardless of what
+// its clock claims.
+func TestPacketMessage_ObserverLastSeen_AlwaysIngestTime_PastEnvelope_1465(t *testing.T) {
+	store := newTestStore(t)
+	source := MQTTSource{Name: "test"}
+
+	stale := time.Now().UTC().Add(-3 * time.Hour).Format(time.RFC3339)
+	before := time.Now().Unix()
+
+	rawHex := "0A00D69FD7A5A7475DB07337749AE61FA53A4788E976"
+	payload := []byte(`{"raw":"` + rawHex + `","SNR":5.5,"RSSI":-100.0,"origin":"obs-pkt","timestamp":"` + stale + `"}`)
+	msg := &mockMessage{topic: "meshcore/SJC/obs-pkt/packets", payload: payload}
+
+	handleMessage(store, "test", source, msg, nil, nil, &Config{})
+	after := time.Now().Unix()
+
+	var lastSeen string
+	if err := store.db.QueryRow(`SELECT last_seen FROM observers WHERE id = ?`, "obs-pkt").Scan(&lastSeen); err != nil {
+		t.Fatalf("scan last_seen: %v", err)
+	}
+	ls, err := time.Parse(time.RFC3339, lastSeen)
+	if err != nil {
+		t.Fatalf("last_seen %q not RFC3339: %v", lastSeen, err)
+	}
+	if ls.Unix() < before-5 || ls.Unix() > after+5 {
+		t.Errorf("packet-path observer.last_seen = %q (epoch %d); want in [%d, %d] (server wall clock). "+
+			"Envelope stale = %q. Observer just delivered a packet; last_seen must be NOW. Issue #1465.",
+			lastSeen, ls.Unix(), before, after, stale)
+	}
+}
@@ -0,0 +1,225 @@
+package main
+
+import (
+	"database/sql"
+	"strings"
+	"sync/atomic"
+)
+
+// Context-aware hop resolver — full restore of pre-#1289 hop
+// disambiguation semantics, ported into the ingestor (where the
+// neighbor graph + node directory now live, per #1283).
+//
+// Why this exists (issues #1547 / #1560):
+//   The naive `resolvePath` only resolves hops whose prefix is unique
+//   in the node table. On a >2K-node mesh the dominant case is 1-byte
+//   prefix collisions (multiple candidates per prefix). Without
+//   adjacency disambiguation those hops always serialize as `nil`
+//   and the resolved_path remains effectively empty for the largest
+//   meshes — the very deployments that need it most.
+//
+// Algorithm (ported from cmd/server/store.go @ commit 450236d5
+// `pm.resolveWithContext`, intersected with the disambiguation gating
+// from PR #1144 / #1352):
+//
+//   For each hop:
+//     1. Collect candidate pubkeys by prefix-match (existing prefixIndex).
+//     2. len==0 → nil.
+//     3. len==1 → that pubkey.
+//     4. len>1 → filter by NeighborGraph adjacency to the anchor:
+//          - hop 0 anchor = fromPubkey (ADVERT originator) if known;
+//          - hop i (i>0) anchor = previous resolved hop's pubkey;
+//            if the previous hop did not resolve, the chain breaks
+//            and subsequent >1-candidate hops fall to nil.
+//        Surviving candidates after filter:
+//          - exactly 1 → use it
+//          - 0 or >1   → nil (cannot disambiguate further)
+//
+// This is the conservative tier-1 variant. Pre-#1289 also carried
+// tier-2 (geo proximity), tier-3 (GPS preference), tier-4 (obs-count
+// fallback) — those were noisy in practice and are intentionally NOT
+// ported here; this PR is a regression restore, not an enhancement.
+
+// NeighborGraph is the in-memory adjacency snapshot used by the
+// context-aware resolver. Internally lowercased.
+type NeighborGraph struct {
+	adj map[string]map[string]struct{}
+}
+
+// NewNeighborGraph returns an empty graph.
+func NewNeighborGraph() *NeighborGraph {
+	return &NeighborGraph{adj: make(map[string]map[string]struct{})}
+}
+
+// AddEdge adds an undirected adjacency a↔b. Self-loops and empty
+// endpoints are ignored.
+func (g *NeighborGraph) AddEdge(a, b string) {
+	a = strings.ToLower(a)
+	b = strings.ToLower(b)
+	if a == "" || b == "" || a == b {
+		return
+	}
+	if g.adj[a] == nil {
+		g.adj[a] = make(map[string]struct{})
+	}
+	if g.adj[b] == nil {
+		g.adj[b] = make(map[string]struct{})
+	}
+	g.adj[a][b] = struct{}{}
+	g.adj[b][a] = struct{}{}
+}
+
+// IsAdjacent reports whether a and b appear together in any neighbor edge.
+func (g *NeighborGraph) IsAdjacent(a, b string) bool {
+	if g == nil {
+		return false
+	}
+	a = strings.ToLower(a)
+	b = strings.ToLower(b)
+	if a == "" || b == "" {
+		return false
+	}
+	nbrs, ok := g.adj[a]
+	if !ok {
+		return false
+	}
+	_, present := nbrs[b]
+	return present
+}
+
+// neighborGraphHolder caches the graph for the InsertTransmission hot
+// path. atomic.Value lets the 60s rebuild publish without a read-side
+// lock.
+type neighborGraphHolder struct {
+	v atomic.Value // holds *NeighborGraph
+}
+
+func (h *neighborGraphHolder) load() *NeighborGraph {
+	if v := h.v.Load(); v != nil {
+		return v.(*NeighborGraph)
+	}
+	return nil
+}
+
+func (h *neighborGraphHolder) store(g *NeighborGraph) {
+	h.v.Store(g)
+}
+
+// loadNeighborGraph reads neighbor_edges and returns an in-memory
+// adjacency snapshot. Safe to call against a fresh DB (returns an
+// empty graph).
+func loadNeighborGraph(db *sql.DB) (*NeighborGraph, error) {
+	rows, err := db.Query(`SELECT node_a, node_b FROM neighbor_edges`)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	g := NewNeighborGraph()
+	for rows.Next() {
+		var a, b string
+		if err := rows.Scan(&a, &b); err != nil {
+			continue
+		}
+		g.AddEdge(a, b)
+	}
+	return g, nil
+}
+
+// resolveHopWithContext resolves a single hop using NeighborGraph
+// adjacency to the anchor. Returns nil when the hop cannot be
+// disambiguated.
+//
+// exclude is a set of pubkeys to discard from the candidate pool
+// (typically the prior hops already resolved on the path — a packet
+// does not revisit a node).
+//
+// Behavior matrix:
+//   len(candidates) | anchor       | graph | result
+//   0               | —            | —     | nil
+//   1               | —            | —     | candidates[0]
+//   >1              | "" or no graph|—     | nil
+//   >1              | non-empty    | set   | unique adjacent candidate
+//                                            (or nil if 0 or >1 survive)
+func resolveHopWithContext(hop string, anchor string, graph *NeighborGraph, idx prefixIndex, exclude map[string]struct{}) *string {
+	if idx == nil {
+		return nil
+	}
+	h := strings.ToLower(hop)
+	candidates := idx[h]
+	switch len(candidates) {
+	case 0:
+		return nil
+	case 1:
+		pk := candidates[0]
+		if _, skip := exclude[pk]; skip {
+			return nil
+		}
+		return &pk
+	}
+	if graph == nil || anchor == "" {
+		return nil
+	}
+	var match string
+	survivors := 0
+	for _, cand := range candidates {
+		if _, skip := exclude[cand]; skip {
+			continue
+		}
+		if graph.IsAdjacent(anchor, cand) {
+			survivors++
+			if survivors > 1 {
+				return nil
+			}
+			match = cand
+		}
+	}
+	if survivors == 1 {
+		return &match
+	}
+	return nil
+}
+
+// resolvePathWithContext walks the hop list, anchoring hop 0 on
+// fromPubkey (for ADVERTs) and each subsequent hop on the previous
+// resolved hop. Previously-resolved pubkeys (plus the originator) are
+// excluded from later candidate pools so the walk doesn't revisit a
+// node. Returns a `[]*string` shape compatible with
+// marshalResolvedPath (and the all-nil clobber-guard from PR #1548).
+func resolvePathWithContext(hops []string, fromPubkey string, graph *NeighborGraph, idx prefixIndex) []*string {
+	if len(hops) == 0 {
+		return nil
+	}
+	out := make([]*string, len(hops))
+	if idx == nil {
+		return out
+	}
+	prevAnchor := strings.ToLower(fromPubkey)
+	seen := make(map[string]struct{}, len(hops)+1)
+	if prevAnchor != "" {
+		seen[prevAnchor] = struct{}{}
+	}
+	for i, hop := range hops {
+		r := resolveHopWithContext(hop, prevAnchor, graph, idx, seen)
+		out[i] = r
+		if r != nil {
+			lc := strings.ToLower(*r)
+			seen[lc] = struct{}{}
+			prevAnchor = lc
+		} else {
+			prevAnchor = ""
+		}
+	}
+	return out
+}
+
+// RefreshNeighborGraph loads the latest neighbor_edges snapshot and
+// publishes it atomically. Called on startup and once per neighbor-
+// edges builder tick (60s) alongside RefreshPrefixIndex.
+func (s *Store) RefreshNeighborGraph() error {
+	g, err := loadNeighborGraph(s.db)
+	if err != nil {
+		return err
+	}
+	s.neighborGraph.store(g)
+	return nil
+}
@@ -0,0 +1,63 @@
+package main
+
+import (
+	"database/sql"
+	"strings"
+	"testing"
+)
+
+// #1483: server's GetNodeLocationsByKeys lookup relies on stored
+// public_key being lowercase (LOWER(public_key) was dropped for perf).
+// The ingestor must normalize any legacy uppercase rows on boot so
+// the lookup remains correct.
+func TestPublicKeyLowercaseNormalizationMigration(t *testing.T) {
+	dbPath := tempDBPath(t)
+	s, err := OpenStore(dbPath)
+	if err != nil {
+		t.Fatalf("first OpenStore: %v", err)
+	}
+	// Seed an uppercase row directly, bypassing UpsertNode's lowercase.
+	if _, err := s.db.Exec(
+		`INSERT INTO nodes (public_key, name, role, last_seen, first_seen)
+		 VALUES ('AABBCCDDEEFF11223344', 'mixed-case-node', 'companion', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')`,
+	); err != nil {
+		t.Fatalf("seed uppercase row: %v", err)
+	}
+	// Sanity: verify the uppercase row is there pre-normalization.
+	var pk string
+	if err := s.db.QueryRow(`SELECT public_key FROM nodes WHERE public_key = 'AABBCCDDEEFF11223344'`).Scan(&pk); err != nil {
+		t.Fatalf("pre-check select: %v", err)
+	}
+	if pk != "AABBCCDDEEFF11223344" {
+		t.Fatalf("pre-check: expected uppercase, got %s", pk)
+	}
+	s.Close()
+
+	// Reopen — the boot-time migration should normalize the row.
+	s2, err := OpenStore(dbPath)
+	if err != nil {
+		t.Fatalf("reopen: %v", err)
+	}
+	defer s2.Close()
+
+	// The uppercase row should be gone.
+	var still int
+	if err := s2.db.QueryRow(`SELECT COUNT(*) FROM nodes WHERE public_key = 'AABBCCDDEEFF11223344'`).Scan(&still); err != nil {
+		t.Fatalf("post-check uppercase count: %v", err)
+	}
+	if still != 0 {
+		t.Fatalf("expected 0 uppercase rows after migration, got %d", still)
+	}
+	// The lowercase form should match.
+	var lower string
+	err = s2.db.QueryRow(`SELECT public_key FROM nodes WHERE public_key = 'aabbccddeeff11223344'`).Scan(&lower)
+	if err == sql.ErrNoRows {
+		t.Fatalf("expected lowercase row to exist after migration")
+	}
+	if err != nil {
+		t.Fatalf("post-check lowercase select: %v", err)
+	}
+	if lower != strings.ToLower("AABBCCDDEEFF11223344") {
+		t.Fatalf("got %s, want lowercase form", lower)
+	}
+}
@@ -0,0 +1,113 @@
+package main
+
+import (
+	"encoding/json"
+	"strings"
+	"sync/atomic"
+)
+
+// Issue #1547 — resolved_path writer (ingestor-owned).
+//
+// Per the #1283 refactor (server is read-only; ingestor owns the
+// neighbor graph + node directory), the writer that populated
+// `observations.resolved_path` must live here in the ingestor. PR #1289
+// removed the server-side writer without porting it — this restores it.
+//
+// Approach:
+//   - `resolvePath` is a pure function: hop prefixes → full pubkeys
+//     using the in-memory prefix index built from `nodes.public_key`.
+//   - Unique-prefix hops resolve to the full pubkey; ambiguous or
+//     unknown hops resolve to `nil`. The output shape is `[]*string`
+//     (with nulls for unresolved positions) — the JSON serialization
+//     matches what the server's `unmarshalResolvedPath` /
+//     frontend `getResolvedPath` already consume.
+//   - The prefix index is rebuilt on startup and once per neighbor-
+//     builder tick (60s) so new nodes start resolving within a minute
+//     without blocking the MQTT ingest path.
+
+// resolvePath maps each hop prefix to a full pubkey when the index
+// has exactly one candidate; returns nil at that position otherwise.
+// Returns nil for empty/no hops.
+func resolvePath(hops []string, idx prefixIndex) []*string {
+	if len(hops) == 0 {
+		return nil
+	}
+	out := make([]*string, len(hops))
+	if idx == nil {
+		return out
+	}
+	for i, hop := range hops {
+		h := strings.ToLower(hop)
+		candidates := idx[h]
+		if len(candidates) == 1 {
+			pk := candidates[0]
+			out[i] = &pk
+		}
+	}
+	return out
+}
+
+// marshalResolvedPath JSON-encodes a resolved path. Returns "" when
+// the input is empty OR when every element is nil (writer treats "" as
+// SQL NULL).
+//
+// The all-nil case matters because of the UPSERT in InsertTransmission:
+//
+//	resolved_path = COALESCE(excluded.resolved_path, resolved_path)
+//
+// If we emitted "[null,null]" here, nilIfEmpty() would let it through
+// as a non-NULL string and the COALESCE would OVERWRITE a previously
+// stored good resolved_path on re-ingest. Returning "" lets nilIfEmpty
+// produce SQL NULL so the COALESCE falls through to the existing value.
+// See issue #1547 / PR #1548 reviewer findings.
+func marshalResolvedPath(rp []*string) string {
+	if len(rp) == 0 {
+		return ""
+	}
+	allNil := true
+	for _, p := range rp {
+		if p != nil {
+			allNil = false
+			break
+		}
+	}
+	if allNil {
+		return ""
+	}
+	b, err := json.Marshal(rp)
+	if err != nil {
+		return ""
+	}
+	return string(b)
+}
+
+// prefixIdxHolder caches the prefix index for the InsertTransmission
+// hot path. atomic.Value lets the 60s rebuild happen without a lock on
+// the read side.
+type prefixIdxHolder struct {
+	v atomic.Value // holds prefixIndex
+}
+
+func (h *prefixIdxHolder) load() prefixIndex {
+	if v := h.v.Load(); v != nil {
+		return v.(prefixIndex)
+	}
+	return nil
+}
+
+func (h *prefixIdxHolder) store(idx prefixIndex) {
+	h.v.Store(idx)
+}
+
+// RefreshPrefixIndex rebuilds the in-memory prefix index from the
+// nodes table and publishes it atomically. Called on startup and from
+// the neighbor-edges builder tick (60s) so new nodes become resolvable
+// without per-insert DB scans.
+func (s *Store) RefreshPrefixIndex() error {
+	idx, err := buildPrefixIndex(s.db)
+	if err != nil {
+		return err
+	}
+	s.prefixIdx.store(idx)
+	return nil
+}
@@ -0,0 +1,446 @@
+package main
+
+import (
+	"database/sql"
+	"encoding/json"
+	"path/filepath"
+	"testing"
+)
+
+func unmarshalResolvedPathLocal(s string) []*string {
+	if s == "" {
+		return nil
+	}
+	var out []*string
+	if json.Unmarshal([]byte(s), &out) != nil {
+		return nil
+	}
+	return out
+}
+
+// TestResolvePathPureFunction is a unit test for the pure resolvePath
+// helper. Asserts:
+//   - unique-prefix hops resolve to the full pubkey
+//   - ambiguous-prefix hops resolve to nil
+//   - unknown-prefix hops resolve to nil
+//   - return slice length equals input hop count
+//
+// Regression gate for #1547 (resolved_path stopped being written).
+func TestResolvePathPureFunction(t *testing.T) {
+	idx := prefixIndex{
+		// "aa" → exactly one pubkey
+		"aa":         {"aaaaaaaaaa"},
+		"aaaaaaaaaa": {"aaaaaaaaaa"},
+		// "bb" → exactly one pubkey
+		"bb":         {"bbbbbbbbbb"},
+		"bbbbbbbbbb": {"bbbbbbbbbb"},
+		// "cc" → ambiguous (2 candidates)
+		"cc":         {"cccccccccc", "ccdddddddd"},
+		"cccccccccc": {"cccccccccc"},
+	}
+
+	got := resolvePath([]string{"aa", "cc", "ff", "bb"}, idx)
+	if len(got) != 4 {
+		t.Fatalf("expected len 4, got %d", len(got))
+	}
+	if got[0] == nil || *got[0] != "aaaaaaaaaa" {
+		t.Errorf("hop[0] aa: want aaaaaaaaaa, got %v", deref(got[0]))
+	}
+	if got[1] != nil {
+		t.Errorf("hop[1] cc: want nil (ambiguous), got %v", deref(got[1]))
+	}
+	if got[2] != nil {
+		t.Errorf("hop[2] ff: want nil (unknown), got %v", deref(got[2]))
+	}
+	if got[3] == nil || *got[3] != "bbbbbbbbbb" {
+		t.Errorf("hop[3] bb: want bbbbbbbbbb, got %v", deref(got[3]))
+	}
+}
+
+// TestResolvePathEmptyHops asserts empty/no-path produces nil.
+func TestResolvePathEmptyHops(t *testing.T) {
+	if got := resolvePath(nil, prefixIndex{}); got != nil {
+		t.Errorf("nil hops: want nil, got %v", got)
+	}
+	if got := resolvePath([]string{}, prefixIndex{}); got != nil {
+		t.Errorf("empty hops: want nil, got %v", got)
+	}
+}
+
+// TestMarshalResolvedPathRoundtrip asserts the JSON shape matches the
+// server's marshal/unmarshal contract: `[]*string` with nulls for
+// unresolved hops.
+func TestMarshalResolvedPathRoundtrip(t *testing.T) {
+	a := "aaaaaaaaaa"
+	b := "bbbbbbbbbb"
+	in := []*string{&a, nil, &b}
+	s := marshalResolvedPath(in)
+	want := `["aaaaaaaaaa",null,"bbbbbbbbbb"]`
+	if s != want {
+		t.Errorf("marshal: want %s, got %s", want, s)
+	}
+}
+
+// TestInsertTransmissionWritesResolvedPath is the integration test that
+// gates the regression introduced by PR #1289 (issue #1547).
+//
+// Setup: seed two nodes + one observer + invoke InsertTransmission with
+// a PacketData whose PathJSON references one of the seeded nodes by
+// unique 1-byte (2-hex) prefix.
+//
+// Assert: the inserted observations row has a non-NULL resolved_path
+// whose JSON-decoded length equals the hop count, and the resolved
+// element matches the seeded node's full pubkey.
+func TestInsertTransmissionWritesResolvedPath(t *testing.T) {
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "ingest.db")
+
+	store, err := OpenStore(dbPath)
+	if err != nil {
+		t.Fatalf("OpenStore: %v", err)
+	}
+	defer store.Close()
+
+	// Seed nodes with unique 1-byte prefixes.
+	if _, err := store.db.Exec(
+		`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
+		"aaaaaaaaaa", "from-node",
+		"bbbbbbbbbb", "first-hop",
+	); err != nil {
+		t.Fatal(err)
+	}
+
+	// Seed one observer (needed so InsertTransmission resolves observer_idx).
+	if err := store.UpsertObserver("obs-1", "observer-1", "", nil); err != nil {
+		t.Fatalf("UpsertObserver: %v", err)
+	}
+
+	// Force the prefix index to be (re)built from the seeded nodes so
+	// the InsertTransmission path has something to resolve against.
+	if err := store.RefreshPrefixIndex(); err != nil {
+		t.Fatalf("RefreshPrefixIndex: %v", err)
+	}
+
+	pkt := &PacketData{
+		RawHex:      "deadbeef",
+		Timestamp:   "2026-06-01T00:00:00Z",
+		ObserverID:  "obs-1",
+		Hash:        "h-1547",
+		RouteType:   0,
+		PayloadType: int(payloadADVERT),
+		PathJSON:    `["bb"]`,
+		DecodedJSON: "{}",
+		FromPubkey:  "aaaaaaaaaa",
+	}
+	if _, err := store.InsertTransmission(pkt); err != nil {
+		t.Fatalf("InsertTransmission: %v", err)
+	}
+
+	var rp sql.NullString
+	if err := store.db.QueryRow(
+		`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
+		"h-1547",
+	).Scan(&rp); err != nil {
+		t.Fatalf("query: %v", err)
+	}
+	if !rp.Valid || rp.String == "" {
+		t.Fatalf("expected non-nil resolved_path, got NULL/empty (regression: #1547)")
+	}
+	got := unmarshalResolvedPathLocal(rp.String)
+	if len(got) != 1 {
+		t.Fatalf("resolved_path length: want 1, got %d (value=%s)", len(got), rp.String)
+	}
+	if got[0] == nil || *got[0] != "bbbbbbbbbb" {
+		t.Errorf("resolved_path[0]: want bbbbbbbbbb, got %v (raw=%s)", deref(got[0]), rp.String)
+	}
+}
+
+func deref(p *string) string {
+	if p == nil {
+		return "<nil>"
+	}
+	return *p
+}
+
+// ─── #1560: context-aware resolution tests ─────────────────────────────────
+//
+// These exercise the post-fix behavior of resolveHopWithContext +
+// resolvePathWithContext. Until the green commit lands they MUST fail
+// on assertions (the stub falls back to naive `len==1` and returns nil
+// on every >1-candidate prefix), proving the gate is real.
+
+// build5NodeAmbiguousIndex returns a prefixIndex where 3 of 5 nodes
+// share the 1-byte prefix 0x5c. Pubkeys are the "fingerprints":
+//
+//	A = "5c000000000000000000000000000000aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+//	B = "5c000000000000000000000000000000bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+//	C = "5c000000000000000000000000000000cccccccccccccccccccccccccccccccc"
+//	D = "dd000000000000000000000000000000dddddddddddddddddddddddddddddddd"
+//	E = "ee000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
+func build5NodeAmbiguousIndex() (idx prefixIndex, A, B, C, D, E string) {
+	A = "5c000000000000000000000000000000aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+	B = "5c000000000000000000000000000000bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"
+	C = "5c000000000000000000000000000000cccccccccccccccccccccccccccccccc"
+	D = "dd000000000000000000000000000000dddddddddddddddddddddddddddddddd"
+	E = "ee000000000000000000000000000000eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
+	idx = prefixIndex{
+		// 1-byte: 5c → A,B,C (collision); dd → D; ee → E
+		"5c": {A, B, C},
+		"dd": {D},
+		"ee": {E},
+		// full-key entries (so exact-match lookups still resolve)
+		A: {A}, B: {B}, C: {C}, D: {D}, E: {E},
+	}
+	return
+}
+
+// TestResolveHopWithContext_OneByteCollision_AdjacencyResolves
+// asserts the dominant production case (#1560): three nodes share the
+// 1-byte prefix 0x5c, but NeighborGraph adjacency narrows to exactly
+// one. The naive resolver returns nil; the context-aware resolver
+// MUST return the right pubkey.
+func TestResolveHopWithContext_OneByteCollision_AdjacencyResolves(t *testing.T) {
+	idx, A, B, C, D, E := build5NodeAmbiguousIndex()
+	g := NewNeighborGraph()
+	// chain: A↔B, B↔C, C↔D, D↔E
+	g.AddEdge(A, B)
+	g.AddEdge(B, C)
+	g.AddEdge(C, D)
+	g.AddEdge(D, E)
+
+	// Anchored on A, the only 5c neighbor of A is B.
+	got := resolveHopWithContext("5c", A, g, idx, nil)
+	if got == nil {
+		t.Fatalf("anchor=A, hop=5c: want B (%s), got <nil>", B)
+	}
+	if *got != B {
+		t.Errorf("anchor=A, hop=5c: want %s, got %s", B, *got)
+	}
+
+	// Anchored on B, the only 5c neighbors of B are A and C — but A is
+	// the originator anchor in a path-walk; here we just assert that
+	// 2 surviving candidates → nil (cannot disambiguate further).
+	got = resolveHopWithContext("5c", B, g, idx, nil)
+	if got != nil {
+		t.Errorf("anchor=B, hop=5c: ambiguous (A and C both adjacent); want <nil>, got %s", *got)
+	}
+}
+
+// TestResolvePathWithContext_TwoHopChainAnchoredOnFromNode covers the
+// canonical 1-byte collision case end-to-end: path = [5c, 5c],
+// from_node = A → expect [B, C].
+func TestResolvePathWithContext_TwoHopChainAnchoredOnFromNode(t *testing.T) {
+	idx, A, B, C, _, _ := build5NodeAmbiguousIndex()
+	g := NewNeighborGraph()
+	g.AddEdge(A, B)
+	g.AddEdge(B, C)
+
+	got := resolvePathWithContext([]string{"5c", "5c"}, A, g, idx)
+	if len(got) != 2 {
+		t.Fatalf("len(got)=%d, want 2 (raw=%v)", len(got), got)
+	}
+	if got[0] == nil || *got[0] != B {
+		t.Errorf("hop[0]: want %s, got %v", B, deref(got[0]))
+	}
+	if got[1] == nil || *got[1] != C {
+		t.Errorf("hop[1]: want %s, got %v", C, deref(got[1]))
+	}
+}
+
+// TestResolveHopWithContext_NoAdjacencyContext_ReturnsNil asserts the
+// negative gate: 3 nodes with shared prefix, no edges between them in
+// the graph, hop=[5c] with no usable anchor → nil. Guards against an
+// over-eager resolver that just picks the first candidate.
+func TestResolveHopWithContext_NoAdjacencyContext_ReturnsNil(t *testing.T) {
+	idx, _, _, _, _, _ := build5NodeAmbiguousIndex()
+	g := NewNeighborGraph() // empty: no edges
+	got := resolveHopWithContext("5c", "", g, idx, nil)
+	if got != nil {
+		t.Errorf("no anchor + empty graph: want <nil>, got %s", *got)
+	}
+
+	// With an anchor that's not adjacent to any candidate, also nil.
+	got = resolveHopWithContext("5c", "deadbeefdeadbeef", g, idx, nil)
+	if got != nil {
+		t.Errorf("non-adjacent anchor: want <nil>, got %s", *got)
+	}
+}
+
+// TestResolvePathWithContext_AdvertAnchoring asserts ADVERT-style
+// anchoring: from_pubkey is the originator, hop[0] is one of its
+// 1-byte-prefix neighbors → resolved.
+func TestResolvePathWithContext_AdvertAnchoring(t *testing.T) {
+	idx, A, B, _, _, _ := build5NodeAmbiguousIndex()
+	g := NewNeighborGraph()
+	g.AddEdge(A, B) // only B is adjacent to A among the 5c candidates
+
+	got := resolvePathWithContext([]string{"5c"}, A, g, idx)
+	if len(got) != 1 {
+		t.Fatalf("len(got)=%d, want 1", len(got))
+	}
+	if got[0] == nil || *got[0] != B {
+		t.Errorf("ADVERT anchored on A, hop=5c: want %s, got %v", B, deref(got[0]))
+	}
+}
+
+// TestResolvePathWithContext_RegressionMultiByteStillWorks asserts no
+// regression in the 2/3/4-byte prefix path that PR #1548 already
+// handled — unique prefixes resolve regardless of graph context.
+func TestResolvePathWithContext_RegressionMultiByteStillWorks(t *testing.T) {
+	idx, _, _, _, D, E := build5NodeAmbiguousIndex()
+	// dd and ee are unique 1-byte prefixes — naive path still works.
+	got := resolvePathWithContext([]string{"dd", "ee"}, "", nil, idx)
+	if len(got) != 2 {
+		t.Fatalf("len(got)=%d, want 2", len(got))
+	}
+	if got[0] == nil || *got[0] != D {
+		t.Errorf("hop[0] dd: want %s, got %v", D, deref(got[0]))
+	}
+	if got[1] == nil || *got[1] != E {
+		t.Errorf("hop[1] ee: want %s, got %v", E, deref(got[1]))
+	}
+}
+
+// TestResolvePathWithContext_AllNilContractPreserved asserts the
+// all-nil → empty-string clobber-guard contract from PR #1548 still
+// holds: an unresolvable path through the context resolver, when fed
+// to marshalResolvedPath, MUST yield "" (so nilIfEmpty → SQL NULL
+// → COALESCE preserves existing).
+func TestResolvePathWithContext_AllNilContractPreserved(t *testing.T) {
+	// Empty index → every hop nil.
+	got := resolvePathWithContext([]string{"5c", "dd"}, "", nil, prefixIndex{})
+	if len(got) != 2 {
+		t.Fatalf("len(got)=%d, want 2", len(got))
+	}
+	for i, p := range got {
+		if p != nil {
+			t.Errorf("hop[%d]: want <nil>, got %s", i, *p)
+		}
+	}
+	if s := marshalResolvedPath(got); s != "" {
+		t.Errorf("all-nil marshal: want \"\", got %q (clobber-guard regression)", s)
+	}
+}
+
+// TestMarshalResolvedPathAllNilReturnsEmpty is a regression gate for
+// the data-loss clobber bug surfaced in PR #1548 review.
+//
+// When resolvePath fails to resolve ANY hop (every element nil),
+// marshalResolvedPath previously emitted "[null,null,...]" — a
+// non-empty string that bypassed nilIfEmpty and then OVERWROTE the
+// existing resolved_path via the COALESCE(excluded, current) UPSERT
+// on re-ingest. The fix returns "" so nilIfEmpty produces SQL NULL and
+// the COALESCE preserves the existing good value.
+func TestMarshalResolvedPathAllNilReturnsEmpty(t *testing.T) {
+	cases := []struct {
+		name string
+		in   []*string
+	}{
+		{"one-nil", []*string{nil}},
+		{"two-nils", []*string{nil, nil}},
+		{"three-nils", []*string{nil, nil, nil}},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := marshalResolvedPath(tc.in)
+			if got != "" {
+				t.Errorf("all-nil input must return \"\" (so nilIfEmpty → SQL NULL → COALESCE preserves existing); got %q", got)
+			}
+		})
+	}
+
+	// Mixed (at least one non-nil) MUST still marshal normally so we
+	// don't lose partial resolutions.
+	a := "aaaaaaaaaa"
+	mixed := marshalResolvedPath([]*string{&a, nil})
+	if mixed != `["aaaaaaaaaa",null]` {
+		t.Errorf("partial resolution must still serialize; got %q", mixed)
+	}
+}
+
+// TestInsertTransmissionDoesNotClobberResolvedPathOnAllNil is the
+// integration-level regression test for the data-loss bug.
+//
+// Setup: insert a transmission whose first ingest resolves cleanly to
+// a known pubkey. Then re-ingest the SAME transmission after the
+// prefix index has been cleared (simulating an empty NeighborGraph /
+// all-nil resolution path) and assert the previously stored
+// resolved_path is PRESERVED (NOT overwritten to "[null]" or NULL).
+//
+// Pre-fix behavior: marshalResolvedPath emitted "[null]", nilIfEmpty
+// kept it non-NULL, and COALESCE(excluded.resolved_path, resolved_path)
+// clobbered the original "bbbbbbbbbb".
+func TestInsertTransmissionDoesNotClobberResolvedPathOnAllNil(t *testing.T) {
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "ingest.db")
+
+	store, err := OpenStore(dbPath)
+	if err != nil {
+		t.Fatalf("OpenStore: %v", err)
+	}
+	defer store.Close()
+
+	if _, err := store.db.Exec(
+		`INSERT INTO nodes (public_key, name) VALUES (?, ?), (?, ?)`,
+		"aaaaaaaaaa", "from-node",
+		"bbbbbbbbbb", "first-hop",
+	); err != nil {
+		t.Fatal(err)
+	}
+	if err := store.UpsertObserver("obs-1", "observer-1", "", nil); err != nil {
+		t.Fatalf("UpsertObserver: %v", err)
+	}
+	if err := store.RefreshPrefixIndex(); err != nil {
+		t.Fatalf("RefreshPrefixIndex: %v", err)
+	}
+
+	pkt := &PacketData{
+		RawHex:      "deadbeef",
+		Timestamp:   "2026-06-01T00:00:00Z",
+		ObserverID:  "obs-1",
+		Hash:        "h-clobber",
+		RouteType:   0,
+		PayloadType: int(payloadADVERT),
+		PathJSON:    `["bb"]`,
+		DecodedJSON: "{}",
+		FromPubkey:  "aaaaaaaaaa",
+	}
+	if _, err := store.InsertTransmission(pkt); err != nil {
+		t.Fatalf("first InsertTransmission: %v", err)
+	}
+
+	// Sanity: first write populated resolved_path.
+	var first sql.NullString
+	if err := store.db.QueryRow(
+		`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
+		"h-clobber",
+	).Scan(&first); err != nil {
+		t.Fatalf("first query: %v", err)
+	}
+	if !first.Valid || first.String == "" {
+		t.Fatalf("precondition failed: first ingest left resolved_path NULL/empty; cannot test clobber")
+	}
+	wantPreserved := first.String
+
+	// Now wipe the prefix index so re-ingest produces an all-nil
+	// resolution — exactly the scenario where the bug clobbers data.
+	store.prefixIdx.store(prefixIndex{})
+
+	if _, err := store.InsertTransmission(pkt); err != nil {
+		t.Fatalf("re-ingest InsertTransmission: %v", err)
+	}
+
+	var after sql.NullString
+	if err := store.db.QueryRow(
+		`SELECT resolved_path FROM observations WHERE transmission_id = (SELECT id FROM transmissions WHERE hash = ?)`,
+		"h-clobber",
+	).Scan(&after); err != nil {
+		t.Fatalf("post-reingest query: %v", err)
+	}
+	if !after.Valid {
+		t.Fatalf("data loss: resolved_path was NULL'd by re-ingest (was %q)", wantPreserved)
+	}
+	if after.String != wantPreserved {
+		t.Errorf("data loss: resolved_path was clobbered by all-nil re-ingest\n  before: %s\n  after:  %s", wantPreserved, after.String)
+	}
+}
@@ -7,23 +7,27 @@ import (

 func TestParseEnvelopeTime(t *testing.T) {
 	cases := []struct {
-		name string
-		in   string
-		ok   bool
+		name      string
+		in        string
+		ok        bool
+		wantNaive bool
 	}{
-		{"rfc3339 utc", "2026-05-16T10:00:00Z", true},
-		{"rfc3339 offset", "2026-05-16T12:00:00+02:00", true},
-		{"naive iso", "2026-05-16T10:00:00", true},
-		{"naive iso micros", "2026-05-16T10:00:00.123456", true},
-		{"garbage", "not-a-time", false},
-		{"empty", "", false},
+		{"rfc3339 utc", "2026-05-16T10:00:00Z", true, false},
+		{"rfc3339 offset", "2026-05-16T12:00:00+02:00", true, false},
+		{"naive iso", "2026-05-16T10:00:00", true, true},
+		{"naive iso micros", "2026-05-16T10:00:00.123456", true, true},
+		{"garbage", "not-a-time", false, false},
+		{"empty", "", false, false},
 	}
 	for _, c := range cases {
 		t.Run(c.name, func(t *testing.T) {
-			_, err := parseEnvelopeTime(c.in)
+			_, naive, err := parseEnvelopeTime(c.in)
 			if (err == nil) != c.ok {
 				t.Fatalf("parseEnvelopeTime(%q): want ok=%v, got err=%v", c.in, c.ok, err)
 			}
+			if err == nil && naive != c.wantNaive {
+				t.Fatalf("parseEnvelopeTime(%q): want naive=%v, got %v", c.in, c.wantNaive, naive)
+			}
 		})
 	}
 }
@@ -48,33 +52,105 @@ func TestResolveRxTime(t *testing.T) {
 	}

 	rx := now.Add(-5 * time.Hour).Format(time.RFC3339)
-	if got := resolveRxTime(map[string]interface{}{"timestamp": rx}, "test"); got != rx {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": rx}, "test"); got != rx {
 		t.Errorf("plausible past timestamp: got %q want %q", got, rx)
 	}
-	if got := resolveRxTime(map[string]interface{}{}, "test"); !nearNow(got) {
+	if got, _ := resolveRxTime(map[string]interface{}{}, "test"); !nearNow(got) {
 		t.Errorf("missing timestamp: got %q, expected ~now", got)
 	}
-	if got := resolveRxTime(map[string]interface{}{"timestamp": "garbage"}, "test"); !nearNow(got) {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": "garbage"}, "test"); !nearNow(got) {
 		t.Errorf("garbage timestamp: got %q, expected ~now", got)
 	}
 	future := now.Add(48 * time.Hour).Format(time.RFC3339)
-	if got := resolveRxTime(map[string]interface{}{"timestamp": future}, "test"); !nearNow(got) {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": future}, "test"); !nearNow(got) {
 		t.Errorf("future timestamp: got %q, expected ~now (rejected)", got)
 	}

 	// RTC-reset node reporting a factory date — must not drag first_seen back.
 	factory := "2020-01-01T00:00:00Z"
-	if got := resolveRxTime(map[string]interface{}{"timestamp": factory}, "test"); !nearNow(got) {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": factory}, "test"); !nearNow(got) {
 		t.Errorf("stale factory timestamp: got %q, expected ~now (rejected)", got)
 	}
 	// Just past the 30-day floor → rejected.
 	stale := now.Add(-31 * 24 * time.Hour).Format(time.RFC3339)
-	if got := resolveRxTime(map[string]interface{}{"timestamp": stale}, "test"); !nearNow(got) {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": stale}, "test"); !nearNow(got) {
 		t.Errorf("stale timestamp >30d: got %q, expected ~now (rejected)", got)
 	}
 	// Just inside the 30-day floor → used verbatim.
 	recent := now.Add(-29 * 24 * time.Hour).Format(time.RFC3339)
-	if got := resolveRxTime(map[string]interface{}{"timestamp": recent}, "test"); got != recent {
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": recent}, "test"); got != recent {
 		t.Errorf("recent timestamp <30d: got %q want %q", got, recent)
 	}
 }
+
+// Regression: issue #1463 — naive (zone-less) ISO timestamps from observers
+// in negative-UTC-offset zones (e.g. California PDT, UTC−7) were interpreted
+// as UTC, producing rxTime values 7h in the past that poisoned `last_seen`
+// and rendered the observer perpetually "Stale" in the UI. The symmetric
+// clamp now collapses any naive timestamp more than 15 min off server-now to
+// `now()`, while zone-aware timestamps (RFC3339 with Z or offset) are still
+// honored verbatim regardless of skew (those are well-behaved observers).
+func TestResolveRxTimeNaiveTimestampClamp(t *testing.T) {
+	now := time.Now().UTC()
+
+	mustParse := func(s string) time.Time {
+		t.Helper()
+		parsed, err := time.Parse(time.RFC3339, s)
+		if err != nil {
+			t.Fatalf("result %q is not RFC3339: %v", s, err)
+		}
+		return parsed
+	}
+	nearNow := func(s string) bool {
+		d := mustParse(s).Sub(now)
+		if d < 0 {
+			d = -d
+		}
+		return d <= time.Minute
+	}
+
+	// California observer (UTC-7) emitting a naive local-clock timestamp:
+	// must NOT be stored verbatim 7h in the past — clamp to ~now.
+	naivePast := now.Add(-7 * time.Hour).Format("2006-01-02T15:04:05")
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naivePast}, "test"); !nearNow(got) {
+		t.Errorf("naive past timestamp (UTC-7 observer): got %q, expected ~now (clamped)", got)
+	}
+
+	// Naive future just minutes ahead (UTC+N observer, existing soft-clamp
+	// behavior): still clamped to now.
+	naiveFuture := now.Add(5 * time.Minute).Format("2006-01-02T15:04:05")
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naiveFuture}, "test"); !nearNow(got) {
+		t.Errorf("naive future timestamp: got %q, expected ~now (clamped)", got)
+	}
+
+	// Naive microsecond layout (python isoformat without tz) — same clamp.
+	naivePastMicros := now.Add(-7 * time.Hour).Format("2006-01-02T15:04:05.000000")
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naivePastMicros}, "test"); !nearNow(got) {
+		t.Errorf("naive past timestamp w/ micros: got %q, expected ~now (clamped)", got)
+	}
+
+	// Well-behaved observer: Z-suffixed past timestamp passes through verbatim
+	// even if it's hours old (legitimate buffered uploads must be preserved).
+	zPast := now.Add(-7 * time.Hour).Format(time.RFC3339)
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": zPast}, "test"); got != zPast {
+		t.Errorf("Z-suffixed past timestamp must pass through: got %q want %q", got, zPast)
+	}
+
+	// Well-behaved observer with explicit offset (UTC-7) — canonicalize to UTC
+	// but preserve the moment in time. Must equal the same moment in UTC.
+	offsetLoc := time.FixedZone("PDT", -7*3600)
+	offsetMoment := now.Add(-7 * time.Hour).In(offsetLoc)
+	offsetStr := offsetMoment.Format(time.RFC3339)
+	wantUTC := offsetMoment.UTC().Format(time.RFC3339)
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": offsetStr}, "test"); got != wantUTC {
+		t.Errorf("offset-suffixed timestamp: got %q want %q", got, wantUTC)
+	}
+
+	// Naive timestamp within tolerance window (2 min in past, observer that
+	// happens to be in UTC) — within tolerance, passes through verbatim.
+	naiveCloseStr := now.Add(-2 * time.Minute).Format("2006-01-02T15:04:05")
+	naiveCloseWant := now.Add(-2 * time.Minute).Format(time.RFC3339)
+	if got, _ := resolveRxTime(map[string]interface{}{"timestamp": naiveCloseStr}, "test"); got != naiveCloseWant {
+		t.Errorf("naive timestamp within tolerance: got %q, expected %q (verbatim)", got, naiveCloseWant)
+	}
+}
@@ -0,0 +1,31 @@
+package main
+
+import "strings"
+
+// sanitizeLogString strips ASCII control bytes that would otherwise let a
+// node-controlled string (advert name, observer origin, channel name) inject
+// fake lines into the log stream. CR (\r), LF (\n), TAB (\t), NUL (\x00),
+// any other byte < 0x20, and 0x7F (DEL) are replaced with '?'.
+//
+// This is intentionally narrower than sanitizeName: sanitizeName preserves
+// \t and \n because they may appear in legitimately-stored display names.
+// Log sinks want neither.
+//
+// See audit-input-vulns-20260603 (LOW — log injection via newline in advert
+// name) and references at cmd/ingestor/main.go:659,689.
+func sanitizeLogString(s string) string {
+	if s == "" {
+		return s
+	}
+	// Iterate over runes so multibyte UTF-8 (Cyrillic, emoji) is preserved.
+	var b strings.Builder
+	b.Grow(len(s))
+	for _, r := range s {
+		if r < 0x20 || r == 0x7f {
+			b.WriteByte('?')
+			continue
+		}
+		b.WriteRune(r)
+	}
+	return b.String()
+}
@@ -0,0 +1,32 @@
+package main
+
+import "testing"
+
+// TestSanitizeLogString covers the log-injection defense added to fix
+// audit-input-vulns-20260603 (LOW — log injection via newline in advert name).
+func TestSanitizeLogString(t *testing.T) {
+	cases := []struct {
+		name string
+		in   string
+		want string
+	}{
+		{"plain ascii preserved", "alpha-node", "alpha-node"},
+		{"unicode preserved", "Иван привет 🦊", "Иван привет 🦊"},
+		{"lf stripped", "evil\n[security] forged-line", "evil?[security] forged-line"},
+		{"cr stripped", "evil\rfake-log", "evil?fake-log"},
+		{"crlf stripped", "a\r\nb", "a??b"},
+		{"tab stripped", "a\tb", "a?b"},
+		{"nul stripped", "a\x00b", "a?b"},
+		{"del stripped", "a\x7fb", "a?b"},
+		{"bell stripped", "a\x07b", "a?b"},
+		{"empty unchanged", "", ""},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := sanitizeLogString(tc.in)
+			if got != tc.want {
+				t.Fatalf("sanitizeLogString(%q) = %q, want %q", tc.in, got, tc.want)
+			}
+		})
+	}
+}
@@ -43,6 +43,28 @@ type IngestorStatsSnapshot struct {
 	// the server's /api/perf/io endpoint under .ingestor (#1120 — "Both
 	// ingestor and server"). Optional; absent on non-Linux hosts.
 	ProcIO *PerfIOSample `json:"procIO,omitempty"`
+	// WriterPerf is the per-component SQLite writer-lock latency
+	// snapshot (#1340) — wait_ms / hold_ms / contention_total tagged
+	// by component (neighbor_builder, mqtt_handler, prune_packets,
+	// prune_observers, prune_metrics, vacuum). Surfaced by the server
+	// via /api/perf/write-sources under .writer_perf. Optional —
+	// older ingestor builds don't publish this field.
+	WriterPerf map[string]WriterStatsSnapshot `json:"writer_perf,omitempty"`
+	// SourceLiveness (PR #1609 M1) is the per-MQTT-source receipt vs
+	// write-path liveness snapshot. Keyed by source Tag. Surfaced by
+	// the server via /api/healthz under .ingest_liveness so operators
+	// can see "broker alive, write path stuck" (lastReceiptUnix recent,
+	// lastMessageUnix stale) distinct from "everything stalled" (both
+	// stale). Additive: omitempty so older server builds ignore it
+	// gracefully.
+	SourceLiveness map[string]SourceLivenessSnapshot `json:"source_liveness,omitempty"`
+}
+
+// SourceLivenessSnapshot is the per-source two-clock view exposed for
+// /api/healthz consumers. unixSeconds for both fields; 0 means "never".
+type SourceLivenessSnapshot struct {
+	LastReceiptUnix int64 `json:"lastReceiptUnix"`
+	LastMessageUnix int64 `json:"lastMessageUnix"`
 }

 // statsFilePath returns the writable path the ingestor will publish stats to.
@@ -61,6 +83,25 @@ func statsFilePath() string {

 // writeStatsAtomic writes b to path via a tmp-then-rename, refusing to follow
 // symlinks on the tmp file. Returns nil on success, an error otherwise.
+//
+// Symlink semantics (refs #1170):
+//
+//   - tmp side (path+".tmp"): protected by O_NOFOLLOW below. If tmp is a
+//     pre-planted symlink, openat fails with ELOOP instead of writing
+//     through it. This is the defensive-coding path that matters when the
+//     default stats path lives under world-writable /tmp.
+//
+//   - rename side (path): NOT protected by O_NOFOLLOW. Instead, os.Rename's
+//     semantics are relied upon — rename atomically replaces any existing
+//     entry at path (including a symlink) with the new regular file. The
+//     symlink's target is NEVER written through, because all writes happened
+//     to the unrelated tmp file before rename. Post-rename, path is a
+//     regular file (not a symlink) and any prior symlink target's contents
+//     are unchanged. The regression guardrail
+//     TestWriteStatsAtomic_SymlinkAtDestIsReplaced pins this behavior so a
+//     future refactor that swaps os.Rename for a destination-symlink-
+//     following primitive (e.g. an open(path, O_WRONLY) without O_NOFOLLOW)
+//     fails loudly.
 func writeStatsAtomic(path string, b []byte) error {
 	tmp := path + ".tmp"
 	// O_NOFOLLOW: if tmp is a pre-existing symlink, openat fails with ELOOP
@@ -107,12 +148,12 @@ var readProcSelfIOFn = readProcSelfIO
 // readProcSelfIO parses /proc/self/io. Returns ok=false on non-Linux hosts or
 // any read/parse failure (caller skips the procIO block in that case).
 func readProcSelfIO() procIOSnapshot {
-	out := procIOSnapshot{at: time.Now()}
 	f, err := os.Open("/proc/self/io")
 	if err != nil {
-		return out
+		return procIOSnapshot{}
 	}
 	defer f.Close()
+	out := procIOSnapshot{at: time.Now()}
 	parseProcSelfIOInto(bufio.NewScanner(f), &out)
 	return out
 }
@@ -204,6 +245,8 @@ func StartStatsFileWriter(s *Store, interval time.Duration) {
 				GroupCommitFlushes: 0, // group commit reverted (refs #1129)
 				BackfillUpdates:    s.Stats.SnapshotBackfills(),
 				ProcIO:             ioRate,
+				WriterPerf:         s.WriterStatsSnapshot(),
+				SourceLiveness:     SnapshotLivenessClocks(),
 			}
 			buf.Reset()
 			if err := enc.Encode(&snap); err != nil {
@@ -8,6 +8,37 @@ import (
 	"time"
 )

+// TestProcIORate_ZeroValuePrevSuppressesRate guards against the phantom-delta
+// regression from #1169: when os.Open("/proc/self/io") fails, readProcSelfIO
+// now returns a zero-value procIOSnapshot (ok=false, zero time.Time). This
+// asserts procIORate returns nil so no inflated rate spike appears for the
+// next successful read.
+func TestProcIORate_ZeroValuePrevSuppressesRate(t *testing.T) {
+	prev := procIOSnapshot{} // zero-value: ok=false, at=zero
+	cur := procIOSnapshot{
+		at:        time.Now(),
+		readBytes: 1024 * 1024 * 100,
+		ok:        true,
+	}
+	if got := procIORate(prev, cur, "2026-01-01T00:00:00Z"); got != nil {
+		t.Fatalf("expected nil rate when prev is zero-value (os.Open failed), got %+v", got)
+	}
+}
+
+// TestProcIORate_NormalPath asserts two valid snapshots produce a non-nil rate.
+func TestProcIORate_NormalPath(t *testing.T) {
+	base := time.Now()
+	prev := procIOSnapshot{at: base, readBytes: 0, ok: true}
+	cur := procIOSnapshot{at: base.Add(time.Second), readBytes: 1024, ok: true}
+	got := procIORate(prev, cur, "2026-01-01T00:00:01Z")
+	if got == nil {
+		t.Fatal("expected non-nil rate for valid prev/cur pair")
+	}
+	if got.ReadBytesPerSec != 1024.0 {
+		t.Errorf("ReadBytesPerSec: want 1024.0, got %v", got.ReadBytesPerSec)
+	}
+}
+
 // TestStatsFileWriter_PublishesProcIO asserts the ingestor's published
 // stats snapshot includes a `procIO` block with the per-process I/O rate
 // fields required by issue #1120 ("Both ingestor and server").
@@ -65,3 +96,73 @@ func TestStatsFileWriter_PublishesProcIO(t *testing.T) {
 		}
 	}
 }
+
+// TestWriteStatsAtomic_SymlinkAtDestIsReplaced is a regression guardrail for
+// #1170. The tmp side of writeStatsAtomic uses O_NOFOLLOW so a pre-planted
+// symlink at path+".tmp" cannot redirect the write — but the rename target
+// (`path` itself) is not protected by O_NOFOLLOW. Instead, os.Rename's
+// semantics are relied upon: rename atomically replaces any existing entry
+// at the destination, including a symlink, with the new regular file. The
+// original symlink's target is never written through (because the write
+// happened to the unrelated tmp file).
+//
+// This test pre-plants a symlink at `path` pointing to an unrelated target
+// file and asserts:
+//   (a) post-write, path is a regular file (not a symlink), and
+//   (b) the original target's contents are unchanged.
+//
+// If a future refactor swaps os.Rename for something that follows the
+// destination symlink (e.g. ioutil.WriteFile, or an open(path, O_WRONLY)
+// without O_NOFOLLOW), this test will fail loudly.
+func TestWriteStatsAtomic_SymlinkAtDestIsReplaced(t *testing.T) {
+	dir := t.TempDir()
+
+	// Unrelated target file with sentinel bytes. If writeStatsAtomic ever
+	// followed the symlink at `path`, it would overwrite this file.
+	target := filepath.Join(dir, "unrelated-target.bin")
+	sentinel := []byte("DO-NOT-OVERWRITE-ME-#1170")
+	if err := os.WriteFile(target, sentinel, 0o600); err != nil {
+		t.Fatalf("seed target: %v", err)
+	}
+
+	// Pre-plant a symlink at the destination path.
+	path := filepath.Join(dir, "stats.json")
+	if err := os.Symlink(target, path); err != nil {
+		t.Fatalf("symlink: %v", err)
+	}
+
+	payload := []byte(`{"sampledAt":"2026-01-01T00:00:00Z"}`)
+	if err := writeStatsAtomic(path, payload); err != nil {
+		t.Fatalf("writeStatsAtomic: %v", err)
+	}
+
+	// (a) post-write, path must NOT be a symlink.
+	info, err := os.Lstat(path)
+	if err != nil {
+		t.Fatalf("lstat path: %v", err)
+	}
+	if info.Mode()&os.ModeSymlink != 0 {
+		t.Errorf("post-write path is still a symlink (mode=%v); os.Rename should have atomically replaced it with a regular file", info.Mode())
+	}
+	if !info.Mode().IsRegular() {
+		t.Errorf("post-write path is not a regular file (mode=%v)", info.Mode())
+	}
+
+	// Path now contains the new payload.
+	got, err := os.ReadFile(path)
+	if err != nil {
+		t.Fatalf("read path: %v", err)
+	}
+	if string(got) != string(payload) {
+		t.Errorf("path contents: want %q, got %q", payload, got)
+	}
+
+	// (b) the original symlink target must be unchanged.
+	gotTarget, err := os.ReadFile(target)
+	if err != nil {
+		t.Fatalf("read target: %v", err)
+	}
+	if string(gotTarget) != string(sentinel) {
+		t.Errorf("symlink target was clobbered: want %q, got %q", sentinel, gotTarget)
+	}
+}
@@ -0,0 +1,21 @@
+// Fixture: migration block WITHOUT an async annotation and WITHOUT being
+// wrapped in the async-migration helper. This file exists ONLY so that
+// ~/.openclaw/skills/pr-preflight/scripts/check-async-migrations.sh
+// has a known-bad sample to test against (the script is invoked with
+// BASE pointing at master and FIXTURE_DIR pointing here).
+//
+// DO NOT add a PREFLIGHT annotation to this file. DO NOT wrap the
+// migration via the async helper. The check script's correctness
+// depends on this staying BAD.
+//
+// IMPORTANT: this file must NOT contain the literal identifier of the
+// async-helper function anywhere (comments, strings, identifiers). The
+// preflight gate greps a window of lines above the migration for that
+// identifier as an "OK" signal, so mentioning it here would cause the
+// gate to *pass* this fixture — defeating its purpose. Refer to the
+// helper only obliquely as "the async-migration helper" in prose.
+package fixtures
+
+const _ = `
+CREATE INDEX idx_observations_bad_sync_v1 ON observations(observer_idx, timestamp);
+`
@@ -0,0 +1,9 @@
+// Fixture: migration block WITH an async annotation. Companion to
+// bad_sync_migration.go. The preflight check script must accept this
+// because of the PREFLIGHT line directly above the migration.
+package fixtures
+
+// PREFLIGHT: async=true reason="fixture-only — ALTER ADD COLUMN is O(1) in sqlite"
+const _ = `
+ALTER TABLE observations ADD COLUMN annotated_good_fixture_col INTEGER DEFAULT 0;
+`
@@ -0,0 +1,98 @@
+package main
+
+// Issue #1551: /api/* responses must emit Cache-Control: no-store so
+// CDNs (Cloudflare, nginx, Varnish) do not cache JSON. Static assets
+// (app.js, /, etc.) intentionally remain CDN-cacheable.
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+
+	"github.com/gorilla/mux"
+)
+
+// TestAPIRoutesEmitNoStoreCacheControl asserts every covered /api/*
+// endpoint sets Cache-Control: no-store. This is a black-box test
+// against the real router, exercising whatever middleware chain is
+// wired by RegisterRoutes.
+func TestAPIRoutesEmitNoStoreCacheControl(t *testing.T) {
+	_, router := setupTestServer(t)
+
+	apiPaths := []string{
+		"/api/stats",
+		"/api/observers",
+		"/api/packets?limit=10",
+		"/api/nodes?limit=10",
+	}
+
+	for _, p := range apiPaths {
+		t.Run(p, func(t *testing.T) {
+			req := httptest.NewRequest("GET", p, nil)
+			w := httptest.NewRecorder()
+			router.ServeHTTP(w, req)
+
+			if w.Code != http.StatusOK {
+				t.Fatalf("%s: expected 200, got %d (body: %s)", p, w.Code, w.Body.String())
+			}
+			cc := w.Header().Get("Cache-Control")
+			if cc != "no-store" {
+				t.Errorf("%s: expected Cache-Control: no-store, got %q", p, cc)
+			}
+		})
+	}
+}
+
+// TestStaticAssetsDoNotEmitNoStore guards against scope creep: the
+// no-store middleware must be scoped to /api/* only. Static assets
+// (HTML, JS, CSS) keep their existing browser-cache headers
+// ("no-cache, no-store, must-revalidate" today via spaHandler) and
+// must NOT be downgraded to bare "no-store" by the API middleware —
+// i.e. the API middleware must not run on these paths. If a future
+// change moves static assets behind no-store middleware, CDN caching
+// of immutable hashed assets breaks; assert the contract explicitly.
+func TestStaticAssetsDoNotEmitBareNoStore(t *testing.T) {
+	// Build a temp public dir so spaHandler has real files to serve.
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "index.html"), []byte("<html>SPA</html>"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(filepath.Join(dir, "app.js"), []byte("console.log('app')"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	_, router := setupTestServer(t)
+	// Wire the SPA handler exactly the way main.go does for non-/api paths.
+	fs := http.FileServer(http.Dir(dir))
+	router.PathPrefix("/").Handler(spaHandler(dir, fs))
+
+	cases := []struct {
+		path        string
+		wantCacheCC string
+	}{
+		// spaHandler sets this exact value for HTML/JS/CSS.
+		{"/app.js", "no-cache, no-store, must-revalidate"},
+		{"/", "no-cache, no-store, must-revalidate"},
+	}
+
+	for _, c := range cases {
+		t.Run(c.path, func(t *testing.T) {
+			req := httptest.NewRequest("GET", c.path, nil)
+			w := httptest.NewRecorder()
+			router.ServeHTTP(w, req)
+			cc := w.Header().Get("Cache-Control")
+			if cc == "no-store" {
+				t.Errorf("%s: API no-store middleware leaked onto static asset (got bare %q, expected %q)", c.path, cc, c.wantCacheCC)
+			}
+			if cc != c.wantCacheCC {
+				t.Errorf("%s: expected Cache-Control %q, got %q", c.path, c.wantCacheCC, cc)
+			}
+		})
+	}
+}
+
+// Ensure mux import used (test compiles even if setupTestServer signature
+// changes).
+var _ = mux.NewRouter
@@ -0,0 +1,87 @@
+package main
+
+// Issue #1561: detect CDN-fronted deployments and warn ONCE.
+//
+// When operators put CoreScope behind Cloudflare/Fastly without
+// configuring a /api/* cache bypass, dashboards go stale — the origin
+// emits Cache-Control: no-store (#1551), but the CDN's zone-level
+// caching policy can still cache JSON responses for hours
+// (cf-cache-status: HIT, age > 0). We can't fix the CDN config from
+// the server side; the best we can do is detect the situation and
+// loudly tell the operator at the logs.
+//
+// Detection: presence of any CDN-specific request header
+// (CF-Connecting-IP, CF-Ray, Fastly-Client-IP, True-Client-IP).
+// We deliberately exclude X-Forwarded-For and X-Real-IP: every
+// generic reverse proxy (nginx, Caddy, Traefik, k8s ingress) sets
+// those, so including them would warn operators who aren't behind
+// a CDN at all and train them to ignore the warning entirely
+// (defeating the point of #1561).
+//
+// Side effects: a single log line per process boot — never blocks
+// the request, never modifies the response, never logs again.
+
+import (
+	"log"
+	"net/http"
+	"sync"
+	"sync/atomic"
+)
+
+var cdnWarnOnce sync.Once
+
+// cdnWarned is set true after the first CDN-fronted request has been
+// observed and logged. Subsequent requests short-circuit before the
+// per-request header scan in firstCDNHeader — a hot-path optimization
+// for the steady state (warning already emitted, every /api request
+// otherwise pays for 4 http.Header.Get lookups forever).
+var cdnWarned atomic.Bool
+
+// cdnHeaders are HTTP request headers injected ONLY by CDNs
+// (Cloudflare, Fastly, Akamai) — never by a generic reverse proxy.
+// Detected case-insensitively by http.Header.Get.
+//
+// X-Forwarded-For / X-Real-IP are intentionally NOT in this list:
+// every nginx/Caddy/Traefik/k8s-ingress deployment sets them, so
+// using them as a CDN signal produces a false positive on every
+// reverse-proxied install (issue #1561 round-1 review).
+var cdnHeaders = []string{
+	"CF-Connecting-IP",  // Cloudflare
+	"CF-Ray",            // Cloudflare
+	"Fastly-Client-IP",  // Fastly
+	"True-Client-IP",    // Akamai (also set by Cloudflare Enterprise)
+}
+
+// cdnDetectionMiddleware inspects each incoming request for CDN
+// headers and, on the FIRST one observed, logs a single warning
+// pointing the operator at docs/deployment-behind-cdn.md. The
+// middleware always calls next; it never blocks or rewrites.
+func cdnDetectionMiddleware(next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Fast path: once we've warned, skip the per-request header
+		// scan entirely. Steady state for any CDN-fronted deploy is
+		// ~every request hitting this branch.
+		if cdnWarned.Load() {
+			next.ServeHTTP(w, r)
+			return
+		}
+		if hdr := firstCDNHeader(r.Header); hdr != "" {
+			cdnWarnOnce.Do(func() {
+				log.Printf("[security] WARNING: detected request via CDN (%s header present). "+
+					"Ensure /api/* is bypassed in your CDN config — see docs/deployment-behind-cdn.md. "+
+					"Cached API responses cause observer-flap and incorrect dashboards.", hdr)
+				cdnWarned.Store(true)
+			})
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+func firstCDNHeader(h http.Header) string {
+	for _, name := range cdnHeaders {
+		if h.Get(name) != "" {
+			return name
+		}
+	}
+	return ""
+}
@@ -0,0 +1,276 @@
+package main
+
+// Issue #1561: When the server is fronted by a CDN (Cloudflare, Fastly,
+// Akamai) we cannot guarantee /api/* responses are not cached unless
+// the operator configures a bypass rule. Detect CDN-specific request
+// headers at the first such request and log a one-shot warning
+// pointing the operator at the bypass doc.
+//
+// Contract:
+//   - Warning logs ONLY when a CDN-specific header is present
+//     (CF-Connecting-IP, CF-Ray, Fastly-Client-IP, True-Client-IP).
+//   - Generic reverse-proxy headers (X-Forwarded-For, X-Real-IP) MUST
+//     NOT trigger the warning — every nginx/Caddy/Traefik/k8s install
+//     sets those, so warning on them defeats the entire signal.
+//   - Warning logs at most ONCE per process boot (sync.Once), even
+//     under concurrent first-request load.
+//   - Middleware NEVER blocks the request — it always calls
+//     next.ServeHTTP.
+
+import (
+	"bytes"
+	"log"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+)
+
+// resetCDNDetectionOnce restores a fresh sync.Once so each test starts
+// from a clean "have not warned yet" state.
+func resetCDNDetectionOnce() {
+	cdnWarnOnce = sync.Once{}
+	cdnWarned.Store(false)
+}
+
+// runWithCDNMiddleware fires the request through the middleware and
+// returns (log output, whether next was called). The sentinel proves
+// the middleware did not silently drop the request.
+func runWithCDNMiddleware(t *testing.T, req *http.Request) (string, bool) {
+	t.Helper()
+	var buf bytes.Buffer
+	prev := log.Writer()
+	log.SetOutput(&buf)
+	defer log.SetOutput(prev)
+
+	nextCalled := false
+	h := cdnDetectionMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		nextCalled = true
+		w.WriteHeader(http.StatusOK)
+	}))
+	w := httptest.NewRecorder()
+	h.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("middleware must not block request; got status %d", w.Code)
+	}
+	return buf.String(), nextCalled
+}
+
+func TestCDNDetection_LogsOnCFRayHeader(t *testing.T) {
+	resetCDNDetectionOnce()
+	req := httptest.NewRequest("GET", "/api/observers", nil)
+	req.Header.Set("CF-Ray", "abc123-LAX")
+
+	out, nextCalled := runWithCDNMiddleware(t, req)
+
+	if !nextCalled {
+		t.Fatal("middleware did not call next handler")
+	}
+	if !strings.Contains(out, "detected request via CDN") {
+		t.Errorf("expected log to contain 'detected request via CDN', got: %q", out)
+	}
+	if !strings.Contains(out, "deployment-behind-cdn") {
+		t.Errorf("expected log to reference deployment-behind-cdn doc, got: %q", out)
+	}
+}
+
+func TestCDNDetection_SilentWithoutCDNHeader(t *testing.T) {
+	resetCDNDetectionOnce()
+	req := httptest.NewRequest("GET", "/api/observers", nil)
+	// No CDN-typical headers set.
+
+	out, nextCalled := runWithCDNMiddleware(t, req)
+
+	if !nextCalled {
+		t.Fatal("middleware did not call next handler")
+	}
+	if strings.Contains(out, "detected request via CDN") {
+		t.Errorf("expected no CDN warning without CDN headers, got: %q", out)
+	}
+}
+
+// Regression for round-1 adversarial finding: generic reverse-proxy
+// headers must NOT trigger the warning. Every nginx/Caddy/Traefik/
+// k8s-ingress reverse proxy sets X-Forwarded-For and X-Real-IP, so
+// flagging them produces a false positive on every reverse-proxied
+// install and trains operators to ignore the warning.
+func TestCDNDetection_SilentOnReverseProxyHeadersAlone(t *testing.T) {
+	cases := []struct {
+		name   string
+		header string
+	}{
+		{"x-forwarded-for-alone", "X-Forwarded-For"},
+		{"x-real-ip-alone", "X-Real-IP"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			resetCDNDetectionOnce()
+			req := httptest.NewRequest("GET", "/api/observers", nil)
+			req.Header.Set(tc.header, "10.0.0.1")
+			// No CDN-specific headers — just the generic reverse-proxy one.
+
+			out, nextCalled := runWithCDNMiddleware(t, req)
+
+			if !nextCalled {
+				t.Fatal("middleware did not call next handler")
+			}
+			if strings.Contains(out, "detected request via CDN") {
+				t.Errorf("header %s alone must NOT trigger CDN warning (would false-positive every nginx/k8s deploy); got: %q", tc.header, out)
+			}
+		})
+	}
+}
+
+// When a CDN-specific header is present alongside generic proxy
+// headers (common: Cloudflare → nginx → app), the warning still fires.
+func TestCDNDetection_LogsWhenCDNHeaderAccompaniesProxyHeaders(t *testing.T) {
+	resetCDNDetectionOnce()
+	req := httptest.NewRequest("GET", "/api/observers", nil)
+	req.Header.Set("X-Forwarded-For", "10.0.0.1")
+	req.Header.Set("X-Real-IP", "10.0.0.1")
+	req.Header.Set("CF-Connecting-IP", "1.2.3.4")
+
+	out, nextCalled := runWithCDNMiddleware(t, req)
+
+	if !nextCalled {
+		t.Fatal("middleware did not call next handler")
+	}
+	if !strings.Contains(out, "detected request via CDN") {
+		t.Errorf("expected CDN warning when CF-Connecting-IP present alongside proxy headers; got: %q", out)
+	}
+}
+
+func TestCDNDetection_LogsOnlyOnce(t *testing.T) {
+	resetCDNDetectionOnce()
+
+	var buf bytes.Buffer
+	prev := log.Writer()
+	log.SetOutput(&buf)
+	defer log.SetOutput(prev)
+
+	nextCalled := 0
+	h := cdnDetectionMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		nextCalled++
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	for i := 0; i < 3; i++ {
+		req := httptest.NewRequest("GET", "/api/observers", nil)
+		req.Header.Set("CF-Ray", "abc123")
+		w := httptest.NewRecorder()
+		h.ServeHTTP(w, req)
+	}
+
+	if nextCalled != 3 {
+		t.Fatalf("middleware must call next on every request; got %d calls, want 3", nextCalled)
+	}
+	got := strings.Count(buf.String(), "detected request via CDN")
+	if got != 1 {
+		t.Errorf("expected CDN warning exactly once across multiple requests; got %d in output: %q", got, buf.String())
+	}
+}
+
+// Each genuinely CDN-specific header should trip the detector on its
+// own. X-Forwarded-For / X-Real-IP are NOT in this set — see the
+// negative test TestCDNDetection_SilentOnReverseProxyHeadersAlone.
+func TestCDNDetection_RecognizesAllCommonCDNHeaders(t *testing.T) {
+	headers := []string{
+		"CF-Connecting-IP",
+		"CF-Ray",
+		"Fastly-Client-IP",
+		"True-Client-IP",
+	}
+	for _, h := range headers {
+		t.Run(h, func(t *testing.T) {
+			resetCDNDetectionOnce()
+			req := httptest.NewRequest("GET", "/api/observers", nil)
+			req.Header.Set(h, "1.2.3.4")
+			out, nextCalled := runWithCDNMiddleware(t, req)
+			if !nextCalled {
+				t.Fatal("middleware did not call next handler")
+			}
+			if !strings.Contains(out, "detected request via CDN") {
+				t.Errorf("header %s should trip CDN detection; log was: %q", h, out)
+			}
+		})
+	}
+}
+
+// Round-1 KB finding #2: sync.Once is what keeps the log from
+// spamming — verify it holds under concurrent first-request load.
+// CI runs `go test -race`, so this also stresses the underlying
+// primitive for data races. Without -race, the assertion still
+// catches a plain bool / non-atomic implementation.
+func TestCDNDetectionMiddlewareConcurrentFirstRequestLogsOnce(t *testing.T) {
+	resetCDNDetectionOnce()
+
+	var buf bytes.Buffer
+	var bufMu sync.Mutex
+	prev := log.Writer()
+	// log.Printf can be called concurrently; serialize writes to buf
+	// so we never race the test's own assertion read.
+	log.SetOutput(writerFunc(func(p []byte) (int, error) {
+		bufMu.Lock()
+		defer bufMu.Unlock()
+		return buf.Write(p)
+	}))
+	defer log.SetOutput(prev)
+
+	var nextCalls int64
+	h := cdnDetectionMiddleware(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		atomic.AddInt64(&nextCalls, 1)
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	const n = 50
+	var wg sync.WaitGroup
+	wg.Add(n)
+	for i := 0; i < n; i++ {
+		go func() {
+			defer wg.Done()
+			req := httptest.NewRequest("GET", "/api/observers", nil)
+			req.Header.Set("CF-Ray", "abc123-LAX")
+			w := httptest.NewRecorder()
+			h.ServeHTTP(w, req)
+		}()
+	}
+	wg.Wait()
+
+	if got := atomic.LoadInt64(&nextCalls); got != n {
+		t.Fatalf("middleware must call next on every concurrent request; got %d, want %d", got, n)
+	}
+
+	bufMu.Lock()
+	out := buf.String()
+	bufMu.Unlock()
+	got := strings.Count(out, "detected request via CDN")
+	if got != 1 {
+		t.Errorf("expected sync.Once to admit exactly ONE warning under %d concurrent first-requests; got %d. Output:\n%s", n, got, out)
+	}
+}
+
+// writerFunc adapts a function to io.Writer.
+type writerFunc func(p []byte) (int, error)
+
+func (f writerFunc) Write(p []byte) (int, error) { return f(p) }
+
+// Round-2 MAJOR finding: sync.Once only short-circuits the log.Printf,
+// not the per-request header scan. firstCDNHeader still iterates 4
+// http.Header.Get lookups on every /api request after warning fires.
+// The fix is an atomic.Bool fast-path checked BEFORE firstCDNHeader.
+// This test gates that the flag is actually set on the first CDN
+// request — without it, the middleware would have no signal to
+// short-circuit on, and the optimization would be a dead store.
+func TestCDNDetection_CdnWarnedFlagSet(t *testing.T) {
+	resetCDNDetectionOnce()
+	req := httptest.NewRequest("GET", "/api/x", nil)
+	req.Header.Set("CF-Ray", "x")
+	if _, nextCalled := runWithCDNMiddleware(t, req); !nextCalled {
+		t.Fatal("middleware did not call next handler")
+	}
+	if !cdnWarned.Load() {
+		t.Fatal("cdnWarned must be true after first CDN request (fast-path flag not set)")
+	}
+}
@@ -0,0 +1,507 @@
+package main
+
+// Chunked startup load + early HTTP readiness for issue #1009.
+//
+// Design:
+//   * LoadChunked paginates transmissions in id-ordered chunks of
+//     `chunkSize` (default 10000 via Config.DBLoadChunkSize). After the
+//     first chunk is merged into the store, FirstChunkReady is closed.
+//     main.go binds the HTTP listener on that signal and serves
+//     partial data while remaining chunks stream in the background.
+//   * loadStatusMiddleware stamps X-CoreScope-Load-Status on every
+//     response: "loading; progress=<rows>" until LoadComplete()
+//     reports true, then "ready". Dashboards and probes can read the
+//     header without parsing JSON.
+//   * OnChunkLoaded registers a per-chunk callback for progress
+//     logging / tests.
+//
+// Concurrency: each chunk acquires s.mu.Lock() ONLY while merging the
+// chunk's rows into store-shared maps. SQLite reads run lock-free so
+// HTTP handlers (which take s.mu.RLock) stay responsive.
+
+import (
+	"database/sql"
+	"fmt"
+	"log"
+	"net/http"
+	"sort"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/meshcore-analyzer/dbconfig"
+)
+
+// dbLoadConfig is the server-package alias for dbconfig.LoadConfig (#1009).
+type dbLoadConfig = dbconfig.LoadConfig
+
+// DBLoadChunkSize returns the configured chunk size for chunked
+// startup load (config: db.load.chunkSize), or 10000 default (#1009).
+func (c *Config) DBLoadChunkSize() int {
+	return c.DB.GetLoadChunkSize()
+}
+
+// chunkedLoadState holds the runtime gates for LoadChunked. It lives
+// on PacketStore via embedded fields — see store.go additions in the
+// same commit.
+
+// FirstChunkReady returns a channel closed once the first chunk has
+// been merged into the store, signalling the HTTP listener can bind.
+func (s *PacketStore) FirstChunkReady() <-chan struct{} {
+	s.chunkedLoadInit()
+	return s.firstChunkReady
+}
+
+// LoadComplete reports whether LoadChunked has finished all chunks.
+func (s *PacketStore) LoadComplete() bool {
+	return s.loadComplete.Load()
+}
+
+// LoadProgress reports the number of transmission rows processed by
+// the in-flight (or completed) LoadChunked call.
+func (s *PacketStore) LoadProgress() int64 {
+	return s.loadProgressRows.Load()
+}
+
+// OnChunkLoaded registers a callback fired once per chunk after that
+// chunk has been merged into the store. The callback receives the
+// number of transmission rows in that chunk and the running total.
+// Multiple registrations chain.
+func (s *PacketStore) OnChunkLoaded(fn func(rowsThisChunk, totalRows int)) {
+	s.chunkedLoadInit()
+	s.chunkCBMu.Lock()
+	defer s.chunkCBMu.Unlock()
+	s.chunkCallbacks = append(s.chunkCallbacks, fn)
+}
+
+// chunkedLoadInit lazily initialises the readiness channel + callback
+// list under a mutex so concurrent first callers don't race.
+func (s *PacketStore) chunkedLoadInit() {
+	s.chunkInitOnce.Do(func() {
+		s.firstChunkReady = make(chan struct{})
+	})
+}
+
+func (s *PacketStore) signalFirstChunk() {
+	if s.firstChunkSignaled.CompareAndSwap(false, true) {
+		close(s.firstChunkReady)
+	}
+}
+
+func (s *PacketStore) fireChunkCallbacks(rowsThisChunk, totalRows int) {
+	s.chunkCBMu.Lock()
+	cbs := append([]func(int, int){}, s.chunkCallbacks...)
+	s.chunkCBMu.Unlock()
+	for _, cb := range cbs {
+		func() {
+			defer func() {
+				if r := recover(); r != nil {
+					log.Printf("[store] OnChunkLoaded callback panic: %v", r)
+				}
+			}()
+			cb(rowsThisChunk, totalRows)
+		}()
+	}
+}
+
+// LoadChunked streams transmissions + observations from SQLite into
+// the in-memory store in id-ordered chunks of `chunkSize` rows. Pass
+// 0 to use the default (10000).
+//
+// After the first chunk is merged, FirstChunkReady is closed and the
+// HTTP listener may bind. Remaining chunks stream while handlers run
+// against partially-populated data; loadStatusMiddleware advertises
+// loading status until LoadComplete() returns true.
+//
+// Re-entrancy: LoadChunked is NOT safe to call concurrently with
+// itself on the same PacketStore — it resets loadComplete /
+// loadProgressRows and mutates store-shared maps under s.mu. In
+// production it is invoked exactly once from main.go boot. Tests that
+// open a fresh store per test are also safe. If a future caller needs
+// repeat or concurrent loads, add a top-level mutex first.
+func (s *PacketStore) LoadChunked(chunkSize int) error {
+	if chunkSize <= 0 {
+		chunkSize = 10000
+	}
+	// Startup-ordering invariant (PR #1643 R1 munger #2). Mirror the
+	// guard in Load() so the production async path also fast-fails when
+	// neighbor_edges has rows but the graph is missing. See Load() for
+	// the full rationale.
+	if neighborEdgesTableExists(s.db.conn) && s.graph.Load() == nil {
+		panic("packet store LoadChunked(): neighbor_edges table has rows but s.graph is nil — graph must be loaded before packet load (see main.go #1643 invariant)")
+	}
+	s.chunkedLoadInit()
+	// Reset state for repeat calls in tests.
+	s.loadComplete.Store(false)
+	s.loadProgressRows.Store(0)
+
+	// On any return — error OR success — unblock listeners that gate on
+	// the readiness signal so an empty/failed DB does not deadlock the
+	// caller. Note: loadComplete is set on the success path only (see
+	// the end of this function) so probes do NOT see ready=true after a
+	// failed load.
+	defer s.signalFirstChunk()
+
+	t0 := time.Now()
+
+	// Build the retention/memory filter the legacy Load() uses so
+	// behavior is preserved when callers migrate from Load → LoadChunked.
+	// Built against the `t2` alias used inside the chunk subquery so we
+	// don't need brittle post-hoc string rewrites.
+	var loadConditions []string
+	hotCutoffHours := s.retentionHours
+	if s.hotStartupHours > 0 {
+		hotCutoffHours = s.hotStartupHours
+	}
+	var hotCutoffStr string
+	if hotCutoffHours > 0 {
+		hotCutoffStr = time.Now().UTC().Add(-time.Duration(hotCutoffHours * float64(time.Hour))).Format(time.RFC3339)
+		loadConditions = append(loadConditions, fmt.Sprintf("t2.first_seen >= '%s'", hotCutoffStr))
+	}
+
+	// COUNT honours the same retention/hot-startup filter the chunk
+	// loop applies, so the logged "DB total" matches the rows the
+	// loop will actually walk. Use a `t2` alias to share the WHERE
+	// builder above. If the count fails (e.g. empty DB, locked WAL),
+	// fall through with -1 — it's only used for the post-load log line.
+	totalInDB := -1
+	countSQL := "SELECT COUNT(*) FROM transmissions t2"
+	if len(loadConditions) > 0 {
+		countSQL += " WHERE " + strings.Join(loadConditions, " AND ")
+	}
+	if err := s.db.conn.QueryRow(countSQL).Scan(&totalInDB); err != nil {
+		totalInDB = -1
+	}
+
+	// Memory cap honoured by clamping the maximum cursor walk.
+	var maxPackets int64
+	if s.maxMemoryMB > 0 {
+		avgBytes := int64(1000)
+		if sample := estimateStoreTxBytesTypical(10); sample > avgBytes {
+			avgBytes = sample
+		}
+		maxPackets = (int64(s.maxMemoryMB) * 1048576) / avgBytes
+		if maxPackets < 1000 {
+			maxPackets = 1000
+		}
+	}
+
+	chunkIdx := 0
+	totalLoaded := 0
+	// Start the id cursor BELOW the minimum possible row id so the
+	// first chunk's `t2.id > cursorID` predicate includes id=0. The
+	// e2e fixture seed for issue #1486 inserts the grouped-packet row
+	// with id=0 (so it sorts LAST in the default packets view via
+	// `ORDER BY id DESC` / oldest first_seen). Seeding the cursor at
+	// 0 silently excluded that row, leaving the page with no
+	// tr[data-hash] and timing out the playwright wait. Legacy Load()
+	// had no id cursor and loaded id=0 unconditionally — we restore
+	// that semantic by starting one below SQLite's minimum rowid (-1).
+	var cursorID int64 = -1
+
+	// Relay-hop fallback inputs, fetched ONCE before the chunk-query loop.
+	// getCachedNodesAndPM issues its own DB query, so calling it while a
+	// chunk cursor is open would deadlock on a single-connection SQLite
+	// pool. resolved_path is never persisted post-#1287, so scanAndMergeChunk
+	// re-resolves relay hops from path_json using these snapshots.
+	// PR #1643 R1 munger #1: cold load uses unique_prefix-only gate, so
+	// the neighbor graph is no longer consulted here (affinity-tier
+	// resolution against ≤168h-old observations would silently mis-attribute).
+	s.mu.RLock()
+	_, relayPM := s.getCachedNodesAndPM()
+	s.mu.RUnlock()
+	var coldLoadAmbiguousHopsSkipped int
+
+	for {
+		conds := append([]string{}, loadConditions...)
+		conds = append(conds, fmt.Sprintf("t2.id > %d", cursorID))
+		whereClause := "WHERE " + strings.Join(conds, " AND ")
+
+		rpCol := ""
+		if s.db.hasResolvedPath {
+			rpCol = ", o.resolved_path"
+		}
+		obsRawHexCol := ""
+		if s.db.hasObsRawHex {
+			obsRawHexCol = ", o.raw_hex"
+		}
+
+		var chunkSQL string
+		if s.db.isV3 {
+			chunkSQL = `SELECT t.id, t.raw_hex, t.hash, t.first_seen, t.route_type,
+					t.payload_type, t.payload_version, t.decoded_json,
+					o.id, obs.id, obs.name, COALESCE(obs.iata, ''), o.direction,
+					o.snr, o.rssi, o.score, o.path_json, strftime('%Y-%m-%dT%H:%M:%fZ', o.timestamp, 'unixepoch')` + obsRawHexCol + rpCol + `
+				FROM (SELECT * FROM transmissions t2 ` + whereClause + ` ORDER BY t2.id ASC LIMIT ` + fmt.Sprintf("%d", chunkSize) + `) AS t
+				LEFT JOIN observations o ON o.transmission_id = t.id
+				LEFT JOIN observers obs ON obs.rowid = o.observer_idx
+				ORDER BY t.id ASC, o.timestamp DESC`
+		} else {
+			chunkSQL = `SELECT t.id, t.raw_hex, t.hash, t.first_seen, t.route_type,
+					t.payload_type, t.payload_version, t.decoded_json,
+					o.id, o.observer_id, o.observer_name, COALESCE(obs.iata, ''), o.direction,
+					o.snr, o.rssi, o.score, o.path_json, o.timestamp` + obsRawHexCol + rpCol + `
+				FROM (SELECT * FROM transmissions t2 ` + whereClause + ` ORDER BY t2.id ASC LIMIT ` + fmt.Sprintf("%d", chunkSize) + `) AS t
+				LEFT JOIN observations o ON o.transmission_id = t.id
+				LEFT JOIN observers obs ON obs.id = o.observer_id
+				ORDER BY t.id ASC, o.timestamp DESC`
+		}
+
+		rows, err := s.db.conn.Query(chunkSQL)
+		if err != nil {
+			return fmt.Errorf("chunk %d: query: %w", chunkIdx, err)
+		}
+
+		chunkTxCount, lastID, err := s.scanAndMergeChunk(rows, relayPM, &coldLoadAmbiguousHopsSkipped)
+		rows.Close()
+		if err != nil {
+			return fmt.Errorf("chunk %d: scan: %w", chunkIdx, err)
+		}
+
+		if chunkTxCount == 0 {
+			break
+		}
+
+		cursorID = lastID
+		totalLoaded += chunkTxCount
+		chunkIdx++
+		s.loadProgressRows.Store(int64(totalLoaded))
+		s.signalFirstChunk()
+		s.fireChunkCallbacks(chunkTxCount, totalLoaded)
+
+		if maxPackets > 0 && int64(totalLoaded) >= maxPackets {
+			break
+		}
+		if chunkTxCount < chunkSize {
+			break
+		}
+	}
+
+	// Post-load: pick best observation, build indexes — same shape as
+	// legacy Load().
+	s.mu.Lock()
+	for _, tx := range s.packets {
+		pickBestObservation(tx)
+		s.indexByNode(tx)
+	}
+	// Restore the "s.packets sorted oldest-first by FirstSeen" invariant
+	// that legacy Load() got for free from "ORDER BY t.first_seen ASC".
+	// LoadChunked walks chunks in id-ASC order so the slice ends up
+	// id-ordered, which only equals first_seen-ordered when ids and
+	// timestamps are correlated. After tools/freshen-fixture.sh (or any
+	// real-world out-of-order ingest) they're not, leaving
+	// s.packets[0].FirstSeen pointing at the newest row — which then
+	// poisons oldestLoaded below and routes legitimate in-memory queries
+	// to the SQL fallback. GetTimestamps (store.go) and QueryPackets
+	// both rely on this invariant. See PR #1596 / mobile e2e regression.
+	sort.SliceStable(s.packets, func(i, j int) bool {
+		return s.packets[i].FirstSeen < s.packets[j].FirstSeen
+	})
+	s.buildSubpathIndex()
+	s.buildPathHopIndex()
+	s.buildDistanceIndex()
+	if s.hotStartupHours > 0 {
+		s.oldestLoaded = hotCutoffStr
+	} else if len(s.packets) > 0 {
+		s.oldestLoaded = s.packets[0].FirstSeen
+	}
+	s.loaded = true
+	s.mu.Unlock()
+
+	// #1009 / PR #1596: flip the subpath + pathHop ready flags now that
+	// the chunk loader has built both indexes synchronously above.
+	// Without this, WaitIndexesReady (used by
+	// StartRepeaterEnrichmentRecomputer at boot) blocks for up to
+	// repeaterEnrichmentPrewarmWait (60s), delaying HTTP listener bind
+	// past CI's 30s /api/healthz deadline.
+	s.markIndexesReadySync()
+
+	elapsed := time.Since(t0)
+	log.Printf("[store] LoadChunked: %d transmissions (%d observations) across %d chunk(s) in %v (chunkSize=%d, DB total=%d)",
+		totalLoaded, s.totalObs, chunkIdx, elapsed, chunkSize, totalInDB)
+	if coldLoadAmbiguousHopsSkipped > 0 {
+		log.Printf("[store] LoadChunked: skipped %d ambiguous-prefix relay hops (unique_prefix gate, PR #1643 R1)",
+			coldLoadAmbiguousHopsSkipped)
+	}
+	s.loadMultibyteCapFromDB()
+	// Mark complete on the success path only — see the function-level
+	// defer above for why this is NOT in a deferred call. Probes that
+	// read LoadComplete()==true after a failed load would otherwise
+	// see ready=true for a half-loaded store.
+	s.loadComplete.Store(true)
+	return nil
+}
+
+// scanAndMergeChunk consumes one chunk's rows under s.mu.Lock and
+// returns the number of distinct transmissions seen + the max
+// transmission id (cursor for the next chunk).
+func (s *PacketStore) scanAndMergeChunk(rows *sql.Rows, relayPM *prefixMap, coldLoadAmbiguousHopsSkipped *int) (int, int64, error) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	hopsSeen := make(map[string]bool)
+	seenTxIDs := make(map[int]bool)
+	var maxID int64
+
+	for rows.Next() {
+		var txID int
+		var rawHex, hash, firstSeen, decodedJSON sql.NullString
+		var routeType, payloadType, payloadVersion sql.NullInt64
+		var obsID sql.NullInt64
+		var observerID, observerName, observerIATA, direction, pathJSON, obsTimestamp sql.NullString
+		var snr, rssi sql.NullFloat64
+		var score sql.NullInt64
+		var obsRawHex sql.NullString
+		var resolvedPathStr sql.NullString
+
+		scanArgs := []interface{}{&txID, &rawHex, &hash, &firstSeen, &routeType, &payloadType,
+			&payloadVersion, &decodedJSON,
+			&obsID, &observerID, &observerName, &observerIATA, &direction,
+			&snr, &rssi, &score, &pathJSON, &obsTimestamp}
+		if s.db.hasObsRawHex {
+			scanArgs = append(scanArgs, &obsRawHex)
+		}
+		if s.db.hasResolvedPath {
+			scanArgs = append(scanArgs, &resolvedPathStr)
+		}
+		if err := rows.Scan(scanArgs...); err != nil {
+			log.Printf("[store] LoadChunked scan error: %v", err)
+			continue
+		}
+
+		if int64(txID) > maxID {
+			maxID = int64(txID)
+		}
+		seenTxIDs[txID] = true
+
+		hashStr := nullStrVal(hash)
+		tx := s.byHash[hashStr]
+		if tx == nil {
+			tx = &StoreTx{
+				ID:          txID,
+				RawHex:      nullStrVal(rawHex),
+				Hash:        hashStr,
+				FirstSeen:   nullStrVal(firstSeen),
+				LatestSeen:  nullStrVal(firstSeen),
+				RouteType:   nullIntPtr(routeType),
+				PayloadType: nullIntPtr(payloadType),
+				DecodedJSON: nullStrVal(decodedJSON),
+				obsKeys:     make(map[string]bool),
+				observerSet: make(map[string]bool),
+			}
+			s.byHash[hashStr] = tx
+			s.packets = append(s.packets, tx)
+			s.byTxID[txID] = tx
+			if txID > s.maxTxID {
+				s.maxTxID = txID
+			}
+			s.indexByNode(tx)
+			if tx.PayloadType != nil {
+				pt := *tx.PayloadType
+				s.byPayloadType[pt] = append(s.byPayloadType[pt], tx)
+			}
+			s.trackAdvertPubkey(tx)
+			s.trackedBytes += estimateStoreTxBytes(tx)
+		}
+
+		if obsID.Valid {
+			oid := int(obsID.Int64)
+			obsIDStr := nullStrVal(observerID)
+			obsPJ := nullStrVal(pathJSON)
+
+			dk := obsIDStr + "|" + obsPJ
+			if tx.obsKeys[dk] {
+				continue
+			}
+
+			obs := &StoreObs{
+				ID:             oid,
+				TransmissionID: txID,
+				ObserverID:     obsIDStr,
+				ObserverName:   nullStrVal(observerName),
+				ObserverIATA:   nullStrVal(observerIATA),
+				Direction:      nullStrVal(direction),
+				SNR:            nullFloatPtr(snr),
+				RSSI:           nullFloatPtr(rssi),
+				Score:          nullIntPtr(score),
+				PathJSON:       obsPJ,
+				RawHex:         nullStrVal(obsRawHex),
+				Timestamp:      normalizeTimestamp(nullStrVal(obsTimestamp)),
+			}
+
+			rpStr := nullStrVal(resolvedPathStr)
+			if rpStr != "" {
+				rp := unmarshalResolvedPath(rpStr)
+				pks := extractResolvedPubkeys(rp)
+				s.indexResolvedPathHops(tx, pks, hopsSeen)
+			} else if relayPM != nil && obsPJ != "" && obsPJ != "[]" {
+				// resolved_path is NULL on live (since #1287 relay data is
+				// persisted as neighbor_edges, not per-observation). Re-resolve
+				// relay-hop attribution from path_json so relay nodes keep their
+				// analytics history across a restart instead of rebuilding only
+				// from post-restart live traffic. relayPM is passed in from
+				// LoadChunked (fetched before any chunk cursor opened).
+				// byNode ONLY — see the Load() counterpart for why the
+				// resolved_path/path-hop indexes must NOT be populated here.
+				// PR #1643 R1 munger #1: unique_prefix-only gate.
+				rp := resolvePathForObsColdLoad(obsPJ, obsIDStr, tx, relayPM, coldLoadAmbiguousHopsSkipped)
+				for _, pk := range extractResolvedPubkeys(rp) {
+					s.addToByNode(tx, pk)
+				}
+			}
+
+			tx.Observations = append(tx.Observations, obs)
+			tx.obsKeys[dk] = true
+			if obs.ObserverID != "" && !tx.observerSet[obs.ObserverID] {
+				tx.observerSet[obs.ObserverID] = true
+				tx.UniqueObserverCount++
+			}
+			tx.ObservationCount++
+			if obs.Timestamp > tx.LatestSeen {
+				tx.LatestSeen = obs.Timestamp
+			}
+
+			s.byObsID[oid] = obs
+			if oid > s.maxObsID {
+				s.maxObsID = oid
+			}
+			if obsIDStr != "" {
+				s.byObserver[obsIDStr] = append(s.byObserver[obsIDStr], obs)
+			}
+			s.totalObs++
+			s.trackedBytes += estimateStoreObsBytes(obs)
+		}
+	}
+	if err := rows.Err(); err != nil {
+		return len(seenTxIDs), maxID, err
+	}
+	return len(seenTxIDs), maxID, nil
+}
+
+// loadStatusMiddleware sets X-CoreScope-Load-Status on every response.
+// While LoadChunked is in flight the header reports
+// "loading; progress=<rows>"; after completion it reports "ready".
+// The header is set BEFORE calling the next handler so probes can
+// observe it on any response (including streaming bodies).
+func loadStatusMiddleware(s *PacketStore, next http.Handler) http.Handler {
+	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		if s != nil && s.LoadComplete() {
+			w.Header().Set("X-CoreScope-Load-Status", "ready")
+		} else if s != nil {
+			w.Header().Set("X-CoreScope-Load-Status",
+				fmt.Sprintf("loading; progress=%d", s.LoadProgress()))
+		} else {
+			w.Header().Set("X-CoreScope-Load-Status", "loading")
+		}
+		next.ServeHTTP(w, r)
+	})
+}
+
+// --- runtime state stitched into PacketStore via store_chunked.go ---
+
+// Forward declarations of the new PacketStore fields used above. The
+// actual struct fields live in store.go; placing them here as a
+// reminder keeps the chunked-load surface easy to audit.
+var _ = sync.Once{}
+var _ atomic.Bool
@@ -0,0 +1,63 @@
+package main
+
+// Issue #1009 follow-up tests for PR #1596:
+//
+//   (A) LoadChunked must flip subpath + pathHop index ready flags
+//       after building those indexes. Otherwise WaitIndexesReady (used
+//       by StartRepeaterEnrichmentRecomputer at boot) blocks the
+//       caller for up to repeaterEnrichmentPrewarmWait (60s), which is
+//       why CI's "Start Go server" step times out before /api/healthz
+//       can answer within its 30s deadline.
+//
+//   (B) LoadChunked must NOT report LoadComplete()==true when it
+//       returns an error. Today a defer unconditionally calls
+//       s.loadComplete.Store(true), so a failed load appears "ready"
+//       to probes and the load-status middleware.
+
+import (
+	"errors"
+	"testing"
+)
+
+// (A) Indexes must be marked ready by LoadChunked.
+func TestLoadChunked_MarksIndexesReady(t *testing.T) {
+	store := openChunkedTestStore(t, 100)
+	defer store.db.conn.Close()
+
+	if store.SubpathIndexReady() || store.PathHopIndexReady() {
+		t.Fatal("indexes must start NOT ready")
+	}
+
+	if err := store.LoadChunked(50); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	if !store.SubpathIndexReady() {
+		t.Fatal("SubpathIndexReady() must be true after LoadChunked builds the index")
+	}
+	if !store.PathHopIndexReady() {
+		t.Fatal("PathHopIndexReady() must be true after LoadChunked builds the index")
+	}
+}
+
+// (B) LoadChunked errors must not flip LoadComplete=true.
+func TestLoadChunked_ErrorDoesNotMarkComplete(t *testing.T) {
+	store := openChunkedTestStore(t, 100)
+
+	// Close the underlying DB so the very first chunk query fails.
+	if err := store.db.conn.Close(); err != nil {
+		t.Fatalf("close DB: %v", err)
+	}
+
+	err := store.LoadChunked(50)
+	if err == nil {
+		t.Fatal("LoadChunked must return an error when the DB query fails")
+	}
+	if !errors.Is(err, err) { // satisfy linters; the assertion below is what matters
+		t.Fatalf("unexpected error shape: %v", err)
+	}
+
+	if store.LoadComplete() {
+		t.Fatal("LoadComplete() must remain false after LoadChunked returns an error")
+	}
+}
@@ -0,0 +1,115 @@
+package main
+
+// Regression for PR #1596 / issue #1486 e2e: LoadChunked uses
+// `cursorID = 0` with a `t2.id > cursorID` predicate, which silently
+// excludes any transmission with id=0. The e2e seed for #1486 inserts
+// the grouped-packet row with id=0 (so it sorts LAST in the default
+// packets view), and the page deep-links to /packets?hash=<seed>.
+// With the chunked loader skipping id=0, the in-memory store never
+// learns about the row; QueryGroupedPackets returns 0; the page
+// renders no `tr[data-hash]` and the e2e times out at 12s.
+//
+// Legacy Load() walked all transmissions unconditionally (no id
+// cursor) and therefore included id=0. Restoring that semantic — by
+// using a non-existent sentinel (-1) on the first iteration, or by
+// switching the predicate to `>=` for the initial pass — fixes the
+// regression.
+//
+// This test inserts a transmission with id=0 plus a handful of
+// id>=1 transmissions and asserts that LoadChunked loads the id=0
+// row into s.byHash.
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+func createTestDBWithIDZero(tb testing.TB, dbPath string, extraTx int) {
+	tb.Helper()
+	conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer conn.Close()
+
+	stmts := []string{
+		`CREATE TABLE IF NOT EXISTS transmissions (
+			id INTEGER PRIMARY KEY,
+			raw_hex TEXT, hash TEXT, first_seen TEXT,
+			route_type INTEGER, payload_type INTEGER,
+			payload_version INTEGER, decoded_json TEXT
+		)`,
+		`CREATE TABLE IF NOT EXISTS observations (
+			id INTEGER PRIMARY KEY,
+			transmission_id INTEGER, observer_id TEXT, observer_name TEXT,
+			direction TEXT, snr REAL, rssi REAL, score INTEGER,
+			path_json TEXT, timestamp TEXT, raw_hex TEXT
+		)`,
+		`CREATE TABLE IF NOT EXISTS observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`,
+		`CREATE TABLE IF NOT EXISTS nodes (
+			pubkey TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
+			last_seen TEXT, first_seen TEXT, frequency REAL
+		)`,
+		`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER)`,
+		`INSERT INTO schema_version (version) VALUES (1)`,
+		`CREATE INDEX IF NOT EXISTS idx_tx_first_seen ON transmissions(first_seen)`,
+	}
+	for _, s := range stmts {
+		if _, err := conn.Exec(s); err != nil {
+			tb.Fatalf("setup exec: %v\nSQL: %s", err, s)
+		}
+	}
+
+	txStmt, _ := conn.Prepare("INSERT INTO transmissions (id, raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json) VALUES (?, ?, ?, ?, ?, ?, ?, ?)")
+	obsStmt, _ := conn.Prepare("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+	defer txStmt.Close()
+	defer obsStmt.Close()
+
+	now := time.Now().UTC().Truncate(time.Second)
+	// id=0: the #1486-style seed row, within retention window.
+	txStmt.Exec(0, "1500", "fae0c9e6d357a814", now.Add(-1*time.Minute).Format(time.RFC3339), 1, 5, 0, `{"type":"CHAN"}`)
+	obsStmt.Exec(0, 0, "obs1", "Obs1", "rx", 5.0, -95.0, 0, `["AA"]`, now.Add(-1*time.Minute).Unix())
+
+	for i := 1; i <= extraTx; i++ {
+		ts := now.Add(-time.Duration(i+1) * time.Minute).Format(time.RFC3339)
+		unixTs := now.Add(-time.Duration(i+1) * time.Minute).Unix()
+		hash := fmt.Sprintf("h%04d", i)
+		txStmt.Exec(i, "aabb", hash, ts, 0, 4, 1, fmt.Sprintf(`{"pubKey":"pk%04d"}`, i))
+		obsStmt.Exec(i, i, "obs1", "Obs1", "rx", -10.0, -80.0, 5, `["aa","bb"]`, unixTs)
+	}
+}
+
+// TestLoadChunked_IncludesIDZero: LoadChunked must load transmissions
+// with id=0. The legacy Load() (since-replaced by LoadChunked) walked
+// transmissions unconditionally; LoadChunked uses an id-cursor that
+// starts at 0 with a strict `t2.id > cursorID` predicate, so id=0
+// rows are silently dropped. This breaks the #1486 e2e fixture seed
+// which uses id=0 to sort the grouped row last in the default view.
+func TestLoadChunked_IncludesIDZero(t *testing.T) {
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "idzero.db")
+	createTestDBWithIDZero(t, dbPath, 10)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatalf("OpenDB: %v", err)
+	}
+	cfg := &PacketStoreConfig{}
+	store := NewPacketStore(db, cfg)
+	defer store.db.conn.Close()
+
+	if err := store.LoadChunked(5); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	if _, ok := store.byHash["fae0c9e6d357a814"]; !ok {
+		t.Fatalf("LoadChunked dropped the id=0 transmission: "+
+			"byHash[fae0c9e6d357a814] missing; loaded %d packets total "+
+			"(id-cursor starts at 0 with strict `t2.id > cursorID`, "+
+			"so id=0 is excluded — this is the #1486 e2e regression)",
+			len(store.packets))
+	}
+}
@@ -0,0 +1,154 @@
+package main
+
+// Regression for PR #1596 (issue #1009) chunked load: when transmission
+// ids are anti-correlated with first_seen (e.g. id=1 has the NEWEST
+// timestamp), LoadChunked walks id-ASC and the post-load
+// `s.oldestLoaded = s.packets[0].FirstSeen` line set oldestLoaded to
+// the NEWEST first_seen. QueryPackets then mis-routed any
+// `since>=oldestLoaded` query to the SQL fallback, hiding fresh
+// in-memory rows. This shows up in real life on the e2e fixture after
+// tools/freshen-fixture.sh shifts timestamps so id=1 (originally
+// loaded first) carries the most recent first_seen.
+//
+// The mobile e2e test test-observer-iata-1188-e2e.js fails as a
+// result: with the default 15-minute time window, /api/packets returns
+// 0 rows and the mobile DOM has no `tr[data-hash]` to tap.
+//
+// This test asserts the in-memory invariant: after LoadChunked,
+// oldestLoaded must equal the actual oldest FirstSeen across loaded
+// transmissions, not the FirstSeen of the first row in s.packets.
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+// createTestDBReverseTime builds numTx transmissions whose ids run
+// 1..numTx ASC while first_seen runs newest..oldest (id=1 = newest).
+// This mirrors the freshen-fixture-shifted e2e DB exactly.
+func createTestDBReverseTime(tb testing.TB, dbPath string, numTx int) {
+	tb.Helper()
+	conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		tb.Fatal(err)
+	}
+	defer conn.Close()
+
+	stmts := []string{
+		`CREATE TABLE IF NOT EXISTS transmissions (
+			id INTEGER PRIMARY KEY,
+			raw_hex TEXT, hash TEXT, first_seen TEXT,
+			route_type INTEGER, payload_type INTEGER,
+			payload_version INTEGER, decoded_json TEXT
+		)`,
+		`CREATE TABLE IF NOT EXISTS observations (
+			id INTEGER PRIMARY KEY,
+			transmission_id INTEGER, observer_id TEXT, observer_name TEXT,
+			direction TEXT, snr REAL, rssi REAL, score INTEGER,
+			path_json TEXT, timestamp TEXT, raw_hex TEXT
+		)`,
+		`CREATE TABLE IF NOT EXISTS observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`,
+		`CREATE TABLE IF NOT EXISTS nodes (
+			pubkey TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
+			last_seen TEXT, first_seen TEXT, frequency REAL
+		)`,
+		`CREATE TABLE IF NOT EXISTS schema_version (version INTEGER)`,
+		`INSERT INTO schema_version (version) VALUES (1)`,
+		`CREATE INDEX IF NOT EXISTS idx_tx_first_seen ON transmissions(first_seen)`,
+	}
+	for _, s := range stmts {
+		if _, err := conn.Exec(s); err != nil {
+			tb.Fatalf("setup exec: %v\nSQL: %s", err, s)
+		}
+	}
+
+	txStmt, _ := conn.Prepare("INSERT INTO transmissions (id, raw_hex, hash, first_seen, route_type, payload_type, payload_version, decoded_json) VALUES (?, ?, ?, ?, ?, ?, ?, ?)")
+	obsStmt, _ := conn.Prepare("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+	defer txStmt.Close()
+	defer obsStmt.Close()
+
+	// id=1 is the NEWEST (now); id=numTx is the OLDEST (numTx minutes ago).
+	now := time.Now().UTC().Truncate(time.Second)
+	for i := 1; i <= numTx; i++ {
+		ts := now.Add(-time.Duration(i-1) * time.Minute).Format(time.RFC3339)
+		unixTs := now.Add(-time.Duration(i-1) * time.Minute).Unix()
+		hash := fmt.Sprintf("h%04d", i)
+		txStmt.Exec(i, "aabb", hash, ts, 0, 4, 1, fmt.Sprintf(`{"pubKey":"pk%04d"}`, i))
+		obsStmt.Exec(i, i, "obs1", "Obs1", "RX", -10.0, -80.0, 5, `["aa","bb"]`, unixTs)
+	}
+}
+
+func openReverseTimeStore(t *testing.T, numTx int) *PacketStore {
+	t.Helper()
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "rev.db")
+	createTestDBReverseTime(t, dbPath, numTx)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatalf("OpenDB: %v", err)
+	}
+	cfg := &PacketStoreConfig{}
+	return NewPacketStore(db, cfg)
+}
+
+// TestLoadChunked_OldestLoadedIsActualOldest: when LoadChunked walks
+// transmissions in id-ASC order but timestamps are anti-correlated
+// with id (PR #1596 regression scenario), oldestLoaded MUST be the
+// minimum FirstSeen across loaded packets, not the first row's
+// FirstSeen. Otherwise QueryPackets routes "since=15min ago" to SQL
+// fallback, hiding fresh rows.
+func TestLoadChunked_OldestLoadedIsActualOldest(t *testing.T) {
+	store := openReverseTimeStore(t, 50)
+	defer store.db.conn.Close()
+
+	if err := store.LoadChunked(20); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	// Compute the actual oldest first_seen across what got loaded.
+	if len(store.packets) == 0 {
+		t.Fatal("no packets loaded")
+	}
+	actualOldest := store.packets[0].FirstSeen
+	for _, p := range store.packets {
+		if p.FirstSeen < actualOldest {
+			actualOldest = p.FirstSeen
+		}
+	}
+
+	if store.oldestLoaded != actualOldest {
+		t.Fatalf("oldestLoaded=%q must equal actual MIN(FirstSeen)=%q "+
+			"(id-ordered chunk walk with anti-correlated timestamps "+
+			"left oldestLoaded pointing at the newest row, which makes "+
+			"QueryPackets mis-route since-windowed queries to SQL fallback "+
+			"and the mobile e2e test renders 0 rows)",
+			store.oldestLoaded, actualOldest)
+	}
+}
+
+// TestLoadChunked_PacketsSortedByFirstSeenASC: QueryPackets and
+// GetTimestamps both assume s.packets is "sorted oldest-first" (see
+// store.go:2125 comment on GetTimestamps). LoadChunked walks rows
+// id-ASC which only equals first_seen-ASC when ids and timestamps
+// are correlated — not true after fixture freshen, not true after
+// any out-of-order ingest. Assert the invariant directly.
+func TestLoadChunked_PacketsSortedByFirstSeenASC(t *testing.T) {
+	store := openReverseTimeStore(t, 25)
+	defer store.db.conn.Close()
+
+	if err := store.LoadChunked(10); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+	for i := 1; i < len(store.packets); i++ {
+		if store.packets[i-1].FirstSeen > store.packets[i].FirstSeen {
+			t.Fatalf("s.packets must be sorted by FirstSeen ASC; "+
+				"packets[%d].FirstSeen=%q > packets[%d].FirstSeen=%q",
+				i-1, store.packets[i-1].FirstSeen,
+				i, store.packets[i].FirstSeen)
+		}
+	}
+}
@@ -0,0 +1,150 @@
+package main
+
+// Issue #1009: chunked Load with early HTTP readiness.
+//
+// These tests gate three behaviors:
+//   (a) FirstChunkReady() unblocks BEFORE LoadChunked returns, so the
+//       HTTP listener can bind after the first chunk completes while
+//       remaining rows continue loading in the background.
+//   (b) loadStatusMiddleware stamps an X-CoreScope-Load-Status header
+//       with "loading" + progress while a load is in flight, flipping
+//       to "ready" once LoadComplete() reports true.
+//   (c) LoadChunked honors the configured chunkSize: the per-chunk
+//       progress callback fires once per chunk, so a 2500-row DB with
+//       chunkSize=1000 must yield 3 callbacks (1000 + 1000 + 500).
+//
+// Each subtest fails on an assertion (not a build error) when the
+// production code is absent — that is the red-commit contract.
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+func openChunkedTestStore(t *testing.T, numTx int) *PacketStore {
+	t.Helper()
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "chunked.db")
+	createTestDBAt(t, dbPath, numTx)
+	t.Cleanup(func() { os.RemoveAll(dir) })
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatalf("OpenDB: %v", err)
+	}
+	cfg := &PacketStoreConfig{}
+	return NewPacketStore(db, cfg)
+}
+
+// (a) FirstChunkReady fires before LoadChunked returns.
+func TestLoadChunked_FirstChunkReadyBeforeComplete(t *testing.T) {
+	store := openChunkedTestStore(t, 2500)
+	defer store.db.conn.Close()
+
+	doneCh := make(chan error, 1)
+	go func() { doneCh <- store.LoadChunked(500) }()
+
+	select {
+	case <-store.FirstChunkReady():
+		// Good: first chunk signaled. Load may or may not have completed
+		// for tiny test DBs, but the gate must have fired without
+		// requiring the full load.
+	case err := <-doneCh:
+		// If load completed before we could observe the signal, the
+		// signal still must be closed.
+		if err != nil {
+			t.Fatalf("LoadChunked: %v", err)
+		}
+		select {
+		case <-store.FirstChunkReady():
+		default:
+			t.Fatal("FirstChunkReady channel must be closed after LoadChunked completes")
+		}
+	case <-time.After(10 * time.Second):
+		t.Fatal("FirstChunkReady did not fire within 10s — listener would never bind")
+	}
+
+	// Drain background completion.
+	select {
+	case err := <-doneCh:
+		if err != nil {
+			t.Fatalf("LoadChunked returned error: %v", err)
+		}
+	case <-time.After(30 * time.Second):
+		t.Fatal("LoadChunked never returned")
+	}
+
+	if !store.LoadComplete() {
+		t.Fatal("LoadComplete() must report true after LoadChunked returns")
+	}
+}
+
+// (b) Middleware stamps X-CoreScope-Load-Status correctly across the
+//     loading→ready transition.
+func TestLoadStatusMiddleware_HeaderTransition(t *testing.T) {
+	store := openChunkedTestStore(t, 100)
+	defer store.db.conn.Close()
+
+	handler := loadStatusMiddleware(store, http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.WriteHeader(http.StatusOK)
+	}))
+
+	// Pre-load: header must report "loading".
+	req := httptest.NewRequest("GET", "/api/healthz", nil)
+	w := httptest.NewRecorder()
+	handler.ServeHTTP(w, req)
+	if got := w.Header().Get("X-CoreScope-Load-Status"); got == "" || got == "ready" {
+		t.Fatalf("expected loading status header before Load, got %q", got)
+	}
+
+	if err := store.LoadChunked(50); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	// Post-load: header must report "ready".
+	req2 := httptest.NewRequest("GET", "/api/healthz", nil)
+	w2 := httptest.NewRecorder()
+	handler.ServeHTTP(w2, req2)
+	if got := w2.Header().Get("X-CoreScope-Load-Status"); got != "ready" {
+		t.Fatalf("expected X-CoreScope-Load-Status=ready after load, got %q", got)
+	}
+}
+
+// (c) LoadChunked honors the chunkSize argument — progress callback
+//     fires once per chunk.
+func TestLoadChunked_ChunkSizeHonored(t *testing.T) {
+	store := openChunkedTestStore(t, 2500)
+	defer store.db.conn.Close()
+
+	var chunks []int
+	store.OnChunkLoaded(func(rowsThisChunk, totalRows int) {
+		chunks = append(chunks, rowsThisChunk)
+	})
+
+	if err := store.LoadChunked(1000); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	if len(chunks) != 3 {
+		t.Fatalf("expected 3 chunks for 2500 rows @ chunkSize=1000, got %d (sizes=%v)", len(chunks), chunks)
+	}
+	if chunks[0] != 1000 || chunks[1] != 1000 || chunks[2] != 500 {
+		t.Fatalf("expected chunk sizes [1000,1000,500], got %v", chunks)
+	}
+}
+
+// (d) Config plumbing: DB.Load.ChunkSize threads through.
+func TestConfig_DBLoadChunkSize(t *testing.T) {
+	c := &Config{}
+	if got := c.DBLoadChunkSize(); got != 10000 {
+		t.Fatalf("DBLoadChunkSize() default = %d, want 10000", got)
+	}
+	c.DB = &DBConfig{Load: &dbLoadConfig{ChunkSize: 2500}}
+	if got := c.DBLoadChunkSize(); got != 2500 {
+		t.Fatalf("DBLoadChunkSize() configured = %d, want 2500", got)
+	}
+}
@@ -0,0 +1,35 @@
+package main
+
+import (
+	"net/http"
+	"strconv"
+)
+
+// clampLimit parses a `limit`-shaped string and clamps it into [1, max].
+// Empty / non-numeric / zero / negative inputs return def.
+// Values exceeding max are clamped to max.
+//
+// This is the uniform helper for list-endpoint `limit` parameters; prefer it
+// over inline `if limit > N { limit = N }` patterns so the absolute caps stay
+// consistent across handlers. See audit-input-vulns-20260603 (MEDIUM —
+// unbounded `limit` on list endpoints).
+func clampLimit(raw string, def, max int) int {
+	if raw == "" {
+		return def
+	}
+	n, err := strconv.Atoi(raw)
+	if err != nil || n <= 0 {
+		return def
+	}
+	if n > max {
+		return max
+	}
+	return n
+}
+
+// queryLimit reads the `limit` query parameter from r and clamps it through
+// clampLimit. Convenience wrapper used by HTTP handlers so existing
+// queryInt(r, "limit", def) call sites can become queryLimit(r, def, max).
+func queryLimit(r *http.Request, def, max int) int {
+	return clampLimit(r.URL.Query().Get("limit"), def, max)
+}
@@ -0,0 +1,34 @@
+package main
+
+import "testing"
+
+// TestClampLimit covers the uniform list-endpoint limit-clamp helper added to
+// fix audit-input-vulns-20260603 (MEDIUM).
+func TestClampLimit(t *testing.T) {
+	const def = 50
+	const max = 500
+	cases := []struct {
+		name string
+		raw  string
+		want int
+	}{
+		{"empty returns default", "", def},
+		{"non-numeric returns default", "abc", def},
+		{"negative returns default", "-1", def},
+		{"zero returns default", "0", def},
+		{"mid-range value preserved", "100", 100},
+		{"value at cap preserved", "500", 500},
+		{"over-cap clamped to max", "999999999", max},
+		{"just over cap clamped", "501", max},
+		{"whitespace garbage returns default", " 100 ", def},
+		{"float-shaped returns default", "10.5", def},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			got := clampLimit(tc.raw, def, max)
+			if got != tc.want {
+				t.Fatalf("clampLimit(%q, %d, %d) = %d, want %d", tc.raw, def, max, got, tc.want)
+			}
+		})
+	}
+}
@@ -133,6 +133,7 @@ type NodeClockSkew struct {
 	Samples         []SkewSample `json:"samples,omitempty"` // time-series for sparklines
 	GoodFraction        float64  `json:"goodFraction"`        // fraction of recent samples with |skew| <= 1h
 	RecentBadSampleCount int     `json:"recentBadSampleCount"` // count of recent samples with |skew| > 1h
+	RecentBadSamples     []BadSample `json:"recentBadSamples,omitempty"` // #1094: per-bad-sample evidence (hash + bad advertTS)
 	RecentSampleCount    int     `json:"recentSampleCount"`    // total recent samples in window
 	RecentHashEvidence  []HashEvidence      `json:"recentHashEvidence,omitempty"`
 	CalibrationSummary  *CalibrationSummary `json:"calibrationSummary,omitempty"`
@@ -146,6 +147,15 @@ type SkewSample struct {
 	SkewSec   float64 `json:"skew"` // corrected skew in seconds
 }

+// BadSample is a single recent advert flagged as having a nonsense timestamp
+// (|corrected skew| in the bimodal-bad band — > 1h, <= 24h). #1094: surfaced
+// so the UI can link each offender to its packet detail page.
+type BadSample struct {
+	Hash     string  `json:"hash"`     // transmission hash for packet-detail deep-link
+	AdvertTS int64   `json:"advertTS"` // the offending advert Unix timestamp
+	SkewSec  float64 `json:"skewSec"`  // corrected skew vs observer at observation time
+}
+
 // HashEvidenceObserver is one observer's contribution to a per-hash evidence entry.
 type HashEvidenceObserver struct {
 	ObserverID      string  `json:"observerID"`
@@ -512,7 +522,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 			lastSkew = cs.LastSkewSec
 			lastAdvTS = cs.LastAdvertTS
 		}
-		tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec})
+		tsSkews = append(tsSkews, tsSkewPair{ts: cs.LastObservedTS, skew: cs.MedianSkewSec, hash: tx.Hash, advertTS: cs.LastAdvertTS})
 	}

 	if len(allSkews) == 0 {
@@ -536,6 +546,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {

 	recentSkew := lastSkew
 	var recentVals []float64
+	var recentPairs []tsSkewPair
 	if n := len(tsSkews); n > 0 {
 		latestTS := tsSkews[n-1].ts
 		// Index-based window: last K samples.
@@ -559,6 +570,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 			start = startByTime
 		}
 		recentVals = make([]float64, 0, n-start)
+		recentPairs = tsSkews[start:n]
 		for i := start; i < n; i++ {
 			recentVals = append(recentVals, tsSkews[i].skew)
 		}
@@ -583,13 +595,25 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 	// adverts had nonsense timestamps") on otherwise-healthy nodes.
 	var goodSamples []float64
 	var rtcResetCount int
-	for _, v := range recentVals {
+	var recentBadSamples []BadSample // #1094: per-bad-sample evidence (hash + advertTS)
+	for i, v := range recentVals {
 		absV := math.Abs(v)
 		switch {
 		case absV > rtcResetOutlierThresholdSec:
 			rtcResetCount++ // ignored for good/bad classification
 		case absV <= bimodalSkewThresholdSec:
 			goodSamples = append(goodSamples, v)
+		default:
+			// Bimodal-bad: 1h < |skew| <= 24h. Capture hash + advertTS so
+			// the UI can link each offender to its packet detail page
+			// instead of showing a count without evidence (#1094).
+			if i < len(recentPairs) && recentPairs[i].hash != "" {
+				recentBadSamples = append(recentBadSamples, BadSample{
+					Hash:     recentPairs[i].hash,
+					AdvertTS: recentPairs[i].advertTS,
+					SkewSec:  round(v, 1),
+				})
+			}
 		}
 	}
 	recentSampleCount := len(recentVals) - rtcResetCount
@@ -715,6 +739,7 @@ func (s *PacketStore) getNodeClockSkewLocked(pubkey string) *NodeClockSkew {
 		Samples:              samples,
 		GoodFraction:         round(goodFraction, 2),
 		RecentBadSampleCount: recentBadCount,
+		RecentBadSamples:     recentBadSamples,
 		RecentSampleCount:    recentSampleCount,
 		RecentHashEvidence:   recentEvidence,
 		CalibrationSummary:   &calSummary,
@@ -875,10 +900,16 @@ func mean(vals []float64) float64 {
 	return sum / float64(len(vals))
 }

-// tsSkewPair is a (timestamp, skew) pair for drift estimation.
+// tsSkewPair is a (timestamp, skew) pair for drift estimation. Also carries
+// the source hash + advertTS so callers building per-sample evidence (e.g.
+// recentBadSamples for #1094) can identify the offending packet without a
+// second pass. Drift code reads only ts/skew; the extra fields are inert
+// there.
 type tsSkewPair struct {
-	ts   int64
-	skew float64
+	ts       int64
+	skew     float64
+	hash     string
+	advertTS int64
 }

 // computeDrift estimates linear drift in seconds per day from time-ordered
@@ -0,0 +1,109 @@
+package main
+
+// Regression test for #1094: the bimodal-clock warning currently exposes only
+// RecentBadSampleCount, leaving the UI to render "⚠️ N of M adverts had
+// nonsense timestamps" without telling the operator WHICH packets were bad.
+//
+// This test pins the additive API contract: alongside the count, the response
+// must expose RecentBadSamples — a slice of (hash, advertTS, skewSec) — so the
+// frontend can render each offending hash as a clickable link with its bad
+// timestamp.
+
+import (
+	"testing"
+	"time"
+)
+
+// Seeds 5 recent adverts: 3 healthy (~-20s skew) and 2 with a "nonsense"
+// bimodal-bad timestamp (|skew| in (1h, 24h]). The recent window is exactly
+// 5 samples, so all five are inside it.
+func seedIssue1094Repro(t *testing.T) (*PacketStore, []string, []int64) {
+	t.Helper()
+	ps := NewPacketStore(nil, nil)
+	pt := 4 // ADVERT
+
+	const pubkey = "BADTS1094"
+	baseObs := int64(1779000000)
+
+	var txs []*StoreTx
+	var badHashes []string
+	var badAdvertTSs []int64
+
+	// 3 healthy adverts (skew = -20s).
+	for i := 0; i < 3; i++ {
+		obsTS := baseObs + int64(i)*60
+		advTS := obsTS - 20
+		txs = append(txs, &StoreTx{
+			Hash:        "healthy-1094-" + formatInt64(int64(i)),
+			PayloadType: &pt,
+			DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
+			Observations: []*StoreObs{
+				{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
+			},
+		})
+	}
+
+	// 2 nonsense-timestamp adverts (skew = -7200s = -2h — bimodal-bad,
+	// below the 24h RTC-reset exclusion so they DO count in recentBadCount).
+	for i := 0; i < 2; i++ {
+		obsTS := baseObs + int64(3+i)*60
+		advTS := obsTS - 7200
+		hash := "bad-1094-" + formatInt64(int64(i))
+		txs = append(txs, &StoreTx{
+			Hash:        hash,
+			PayloadType: &pt,
+			DecodedJSON: `{"payload":{"timestamp":` + formatInt64(advTS) + `}}`,
+			Observations: []*StoreObs{
+				{ObserverID: "obs1", Timestamp: time.Unix(obsTS, 0).UTC().Format(time.RFC3339)},
+			},
+		})
+		badHashes = append(badHashes, hash)
+		badAdvertTSs = append(badAdvertTSs, advTS)
+	}
+
+	ps.mu.Lock()
+	ps.byNode[pubkey] = txs
+	for _, tx := range txs {
+		ps.byPayloadType[4] = append(ps.byPayloadType[4], tx)
+	}
+	ps.clockSkew.computeInterval = 0
+	ps.mu.Unlock()
+	return ps, badHashes, badAdvertTSs
+}
+
+func TestIssue1094_RecentBadSamples_ExposesHashAndTimestamp(t *testing.T) {
+	ps, wantHashes, wantAdvertTSs := seedIssue1094Repro(t)
+	r := ps.GetNodeClockSkew("BADTS1094")
+	if r == nil {
+		t.Fatal("expected clock skew result")
+	}
+
+	// Pre-condition: count must already be 2 (gates the test against the
+	// existing field — if this drops we'd be measuring the wrong thing).
+	if r.RecentBadSampleCount != 2 {
+		t.Fatalf("RecentBadSampleCount = %d, want 2 (seed bug, not the field-under-test)",
+			r.RecentBadSampleCount)
+	}
+
+	if len(r.RecentBadSamples) != 2 {
+		t.Fatalf("RecentBadSamples len = %d, want 2 — operators need to see which "+
+			"adverts had nonsense timestamps, not just the count",
+			len(r.RecentBadSamples))
+	}
+
+	gotByHash := map[string]int64{}
+	for _, bs := range r.RecentBadSamples {
+		gotByHash[bs.Hash] = bs.AdvertTS
+	}
+	for i, h := range wantHashes {
+		ts, ok := gotByHash[h]
+		if !ok {
+			t.Errorf("RecentBadSamples missing hash %q", h)
+			continue
+		}
+		if ts != wantAdvertTSs[i] {
+			t.Errorf("RecentBadSamples[%q].AdvertTS = %d, want %d (the bad advertTS)",
+				h, ts, wantAdvertTSs[i])
+		}
+	}
+}
@@ -8,6 +8,7 @@ import (
 	"path/filepath"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"

 	"github.com/meshcore-analyzer/dbconfig"
@@ -24,11 +25,21 @@ type AreaEntry struct {
 	LonMax  *float64     `json:"lonMax,omitempty"`
 }

+// ListLimitsConfig defines maximum row limits for list endpoints to prevent DoS.
+type ListLimitsConfig struct {
+	PacketsMax         int `json:"packetsMax"`
+	NodesMax           int `json:"nodesMax"`
+	AnalyticsMax       int `json:"analyticsMax"`
+	ChannelMessagesMax int `json:"channelMessagesMax"`
+	BulkHealthMax      int `json:"bulkHealthMax"`
+}
+
 // Config mirrors the Node.js config.json structure (read-only fields).
 type Config struct {
-	Port    int    `json:"port"`
-	APIKey  string `json:"apiKey"`
-	DBPath  string `json:"dbPath"`
+	Port       int               `json:"port"`
+	APIKey     string            `json:"apiKey"`
+	DBPath     string            `json:"dbPath"`
+	ListLimits *ListLimitsConfig `json:"listLimits"`

 	// NodeBlacklist is a list of public keys to exclude from all API responses.
 	// Blacklisted nodes are hidden from node lists, search, detail, map, and stats.
@@ -37,9 +48,40 @@ type Config struct {
 	// operator refuses to fix.
 	NodeBlacklist []string `json:"nodeBlacklist"`

-	// blacklistSetCached is the lazily-built set version of NodeBlacklist.
-	blacklistSetCached map[string]bool
-	blacklistOnce      sync.Once
+	// HiddenNamePrefixes is a list of name prefixes that mark a node as
+	// hidden from API responses (issue #1181). The default `["🚫"]` mirrors
+	// a convention used by other MeshCore map dashboards: operators who
+	// rename their node with the prefix get hidden from the map without
+	// waiting for normal retention to clear stale data. DB rows are
+	// preserved — the filter is applied at the API layer only, so the
+	// underlying observation history remains intact.
+	HiddenNamePrefixes []string `json:"hiddenNamePrefixes"`
+
+	// hiddenPrefixesPtr holds the active prefix slice as an atomic pointer.
+	// Read path (IsNameHidden) is a single atomic load — no mutex, no
+	// sync.Once. Writers always replace the whole slice; readers see either
+	// the old or the new slice as a single value, never a partial state.
+	// Mirrors blacklistSetPtr.
+	hiddenPrefixesPtr atomic.Pointer[[]string]
+
+	// hiddenPrefixesGen is a monotonic counter bumped every time the
+	// hidden-prefix list mutates via SetHiddenNamePrefixes. Cache wiring
+	// is left for follow-up; the counter is the prerequisite primitive
+	// callers will key on (mirrors blacklistGen / #1629).
+	hiddenPrefixesGen atomic.Uint64
+
+	// blacklistSetPtr holds the active lookup set as an atomic pointer.
+	// Read path is a single atomic load — no mutex, no sync.Once. Writers
+	// always replace the whole map; readers see either the old or the new
+	// map as a single value, never a partially-built one.
+	blacklistSetPtr atomic.Pointer[map[string]bool]
+
+	// blacklistGen is a monotonic generation counter bumped every time the
+	// blacklist mutates via SetNodeBlacklist. Callers that cache responses
+	// keyed by pubkey (e.g. /api/nodes/{pubkey}/reach, #1629) include this
+	// generation in their cache key so any blacklist change naturally
+	// invalidates prior entries on the next request.
+	blacklistGen atomic.Uint64

 	Branding   map[string]interface{} `json:"branding"`
 	Theme      map[string]interface{} `json:"theme"`
@@ -48,6 +90,12 @@ type Config struct {
 	TypeColors map[string]interface{} `json:"typeColors"`
 	Home       map[string]interface{} `json:"home"`

+	// #1488 — marker stroke (outline) settings. Operators dial color, width
+	// and opacity to soften the default white outline when hundreds of
+	// nodes feel overwhelming. Frontend reads these as CSS vars; see
+	// public/customize-v2.js applyCSS markerStroke block.
+	MarkerStroke map[string]interface{} `json:"markerStroke,omitempty"`
+
 	MapDefaults struct {
 		Center []float64 `json:"center"`
 		Zoom   int       `json:"zoom"`
@@ -57,7 +105,8 @@ type Config struct {

 	Roles            map[string]interface{} `json:"roles"`
 	HealthThresholds *HealthThresholds      `json:"healthThresholds"`
-	Tiles            map[string]interface{} `json:"tiles"`
+	Map              map[string]interface{} `json:"map"`
+	Tiles            map[string]interface{} `json:"tiles"` // deprecated
 	SnrThresholds    map[string]interface{} `json:"snrThresholds"`
 	DistThresholds   map[string]interface{} `json:"distThresholds"`
 	MaxHopDist       *float64               `json:"maxHopDist"`
@@ -69,6 +118,7 @@ type Config struct {

 	LiveMap struct {
 		PropagationBufferMs int `json:"propagationBufferMs"`
+		MaxNodes            int `json:"maxNodes"`
 	} `json:"liveMap"`

 	CacheTTL map[string]interface{} `json:"cacheTTL"`
@@ -79,6 +129,11 @@ type Config struct {

 	PacketStore *PacketStoreConfig `json:"packetStore,omitempty"`

+	// Runtime holds Go runtime tuning knobs (#1010).
+	// Currently exposes runtime.maxMemoryMB which sets a soft memory limit
+	// (GOMEMLIMIT) via runtime/debug.SetMemoryLimit at startup. The
+	// GOMEMLIMIT environment variable, when set, takes precedence.
+	Runtime *RuntimeConfig `json:"runtime,omitempty"`
 	GeoFilter *GeoFilterConfig `json:"geo_filter,omitempty"`

 	Areas map[string]AreaEntry `json:"areas,omitempty"`
@@ -92,6 +147,10 @@ type Config struct {

 	DebugAffinity bool `json:"debugAffinity,omitempty"`

+	// MapDarkTileProvider selects the default dark-mode basemap provider for
+	// new visitors. Deprecated: use Map.Tiles.DarkDefault instead.
+	MapDarkTileProvider string `json:"mapDarkTileProvider,omitempty"`
+
 	// ObserverBlacklist is a list of observer public keys to exclude from API
 	// responses (defense in depth — ingestor drops at ingest, server filters
 	// any that slipped through from a prior unblocked window).
@@ -105,11 +164,34 @@ type Config struct {
 	ResolvedPath  *ResolvedPathConfig  `json:"resolvedPath,omitempty"`
 	NeighborGraph *NeighborGraphConfig `json:"neighborGraph,omitempty"`

+	// Observers cache settings (#1481 P0-3 / #1483).
+	ObserversCache *ObserversCacheConfig `json:"observersCache,omitempty"`
+
 	// Analytics steady-state background recompute (issue #1240).
 	Analytics *AnalyticsConfig `json:"analytics,omitempty"`

 	// BatteryThresholds: voltage cutoffs for low/critical alerts (#663).
 	BatteryThresholds *BatteryThresholdsConfig `json:"batteryThresholds,omitempty"`
+
+	// Customizer controls operator-side knobs for the in-app customizer modal
+	// (theme/branding/etc.). See CustomizerConfig and issue #1508.
+	Customizer *CustomizerConfig `json:"customizer,omitempty"`
+
+	// Known-channels catalogue integration (issue #1323).
+	// URL of a JSON catalogue file (channels-by-country shape) fetched
+	// periodically and exposed via /api/known-channels. Empty disables.
+	KnownChannelsURL string `json:"knownChannelsUrl,omitempty"`
+	// Refresh interval in milliseconds. 0/missing => default 24h.
+	KnownChannelsRefreshMs int64 `json:"knownChannelsRefreshMs,omitempty"`
+}
+
+// CustomizerConfig holds operator-side knobs for the in-app customizer modal.
+// Today only DisabledTabs is exposed: a list of tab ids the operator wants to
+// hide from end users (e.g. ["branding","geofilter","export"]). The frontend
+// (public/customize-v2.js _renderTabs) reads this from /api/config/client and
+// filters those tabs out before rendering. Issue #1508.
+type CustomizerConfig struct {
+	DisabledTabs []string `json:"disabledTabs"`
 }

 // weakAPIKeys is the blocklist of known default/example API keys that must be rejected.
@@ -182,6 +264,21 @@ type ResolvedPathConfig struct {
 type NeighborGraphConfig struct {
 	MaxAgeDays int     `json:"maxAgeDays"` // edges older than this are pruned (default 5)
 	MaxEdgeKm  float64 `json:"maxEdgeKm"`  // geo-implausibility threshold (km); 0 = default 500; negative disables (#1228)
+
+	// CacheRecomputeIntervalSeconds: cadence for the background
+	// recomputer that rebuilds the default-shape neighbor-graph
+	// response (#1481 P0-1). 0/missing = default 300 (5 min).
+	// Lower = fresher data, more CPU per minute. #1483.
+	CacheRecomputeIntervalSeconds int `json:"cacheRecomputeIntervalSeconds,omitempty"`
+}
+
+// ObserversCacheConfig controls the /api/observers default-shape cache.
+// #1481 P0-3 / #1483.
+type ObserversCacheConfig struct {
+	// TTLSeconds: how long the cached default-shape /api/observers
+	// response is served before a singleflight-collapsed refill.
+	// 0/missing = default 30. Lower = fresher data, more SQL pressure.
+	TTLSeconds int `json:"ttlSeconds,omitempty"`
 }

 // PacketStoreConfig controls in-memory packet store limits.
@@ -195,6 +292,16 @@ type PacketStoreConfig struct {
 // GeoFilterConfig is an alias for the shared geofilter.Config type.
 type GeoFilterConfig = geofilter.Config

+// RuntimeConfig holds Go runtime tuning knobs (#1010).
+type RuntimeConfig struct {
+	// MaxMemoryMB sets the Go soft memory limit (GOMEMLIMIT) in MiB via
+	// runtime/debug.SetMemoryLimit at startup. Takes precedence over the
+	// implicit limit derived from packetStore.maxMemoryMB. The GOMEMLIMIT
+	// environment variable, when set, takes precedence over this value.
+	// 0/unset preserves default behavior.
+	MaxMemoryMB int `json:"maxMemoryMB"`
+}
+
 type RetentionConfig struct {
 	NodeDays      int `json:"nodeDays"`
 	ObserverDays  int `json:"observerDays"`
@@ -294,6 +401,10 @@ type HealthThresholds struct {
 	// repeater to be considered "actively relaying" vs only "alive
 	// (advert-only)". See issue #662. Defaults to 24h.
 	RelayActiveHours float64 `json:"relayActiveHours"`
+	// Issue #1552 — observer health classification thresholds (minutes).
+	// Defaults match prior hardcoded behavior in public/observers.js (10/60).
+	ObserverOnlineMinutes int `json:"observerOnlineMinutes"`
+	ObserverStaleMinutes  int `json:"observerStaleMinutes"`
 }

 // ThemeFile mirrors theme.json overlay.
@@ -304,6 +415,8 @@ type ThemeFile struct {
 	NodeColors map[string]interface{} `json:"nodeColors"`
 	TypeColors map[string]interface{} `json:"typeColors"`
 	Home       map[string]interface{} `json:"home"`
+	// #1488 — marker stroke overlay from theme.json.
+	MarkerStroke map[string]interface{} `json:"markerStroke,omitempty"`
 }

 func LoadConfig(baseDirs ...string) (*Config, error) {
@@ -326,12 +439,71 @@ func LoadConfig(baseDirs ...string) (*Config, error) {
 			continue
 		}
 		cfg.NormalizeTimestampConfig()
+		cfg.migrateDeprecatedConfig()
+		cfg.applyListLimitsDefaults()
+		applyCORSEnv(cfg)
 		return cfg, nil
 	}
 	cfg.NormalizeTimestampConfig()
+	cfg.migrateDeprecatedConfig()
+	cfg.applyListLimitsDefaults()
+	applyCORSEnv(cfg)
 	return cfg, nil // defaults
 }

+func (c *Config) applyListLimitsDefaults() {
+	if c.ListLimits == nil {
+		c.ListLimits = &ListLimitsConfig{}
+	}
+	if c.ListLimits.PacketsMax <= 0 {
+		c.ListLimits.PacketsMax = 10000
+	}
+	if c.ListLimits.NodesMax <= 0 {
+		c.ListLimits.NodesMax = 2000
+	}
+	if c.ListLimits.AnalyticsMax <= 0 {
+		c.ListLimits.AnalyticsMax = 200
+	}
+	if c.ListLimits.ChannelMessagesMax <= 0 {
+		c.ListLimits.ChannelMessagesMax = 500
+	}
+	if c.ListLimits.BulkHealthMax <= 0 {
+		c.ListLimits.BulkHealthMax = 200
+	}
+}
+
+func (c *Config) migrateDeprecatedConfig() {
+	migrated := false
+	if c.Map == nil {
+		c.Map = make(map[string]interface{})
+	}
+	if c.Map["tiles"] == nil {
+		c.Map["tiles"] = make(map[string]interface{})
+	}
+	tilesMap, ok := c.Map["tiles"].(map[string]interface{})
+	if !ok {
+		return
+	}
+
+	if c.MapDarkTileProvider != "" {
+		if tilesMap["darkDefault"] == nil {
+			tilesMap["darkDefault"] = c.MapDarkTileProvider
+		}
+		migrated = true
+	}
+	if len(c.Tiles) > 0 {
+		for k, v := range c.Tiles {
+			if tilesMap[k] == nil {
+				tilesMap[k] = v
+			}
+		}
+		migrated = true
+	}
+	if migrated {
+		fmt.Fprintf(os.Stderr, "[deprecated] Top-level 'mapDarkTileProvider' and 'tiles' keys in config.json are deprecated and will be ignored in v3.5.0 (see #1165). Please move them into 'map': { 'tiles': { ... } }.\n")
+	}
+}
+
 func LoadTheme(baseDirs ...string) *ThemeFile {
 	if len(baseDirs) == 0 {
 		baseDirs = []string{"."}
@@ -380,6 +552,18 @@ func (c *Config) GetHealthThresholds() HealthThresholds {
 		if c.HealthThresholds.RelayActiveHours > 0 {
 			h.RelayActiveHours = c.HealthThresholds.RelayActiveHours
 		}
+		if c.HealthThresholds.ObserverOnlineMinutes > 0 {
+			h.ObserverOnlineMinutes = c.HealthThresholds.ObserverOnlineMinutes
+		}
+		if c.HealthThresholds.ObserverStaleMinutes > 0 {
+			h.ObserverStaleMinutes = c.HealthThresholds.ObserverStaleMinutes
+		}
+	}
+	if h.ObserverOnlineMinutes <= 0 {
+		h.ObserverOnlineMinutes = 60
+	}
+	if h.ObserverStaleMinutes <= 0 {
+		h.ObserverStaleMinutes = 1440
 	}
 	return h
 }
@@ -396,11 +580,14 @@ func (h HealthThresholds) GetHealthMs(role string) (degradedMs, silentMs int) {
 // ToClientMs returns the thresholds as ms for the frontend.
 func (h HealthThresholds) ToClientMs() map[string]int {
 	const hourMs = 3600000
+	const minMs = 60000
 	return map[string]int{
-		"infraDegradedMs": int(h.InfraDegradedHours * hourMs),
-		"infraSilentMs":   int(h.InfraSilentHours * hourMs),
-		"nodeDegradedMs":  int(h.NodeDegradedHours * hourMs),
-		"nodeSilentMs":    int(h.NodeSilentHours * hourMs),
+		"infraDegradedMs":  int(h.InfraDegradedHours * hourMs),
+		"infraSilentMs":    int(h.InfraSilentHours * hourMs),
+		"nodeDegradedMs":   int(h.NodeDegradedHours * hourMs),
+		"nodeSilentMs":     int(h.NodeSilentHours * hourMs),
+		"observerOnlineMs": h.ObserverOnlineMinutes * minMs,
+		"observerStaleMs":  h.ObserverStaleMinutes * minMs,
 	}
 }

@@ -467,31 +654,166 @@ func (c *Config) PropagationBufferMs() int {
 	return 5000
 }

-// blacklistSet lazily builds and caches the nodeBlacklist as a set for O(1) lookups.
-// Uses sync.Once to eliminate the data race on first concurrent access.
-func (c *Config) blacklistSet() map[string]bool {
-	c.blacklistOnce.Do(func() {
-		if len(c.NodeBlacklist) == 0 {
-			return
+// LiveMapMaxNodes returns the operator-configured cap on how many nodes
+// the live map fetches (and thus renders) in a single page. Default is
+// 2000; values are clamped to [100, 20000] to defang misconfig.
+// Negative/zero falls back to default. See #1574.
+func (c *Config) LiveMapMaxNodes() int {
+	const def = 2000
+	const min = 100
+	const max = 20000
+	if c == nil || c.LiveMap.MaxNodes <= 0 {
+		return def
+	}
+	v := c.LiveMap.MaxNodes
+	if v < min {
+		return min
+	}
+	if v > max {
+		return max
+	}
+	return v
+}
+
+// buildBlacklistSet recomputes the lookup set from pks and returns it.
+// Empty/whitespace-only entries are skipped. Keys are lowercased + trimmed.
+// Returns nil for an empty effective set so callers can `len(m) == 0` short-circuit.
+func buildBlacklistSet(pks []string) map[string]bool {
+	if len(pks) == 0 {
+		return nil
+	}
+	m := make(map[string]bool, len(pks))
+	for _, pk := range pks {
+		trimmed := strings.ToLower(strings.TrimSpace(pk))
+		if trimmed != "" {
+			m[trimmed] = true
 		}
-		m := make(map[string]bool, len(c.NodeBlacklist))
-		for _, pk := range c.NodeBlacklist {
-			trimmed := strings.ToLower(strings.TrimSpace(pk))
-			if trimmed != "" {
-				m[trimmed] = true
-			}
-		}
-		c.blacklistSetCached = m
-	})
-	return c.blacklistSetCached
+	}
+	if len(m) == 0 {
+		return nil
+	}
+	return m
+}
+
+// SetNodeBlacklist atomically replaces NodeBlacklist with pks, rebuilds the
+// lookup set, and bumps the generation counter so any cache keyed on the
+// generation invalidates on the next request (#1629). Safe for concurrent
+// use with IsBlacklisted / BlacklistGeneration.
+func (c *Config) SetNodeBlacklist(pks []string) {
+	if c == nil {
+		return
+	}
+	// Copy so callers can mutate their slice without affecting us.
+	cp := make([]string, len(pks))
+	copy(cp, pks)
+	c.NodeBlacklist = cp
+	m := buildBlacklistSet(cp)
+	c.blacklistSetPtr.Store(&m)
+	c.blacklistGen.Add(1)
+}
+
+// BlacklistGeneration returns a monotonic counter that increments on every
+// SetNodeBlacklist call. Response caches keyed per-pubkey embed this value
+// in their cache key so any blacklist mutation invalidates prior entries on
+// the next request (#1629).
+func (c *Config) BlacklistGeneration() uint64 {
+	if c == nil {
+		return 0
+	}
+	return c.blacklistGen.Load()
 }

 // IsBlacklisted returns true if the given public key is in the nodeBlacklist.
+// Hot read path: a single atomic pointer load + map lookup. No locks, no
+// sync.Once. The in-memory set is populated either via SetNodeBlacklist or
+// lazily on first read from c.NodeBlacklist (covering the JSON-load path
+// where the setter was never called).
 func (c *Config) IsBlacklisted(pubkey string) bool {
-	if c == nil || len(c.NodeBlacklist) == 0 {
+	if c == nil {
 		return false
 	}
-	return c.blacklistSet()[strings.ToLower(strings.TrimSpace(pubkey))]
+	mp := c.blacklistSetPtr.Load()
+	if mp == nil {
+		// Lazy first-read materialisation from the JSON-loaded slice.
+		// CAS-style: if another goroutine wins the race, drop ours.
+		built := buildBlacklistSet(c.NodeBlacklist)
+		if c.blacklistSetPtr.CompareAndSwap(nil, &built) {
+			mp = &built
+		} else {
+			mp = c.blacklistSetPtr.Load()
+		}
+	}
+	if mp == nil || len(*mp) == 0 {
+		return false
+	}
+	return (*mp)[strings.ToLower(strings.TrimSpace(pubkey))]
+}
+
+// IsNameHidden returns true if the given node name starts with any of the
+// operator-configured HiddenNamePrefixes (issue #1181). Empty/whitespace
+// prefixes are ignored. Used to drop nodes from /api/nodes, /api/nodes/search
+// and /api/nodes/{pubkey} without deleting the underlying DB row, so observer
+// history stays intact even after the operator hides the node.
+//
+// Hot read path: a single atomic pointer load. No locks, no sync.Once.
+// Writers always replace the whole slice; readers see either the old or
+// the new slice as a single value, never a partially-built one. Mirrors
+// IsBlacklisted's CAS-style lazy first-read materialisation for the
+// JSON-load path where SetHiddenNamePrefixes was never called.
+func (c *Config) IsNameHidden(name string) bool {
+	if c == nil {
+		return false
+	}
+	pp := c.hiddenPrefixesPtr.Load()
+	if pp == nil {
+		// Lazy first-read materialisation from the JSON-loaded slice.
+		// CAS-style: if another goroutine wins the race, drop ours.
+		built := make([]string, len(c.HiddenNamePrefixes))
+		copy(built, c.HiddenNamePrefixes)
+		if c.hiddenPrefixesPtr.CompareAndSwap(nil, &built) {
+			pp = &built
+		} else {
+			pp = c.hiddenPrefixesPtr.Load()
+		}
+	}
+	if pp == nil || len(*pp) == 0 {
+		return false
+	}
+	for _, p := range *pp {
+		if p == "" {
+			continue
+		}
+		if strings.HasPrefix(name, p) {
+			return true
+		}
+	}
+	return false
+}
+
+// SetHiddenNamePrefixes atomically replaces HiddenNamePrefixes with the
+// given slice and bumps the generation counter. Safe for concurrent use
+// with IsNameHidden / HiddenNamePrefixesGeneration. Mirrors
+// SetNodeBlacklist (#1629).
+func (c *Config) SetHiddenNamePrefixes(prefixes []string) {
+	if c == nil {
+		return
+	}
+	cp := make([]string, len(prefixes))
+	copy(cp, prefixes)
+	c.HiddenNamePrefixes = cp
+	c.hiddenPrefixesPtr.Store(&cp)
+	c.hiddenPrefixesGen.Add(1)
+}
+
+// HiddenNamePrefixesGeneration returns a monotonic counter that increments
+// on every SetHiddenNamePrefixes call. Response caches keyed per-pubkey can
+// embed this value in their cache key so any prefix mutation invalidates
+// prior entries on the next request — same pattern as BlacklistGeneration.
+func (c *Config) HiddenNamePrefixesGeneration() uint64 {
+	if c == nil {
+		return 0
+	}
+	return c.hiddenPrefixesGen.Load()
 }

 // SaveGeoFilter writes the geo_filter section back to config.json on disk.
@@ -387,3 +387,131 @@ func TestObserverDaysOrDefault(t *testing.T) {
 		})
 	}
 }
+
+// Issue #1552 — observer health thresholds configurable.
+
+func TestObserverThresholdsOverride(t *testing.T) {
+	dir := t.TempDir()
+	cfgData := map[string]interface{}{
+		"healthThresholds": map[string]interface{}{
+			"observerOnlineMinutes": 30,
+			"observerStaleMinutes":  120,
+		},
+	}
+	data, _ := json.Marshal(cfgData)
+	os.WriteFile(filepath.Join(dir, "config.json"), data, 0644)
+	cfg, err := LoadConfig(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	h := cfg.GetHealthThresholds()
+	if h.ObserverOnlineMinutes != 30 {
+		t.Errorf("ObserverOnlineMinutes = %d, want 30", h.ObserverOnlineMinutes)
+	}
+	if h.ObserverStaleMinutes != 120 {
+		t.Errorf("ObserverStaleMinutes = %d, want 120", h.ObserverStaleMinutes)
+	}
+	m := h.ToClientMs()
+	if m["observerOnlineMs"] != 30*60*1000 {
+		t.Errorf("observerOnlineMs = %d, want %d", m["observerOnlineMs"], 30*60*1000)
+	}
+	if m["observerStaleMs"] != 120*60*1000 {
+		t.Errorf("observerStaleMs = %d, want %d", m["observerStaleMs"], 120*60*1000)
+	}
+}
+
+func TestObserverThresholdsDefaults(t *testing.T) {
+	cfg := &Config{}
+	h := cfg.GetHealthThresholds()
+	if h.ObserverOnlineMinutes != 60 {
+		t.Errorf("default ObserverOnlineMinutes = %d, want 60", h.ObserverOnlineMinutes)
+	}
+	if h.ObserverStaleMinutes != 1440 {
+		t.Errorf("default ObserverStaleMinutes = %d, want 1440", h.ObserverStaleMinutes)
+	}
+	m := h.ToClientMs()
+	if m["observerOnlineMs"] != 3600000 {
+		t.Errorf("default observerOnlineMs = %d, want 3600000", m["observerOnlineMs"])
+	}
+	if m["observerStaleMs"] != 86400000 {
+		t.Errorf("default observerStaleMs = %d, want 86400000", m["observerStaleMs"])
+	}
+}
+
+// Loading a config with no healthThresholds block at all must still produce
+// the new 60 / 1440 defaults (not zero, not the old 10 / 60).
+func TestObserverThresholdsDefaultsFromEmptyConfigFile(t *testing.T) {
+	dir := t.TempDir()
+	os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{"port": 3000}`), 0644)
+	cfg, err := LoadConfig(dir)
+	if err != nil {
+		t.Fatal(err)
+	}
+	h := cfg.GetHealthThresholds()
+	if h.ObserverOnlineMinutes != 60 {
+		t.Errorf("empty-config ObserverOnlineMinutes = %d, want 60 (new default)", h.ObserverOnlineMinutes)
+	}
+	if h.ObserverStaleMinutes != 1440 {
+		t.Errorf("empty-config ObserverStaleMinutes = %d, want 1440 (new default)", h.ObserverStaleMinutes)
+	}
+}
+
+func TestApplyListLimitsDefaults(t *testing.T) {
+	t.Run("defaults when block is absent", func(t *testing.T) {
+		dir := t.TempDir()
+		os.WriteFile(filepath.Join(dir, "config.json"), []byte(`{"port": 3000}`), 0644)
+		cfg, err := LoadConfig(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if cfg.ListLimits.PacketsMax != 10000 {
+			t.Errorf("expected 10000, got %d", cfg.ListLimits.PacketsMax)
+		}
+		if cfg.ListLimits.NodesMax != 2000 {
+			t.Errorf("expected 2000, got %d", cfg.ListLimits.NodesMax)
+		}
+		if cfg.ListLimits.AnalyticsMax != 200 {
+			t.Errorf("expected 200, got %d", cfg.ListLimits.AnalyticsMax)
+		}
+		if cfg.ListLimits.ChannelMessagesMax != 500 {
+			t.Errorf("expected 500, got %d", cfg.ListLimits.ChannelMessagesMax)
+		}
+		if cfg.ListLimits.BulkHealthMax != 200 {
+			t.Errorf("expected 200, got %d", cfg.ListLimits.BulkHealthMax)
+		}
+	})
+
+	t.Run("operator overrides honored", func(t *testing.T) {
+		dir := t.TempDir()
+		cfgData := map[string]interface{}{
+			"listLimits": map[string]interface{}{
+				"packetsMax":         50000,
+				"nodesMax":           5000,
+				"analyticsMax":       500,
+				"channelMessagesMax": 1000,
+				"bulkHealthMax":      300,
+			},
+		}
+		data, _ := json.Marshal(cfgData)
+		os.WriteFile(filepath.Join(dir, "config.json"), data, 0644)
+		cfg, err := LoadConfig(dir)
+		if err != nil {
+			t.Fatal(err)
+		}
+		if cfg.ListLimits.PacketsMax != 50000 {
+			t.Errorf("expected 50000, got %d", cfg.ListLimits.PacketsMax)
+		}
+		if cfg.ListLimits.NodesMax != 5000 {
+			t.Errorf("expected 5000, got %d", cfg.ListLimits.NodesMax)
+		}
+		if cfg.ListLimits.AnalyticsMax != 500 {
+			t.Errorf("expected 500, got %d", cfg.ListLimits.AnalyticsMax)
+		}
+		if cfg.ListLimits.ChannelMessagesMax != 1000 {
+			t.Errorf("expected 1000, got %d", cfg.ListLimits.ChannelMessagesMax)
+		}
+		if cfg.ListLimits.BulkHealthMax != 300 {
+			t.Errorf("expected 300, got %d", cfg.ListLimits.BulkHealthMax)
+		}
+	})
+}
@@ -1,10 +1,47 @@
 package main

-import "net/http"
+import (
+	"net/http"
+	"os"
+	"strings"
+)
+
+// applyCORSEnv overlays cfg.CORSAllowedOrigins from the CORS_ALLOWED_ORIGINS
+// env var when it is set and non-empty. Tokens are comma-separated, trimmed,
+// and empties dropped. The env var is the ops-friendly override; it lets
+// operators add cross-domain embed origins without editing config.json
+// (issue #1369). An unset or empty env var leaves cfg untouched, so
+// per-deployment config.json values still apply.
+func applyCORSEnv(cfg *Config) {
+	raw, ok := os.LookupEnv("CORS_ALLOWED_ORIGINS")
+	if !ok {
+		return
+	}
+	parts := strings.Split(raw, ",")
+	out := make([]string, 0, len(parts))
+	for _, p := range parts {
+		s := strings.TrimSpace(p)
+		if s != "" {
+			out = append(out, s)
+		}
+	}
+	if len(out) == 0 {
+		// Env var present but only whitespace — treat as unset, do not clobber.
+		return
+	}
+	cfg.CORSAllowedOrigins = out
+}

 // corsMiddleware returns a middleware that sets CORS headers based on the
 // configured allowed origins. When CORSAllowedOrigins is empty (default),
 // no Access-Control-* headers are added, preserving browser same-origin policy.
+//
+// Embed contract (issue #1369): the cross-domain surface is read-only. The
+// middleware advertises only GET, HEAD, and OPTIONS in Access-Control-Allow-
+// Methods so iframes / server-side fetchers cannot opt into POST/PUT/DELETE
+// via CORS. Same-origin writes (admin UI, API-key holders on the canonical
+// origin) are unaffected — they never go through the preflight path.
+// Credentialed CORS is intentionally NOT enabled.
 func (s *Server) corsMiddleware(next http.Handler) http.Handler {
 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
 		origins := s.cfg.CORSAllowedOrigins
@@ -52,7 +89,8 @@ func (s *Server) corsMiddleware(next http.Handler) http.Handler {
 			w.Header().Set("Access-Control-Allow-Origin", reqOrigin)
 			w.Header().Set("Vary", "Origin")
 		}
-		w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS")
+		// Read-only embed contract — see comment above.
+		w.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, OPTIONS")
 		w.Header().Set("Access-Control-Allow-Headers", "Content-Type, X-API-Key")

 		// Handle preflight
@@ -0,0 +1,93 @@
+package main
+
+import (
+	"net/http"
+	"net/http/httptest"
+	"os"
+	"testing"
+)
+
+// Issue #1369: CORS_ALLOWED_ORIGINS env override + embed support.
+//
+// Red commit: these tests fail until LoadConfig honors the env var and the
+// CORS middleware advertises GET/HEAD/OPTIONS (the embed contract is
+// read-only cross-origin access).
+
+// TestCORS_EnvOverridesConfig — env var CORS_ALLOWED_ORIGINS replaces config.
+func TestCORS_EnvOverridesConfig_1369(t *testing.T) {
+	t.Setenv("CORS_ALLOWED_ORIGINS", "https://blog.example.com,https://embed.example.com")
+	cfg, err := LoadConfig("/nonexistent")
+	if err != nil {
+		t.Fatalf("LoadConfig: %v", err)
+	}
+	if len(cfg.CORSAllowedOrigins) != 2 {
+		t.Fatalf("expected 2 origins from env, got %v", cfg.CORSAllowedOrigins)
+	}
+	if cfg.CORSAllowedOrigins[0] != "https://blog.example.com" ||
+		cfg.CORSAllowedOrigins[1] != "https://embed.example.com" {
+		t.Fatalf("env parse wrong: %v", cfg.CORSAllowedOrigins)
+	}
+}
+
+// TestCORS_EnvEmptyKeepsConfig — empty env var does not clobber file config.
+func TestCORS_EnvEmptyKeepsConfig_1369(t *testing.T) {
+	os.Unsetenv("CORS_ALLOWED_ORIGINS")
+	cfg := &Config{CORSAllowedOrigins: []string{"https://example.com"}}
+	applyCORSEnv(cfg)
+	if len(cfg.CORSAllowedOrigins) != 1 || cfg.CORSAllowedOrigins[0] != "https://example.com" {
+		t.Fatalf("unset env should not clobber config; got %v", cfg.CORSAllowedOrigins)
+	}
+}
+
+// TestCORS_EnvTrimsWhitespace — comma-separated env tokens are trimmed.
+func TestCORS_EnvTrimsWhitespace_1369(t *testing.T) {
+	t.Setenv("CORS_ALLOWED_ORIGINS", "  https://a.example  , https://b.example ")
+	cfg := &Config{}
+	applyCORSEnv(cfg)
+	if len(cfg.CORSAllowedOrigins) != 2 {
+		t.Fatalf("expected 2, got %v", cfg.CORSAllowedOrigins)
+	}
+	if cfg.CORSAllowedOrigins[0] != "https://a.example" || cfg.CORSAllowedOrigins[1] != "https://b.example" {
+		t.Fatalf("not trimmed: %v", cfg.CORSAllowedOrigins)
+	}
+}
+
+// TestCORS_EmbedContractGETHEAD — embed contract is read-only; the
+// Access-Control-Allow-Methods header must advertise GET, HEAD, OPTIONS only
+// (no POST/PUT/DELETE) so iframes/server-side fetchers know writes are not
+// CORS-permitted. DJB hardening: minimum surface.
+func TestCORS_EmbedContractGETHEAD_1369(t *testing.T) {
+	srv := newTestServerWithCORS([]string{"https://embed.example.com"})
+	handler := srv.corsMiddleware(dummyHandler)
+
+	req := httptest.NewRequest("GET", "/api/health", nil)
+	req.Header.Set("Origin", "https://embed.example.com")
+	rr := httptest.NewRecorder()
+	handler.ServeHTTP(rr, req)
+
+	methods := rr.Header().Get("Access-Control-Allow-Methods")
+	if methods != "GET, HEAD, OPTIONS" {
+		t.Fatalf("expected read-only methods 'GET, HEAD, OPTIONS', got %q", methods)
+	}
+}
+
+// TestCORS_PreflightPOSTRejected — preflight asking for POST from an allowed
+// origin must NOT echo POST in Allow-Methods. The middleware advertises only
+// the read-only set; preflight succeeds (browser then blocks the POST).
+func TestCORS_PreflightPOSTRejected_1369(t *testing.T) {
+	srv := newTestServerWithCORS([]string{"https://embed.example.com"})
+	handler := srv.corsMiddleware(dummyHandler)
+
+	req := httptest.NewRequest("OPTIONS", "/api/anything", nil)
+	req.Header.Set("Origin", "https://embed.example.com")
+	req.Header.Set("Access-Control-Request-Method", "POST")
+	rr := httptest.NewRecorder()
+	handler.ServeHTTP(rr, req)
+
+	if rr.Code != http.StatusNoContent {
+		t.Fatalf("preflight expected 204, got %d", rr.Code)
+	}
+	if got := rr.Header().Get("Access-Control-Allow-Methods"); got != "GET, HEAD, OPTIONS" {
+		t.Fatalf("preflight must advertise read-only methods only, got %q", got)
+	}
+}
@@ -51,7 +51,7 @@ func TestCORS_AllowlistMatch(t *testing.T) {
 	if v := rr.Header().Get("Access-Control-Allow-Origin"); v != "https://good.example" {
 		t.Fatalf("expected origin echo, got %q", v)
 	}
-	if v := rr.Header().Get("Access-Control-Allow-Methods"); v != "GET, POST, OPTIONS" {
+	if v := rr.Header().Get("Access-Control-Allow-Methods"); v != "GET, HEAD, OPTIONS" {
 		t.Fatalf("expected methods header, got %q", v)
 	}
 	if v := rr.Header().Get("Access-Control-Allow-Headers"); v != "Content-Type, X-API-Key" {
@@ -2289,6 +2289,10 @@ func TestSubpathPrecomputedIndex(t *testing.T) {
 	defer db.Close()
 	store := NewPacketStore(db, nil)
 	store.Load()
+	// #1008: indexes built in background goroutine; wait before reading.
+	if !store.WaitIndexesReady(5 * time.Second) {
+		t.Fatal("indexes never became ready")
+	}

 	// After Load(), the precomputed index must be populated.
 	if len(store.spIndex) == 0 {
@@ -2343,6 +2347,10 @@ func TestSubpathTxIndexPopulated(t *testing.T) {
 	defer db.Close()
 	store := NewPacketStore(db, nil)
 	store.Load()
+	// #1008: indexes built in background goroutine; wait before reading.
+	if !store.WaitIndexesReady(5 * time.Second) {
+		t.Fatal("indexes never became ready")
+	}

 	// spTxIndex must be populated alongside spIndex
 	if len(store.spTxIndex) == 0 {
@@ -2387,6 +2395,10 @@ func TestSubpathDetailMixedCaseHops(t *testing.T) {
 	defer db.Close()
 	store := NewPacketStore(db, nil)
 	store.Load()
+	// #1008: indexes built in background goroutine; wait before reading.
+	if !store.WaitIndexesReady(5 * time.Second) {
+		t.Fatal("indexes never became ready")
+	}

 	// Query with lowercase hops to establish baseline
 	lower := store.GetSubpathDetail([]string{"eeff", "0011"})
@@ -2701,6 +2713,17 @@ func TestHandleAnalyticsDistanceWithStore(t *testing.T) {
 	router := mux.NewRouter()
 	srv.RegisterRoutes(router)

+	// #1011: lazy distance index — first request returns 202; trigger
+	// the build and wait for it before asserting the 200 shape.
+	store.TriggerDistanceIndexBuild()
+	deadline := time.Now().Add(5 * time.Second)
+	for !store.DistanceIndexBuilt() {
+		if time.Now().After(deadline) {
+			t.Fatal("distance index did not finish building within 5s")
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+
 	req := httptest.NewRequest("GET", "/api/analytics/distance", nil)
 	w := httptest.NewRecorder()
 	router.ServeHTTP(w, req)
@@ -0,0 +1,96 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"reflect"
+	"sort"
+	"testing"
+
+	"github.com/gorilla/mux"
+)
+
+// TestConfigClientExposesCustomizerDisabledTabs verifies that the
+// /api/config/client endpoint surfaces the operator-set list of customizer
+// tabs to hide, so the customize-v2 frontend can filter them out of
+// _renderTabs(). Issue #1508.
+func TestConfigClientExposesCustomizerDisabledTabs(t *testing.T) {
+	db := setupTestDB(t)
+	seedTestData(t, db)
+	cfg := &Config{
+		Port: 3000,
+		Customizer: &CustomizerConfig{
+			DisabledTabs: []string{"branding", "geofilter", "export"},
+		},
+	}
+	hub := NewHub()
+	srv := NewServer(db, cfg, hub)
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("store.Load failed: %v", err)
+	}
+	srv.store = store
+	router := mux.NewRouter()
+	srv.RegisterRoutes(router)
+
+	req := httptest.NewRequest("GET", "/api/config/client", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d (body=%s)", w.Code, w.Body.String())
+	}
+	var body map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	custRaw, ok := body["customizer"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("expected body.customizer object, got %T (body=%s)", body["customizer"], w.Body.String())
+	}
+	tabsRaw, ok := custRaw["disabledTabs"].([]interface{})
+	if !ok {
+		t.Fatalf("expected body.customizer.disabledTabs array, got %T", custRaw["disabledTabs"])
+	}
+	got := make([]string, 0, len(tabsRaw))
+	for _, v := range tabsRaw {
+		s, ok := v.(string)
+		if !ok {
+			t.Fatalf("disabledTabs element not a string: %T", v)
+		}
+		got = append(got, s)
+	}
+	want := []string{"branding", "export", "geofilter"}
+	sort.Strings(got)
+	if !reflect.DeepEqual(got, want) {
+		t.Errorf("disabledTabs: got %v, want %v", got, want)
+	}
+}
+
+// TestConfigClientDefaultsCustomizerDisabledTabsEmpty verifies the backward-
+// compat default: when no customizer block is configured, the field is still
+// present and is an empty array (so the frontend can blindly call .includes()).
+func TestConfigClientDefaultsCustomizerDisabledTabsEmpty(t *testing.T) {
+	_, router := setupTestServer(t)
+	req := httptest.NewRequest("GET", "/api/config/client", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d", w.Code)
+	}
+	var body map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode: %v", err)
+	}
+	custRaw, ok := body["customizer"].(map[string]interface{})
+	if !ok {
+		t.Fatalf("expected body.customizer object, got %T", body["customizer"])
+	}
+	tabsRaw, ok := custRaw["disabledTabs"].([]interface{})
+	if !ok {
+		t.Fatalf("expected body.customizer.disabledTabs array, got %T", custRaw["disabledTabs"])
+	}
+	if len(tabsRaw) != 0 {
+		t.Errorf("default disabledTabs should be empty, got %v", tabsRaw)
+	}
+}
@@ -12,6 +12,7 @@ import (
 	"sync"
 	"time"

+	"github.com/meshcore-analyzer/dbschema"
 	"github.com/meshcore-analyzer/geofilter"
 	_ "modernc.org/sqlite"
 )
@@ -243,6 +244,21 @@ type Observer struct {
 	UptimeSecs    *int64   `json:"uptime_secs"`
 	NoiseFloor    *float64 `json:"noise_floor"`
 	LastPacketAt  *string  `json:"last_packet_at"`
+	// Issue #1478: per-observer naive-clock skew tracking.
+	// Written by the ingestor in cmd/ingestor/db.go RecordNaiveSkew whenever
+	// resolveRxTime clamps a naive envelope timestamp >15 min off UTC. The
+	// server reads these as-is; the handler derives the bool `clock_naive`
+	// from clock_last_naive_at being within the last 24h.
+	ClockSkewSeconds  *int64  `json:"clock_skew_seconds"`
+	ClockSkewCount24h int     `json:"clock_skew_count_24h"`
+	ClockLastNaiveAt  *string `json:"clock_last_naive_at"`
+	// Issue #1290: firmware 1.16 `repeat: on|off` flag persisted by the
+	// ingestor. true = relay-capable, false = listener-only, nil =
+	// unknown (legacy observer that never sent the field — drives the
+	// tri-state UI badge so legacy rows don't masquerade as confirmed
+	// repeaters). The ingestor sets can_relay_seen=1 only when it has
+	// an explicit value; the read layer returns nil when seen=0.
+	CanRelay *bool `json:"can_relay,omitempty"`
 }

 // Transmission represents a row from the transmissions table.
@@ -471,6 +487,8 @@ type PacketQuery struct {
 type PacketResult struct {
 	Packets []map[string]interface{} `json:"packets"`
 	Total   int                      `json:"total"`
+	Limit   int                      `json:"limit"`
+	Offset  int                      `json:"offset"`
 }

 // QueryPackets returns paginated, filtered packets as transmissions (matching Node.js shape).
@@ -1138,7 +1156,25 @@ func (db *DB) getObservationsForTransmissions(txIDs []int) map[int][]map[string]

 // GetObservers returns active observers (not soft-deleted) sorted by last_seen DESC.
 func (db *DB) GetObservers() ([]Observer, error) {
-	rows, err := db.conn.Query("SELECT id, name, iata, last_seen, first_seen, packet_count, model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor, last_packet_at FROM observers WHERE inactive IS NULL OR inactive = 0 ORDER BY last_seen DESC")
+	// Issue #1290: can_relay is read via COALESCE(can_relay, 1). The
+	// column is added by internal/dbschema; older test fixtures and
+	// pre-migration DBs may lack it, so we probe and fall back.
+	// PR #1624 MAJOR-2: can_relay_seen is the tri-state sentinel — 1
+	// means the ingestor explicitly wrote a value, 0 means "unknown"
+	// and the server returns CanRelay=nil so the UI shows no badge.
+	canRelayClause := "COALESCE(can_relay, 1)"
+	canRelaySeenClause := "0"
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay"); !hasCol {
+		canRelayClause = "1"
+	}
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay_seen"); hasCol {
+		canRelaySeenClause = "COALESCE(can_relay_seen, 0)"
+	}
+	rows, err := db.conn.Query(`SELECT id, name, iata, last_seen, first_seen, packet_count,
+		model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor, last_packet_at,
+		clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at,
+		` + canRelayClause + `, ` + canRelaySeenClause + `
+		FROM observers WHERE inactive IS NULL OR inactive = 0 ORDER BY last_seen DESC`)
 	if err != nil {
 		return nil, err
 	}
@@ -1147,11 +1183,19 @@ func (db *DB) GetObservers() ([]Observer, error) {
 	var observers []Observer
 	for rows.Next() {
 		var o Observer
-		var batteryMv, uptimeSecs sql.NullInt64
+		var batteryMv, uptimeSecs, clockSkewSec sql.NullInt64
+		var clockSkewCount sql.NullInt64
 		var noiseFloor sql.NullFloat64
-		if err := rows.Scan(&o.ID, &o.Name, &o.IATA, &o.LastSeen, &o.FirstSeen, &o.PacketCount, &o.Model, &o.Firmware, &o.ClientVersion, &o.Radio, &batteryMv, &uptimeSecs, &noiseFloor, &o.LastPacketAt); err != nil {
+		var canRelay, canRelaySeen int
+		if err := rows.Scan(&o.ID, &o.Name, &o.IATA, &o.LastSeen, &o.FirstSeen, &o.PacketCount,
+			&o.Model, &o.Firmware, &o.ClientVersion, &o.Radio, &batteryMv, &uptimeSecs, &noiseFloor, &o.LastPacketAt,
+			&clockSkewSec, &clockSkewCount, &o.ClockLastNaiveAt, &canRelay, &canRelaySeen); err != nil {
 			continue
 		}
+		if canRelaySeen != 0 {
+			b := canRelay != 0
+			o.CanRelay = &b
+		}
 		if batteryMv.Valid {
 			v := int(batteryMv.Int64)
 			o.BatteryMv = &v
@@ -1162,21 +1206,103 @@ func (db *DB) GetObservers() ([]Observer, error) {
 		if noiseFloor.Valid {
 			o.NoiseFloor = &noiseFloor.Float64
 		}
+		if clockSkewSec.Valid {
+			v := clockSkewSec.Int64
+			o.ClockSkewSeconds = &v
+		}
+		if clockSkewCount.Valid {
+			o.ClockSkewCount24h = int(clockSkewCount.Int64)
+		}
 		observers = append(observers, o)
 	}
 	return observers, nil
 }

+// GetNonRelayObserverPubkeys returns the lowercase observer.id pubkeys
+// for observers that have advertised `repeat:off` (#1290). The server's
+// path-hop disambiguator consumes this to exclude listener-only nodes
+// from the candidate set. Inactive observers are excluded for
+// consistency with GetObservers; reactivation flips can_relay only on
+// the next status message.
+func (db *DB) GetNonRelayObserverPubkeys() ([]string, error) {
+	// Graceful no-op when can_relay column is absent (legacy DB / older
+	// test fixture). Avoids noisy schema-degradation log spam.
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay"); !hasCol {
+		return nil, nil
+	}
+	rows, err := db.conn.Query(`SELECT LOWER(id) FROM observers
+		WHERE COALESCE(can_relay, 1) = 0
+		  AND (inactive IS NULL OR inactive = 0)`)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []string
+	for rows.Next() {
+		var pk string
+		if err := rows.Scan(&pk); err == nil && pk != "" {
+			out = append(out, pk)
+		}
+	}
+	return out, rows.Err()
+}
+
+// GetCanRelaySeenObserverPubkeys returns the lowercase observer.id
+// pubkeys for which the ingestor has explicitly written a repeat-field
+// value (can_relay_seen=1). PR #1624 MAJOR-2: the badge surface uses
+// this to render tri-state — observers NOT in this set are "unknown"
+// and the UI shows no badge.
+func (db *DB) GetCanRelaySeenObserverPubkeys() ([]string, error) {
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay_seen"); !hasCol {
+		return nil, nil
+	}
+	rows, err := db.conn.Query(`SELECT LOWER(id) FROM observers
+		WHERE COALESCE(can_relay_seen, 0) = 1
+		  AND (inactive IS NULL OR inactive = 0)`)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+	var out []string
+	for rows.Next() {
+		var pk string
+		if err := rows.Scan(&pk); err == nil && pk != "" {
+			out = append(out, pk)
+		}
+	}
+	return out, rows.Err()
+}
+
 // GetObserverByID returns a single observer.
 func (db *DB) GetObserverByID(id string) (*Observer, error) {
 	var o Observer
-	var batteryMv, uptimeSecs sql.NullInt64
+	var batteryMv, uptimeSecs, clockSkewSec sql.NullInt64
+	var clockSkewCount sql.NullInt64
 	var noiseFloor sql.NullFloat64
-	err := db.conn.QueryRow("SELECT id, name, iata, last_seen, first_seen, packet_count, model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor, last_packet_at FROM observers WHERE id = ?", id).
-		Scan(&o.ID, &o.Name, &o.IATA, &o.LastSeen, &o.FirstSeen, &o.PacketCount, &o.Model, &o.Firmware, &o.ClientVersion, &o.Radio, &batteryMv, &uptimeSecs, &noiseFloor, &o.LastPacketAt)
+	var canRelay, canRelaySeen int
+	canRelayClause := "COALESCE(can_relay, 1)"
+	canRelaySeenClause := "0"
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay"); !hasCol {
+		canRelayClause = "1"
+	}
+	if hasCol, _ := dbschema.TableHasColumn(db.conn, "observers", "can_relay_seen"); hasCol {
+		canRelaySeenClause = "COALESCE(can_relay_seen, 0)"
+	}
+	err := db.conn.QueryRow(`SELECT id, name, iata, last_seen, first_seen, packet_count,
+		model, firmware, client_version, radio, battery_mv, uptime_secs, noise_floor, last_packet_at,
+		clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at,
+		`+canRelayClause+`, `+canRelaySeenClause+`
+		FROM observers WHERE id = ?`, id).
+		Scan(&o.ID, &o.Name, &o.IATA, &o.LastSeen, &o.FirstSeen, &o.PacketCount,
+			&o.Model, &o.Firmware, &o.ClientVersion, &o.Radio, &batteryMv, &uptimeSecs, &noiseFloor, &o.LastPacketAt,
+			&clockSkewSec, &clockSkewCount, &o.ClockLastNaiveAt, &canRelay, &canRelaySeen)
 	if err != nil {
 		return nil, err
 	}
+	if canRelaySeen != 0 {
+		b := canRelay != 0
+		o.CanRelay = &b
+	}
 	if batteryMv.Valid {
 		v := int(batteryMv.Int64)
 		o.BatteryMv = &v
@@ -1187,6 +1313,13 @@ func (db *DB) GetObserverByID(id string) (*Observer, error) {
 	if noiseFloor.Valid {
 		o.NoiseFloor = &noiseFloor.Float64
 	}
+	if clockSkewSec.Valid {
+		v := clockSkewSec.Int64
+		o.ClockSkewSeconds = &v
+	}
+	if clockSkewCount.Valid {
+		o.ClockSkewCount24h = int(clockSkewCount.Int64)
+	}
 	return &o, nil
 }

@@ -1964,7 +2097,10 @@ func (db *DB) GetNodeLocationsByKeys(keys []string) map[string]map[string]interf
 		placeholders[i] = "?"
 		args[i] = strings.ToLower(k)
 	}
-	query := "SELECT public_key, lat, lon, role FROM nodes WHERE LOWER(public_key) IN (" + strings.Join(placeholders, ",") + ")"
+	// #1481 P0-3: drop LOWER(public_key) — that wrap is non-sargable and
+	// forces a full scan. Nodes are stored lowercase already; we lowercase
+	// args in Go above so a plain IN matches the index on public_key.
+	query := "SELECT public_key, lat, lon, role FROM nodes WHERE public_key IN (" + strings.Join(placeholders, ",") + ")"
 	rows, err := db.conn.Query(query, args...)
 	if err != nil {
 		return result
@@ -51,7 +51,10 @@ func setupTestDB(t *testing.T) *DB {
 			uptime_secs INTEGER,
 			noise_floor REAL,
 			inactive INTEGER DEFAULT 0,
-			last_packet_at TEXT DEFAULT NULL
+			last_packet_at TEXT DEFAULT NULL,
+			clock_skew_seconds INTEGER DEFAULT NULL,
+			clock_skew_count_24h INTEGER DEFAULT 0,
+			clock_last_naive_at TEXT DEFAULT NULL
 		);

 		CREATE TABLE transmissions (
@@ -0,0 +1,114 @@
+package main
+
+import (
+	"net/http/httptest"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/gorilla/mux"
+)
+
+// Issue #1011: distance index must NOT be built eagerly at startup.
+// It is constructed lazily on first /api/analytics/distance request,
+// the first request returns 202 + Retry-After while the build runs,
+// and concurrent requests during the build also get 202 (one build
+// only, not N parallel builds).
+//
+// These three assertions encode the acceptance criteria from the
+// triage Fix path (sync.Once-style first-request trigger, 202+Retry-After).
+
+// TestDistanceIndexNotBuiltOnLoad: Load() must complete without
+// populating distHops / distPaths. Eager build is gone.
+func TestDistanceIndexNotBuiltOnLoad(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load(): %v", err)
+	}
+	store.mu.RLock()
+	nHops := len(store.distHops)
+	nPaths := len(store.distPaths)
+	store.mu.RUnlock()
+	if nHops != 0 || nPaths != 0 {
+		t.Fatalf("expected distance index empty after Load() (lazy build, #1011); got %d hops, %d paths — eager build still firing in Load()", nHops, nPaths)
+	}
+	if store.DistanceIndexBuilt() {
+		t.Fatalf("expected DistanceIndexBuilt() = false directly after Load(); got true")
+	}
+}
+
+// TestDistanceFirstRequestReturns202: first /api/analytics/distance call
+// must trigger async build and return 202 + Retry-After. The handler must
+// NOT block for the full build.
+func TestDistanceFirstRequestReturns202(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	cfg := &Config{Port: 3000}
+	hub := NewHub()
+	srv := NewServer(db, cfg, hub)
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load(): %v", err)
+	}
+	srv.store = store
+	r := mux.NewRouter()
+	srv.RegisterRoutes(r)
+
+	req := httptest.NewRequest("GET", "/api/analytics/distance", nil)
+	w := httptest.NewRecorder()
+	t0 := time.Now()
+	r.ServeHTTP(w, req)
+	elapsed := time.Since(t0)
+
+	if w.Code != 202 {
+		t.Fatalf("expected 202 Accepted on first request (lazy build, #1011); got %d (body=%s)", w.Code, w.Body.String())
+	}
+	if ra := w.Header().Get("Retry-After"); ra == "" {
+		t.Fatalf("expected non-empty Retry-After header on 202 response; got none")
+	}
+	// Handler must return quickly — must not block on the full build.
+	if elapsed > 500*time.Millisecond {
+		t.Fatalf("first-request handler took %v — must not block on build (#1011)", elapsed)
+	}
+}
+
+// TestDistanceConcurrentRequestsDuringBuildReturn202: 10 requests fired
+// in close succession while the build is in flight must all receive 202;
+// exactly one build runs.
+func TestDistanceConcurrentRequestsDuringBuildReturn202(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	cfg := &Config{Port: 3000}
+	hub := NewHub()
+	srv := NewServer(db, cfg, hub)
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load(): %v", err)
+	}
+	srv.store = store
+	r := mux.NewRouter()
+	srv.RegisterRoutes(r)
+
+	const N = 10
+	var wg sync.WaitGroup
+	var got202 atomic.Int32
+	wg.Add(N)
+	for i := 0; i < N; i++ {
+		go func() {
+			defer wg.Done()
+			req := httptest.NewRequest("GET", "/api/analytics/distance", nil)
+			w := httptest.NewRecorder()
+			r.ServeHTTP(w, req)
+			if w.Code == 202 {
+				got202.Add(1)
+			}
+		}()
+	}
+	wg.Wait()
+	if got202.Load() != N {
+		t.Fatalf("expected all %d concurrent first-window requests to get 202; only %d did", N, got202.Load())
+	}
+}
@@ -0,0 +1,75 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/gorilla/mux"
+)
+
+// TestFirstSeen_1166_HandleNodesSurface pins issue #1166: the /api/nodes
+// response carries a `first_seen` ISO timestamp per node so the frontend
+// can show a sortable "First Seen" column.
+func TestFirstSeen_1166_HandleNodesSurface(t *testing.T) {
+	db := setupCapabilityTestDB(t)
+	defer db.conn.Close()
+	if _, err := db.conn.Exec(`ALTER TABLE nodes ADD COLUMN foreign_advert INTEGER DEFAULT 0`); err != nil {
+		t.Fatal(err)
+	}
+
+	pk := "cccc000000000000000000000000000000000000000000000000000000000000"
+	first := time.Now().Add(-72 * time.Hour).UTC().Format("2006-01-02T15:04:05.000Z")
+	last := time.Now().UTC().Format("2006-01-02T15:04:05.000Z")
+	if _, err := db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, 'rpt', 'repeater', 37.5, -122.0, ?, ?, 5)`,
+		pk, last, first); err != nil {
+		t.Fatal(err)
+	}
+
+	store := NewPacketStore(db, nil)
+	cfg := &Config{Port: 3000}
+	hub := NewHub()
+	srv := NewServer(db, cfg, hub)
+	srv.store = store
+
+	router := mux.NewRouter()
+	srv.RegisterRoutes(router)
+
+	req := httptest.NewRequest("GET", "/api/nodes?limit=10", nil)
+	rr := httptest.NewRecorder()
+	router.ServeHTTP(rr, req)
+	if rr.Code != 200 {
+		t.Fatalf("/api/nodes status: want 200, got %d body=%s", rr.Code, rr.Body.String())
+	}
+
+	var resp struct {
+		Nodes []map[string]interface{} `json:"nodes"`
+	}
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("decode: %v body=%s", err, rr.Body.String())
+	}
+	var got map[string]interface{}
+	for _, n := range resp.Nodes {
+		if k, _ := n["public_key"].(string); k == pk {
+			got = n
+			break
+		}
+	}
+	if got == nil {
+		t.Fatalf("node missing from /api/nodes response")
+	}
+	fs, hasFS := got["first_seen"]
+	if !hasFS {
+		t.Fatalf("first_seen absent from /api/nodes response (issue #1166)")
+	}
+	s, _ := fs.(string)
+	if s == "" {
+		t.Errorf("first_seen empty, want ISO timestamp, got %v", fs)
+	}
+	if s != first {
+		t.Errorf("first_seen = %q, want %q", s, first)
+	}
+}
@@ -0,0 +1,85 @@
+package main
+
+import (
+	"sync"
+	"testing"
+	"time"
+)
+
+// TestGetStoreStats_CacheHit verifies that a second call within 30s returns
+// the cached observation counts without re-querying the database.
+func TestGetStoreStats_CacheHit(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	store := srv.store
+
+	store.statsCacheMu.Lock()
+	store.statsCacheTime = time.Now()
+	store.statsLastHour = 42
+	store.statsLast24h = 777
+	store.statsCacheMu.Unlock()
+
+	st, err := store.GetStoreStats()
+	if err != nil {
+		t.Fatalf("GetStoreStats: %v", err)
+	}
+	if st.PacketsLastHour != 42 {
+		t.Errorf("cache hit: PacketsLastHour want 42 got %d", st.PacketsLastHour)
+	}
+	if st.PacketsLast24h != 777 {
+		t.Errorf("cache hit: PacketsLast24h want 777 got %d", st.PacketsLast24h)
+	}
+}
+
+// TestGetStoreStats_CacheExpiry verifies that a cache older than 30s is
+// discarded and the database query re-runs to refresh the values.
+func TestGetStoreStats_CacheExpiry(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	store := srv.store
+
+	store.statsCacheMu.Lock()
+	store.statsCacheTime = time.Now().Add(-35 * time.Second)
+	store.statsLastHour = 9999
+	store.statsLast24h = 9999
+	store.statsCacheMu.Unlock()
+
+	st, err := store.GetStoreStats()
+	if err != nil {
+		t.Fatalf("GetStoreStats: %v", err)
+	}
+	if st.PacketsLastHour == 9999 || st.PacketsLast24h == 9999 {
+		t.Errorf("stale cache not expired: got PacketsLastHour=%d PacketsLast24h=%d — DB values expected, not sentinel",
+			st.PacketsLastHour, st.PacketsLast24h)
+	}
+
+	store.statsCacheMu.Lock()
+	age := time.Since(store.statsCacheTime)
+	store.statsCacheMu.Unlock()
+	if age > 5*time.Second {
+		t.Errorf("cache not refreshed after expiry: statsCacheTime age=%v", age)
+	}
+}
+
+// TestGetStoreStats_CacheConcurrentReaders verifies that 100 concurrent
+// callers produce no data race on the stats cache fields.
+// Run with: go test -race ./... -run TestGetStoreStats_CacheConcurrentReaders
+func TestGetStoreStats_CacheConcurrentReaders(t *testing.T) {
+	srv, _ := setupTestServer(t)
+	store := srv.store
+
+	var wg sync.WaitGroup
+	errs := make(chan error, 100)
+	for range 100 {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			if _, err := store.GetStoreStats(); err != nil {
+				errs <- err
+			}
+		}()
+	}
+	wg.Wait()
+	close(errs)
+	for err := range errs {
+		t.Errorf("concurrent GetStoreStats: %v", err)
+	}
+}
@@ -46,6 +46,9 @@ require github.com/meshcore-analyzer/prunequeue v0.0.0

 replace github.com/meshcore-analyzer/prunequeue => ../../internal/prunequeue

-require github.com/meshcore-analyzer/mbcapqueue v0.0.0
+require (
+	github.com/meshcore-analyzer/mbcapqueue v0.0.0
+	golang.org/x/sync v0.10.0
+)

 replace github.com/meshcore-analyzer/mbcapqueue => ../../internal/mbcapqueue
@@ -16,6 +16,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
 golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI=
 golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
@@ -42,7 +42,7 @@ func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) {
 	// processed<total).
 	bfTotal, bfProcessed, bfDone := fromPubkeyBackfillSnapshot()
 	w.WriteHeader(http.StatusOK)
-	json.NewEncoder(w).Encode(map[string]interface{}{
+	resp := map[string]interface{}{
 		"ready":     true,
 		"loadedTx":  loadedTx,
 		"loadedObs": loadedObs,
@@ -51,5 +51,15 @@ func (s *Server) handleHealthz(w http.ResponseWriter, r *http.Request) {
 			"processed": bfProcessed,
 			"done":      bfDone,
 		},
-	})
+	}
+	// PR #1609 M1: surface per-MQTT-source receipt vs write-path
+	// liveness so operators can distinguish "broker alive, write
+	// path stuck" (lastReceiptUnix recent, lastMessageUnix stale)
+	// from "everything stalled" (both stale). Additive — older
+	// ingestor builds simply produce no entry and the field is
+	// omitted. Schema-compatible with prior /healthz consumers.
+	if liveness := readIngestorSourceLiveness(); len(liveness) > 0 {
+		resp["ingest_liveness"] = liveness
+	}
+	json.NewEncoder(w).Encode(resp)
 }
@@ -0,0 +1,193 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// TestHiddenNamePrefix_1181_NodeHealth asserts that /api/nodes/{pk}/health
+// returns 404 for a node whose name starts with a hidden prefix — mirroring
+// the existing blacklist guard at the top of handleNodeHealth.
+//
+// Anti-tautology: this test FAILS if the IsNameHidden guard is removed from
+// handleNodeHealth (the handler would 200 with health data instead of 404).
+func TestHiddenNamePrefix_1181_NodeHealth(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	pk := "deadbeef00001184"
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		pk, "🚫 health me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	get := func() *httptest.ResponseRecorder {
+		req := httptest.NewRequest("GET", "/api/nodes/"+pk+"/health", nil)
+		w := httptest.NewRecorder()
+		router.ServeHTTP(w, req)
+		return w
+	}
+
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	w := get()
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("hidden: expected 404 from /api/nodes/%s/health, got %d body=%s", pk, w.Code, w.Body.String())
+	}
+	if strings.Contains(w.Body.String(), "health me") {
+		t.Fatalf("hidden: name leaked in /health 404 body: %s", w.Body.String())
+	}
+}
+
+// TestHiddenNamePrefix_1181_BulkHealth asserts /api/nodes/bulk-health filters
+// out nodes whose name starts with a hidden prefix — same shape as the
+// existing blacklist filter inside handleBulkHealth.
+//
+// Anti-tautology: remove the IsNameHidden branch from handleBulkHealth and
+// the hidden node leaks back into the response array; this assertion fails.
+func TestHiddenNamePrefix_1181_BulkHealth(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	pk := "deadbeef00001185"
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		pk, "🚫 bulk me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	srv.cfg.NodeBlacklist = []string{"force-filter-branch"} // force the existing blacklist branch on so results-array path is taken
+	srv.cfg.SetNodeBlacklist(srv.cfg.NodeBlacklist)
+
+	req := httptest.NewRequest("GET", "/api/nodes/bulk-health?limit=2000", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	var arr []map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &arr); err != nil {
+		t.Fatalf("unmarshal: %v body=%s", err, w.Body.String())
+	}
+	for _, e := range arr {
+		if got, _ := e["public_key"].(string); strings.EqualFold(got, pk) {
+			t.Fatalf("hidden node %s leaked through /api/nodes/bulk-health", pk)
+		}
+	}
+}
+
+// TestHiddenNamePrefix_1181_Paths asserts /api/nodes/{pk}/paths returns 404
+// for a hidden-prefix node, mirroring blacklist behaviour.
+func TestHiddenNamePrefix_1181_Paths(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	pk := "deadbeef00001186"
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		pk, "🚫 paths me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	req := httptest.NewRequest("GET", "/api/nodes/"+pk+"/paths", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("hidden: expected 404 from /api/nodes/%s/paths, got %d body=%s", pk, w.Code, w.Body.String())
+	}
+}
+
+// TestHiddenNamePrefix_1181_Analytics asserts /api/nodes/{pk}/analytics 404s
+// for hidden-prefix nodes.
+func TestHiddenNamePrefix_1181_Analytics(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	pk := "deadbeef00001187"
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		pk, "🚫 analytics me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	req := httptest.NewRequest("GET", "/api/nodes/"+pk+"/analytics", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("hidden: expected 404 from /api/nodes/%s/analytics, got %d body=%s", pk, w.Code, w.Body.String())
+	}
+}
+
+// TestHiddenNamePrefixesGeneration_Increments asserts the per-source
+// generation counter bumps on every Set call — mirrors
+// TestConfig_BlacklistGenerationIncrements behaviour. Cache wiring lives in
+// a follow-up; the counter is the prerequisite primitive.
+func TestHiddenNamePrefixesGeneration_Increments(t *testing.T) {
+	cfg := &Config{}
+	g0 := cfg.HiddenNamePrefixesGeneration()
+	cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	g1 := cfg.HiddenNamePrefixesGeneration()
+	if g1 != g0+1 {
+		t.Fatalf("first SetHiddenNamePrefixes: gen %d -> %d (want +1)", g0, g1)
+	}
+	cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	g2 := cfg.HiddenNamePrefixesGeneration()
+	if g2 != g1+1 {
+		t.Fatalf("second SetHiddenNamePrefixes: gen %d -> %d (want +1)", g1, g2)
+	}
+	cfg.SetHiddenNamePrefixes(nil)
+	g3 := cfg.HiddenNamePrefixesGeneration()
+	if g3 != g2+1 {
+		t.Fatalf("nil SetHiddenNamePrefixes: gen %d -> %d (want +1)", g2, g3)
+	}
+}
+
+// TestHiddenNamePrefixes_ConcurrentAccess hammers Set + IsNameHidden from
+// multiple goroutines. Doesn't assert anything beyond "doesn't panic" —
+// atomic.Pointer correctness is what we're verifying, race detector is not
+// in scope for this PR's CI (see PR scope).
+func TestHiddenNamePrefixes_ConcurrentAccess(t *testing.T) {
+	cfg := &Config{}
+	cfg.SetHiddenNamePrefixes([]string{"🚫"})
+
+	var stop atomic.Bool
+	var wg sync.WaitGroup
+
+	// Writer
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for i := 0; !stop.Load(); i++ {
+			if i%2 == 0 {
+				cfg.SetHiddenNamePrefixes([]string{"🚫", "test"})
+			} else {
+				cfg.SetHiddenNamePrefixes([]string{"🚫"})
+			}
+		}
+	}()
+
+	// Readers
+	for r := 0; r < 4; r++ {
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			for !stop.Load() {
+				_ = cfg.IsNameHidden("🚫 something")
+				_ = cfg.IsNameHidden("normal name")
+			}
+		}()
+	}
+
+	time.Sleep(250 * time.Millisecond)
+	stop.Store(true)
+	wg.Wait()
+}
@@ -0,0 +1,139 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+)
+
+// TestHiddenNamePrefix_1181 verifies operator-configurable name-prefix hiding
+// for nodes (issue #1181). When the operator configures HiddenNamePrefixes,
+// nodes whose name begins with any configured prefix are omitted from API
+// responses (list, search, detail). DB rows are preserved — filtering happens
+// at the API layer only.
+func TestHiddenNamePrefix_1181_NodesList(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	// Insert a node whose name starts with the configured 🚫 prefix.
+	_, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		"deadbeef00001181", "🚫 ban me", "companion")
+	if err != nil {
+		t.Fatalf("insert hidden node: %v", err)
+	}
+
+	get := func() []map[string]interface{} {
+		req := httptest.NewRequest("GET", "/api/nodes?limit=2000", nil)
+		w := httptest.NewRecorder()
+		router.ServeHTTP(w, req)
+		if w.Code != http.StatusOK {
+			t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+		}
+		var resp struct {
+			Nodes []map[string]interface{} `json:"nodes"`
+		}
+		if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+			t.Fatalf("unmarshal: %v body=%s", err, w.Body.String())
+		}
+		return resp.Nodes
+	}
+
+	hasName := func(nodes []map[string]interface{}, substr string) bool {
+		for _, n := range nodes {
+			if name, _ := n["name"].(string); strings.Contains(name, substr) {
+				return true
+			}
+		}
+		return false
+	}
+
+	// Empty prefix list: node MUST be present.
+	srv.cfg.SetHiddenNamePrefixes(nil)
+	if !hasName(get(), "ban me") {
+		t.Fatalf("with empty HiddenNamePrefixes, node should be present in /api/nodes")
+	}
+
+	// Configured 🚫 prefix: node MUST be omitted.
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	if hasName(get(), "ban me") {
+		t.Fatalf("with HiddenNamePrefixes=[\"🚫\"], node 🚫 ban me should be hidden from /api/nodes")
+	}
+}
+
+// TestHiddenNamePrefix_1181_Search ensures hidden nodes are also filtered
+// from /api/nodes/search.
+func TestHiddenNamePrefix_1181_Search(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		"deadbeef00001182", "🚫 search me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+
+	req := httptest.NewRequest("GET", "/api/nodes/search?q=search", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	var resp struct {
+		Nodes []map[string]interface{} `json:"nodes"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal: %v", err)
+	}
+	for _, n := range resp.Nodes {
+		if name, _ := n["name"].(string); strings.Contains(name, "search me") {
+			t.Fatalf("hidden node leaked through /api/nodes/search: %v", n)
+		}
+	}
+}
+
+// TestHiddenNamePrefix_1181_Detail ensures /api/nodes/{pubkey} returns 404
+// for a node whose name starts with a hidden prefix — mirroring the
+// blacklist behaviour so callers learn nothing about whether the row exists.
+func TestHiddenNamePrefix_1181_Detail(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	pk := "deadbeef00001183"
+	if _, err := srv.db.conn.Exec(`INSERT INTO nodes
+		(public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES (?, ?, ?, 0, 0, '2026-06-01T00:00:00Z', '2026-06-01T00:00:00Z', 1)`,
+		pk, "🚫 detail me", "companion"); err != nil {
+		t.Fatalf("insert: %v", err)
+	}
+
+	get := func() *httptest.ResponseRecorder {
+		req := httptest.NewRequest("GET", "/api/nodes/"+pk, nil)
+		w := httptest.NewRecorder()
+		router.ServeHTTP(w, req)
+		return w
+	}
+
+	// Empty prefix list: detail MUST be reachable (200 with the name).
+	srv.cfg.SetHiddenNamePrefixes(nil)
+	w := get()
+	if w.Code != http.StatusOK {
+		t.Fatalf("baseline: expected 200, got %d body=%s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), "detail me") {
+		t.Fatalf("baseline: response missing node name; body=%s", w.Body.String())
+	}
+
+	// Configured 🚫 prefix: detail MUST 404 — no name, no fields, nothing.
+	srv.cfg.SetHiddenNamePrefixes([]string{"🚫"})
+	w = get()
+	if w.Code != http.StatusNotFound {
+		t.Fatalf("hidden: expected 404, got %d body=%s", w.Code, w.Body.String())
+	}
+	if strings.Contains(w.Body.String(), "detail me") {
+		t.Fatalf("hidden: name leaked in 404 body: %s", w.Body.String())
+	}
+}
@@ -172,6 +172,17 @@ func TestTopHopsRespectsContextAcrossAllCallSites(t *testing.T) {
 		t.Fatalf("Load: %v", err)
 	}

+	// #1011: distance index is now lazy — trigger it explicitly and
+	// wait for build completion before inspecting distHops.
+	store.TriggerDistanceIndexBuild()
+	deadline := time.Now().Add(5 * time.Second)
+	for !store.DistanceIndexBuilt() {
+		if time.Now().After(deadline) {
+			t.Fatal("distance index did not finish building within 5s")
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+
 	// Inspect precomputed distance index.
 	store.mu.RLock()
 	hops := make([]distHopRecord, len(store.distHops))
@@ -0,0 +1,218 @@
+// Issue #1008: background-deferred subpath + pathHop index builds.
+//
+// Pattern mirrors the distance index (#1011) — but where distance is
+// fully lazy (built on first request), these two indexes are kicked off
+// eagerly by Load() in a background goroutine so HTTP becomes ready
+// immediately while the indexes finish populating.
+//
+// Concurrency model:
+//
+//   - subpathReady / pathHopReady are atomic.Bool flags written exactly
+//     once by the background builder (false → true) and never reset
+//     thereafter. Handlers read them via SubpathIndexReady() /
+//     PathHopIndexReady() before touching s.spIndex / s.spTxIndex /
+//     s.byPathHop. While a flag is false, the handler responds 503 +
+//     Retry-After: 5.
+//
+//   - The builder itself acquires s.mu.Lock() and calls the existing
+//     buildSubpathIndex() / buildPathHopIndex() methods. Those methods
+//     replace s.spIndex / s.spTxIndex / s.byPathHop with freshly-
+//     allocated maps under the write lock. Visibility of the populated
+//     maps to handlers that see Ready()==true is guaranteed by Go's
+//     sync/atomic acquire-release semantics (formalized in Go 1.19):
+//     the atomic.Store(true) happens-after the s.mu.Unlock() that
+//     completes the build, and the handler's atomic.Load()==true
+//     synchronizes-with that store. The handler's subsequent s.mu.RLock
+//     is not what establishes visibility — it only serializes against
+//     concurrent ingest writers — so dropping the RLock would still be
+//     safe for the build's "populated map" snapshot (we keep it for
+//     ingest serialization).
+//
+//   - Ingest-side incremental updates in StoreNewTransmissions /
+//     pruning / hash-collision paths continue to write s.spIndex /
+//     s.spTxIndex / s.byPathHop directly under s.mu.Lock(). Because
+//     the builder also runs under s.mu.Lock() and the builder
+//     overwrites whatever is there, the brief window between Load()
+//     returning and the goroutine acquiring s.mu means any
+//     concurrent ingest writes will be overwritten by the build —
+//     this matches the prior behavior where ingest could not start
+//     until Load() released s.mu, so in practice ingest does not
+//     run during the build window. Documenting this rather than
+//     adding a separate gate: the existing main.go boot sequence
+//     does not start ingest goroutines until after store.Load()
+//     and graph init complete.
+//
+// Handler scope of the ready gate (issue #1008 review M2):
+//
+//   - HARD-GATED with 503 + Retry-After: 5 — analytics endpoints whose
+//     entire response is the index aggregate. Empty data would be
+//     visibly broken (charts, top-N tables). See routes.go:
+//     /api/analytics/subpaths, /api/analytics/subpaths-bulk,
+//     /api/analytics/subpath-detail, /api/nodes/{pubkey}/paths.
+//
+//   - BEST-EFFORT (not gated) — endpoints where the index drives
+//     enrichment fields that callers already treat as optional. During
+//     the not-ready window these report zero counts / nil scores
+//     rather than 503-ing the whole list. Acceptable because:
+//
+//       * /api/nodes and /api/nodes/{pubkey} have many other fields
+//         (last-seen, position, advert metadata) that callers depend
+//         on at startup. 503-ing the SPA bootstrap to wait for an
+//         index that exclusively affects "relay activity" badges
+//         would be a worse UX than a 30–60s window of "—" badges.
+//
+//       * GetRepeaterRelayInfoMap / GetRepeaterUsefulnessScoreMap /
+//         GetBridgeScore / repeater_liveness / repeater_usefulness
+//         all walk s.byPathHop. During the build window they return
+//         empty maps or zero scores; the steady-state recomputer
+//         (#1262) refreshes them every 5min once indexes flip ready
+//         (prewarm guarded by WaitIndexesReady — see review M1).
+//
+//     This is documented rather than gated so operators do not see
+//     /api/nodes 503 during routine restarts on Cascadia-scale data.
+package main
+
+import (
+	"log"
+	"net/http"
+	"time"
+)
+
+// writeIndexLoading503 emits the standard 503 response used by handlers
+// that depend on a not-yet-built index (#1008). Body shape matches the
+// triage spec: {"error":"index loading","retryAfter":5}. The Retry-After
+// header is also set so well-behaved clients back off automatically.
+func writeIndexLoading503(w http.ResponseWriter) {
+	w.Header().Set("Retry-After", "5")
+	w.Header().Set("Content-Type", "application/json")
+	w.WriteHeader(http.StatusServiceUnavailable)
+	_, _ = w.Write([]byte(`{"error":"index loading","retryAfter":5}`))
+}
+
+// SubpathIndexReady reports whether the subpath index build kicked off
+// by Load() has completed (#1008). Until this returns true, callers
+// must NOT read s.spIndex / s.spTxIndex.
+func (s *PacketStore) SubpathIndexReady() bool {
+	return s.subpathReady.Load()
+}
+
+// PathHopIndexReady reports whether the path-hop index build kicked
+// off by Load() has completed (#1008). Until this returns true,
+// callers must NOT read s.byPathHop.
+func (s *PacketStore) PathHopIndexReady() bool {
+	return s.pathHopReady.Load()
+}
+
+// indexReadyCh returns the channel that is closed when BOTH indexes
+// have flipped ready. Lazily created on first access. Safe to call
+// concurrently. Used by WaitIndexesReady and any future waiters that
+// want event-driven semantics instead of polling.
+func (s *PacketStore) indexReadyCh() <-chan struct{} {
+	s.indexReadyChMu.Lock()
+	defer s.indexReadyChMu.Unlock()
+	if s.indexReadyChan == nil {
+		s.indexReadyChan = make(chan struct{})
+		// If both are already ready (e.g. background chunk loader
+		// flipped them synchronously before any waiter showed up),
+		// close immediately so the channel is usable as a one-shot.
+		if s.subpathReady.Load() && s.pathHopReady.Load() {
+			close(s.indexReadyChan)
+		}
+	}
+	return s.indexReadyChan
+}
+
+// maybeCloseIndexReadyCh closes the ready channel iff both flags are
+// set. Idempotent (a sync.Once on the channel) and safe to call from
+// either builder goroutine on the green-path transitions, as well as
+// from markIndexesReadySync.
+func (s *PacketStore) maybeCloseIndexReadyCh() {
+	if !(s.subpathReady.Load() && s.pathHopReady.Load()) {
+		return
+	}
+	s.indexReadyChMu.Lock()
+	defer s.indexReadyChMu.Unlock()
+	if s.indexReadyChan == nil {
+		// Lazily allocate AND close it in one step so any future
+		// indexReadyCh() caller gets a pre-closed channel.
+		s.indexReadyChan = make(chan struct{})
+		close(s.indexReadyChan)
+		return
+	}
+	select {
+	case <-s.indexReadyChan:
+		// Already closed.
+	default:
+		close(s.indexReadyChan)
+	}
+}
+
+// startBackgroundIndexBuilds is called from Load() after s.loaded=true
+// to populate the subpath + path-hop indexes off the critical path
+// (#1008). It returns immediately; the work runs in two background
+// goroutines (one per index — see review m7) that each acquire
+// s.mu.Lock() independently, install their map, then set the
+// corresponding atomic ready flag.
+//
+// At Cascadia scale (~5M observations) this previously blocked HTTP
+// readiness ~60s inside Load() under s.mu. Running the two builds in
+// parallel halves the pathHop-not-ready window since the two builders
+// are independent of each other.
+func (s *PacketStore) startBackgroundIndexBuilds() {
+	go func() {
+		t0 := time.Now()
+		s.mu.Lock()
+		s.buildSubpathIndex()
+		s.mu.Unlock()
+		// Atomic.Store happens-after s.mu.Unlock; handlers that
+		// observe Ready()==true synchronize-with this store.
+		s.subpathReady.Store(true)
+		s.maybeCloseIndexReadyCh()
+		log.Printf("[startup] index build complete: subpath (%s)",
+			time.Since(t0).Round(time.Millisecond))
+	}()
+	go func() {
+		t1 := time.Now()
+		s.mu.Lock()
+		s.buildPathHopIndex()
+		s.mu.Unlock()
+		s.pathHopReady.Store(true)
+		s.maybeCloseIndexReadyCh()
+		log.Printf("[startup] index build complete: pathHop (%s)",
+			time.Since(t1).Round(time.Millisecond))
+	}()
+}
+
+// markIndexesReadySync is the synchronous-build entry point used by
+// the background chunk loader in store.go (and by tests). The chunk
+// loader rebuilds both indexes under s.mu.Lock(); after the Unlock it
+// calls this to flip the ready flags and close the broadcast channel
+// in one shot, preserving symmetry with the goroutine path above.
+func (s *PacketStore) markIndexesReadySync() {
+	s.subpathReady.Store(true)
+	s.pathHopReady.Store(true)
+	s.maybeCloseIndexReadyCh()
+}
+
+// WaitIndexesReady blocks until both background indexes built by
+// startBackgroundIndexBuilds() report ready, or the deadline expires.
+// Returns true if both flipped in time. Intended for tests that read
+// s.spIndex / s.spTxIndex / s.byPathHop directly after Load(); production
+// code paths gate via SubpathIndexReady() / PathHopIndexReady() and
+// respond 503 + Retry-After to clients instead of blocking.
+//
+// Uses the indexReadyCh broadcast channel rather than polling
+// (see review m6) so wake-up is immediate with no poll-interval jitter.
+func (s *PacketStore) WaitIndexesReady(timeout time.Duration) bool {
+	if s.SubpathIndexReady() && s.PathHopIndexReady() {
+		return true
+	}
+	ch := s.indexReadyCh()
+	select {
+	case <-ch:
+		return true
+	case <-time.After(timeout):
+		return s.SubpathIndexReady() && s.PathHopIndexReady()
+	}
+}
+
@@ -0,0 +1,144 @@
+// Issue #1008: subpath + pathHop index builds must move off the
+// synchronous Load() critical path into a background goroutine.
+//
+// Contract:
+//   1. Immediately after Load() returns, SubpathIndexReady() and
+//      PathHopIndexReady() report false (the goroutine has not finished).
+//   2. Analytics handlers that depend on those indices respond 503 with
+//      Retry-After: 5 until the corresponding ready flag flips true.
+//   3. After the background build completes (waitable via a helper),
+//      both flags flip true and handlers respond 200.
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+// TestIssue1008_SubpathIndexReadyFalseImmediatelyAfterLoad asserts the
+// subpath ready flag is false the instant Load() returns. Red commit: the
+// stub returns true → assertion fires. Green commit: the flag is owned by
+// the background goroutine, which has not yet run, so the assertion holds.
+func TestIssue1008_SubpathIndexReadyFalseImmediatelyAfterLoad(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load() error: %v", err)
+	}
+	if store.SubpathIndexReady() {
+		t.Fatal("expected SubpathIndexReady()==false immediately after Load(); want background-deferred build (#1008)")
+	}
+}
+
+// TestIssue1008_PathHopIndexReadyFalseImmediatelyAfterLoad: same contract
+// for the path-hop index.
+func TestIssue1008_PathHopIndexReadyFalseImmediatelyAfterLoad(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load() error: %v", err)
+	}
+	if store.PathHopIndexReady() {
+		t.Fatal("expected PathHopIndexReady()==false immediately after Load(); want background-deferred build (#1008)")
+	}
+}
+
+// TestIssue1008_HandlerReturns503WhileSubpathIndexLoading asserts the
+// analytics/subpaths handler returns 503 + Retry-After: 5 + a JSON body
+// matching the triage spec while the subpath index is still building.
+func TestIssue1008_HandlerReturns503WhileSubpathIndexLoading(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load() error: %v", err)
+	}
+	// Don't wait for the background build — we want to observe the
+	// not-ready window.
+	cfg := &Config{}
+	cfg.applyListLimitsDefaults()
+	srv := &Server{store: store, cfg: cfg}
+
+	req := httptest.NewRequest("GET", "/api/analytics/subpaths?minLen=2&maxLen=4&limit=10", nil)
+	rec := httptest.NewRecorder()
+	srv.handleAnalyticsSubpaths(rec, req)
+
+	if rec.Code != http.StatusServiceUnavailable {
+		t.Fatalf("status = %d, want 503 (subpath index loading, #1008)", rec.Code)
+	}
+	if got := rec.Header().Get("Retry-After"); got != "5" {
+		t.Errorf("Retry-After header = %q, want %q", got, "5")
+	}
+	var body map[string]interface{}
+	if err := json.Unmarshal(rec.Body.Bytes(), &body); err != nil {
+		t.Fatalf("body not valid JSON: %v (body=%s)", err, rec.Body.String())
+	}
+	if body["error"] != "index loading" {
+		t.Errorf(`body["error"] = %v, want "index loading"`, body["error"])
+	}
+}
+
+// TestIssue1008_HandlerRecoversAfterIndexReady asserts that, once the
+// background build completes, the handler returns 200.
+func TestIssue1008_HandlerRecoversAfterIndexReady(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load() error: %v", err)
+	}
+
+	// Wait up to 5s for both background builds to finish on this small
+	// fixture (rich test DB has ~3 packets; build is sub-millisecond).
+	deadline := time.Now().Add(5 * time.Second)
+	for time.Now().Before(deadline) {
+		if store.SubpathIndexReady() && store.PathHopIndexReady() {
+			break
+		}
+		time.Sleep(10 * time.Millisecond)
+	}
+	if !store.SubpathIndexReady() {
+		t.Fatal("SubpathIndexReady() never flipped true within 5s")
+	}
+	if !store.PathHopIndexReady() {
+		t.Fatal("PathHopIndexReady() never flipped true within 5s")
+	}
+
+	cfg := &Config{}
+	cfg.applyListLimitsDefaults()
+	srv := &Server{store: store, cfg: cfg}
+	req := httptest.NewRequest("GET", "/api/analytics/subpaths?minLen=2&maxLen=4&limit=10", nil)
+	rec := httptest.NewRecorder()
+	srv.handleAnalyticsSubpaths(rec, req)
+	if rec.Code != http.StatusOK {
+		t.Fatalf("status after ready = %d, want 200 (body=%s)", rec.Code, rec.Body.String())
+	}
+}
+
+// TestIssue1008_m7_BothFlagsSetAfterParallelStart verifies that the
+// parallel two-goroutine version of startBackgroundIndexBuilds (review
+// m7) sets BOTH ready flags after a bounded wait, regardless of which
+// goroutine wins the race to s.mu.Lock(). Sanity check that breaking
+// the two builds apart didn't drop the pathHop flag flip.
+func TestIssue1008_m7_BothFlagsSetAfterParallelStart(t *testing.T) {
+	db := setupRichTestDB(t)
+	defer db.Close()
+	store := NewPacketStore(db, nil)
+	if err := store.Load(); err != nil {
+		t.Fatalf("Load: %v", err)
+	}
+	if !store.WaitIndexesReady(5 * time.Second) {
+		t.Fatal("indexes never ready after parallel start (#1008 m7)")
+	}
+	if !store.SubpathIndexReady() {
+		t.Error("subpath flag not set after WaitIndexesReady returned true")
+	}
+	if !store.PathHopIndexReady() {
+		t.Error("pathHop flag not set after WaitIndexesReady returned true")
+	}
+}
@@ -0,0 +1,224 @@
+package main
+
+// Known-channels catalogue cache (issue #1323).
+//
+// Fetches a community-maintained catalogue of hashtag channels (default:
+// https://raw.githubusercontent.com/marcelverdult/meshcore-channels/main/channels-by-country.json)
+// every N hours into an in-memory snapshot. Never blocks startup; never
+// blocks UI on the fetch; fail-soft to last-known. No DB, no disk cache.
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"net/http"
+	"strings"
+	"sync/atomic"
+	"time"
+)
+
+// DefaultKnownChannelsURL is the suggested upstream catalogue, pinned to a
+// specific commit SHA so a hostile or compromised future commit on the
+// community repo cannot be silently fetched by deployments that opt in.
+// Operators should periodically bump this pin (see config.example.json).
+// NOTE: this constant is only used by tests and as documentation — the
+// feature is OPT-IN: an empty cfg.KnownChannelsURL leaves the cache
+// disabled (no background fetch, /api/known-channels serves empty).
+const DefaultKnownChannelsURL = "https://raw.githubusercontent.com/marcelverdult/meshcore-channels/072bc25b6fc983aa2aa7e9d399a97a5f4899ea71/channels-by-country.json"
+
+// DefaultKnownChannelsRefresh is the default refresh interval (24h).
+const DefaultKnownChannelsRefresh = 24 * time.Hour
+
+// maxKnownChannelsBytes caps the upstream response size we are willing to
+// parse (the catalogue is ~80 KB today; 4 MB ceiling is plenty of headroom
+// and bounds memory if upstream ever ships a malicious oversize payload).
+const maxKnownChannelsBytes = 4 * 1024 * 1024
+
+// KnownChannelEntry is one catalogue entry, region-stamped.
+type KnownChannelEntry struct {
+	Channel     string `json:"channel"`               // e.g. "#antwerpen" (# prefix preserved)
+	Description string `json:"description,omitempty"`
+	Key         string `json:"key,omitempty"` // optional PSK (base64) — present for some entries
+	Region      string `json:"region"`        // ISO 3166-1 alpha-2 lowercase
+	RegionName  string `json:"regionName,omitempty"`
+}
+
+// KnownChannelsSnapshot is the immutable parsed catalogue surfaced over /api.
+type KnownChannelsSnapshot struct {
+	GeneratedAt string              `json:"generatedAt,omitempty"` // upstream generation timestamp
+	License     string              `json:"license,omitempty"`
+	FetchedAt   time.Time           `json:"fetchedAt"`
+	Source      string              `json:"source"`
+	Entries     []KnownChannelEntry `json:"entries"`
+}
+
+// upstreamPayload mirrors the channels-by-country.json shape.
+type upstreamPayload struct {
+	GeneratedAt  string                              `json:"generated_at"`
+	License      string                              `json:"license"`
+	Countries    map[string][]upstreamCountryChannel `json:"countries"`
+	CountryNames map[string]string                   `json:"countryNames,omitempty"` // optional extension
+}
+
+type upstreamCountryChannel struct {
+	Channel     string `json:"channel"`
+	Description string `json:"description"`
+	Key         string `json:"key,omitempty"`
+}
+
+// parseKnownChannelsJSON parses the upstream JSON into a snapshot.
+// Tolerant: missing/empty countries are skipped silently; entries with
+// empty channel strings are dropped.
+func parseKnownChannelsJSON(raw []byte, source string, now time.Time) (*KnownChannelsSnapshot, error) {
+	if len(raw) == 0 {
+		return nil, errors.New("empty payload")
+	}
+	var p upstreamPayload
+	if err := json.Unmarshal(raw, &p); err != nil {
+		return nil, fmt.Errorf("decode catalogue: %w", err)
+	}
+	out := &KnownChannelsSnapshot{
+		GeneratedAt: p.GeneratedAt,
+		License:     p.License,
+		FetchedAt:   now,
+		Source:      source,
+		Entries:     make([]KnownChannelEntry, 0, 256),
+	}
+	for code, list := range p.Countries {
+		if len(list) == 0 {
+			continue
+		}
+		region := strings.ToLower(strings.TrimSpace(code))
+		name := p.CountryNames[code]
+		for _, c := range list {
+			ch := strings.TrimSpace(c.Channel)
+			if ch == "" {
+				continue
+			}
+			out.Entries = append(out.Entries, KnownChannelEntry{
+				Channel:     ch,
+				Description: c.Description,
+				Key:         c.Key,
+				Region:      region,
+				RegionName:  name,
+			})
+		}
+	}
+	return out, nil
+}
+
+// filterSnapshotByRegion returns a copy filtered to the given region
+// (case-insensitive). Empty/whitespace region returns the original snapshot
+// (entry slice shared — callers must not mutate). Unknown region returns
+// a snapshot with an empty (but non-nil) Entries slice so JSON marshals as `[]`.
+func filterSnapshotByRegion(snap *KnownChannelsSnapshot, region string) *KnownChannelsSnapshot {
+	if snap == nil {
+		return nil
+	}
+	region = strings.ToLower(strings.TrimSpace(region))
+	if region == "" {
+		return snap
+	}
+	out := &KnownChannelsSnapshot{
+		GeneratedAt: snap.GeneratedAt,
+		License:     snap.License,
+		FetchedAt:   snap.FetchedAt,
+		Source:      snap.Source,
+		Entries:     []KnownChannelEntry{},
+	}
+	for _, e := range snap.Entries {
+		if e.Region == region {
+			out.Entries = append(out.Entries, e)
+		}
+	}
+	return out
+}
+
+// knownChannelsCache holds the atomic snapshot pointer + config.
+type knownChannelsCache struct {
+	ptr     atomic.Pointer[KnownChannelsSnapshot]
+	url     string
+	refresh time.Duration
+	client  *http.Client
+
+	fetchCount atomic.Int64 // # successful upstream fetches
+	failCount  atomic.Int64 // # failed fetches (fail-soft)
+}
+
+func newKnownChannelsCache(url string, refresh time.Duration) *knownChannelsCache {
+	if refresh <= 0 {
+		refresh = DefaultKnownChannelsRefresh
+	}
+	return &knownChannelsCache{
+		url:     url,
+		refresh: refresh,
+		client:  &http.Client{Timeout: 30 * time.Second},
+	}
+}
+
+// load returns the current snapshot or nil if never populated.
+func (c *knownChannelsCache) load() *KnownChannelsSnapshot {
+	return c.ptr.Load()
+}
+
+// fetchOnce performs a single upstream fetch. Updates ptr on success;
+// leaves last-known snapshot in place on failure (fail-soft).
+func (c *knownChannelsCache) fetchOnce(ctx context.Context) error {
+	if c.url == "" {
+		return errors.New("known channels url not configured")
+	}
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.url, nil)
+	if err != nil {
+		c.failCount.Add(1)
+		return err
+	}
+	req.Header.Set("User-Agent", "CoreScope-KnownChannels/1.0 (+https://github.com/Kpa-clawbot/CoreScope)")
+	resp, err := c.client.Do(req)
+	if err != nil {
+		c.failCount.Add(1)
+		return err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		c.failCount.Add(1)
+		return fmt.Errorf("upstream status %s", resp.Status)
+	}
+	body, err := io.ReadAll(io.LimitReader(resp.Body, maxKnownChannelsBytes))
+	if err != nil {
+		c.failCount.Add(1)
+		return err
+	}
+	snap, err := parseKnownChannelsJSON(body, c.url, time.Now())
+	if err != nil {
+		c.failCount.Add(1)
+		return err
+	}
+	c.ptr.Store(snap)
+	c.fetchCount.Add(1)
+	return nil
+}
+
+// run kicks off the background fetch loop in a new goroutine. Does an
+// initial fetch (fail-soft) and then ticks every refresh interval until
+// ctx is cancelled. Never blocks the caller — startup proceeds immediately
+// even if the upstream is slow or unreachable.
+func (c *knownChannelsCache) run(ctx context.Context) {
+	if c.url == "" {
+		return
+	}
+	go func() {
+		_ = c.fetchOnce(ctx) // initial fetch, fail-soft
+		t := time.NewTicker(c.refresh)
+		defer t.Stop()
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-t.C:
+				_ = c.fetchOnce(ctx)
+			}
+		}
+	}()
+}
@@ -0,0 +1,236 @@
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/gorilla/mux"
+)
+
+// Canned fixture mirroring the upstream channels-by-country.json shape
+// (https://raw.githubusercontent.com/marcelverdult/meshcore-channels/main/channels-by-country.json
+// pinned 2026-05-24). Two countries: one with entries, one empty (to test
+// the "skip empty countries" branch).
+const knownChannelsFixture = `{
+  "generated_at": "2026-05-24T22:29:02Z",
+  "license": "CC0-1.0",
+  "countries": {
+    "be": [
+      {"channel": "#antwerpen", "description": "antwerpen"},
+      {"channel": "#bemesh",    "description": "bemesh"}
+    ],
+    "us": [
+      {"channel": "#bayarea", "description": "Bay Area"}
+    ],
+    "ad": []
+  }
+}`
+
+// (a) Cache parses a canned JSON fixture into a snapshot.
+func TestKnownChannelsParseFixture(t *testing.T) {
+	snap, err := parseKnownChannelsJSON([]byte(knownChannelsFixture), "fixture://test", time.Unix(1700000000, 0))
+	if err != nil {
+		t.Fatalf("parseKnownChannelsJSON: %v", err)
+	}
+	if snap == nil {
+		t.Fatal("snapshot is nil")
+	}
+	if snap.GeneratedAt != "2026-05-24T22:29:02Z" {
+		t.Errorf("GeneratedAt = %q, want 2026-05-24T22:29:02Z", snap.GeneratedAt)
+	}
+	if snap.License != "CC0-1.0" {
+		t.Errorf("License = %q, want CC0-1.0", snap.License)
+	}
+	if snap.Source != "fixture://test" {
+		t.Errorf("Source = %q, want fixture://test", snap.Source)
+	}
+	if got, want := len(snap.Entries), 3; got != want {
+		t.Fatalf("len(Entries) = %d, want %d (empty country ad must be skipped)", got, want)
+	}
+	// Spot-check one entry's region stamping.
+	var foundAntwerpen bool
+	for _, e := range snap.Entries {
+		if e.Channel == "#antwerpen" {
+			foundAntwerpen = true
+			if e.Region != "be" {
+				t.Errorf("antwerpen Region = %q, want be", e.Region)
+			}
+		}
+	}
+	if !foundAntwerpen {
+		t.Fatal("antwerpen entry missing from snapshot")
+	}
+}
+
+// (b) The route returns 200 + filtered list.
+func TestKnownChannelsRouteRegionFilter(t *testing.T) {
+	snap, err := parseKnownChannelsJSON([]byte(knownChannelsFixture), "fixture://test", time.Now())
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	srv := &Server{
+		knownChannels: &knownChannelsCache{},
+	}
+	srv.knownChannels.ptr.Store(snap)
+
+	r := mux.NewRouter()
+	r.HandleFunc("/api/known-channels", srv.handleKnownChannels).Methods("GET")
+
+	req := httptest.NewRequest(http.MethodGet, "/api/known-channels?region=be", nil)
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, want 200; body=%s", w.Code, w.Body.String())
+	}
+	var resp KnownChannelsSnapshot
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal: %v; body=%s", err, w.Body.String())
+	}
+	if got := len(resp.Entries); got != 2 {
+		t.Fatalf("filtered entries = %d, want 2 (be has 2); got body=%s", got, w.Body.String())
+	}
+	for _, e := range resp.Entries {
+		if e.Region != "be" {
+			t.Errorf("entry %q has region %q, want be", e.Channel, e.Region)
+		}
+		if !strings.HasPrefix(e.Channel, "#") {
+			t.Errorf("entry channel %q missing # prefix", e.Channel)
+		}
+	}
+}
+
+// (c) Cache survives upstream 500 (fail-soft): a prior good snapshot must
+// remain available after a failed refresh.
+func TestKnownChannelsFailSoftOn500(t *testing.T) {
+	// First server: returns the fixture (success).
+	good := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte(knownChannelsFixture))
+	}))
+	defer good.Close()
+
+	c := newKnownChannelsCache(good.URL, time.Hour)
+	if err := c.fetchOnce(context.Background()); err != nil {
+		t.Fatalf("initial fetchOnce: %v", err)
+	}
+	first := c.load()
+	if first == nil || len(first.Entries) == 0 {
+		t.Fatal("first snapshot must be populated")
+	}
+
+	// Second server: always 500.
+	bad := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		http.Error(w, "boom", http.StatusInternalServerError)
+	}))
+	defer bad.Close()
+
+	// Re-point the cache to the failing upstream and fetch.
+	c.url = bad.URL
+	err := c.fetchOnce(context.Background())
+	if err == nil {
+		t.Fatal("expected fetchOnce to return error on 500")
+	}
+	after := c.load()
+	if after == nil {
+		t.Fatal("snapshot wiped after failed fetch — must be fail-soft")
+	}
+	if len(after.Entries) != len(first.Entries) {
+		t.Errorf("snapshot entry count changed after failed fetch: was %d, now %d", len(first.Entries), len(after.Entries))
+	}
+	if c.failCount.Load() < 1 {
+		t.Errorf("failCount = %d, want >=1", c.failCount.Load())
+	}
+}
+
+// (d) Malformed JSON returns an error AND increments failCount via
+// fetchOnce (the parse path lives inside fetchOnce so the metric is
+// the cache-level signal operators see, not just the parser's return).
+func TestKnownChannelsParseError(t *testing.T) {
+	// parser-level: garbage in, error out.
+	if _, err := parseKnownChannelsJSON([]byte("{not json"), "fixture://bad", time.Now()); err == nil {
+		t.Fatal("parseKnownChannelsJSON: expected error on malformed JSON")
+	}
+	// cache-level: a 200 with malformed body must bump failCount and
+	// leave any prior snapshot in place.
+	bad := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		w.Header().Set("Content-Type", "application/json")
+		_, _ = w.Write([]byte("{not json"))
+	}))
+	defer bad.Close()
+	c := newKnownChannelsCache(bad.URL, time.Hour)
+	before := c.failCount.Load()
+	if err := c.fetchOnce(context.Background()); err == nil {
+		t.Fatal("fetchOnce: expected parse error to surface")
+	}
+	if c.failCount.Load() <= before {
+		t.Errorf("failCount did not increment: before=%d after=%d", before, c.failCount.Load())
+	}
+	if c.fetchCount.Load() != 0 {
+		t.Errorf("fetchCount = %d, want 0 (parse failed)", c.fetchCount.Load())
+	}
+}
+
+// (e) The handler tolerates a nil cache (the startup-window fail-soft
+// guarantee): server still serves 200 + an empty entries snapshot
+// rather than 500. Mirrors the production code path where the route
+// is registered before — or independently of — knownChannels being
+// instantiated (the OPT-IN gating leaves it nil entirely when disabled).
+func TestKnownChannelsHandlerNilCache(t *testing.T) {
+	srv := &Server{} // knownChannels intentionally nil
+	r := mux.NewRouter()
+	r.HandleFunc("/api/known-channels", srv.handleKnownChannels).Methods("GET")
+	req := httptest.NewRequest(http.MethodGet, "/api/known-channels", nil)
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, want 200 (nil cache must fail-soft); body=%s", w.Code, w.Body.String())
+	}
+	var resp KnownChannelsSnapshot
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal: %v; body=%s", err, w.Body.String())
+	}
+	if resp.Entries == nil {
+		t.Fatal("Entries is nil, want non-nil empty slice (JSON [] not null)")
+	}
+	if len(resp.Entries) != 0 {
+		t.Errorf("Entries len = %d, want 0", len(resp.Entries))
+	}
+	if cc := w.Header().Get("Cache-Control"); cc == "" {
+		t.Errorf("Cache-Control header missing on nil-cache response")
+	}
+}
+
+// (f) An empty region query param ("?region=") must pass through as if
+// no filter was supplied — i.e. the full snapshot is returned, NOT an
+// empty list. Guards against an off-by-one in the trim+filter path.
+func TestKnownChannelsRegionEmptyPassthrough(t *testing.T) {
+	snap, err := parseKnownChannelsJSON([]byte(knownChannelsFixture), "fixture://test", time.Now())
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	srv := &Server{knownChannels: &knownChannelsCache{}}
+	srv.knownChannels.ptr.Store(snap)
+	r := mux.NewRouter()
+	r.HandleFunc("/api/known-channels", srv.handleKnownChannels).Methods("GET")
+	req := httptest.NewRequest(http.MethodGet, "/api/known-channels?region=", nil)
+	w := httptest.NewRecorder()
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status = %d, want 200; body=%s", w.Code, w.Body.String())
+	}
+	var resp KnownChannelsSnapshot
+	if err := json.Unmarshal(w.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("unmarshal: %v; body=%s", err, w.Body.String())
+	}
+	if got, want := len(resp.Entries), len(snap.Entries); got != want {
+		t.Fatalf("empty region must return unfiltered snapshot: got %d entries, want %d", got, want)
+	}
+	if cc := w.Header().Get("Cache-Control"); cc == "" {
+		t.Errorf("Cache-Control header missing on populated response")
+	}
+}
@@ -0,0 +1,38 @@
+package main
+
+import (
+	"net/http"
+	"time"
+)
+
+// handleKnownChannels — GET /api/known-channels?region=XX
+//
+// Returns the cached community catalogue of hashtag channels (issue #1323),
+// optionally filtered to one region (ISO 3166-1 alpha-2, case-insensitive).
+// Empty/missing cache returns 200 with an empty Entries list so the UI
+// degrades gracefully (fail-soft). Never blocks on the upstream fetch:
+// the response is served straight off an atomic snapshot pointer.
+func (s *Server) handleKnownChannels(w http.ResponseWriter, r *http.Request) {
+	region := r.URL.Query().Get("region")
+	var snap *KnownChannelsSnapshot
+	if s.knownChannels != nil {
+		snap = s.knownChannels.load()
+	}
+	if snap == nil {
+		// Empty cache — return a well-formed empty snapshot. Short
+		// max-age so a slow first fetch (or disabled feature) doesn't
+		// freeze the UI for the whole page lifetime.
+		w.Header().Set("Cache-Control", "public, max-age=30")
+		writeJSON(w, &KnownChannelsSnapshot{
+			FetchedAt: time.Time{},
+			Source:    "",
+			Entries:   []KnownChannelEntry{},
+		})
+		return
+	}
+	// Catalogue refreshes every 24h upstream; 5 min browser cache is
+	// well under that and avoids hammering the endpoint when the UI
+	// re-renders the sidebar.
+	w.Header().Set("Cache-Control", "public, max-age=300")
+	writeJSON(w, filterSnapshotByRegion(snap, region))
+}
@@ -0,0 +1,67 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http/httptest"
+	"testing"
+)
+
+// Behavior test (#1574): /api/config/client must expose `liveMapMaxNodes`
+// so the frontend can honor the operator-configured live-map node cap
+// instead of the hardcoded 2000 in public/live.js. Default is 2000;
+// operators tune via `liveMap.maxNodes` in config.json. Server clamps to
+// [100, 20000] to defang misconfig.
+func TestConfigClientExposesLiveMapMaxNodes(t *testing.T) {
+	_, router := setupTestServer(t)
+	req := httptest.NewRequest("GET", "/api/config/client", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	if w.Code != 200 {
+		t.Fatalf("expected 200, got %d", w.Code)
+	}
+	var body map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode body: %v", err)
+	}
+	v, present := body["liveMapMaxNodes"]
+	if !present {
+		t.Fatal("expected liveMapMaxNodes in /api/config/client response")
+	}
+	n, ok := v.(float64)
+	if !ok {
+		t.Fatalf("expected liveMapMaxNodes to be a number, got %T", v)
+	}
+	if int(n) != 2000 {
+		t.Errorf("expected default liveMapMaxNodes=2000, got %d", int(n))
+	}
+}
+
+// Server-side clamp: operator misconfig (negative, zero, absurdly large)
+// must be coerced to safe bounds [100, 20000]. Default (unset) is 2000.
+func TestLiveMapMaxNodesClamp(t *testing.T) {
+	cases := []struct {
+		name string
+		set  int
+		want int
+	}{
+		{"default-when-unset", 0, 2000},
+		{"negative-clamps-to-default", -42, 2000},
+		{"below-min-clamps-up", 50, 100},
+		{"in-range-passthrough", 4300, 4300},
+		{"above-max-clamps-down", 99999, 20000},
+		{"exact-min", 100, 100},
+		{"exact-max", 20000, 20000},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			cfg := &Config{}
+			cfg.LiveMap.MaxNodes = tc.set
+			got := cfg.LiveMapMaxNodes()
+			if got != tc.want {
+				t.Errorf("LiveMapMaxNodes() with set=%d: want %d, got %d",
+					tc.set, tc.want, got)
+			}
+		})
+	}
+}
@@ -0,0 +1,90 @@
+package main
+
+import (
+	"database/sql"
+	"path/filepath"
+	"testing"
+	"time"
+
+	_ "modernc.org/sqlite"
+)
+
+// TestLoad_PanicsWhenGraphNotLoadedAndEdgesExist pins the startup-ordering
+// invariant (munger R1 #2). Graph-load-before-packet-load is the entire
+// premise of PR #1643's fix: without an in-memory neighbor graph, the
+// path_json relay-hop fallback cannot resolve hops, so relay-node analytics
+// history collapses. main.go currently does the right thing — but nothing
+// asserts the ordering, so a future refactor could silently regress.
+//
+// Load() must panic when neighbor_edges has rows but s.graph.Load() returns
+// nil. Fast-fail at startup beats silently-wrong attribution.
+func TestLoad_PanicsWhenGraphNotLoadedAndEdgesExist(t *testing.T) {
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "test.db")
+
+	rw, err := sql.Open("sqlite", "file:"+dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer rw.Close()
+
+	exec := func(s string, args ...interface{}) {
+		if _, err := rw.Exec(s, args...); err != nil {
+			t.Fatalf("setup exec failed: %v\nSQL: %s", err, s)
+		}
+	}
+
+	// Minimal CoreScope schema. PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE transmissions (
+		id INTEGER PRIMARY KEY,
+		raw_hex TEXT, hash TEXT, first_seen TEXT,
+		route_type INTEGER, payload_type INTEGER, payload_version INTEGER,
+		decoded_json TEXT
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observations (
+		id INTEGER PRIMARY KEY, transmission_id INTEGER,
+		observer_id TEXT, observer_name TEXT,
+		direction TEXT, snr REAL, rssi REAL, score INTEGER,
+		path_json TEXT, timestamp TEXT, raw_hex TEXT, resolved_path TEXT
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE nodes (
+		public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
+		last_seen TEXT, first_seen TEXT, advert_count INTEGER DEFAULT 0
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE schema_version (version INTEGER)`)
+	exec(`INSERT INTO schema_version (version) VALUES (1)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE neighbor_edges (
+		node_a TEXT NOT NULL,
+		node_b TEXT NOT NULL,
+		count INTEGER DEFAULT 1,
+		last_seen TEXT,
+		PRIMARY KEY (node_a, node_b)
+	)`)
+	now := time.Now().UTC().Format(time.RFC3339)
+	exec(`INSERT INTO neighbor_edges (node_a, node_b, count, last_seen) VALUES (?, ?, ?, ?)`,
+		"aaa", "bbb", 5, now)
+
+	d, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatalf("OpenDB: %v", err)
+	}
+	defer d.conn.Close()
+
+	// Deliberately DO NOT call store.graph.Store(...). s.graph.Load() returns
+	// nil → the bug condition the invariant guard must catch.
+	store := NewPacketStore(d, &PacketStoreConfig{RetentionHours: 72})
+
+	defer func() {
+		r := recover()
+		if r == nil {
+			t.Fatalf("Load() must panic when neighbor_edges has rows but graph is nil; got no panic")
+		}
+	}()
+	_ = store.Load()
+}
@@ -0,0 +1,172 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"testing"
+	"time"
+
+	_ "modernc.org/sqlite"
+)
+
+// createTestDBAmbiguousPrefix builds a fixture where TWO repeaters share the
+// same 2-char hop prefix. An observation's path_json carries ONLY the
+// ambiguous prefix (no longer prefix that would disambiguate). With no
+// neighbor_edges seeded, the cold-load fallback in scanAndMergeChunk has
+// nothing to anchor on — yet the current code resolves the prefix anyway
+// (via observation_count_fallback or candidate[0]) and over-attributes the
+// hop to ONE of the two repeaters. That is the time-travel bug munger
+// flagged: the historical packet's actual relay is unknown, but the loader
+// picks today's tier-4 winner against ~7-day-old observations.
+func createTestDBAmbiguousPrefix(t *testing.T, relayA, relayB, hop, firstSeen string) string {
+	t.Helper()
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "test.db")
+
+	conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer conn.Close()
+
+	exec := func(s string, args ...interface{}) {
+		if _, err := conn.Exec(s, args...); err != nil {
+			t.Fatalf("setup exec failed: %v\nSQL: %s", err, s)
+		}
+	}
+
+	// PREFLIGHT: async=true reason="test fixture: in-memory t.TempDir SQLite, never touches a real DB."
+	exec(`CREATE TABLE transmissions (
+		id INTEGER PRIMARY KEY,
+		raw_hex TEXT, hash TEXT, first_seen TEXT,
+		route_type INTEGER, payload_type INTEGER, payload_version INTEGER,
+		decoded_json TEXT
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observations (
+		id INTEGER PRIMARY KEY,
+		transmission_id INTEGER,
+		observer_id TEXT, observer_name TEXT,
+		direction TEXT, snr REAL, rssi REAL, score INTEGER,
+		path_json TEXT, timestamp TEXT,
+		raw_hex TEXT,
+		resolved_path TEXT
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE nodes (
+		public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
+		last_seen TEXT, first_seen TEXT, advert_count INTEGER DEFAULT 0
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE schema_version (version INTEGER)`)
+	exec(`INSERT INTO schema_version (version) VALUES (1)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE INDEX idx_tx_first_seen ON transmissions(first_seen)`)
+
+	// Two repeaters sharing the same 2-char prefix `hop`.
+	// Different advert_counts so tier-4 tiebreak deterministically picks one
+	// (proving the bug: it over-attributes to the higher-count node).
+	exec(`INSERT INTO nodes (public_key, name, role, advert_count) VALUES (?,?,?,?)`,
+		relayA, "Relay A", "repeater", 50)
+	exec(`INSERT INTO nodes (public_key, name, role, advert_count) VALUES (?,?,?,?)`,
+		relayB, "Relay B", "repeater", 10)
+
+	// Aged 48h so it lands in the background window (loadChunk path).
+	exec("INSERT INTO transmissions VALUES (?,?,?,?,0,4,1,?)",
+		1, "aa", "hashamb_1", firstSeen, `{}`)
+	exec("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp, raw_hex, resolved_path) VALUES (?,?,?,?,?,?,?,?,?,?,?,NULL)",
+		1, 1, "obs1", "Obs1", "RX", -10.0, -80.0, 5, fmt.Sprintf(`[%q]`, hop), firstSeen, "")
+
+	return dbPath
+}
+
+// TestLoadChunk_AmbiguousPrefix_SkipsAttribution pins the fix for the
+// time-travel attribution gate (munger R1 #1). When path_json carries an
+// ambiguous prefix that matches multiple repeaters, the cold-load path
+// MUST NOT pick a winner via affinity/observation-count tiebreak — today's
+// affinity winner is not necessarily the historical hop. Safer to
+// under-attribute (skip byNode for that hop) than to mis-attribute.
+func TestLoadChunk_AmbiguousPrefix_SkipsAttribution(t *testing.T) {
+	relayA := "aabbccddeeff00112233445566778899aabbccddeeff00112233445566778899"
+	relayB := "aa1122334455667788990011223344556677889900112233445566778899aabb"
+	hop := "aa" // 2-char prefix shared by both relayA and relayB
+
+	aged := time.Now().UTC().Add(-48 * time.Hour).Format(time.RFC3339)
+	dbPath := createTestDBAmbiguousPrefix(t, relayA, relayB, hop, aged)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	store := NewPacketStore(db, &PacketStoreConfig{
+		RetentionHours:  72,
+		HotStartupHours: 1, // hot load skips the 48h-old row → goes to loadChunk
+	})
+	// Empty graph: no neighbor-affinity tiebreak signal. Mirrors a freshly
+	// restarted server whose only relay info is the prefix map.
+	store.graph.Store(NewNeighborGraph())
+
+	if err := store.LoadChunked(0); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+	if got := len(store.byNode[relayA]) + len(store.byNode[relayB]); got != 0 {
+		t.Fatalf("setup: hot load unexpectedly picked up 48h-old row "+
+			"(byNode total=%d, want 0) — test would not exercise loadChunk", got)
+	}
+
+	chunkStart := time.Now().UTC().Add(-72 * time.Hour)
+	chunkEnd := time.Now().UTC().Add(-1 * time.Hour)
+	if err := store.loadChunk(chunkStart, chunkEnd); err != nil {
+		t.Fatalf("loadChunk: %v", err)
+	}
+
+	// Neither repeater may be over-attributed. The hop is ambiguous → the
+	// cold-load loader MUST NOT pick one as the byNode owner.
+	if got := len(store.byNode[relayA]); got != 0 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 0 — ambiguous-prefix hop "+
+			"was over-attributed to relayA (time-travel attribution bug)", relayA, got)
+	}
+	if got := len(store.byNode[relayB]); got != 0 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 0 — ambiguous-prefix hop "+
+			"was over-attributed to relayB (time-travel attribution bug)", relayB, got)
+	}
+}
+
+// TestLoad_AmbiguousPrefix_SkipsAttribution covers the hot-window Load()
+// path. Same setup as the loadChunk test but the row falls inside the hot
+// window so it is loaded by Load() / scanAndMergeChunk.
+func TestLoad_AmbiguousPrefix_SkipsAttribution(t *testing.T) {
+	relayA := "bbccddeeff00112233445566778899aabbccddeeff00112233445566778899aa"
+	relayB := "bb112233445566778899001122334455667788990011223344556677889900aa"
+	hop := "bb"
+
+	ts := time.Now().UTC().Format(time.RFC3339)
+	dbPath := createTestDBAmbiguousPrefix(t, relayA, relayB, hop, ts)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	store := NewPacketStore(db, &PacketStoreConfig{RetentionHours: 72})
+	store.graph.Store(NewNeighborGraph())
+
+	if err := store.LoadChunked(0); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	if got := len(store.byNode[relayA]); got != 0 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 0 — ambiguous-prefix hop "+
+			"was over-attributed (hot Load path)", relayA, got)
+	}
+	if got := len(store.byNode[relayB]); got != 0 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 0 — ambiguous-prefix hop "+
+			"was over-attributed (hot Load path)", relayB, got)
+	}
+}
@@ -0,0 +1,180 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"testing"
+	"time"
+
+	_ "modernc.org/sqlite"
+)
+
+// createTestDBPathJSONNoResolvedPath builds a fixture that mirrors the LIVE
+// deployment state after #1287: observations carry a path_json hop list but
+// observations.resolved_path is NULL (the ingestor no longer writes it; relay
+// data is persisted as aggregate neighbor_edges instead). A single repeater
+// node whose public_key starts with hopPrefix lets the in-memory prefix map
+// resolve that hop unambiguously to relayPubkey.
+//
+// The transmission's decoded_json is empty ({}), so relayPubkey is NOT an
+// endpoint (pubKey/destPubKey/srcPubKey). The ONLY way it can enter
+// s.byNode is via path_json → resolvePathForObs relay-hop resolution.
+func createTestDBPathJSONNoResolvedPath(t *testing.T, relayPubkey, hopPrefix, firstSeen string) string {
+	t.Helper()
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "test.db")
+
+	conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer conn.Close()
+
+	exec := func(s string, args ...interface{}) {
+		if _, err := conn.Exec(s, args...); err != nil {
+			t.Fatalf("setup exec failed: %v\nSQL: %s", err, s)
+		}
+	}
+
+	// PREFLIGHT: async=true reason="test fixture: in-memory t.TempDir SQLite, never touches a real DB. Tables are CREATE-from-empty in a one-shot OpenDB call, not a schema migration over existing data."
+	exec(`CREATE TABLE transmissions (
+		id INTEGER PRIMARY KEY,
+		raw_hex TEXT, hash TEXT, first_seen TEXT,
+		route_type INTEGER, payload_type INTEGER, payload_version INTEGER,
+		decoded_json TEXT
+	)`)
+	// resolved_path column present (matches live schema) but left NULL.
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observations (
+		id INTEGER PRIMARY KEY,
+		transmission_id INTEGER,
+		observer_id TEXT, observer_name TEXT,
+		direction TEXT, snr REAL, rssi REAL, score INTEGER,
+		path_json TEXT, timestamp TEXT,
+		raw_hex TEXT,
+		resolved_path TEXT
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`)
+	// Production nodes schema uses public_key (not pubkey) — getAllNodes /
+	// buildPrefixMap reads public_key, role, advert_count, first_seen.
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE nodes (
+		public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL,
+		last_seen TEXT, first_seen TEXT, advert_count INTEGER DEFAULT 0
+	)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE TABLE schema_version (version INTEGER)`)
+	exec(`INSERT INTO schema_version (version) VALUES (1)`)
+	// PREFLIGHT: async=true reason="test fixture, in-memory tmpdir DB"
+	exec(`CREATE INDEX idx_tx_first_seen ON transmissions(first_seen)`)
+
+	// Repeater node so canAppearInPath() admits it to the prefix map.
+	exec(`INSERT INTO nodes (public_key, name, role, advert_count) VALUES (?,?,?,?)`,
+		relayPubkey, "Relay One", "repeater", 10)
+
+	exec("INSERT INTO transmissions VALUES (?,?,?,?,0,4,1,?)",
+		1, "aa", "hashpjf_1", firstSeen, `{}`)
+	// resolved_path explicitly NULL; path_json carries the relay hop prefix.
+	exec("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp, raw_hex, resolved_path) VALUES (?,?,?,?,?,?,?,?,?,?,?,NULL)",
+		1, 1, "obs1", "Obs1", "RX", -10.0, -80.0, 5, fmt.Sprintf(`[%q]`, hopPrefix), firstSeen, "")
+
+	return dbPath
+}
+
+// TestLoadChunked_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty pins the
+// fix for the "relay-node analytics empty after every restart" bug.
+//
+// On live, observations.resolved_path is 100% NULL (since #1287 the ingestor
+// persists relay data as neighbor_edges, not per-observation resolved_path).
+// The cold-load paths (Load / scanAndMergeChunk) indexed relay hops ONLY from
+// resolved_path, so a relay node's path-hop attribution was never rebuilt on
+// startup — it only re-accumulated from live traffic, collapsing the activity
+// timeline to "just the hour the server restarted".
+//
+// The fix: when resolved_path is empty, fall back to resolving the hops from
+// the persisted path_json using the in-memory prefix map + neighbor graph
+// (exactly what the live ingest path already does), then index the relay hops.
+func TestLoadChunked_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty(t *testing.T) {
+	relayPK := "aabbccddeeff00112233445566778899aabbccddeeff00112233445566778899"
+	hop := "aa" // 2-hex-char path hop; unique 2-char prefix of relayPK
+
+	ts := time.Now().UTC().Format(time.RFC3339)
+	dbPath := createTestDBPathJSONNoResolvedPath(t, relayPK, hop, ts)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	if !db.hasResolvedPath {
+		t.Fatalf("setup: fixture should expose resolved_path column; hasResolvedPath=false")
+	}
+
+	store := NewPacketStore(db, &PacketStoreConfig{RetentionHours: 72})
+	// Empty graph is sufficient: a single prefix candidate resolves without
+	// neighbor-affinity disambiguation. Mirrors a freshly restarted server
+	// that has loaded its neighbor_edges snapshot before the packet load.
+	store.graph.Store(NewNeighborGraph())
+
+	if err := store.LoadChunked(0); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+
+	// The relay pubkey only reachable through path_json resolution must be
+	// indexed in byNode for the transmission.
+	if got := len(store.byNode[relayPK]); got != 1 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 1 — cold load did not "+
+			"resolve relay hops from path_json when resolved_path was NULL "+
+			"(relay history lost on restart)", relayPK, got)
+	}
+}
+
+// TestLoadChunk_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty covers the
+// background-window loader (loadBackgroundChunks → loadChunk), which on live
+// loads everything older than hotStartupHours (24h) up to retentionHours
+// (168h). Without the path_json fallback here, a relay node's analytics for
+// the older 6 days would still vanish on every restart even with the hot
+// window fixed.
+func TestLoadChunk_ResolvesRelayHopsFromPathJSON_WhenResolvedPathEmpty(t *testing.T) {
+	relayPK := "ccddeeff00112233445566778899aabbccddeeff00112233445566778899aabb"
+	hop := "cc"
+
+	// Aged 48h so it falls in the background window, not the hot window.
+	aged := time.Now().UTC().Add(-48 * time.Hour).Format(time.RFC3339)
+	dbPath := createTestDBPathJSONNoResolvedPath(t, relayPK, hop, aged)
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	store := NewPacketStore(db, &PacketStoreConfig{
+		RetentionHours:  72,
+		HotStartupHours: 1, // hot load must NOT pick up the 48h-old row
+	})
+	store.graph.Store(NewNeighborGraph())
+
+	if err := store.LoadChunked(0); err != nil {
+		t.Fatalf("LoadChunked: %v", err)
+	}
+	if got := len(store.byNode[relayPK]); got != 0 {
+		t.Fatalf("setup: hot load unexpectedly picked up 48h-old row; "+
+			"byNode[relayPK]=%d (want 0) — test would not exercise loadChunk", got)
+	}
+
+	chunkStart := time.Now().UTC().Add(-72 * time.Hour)
+	chunkEnd := time.Now().UTC().Add(-1 * time.Hour)
+	if err := store.loadChunk(chunkStart, chunkEnd); err != nil {
+		t.Fatalf("loadChunk: %v", err)
+	}
+
+	if got := len(store.byNode[relayPK]); got != 1 {
+		t.Errorf("byNode[%s]: got %d transmissions, want 1 — background loadChunk "+
+			"did not resolve relay hops from path_json when resolved_path was NULL "+
+			"(relay history lost on restart for the older retention window)", relayPK, got)
+	}
+}
@@ -0,0 +1,160 @@
+package main
+
+import (
+	"database/sql"
+	"fmt"
+	"path/filepath"
+	"testing"
+	"time"
+
+	_ "modernc.org/sqlite"
+)
+
+// createTestDBWithResolvedPath creates a fixture DB containing numTx old
+// transmissions (48h ago, outside any default hot window) where each
+// observation has a non-empty resolved_path JSON listing relay-hop pubkeys.
+// Mirrors createTestDBWithAgedPackets shape but adds the resolved_path
+// column so loadChunk's hasResolvedPath branch is exercised.
+func createTestDBWithResolvedPath(t *testing.T, numTx int, relayPubkeys []string) string {
+	t.Helper()
+	dir := t.TempDir()
+	dbPath := filepath.Join(dir, "test.db")
+
+	conn, err := sql.Open("sqlite", dbPath+"?_journal_mode=WAL")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer conn.Close()
+
+	exec := func(s string, args ...interface{}) {
+		if _, err := conn.Exec(s, args...); err != nil {
+			t.Fatalf("setup exec failed: %v\nSQL: %s", err, s)
+		}
+	}
+
+	exec(`CREATE TABLE transmissions (
+		id INTEGER PRIMARY KEY,
+		raw_hex TEXT, hash TEXT, first_seen TEXT,
+		route_type INTEGER, payload_type INTEGER, payload_version INTEGER,
+		decoded_json TEXT
+	)`)
+	exec(`CREATE TABLE observations (
+		id INTEGER PRIMARY KEY,
+		transmission_id INTEGER,
+		observer_id TEXT, observer_name TEXT,
+		direction TEXT, snr REAL, rssi REAL, score INTEGER,
+		path_json TEXT, timestamp TEXT,
+		raw_hex TEXT,
+		resolved_path TEXT
+	)`)
+	exec(`CREATE TABLE observers (rowid INTEGER PRIMARY KEY, id TEXT, name TEXT, iata TEXT)`)
+	exec(`CREATE TABLE nodes (pubkey TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, first_seen TEXT, frequency REAL)`)
+	exec(`CREATE TABLE schema_version (version INTEGER)`)
+	exec(`INSERT INTO schema_version (version) VALUES (1)`)
+	exec(`CREATE INDEX idx_tx_first_seen ON transmissions(first_seen)`)
+
+	// Build resolved_path JSON array of pubkey strings: ["pk1","pk2",...]
+	rpJSON := "["
+	for i, pk := range relayPubkeys {
+		if i > 0 {
+			rpJSON += ","
+		}
+		rpJSON += fmt.Sprintf("%q", pk)
+	}
+	rpJSON += "]"
+
+	now := time.Now().UTC()
+	for i := 0; i < numTx; i++ {
+		ts := now.Add(-48 * time.Hour).Add(time.Duration(i) * time.Second).Format(time.RFC3339)
+		hash := fmt.Sprintf("hash1558_%d", i)
+		exec("INSERT INTO transmissions VALUES (?,?,?,?,0,4,1,?)",
+			i+1, "aa", hash, ts, `{}`)
+		exec("INSERT INTO observations (id, transmission_id, observer_id, observer_name, direction, snr, rssi, score, path_json, timestamp, raw_hex, resolved_path) VALUES (?,?,?,?,?,?,?,?,?,?,?,?)",
+			i+1, i+1, "obs1", "Obs1", "RX", -10.0, -80.0, 5, `[]`, ts, "", rpJSON)
+	}
+	return dbPath
+}
+
+// TestLoadChunk_IndexesResolvedPathPubkeys_Issue1558 verifies the
+// contract-violation fix from #1558:
+//
+//	`Load` (cmd/server/store.go:783-799) unmarshals each observation's
+//	resolved_path column and feeds every relay-hop pubkey through
+//	addToByNode / addResolvedPubkeysToPathHopIndex /
+//	addToResolvedPubkeyIndex. `loadChunk` (cmd/server/store.go:937-1023)
+//	scans the same column into resolvedPathStr but never feeds it
+//	anywhere — so background-backfilled transmissions never appear under
+//	their relay pubkeys in s.byNode, even though the same exact rows do
+//	when they happen to fall inside the hot startup window.
+//
+// Symptom in production: Home page per-node `packetsToday` /
+// `totalTransmissions` / observer counts collapse after a container
+// restart for any node that primarily appears as a relay (rather than
+// as the endpoint pubKey/destPubKey/srcPubKey of a packet), because the
+// background backfill path silently drops the relay-hop indexing
+// branch. See issue #1558 for the full trace + diagnosis.
+//
+// This test loads a fixture DB exclusively via loadChunk (skipping
+// Load) and asserts that for each relay pubkey present in
+// `resolved_path` of every observation, s.byNode contains the
+// transmission.
+func TestLoadChunk_IndexesResolvedPathPubkeys_Issue1558(t *testing.T) {
+	// Two distinct relay pubkeys appear in every observation's resolved_path.
+	// Neither is an endpoint pubkey in decoded_json — so the ONLY path
+	// they can enter byNode through is the resolved_path branch.
+	relayPK1 := "1111111111111111111111111111111111111111111111111111111111111111"
+	relayPK2 := "2222222222222222222222222222222222222222222222222222222222222222"
+
+	dbPath := createTestDBWithResolvedPath(t, 3, []string{relayPK1, relayPK2})
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	if !db.hasResolvedPath {
+		t.Fatalf("setup: fixture should expose resolved_path column; hasResolvedPath=false")
+	}
+
+	store := NewPacketStore(db, &PacketStoreConfig{
+		RetentionHours:  72,
+		HotStartupHours: 1, // initial Load should NOT pick up 48h-old fixture rows
+	})
+	if err := store.Load(); err != nil {
+		t.Fatal(err)
+	}
+	// Confirm the fixture rows are outside the hot window — Load() must
+	// not have already populated byNode for the relay pubkeys; otherwise
+	// the test would not actually be exercising loadChunk.
+	if len(store.byNode[relayPK1]) != 0 {
+		t.Fatalf("setup: Load() unexpectedly picked up 48h-old rows; "+
+			"byNode[relayPK1]=%d entries (expected 0)", len(store.byNode[relayPK1]))
+	}
+
+	// Trigger background backfill of the 48h-old window via loadChunk —
+	// this is the code path under test.
+	chunkStart := time.Now().UTC().Add(-72 * time.Hour)
+	chunkEnd := time.Now().UTC().Add(-1 * time.Hour)
+	if err := store.loadChunk(chunkStart, chunkEnd); err != nil {
+		t.Fatalf("loadChunk failed: %v", err)
+	}
+
+	// Sanity: loadChunk did merge the transmissions into the slice.
+	if len(store.packets) != 3 {
+		t.Fatalf("loadChunk should have merged 3 transmissions; got %d", len(store.packets))
+	}
+
+	// THE ASSERTION: every relay pubkey listed in resolved_path must be
+	// indexed in byNode for every transmission, because loadChunk's
+	// per-row scan should mirror Load()'s 783-799 block.
+	for _, relayPK := range []string{relayPK1, relayPK2} {
+		got := len(store.byNode[relayPK])
+		if got != 3 {
+			t.Errorf("byNode[%s]: got %d transmissions, want 3 — "+
+				"loadChunk dropped the resolved_path indexing branch "+
+				"(issue #1558)",
+				relayPK, got)
+		}
+	}
+}
@@ -109,23 +109,39 @@ func main() {
 		log.Printf("[security] WARNING: API key is weak or a known default — write endpoints are vulnerable")
 	}

-	// Apply Go runtime soft memory limit (#836).
-	// Honors GOMEMLIMIT if set; otherwise derives from packetStore.maxMemoryMB.
+	// Apply Go runtime soft memory limit (#836, #1010).
+	// Precedence: GOMEMLIMIT env > runtime.maxMemoryMB > derived from packetStore.maxMemoryMB.
 	{
 		_, envSet := os.LookupEnv("GOMEMLIMIT")
+		runtimeMaxMB := 0
+		if cfg.Runtime != nil {
+			runtimeMaxMB = cfg.Runtime.MaxMemoryMB
+		}
 		maxMB := 0
 		if cfg.PacketStore != nil {
 			maxMB = cfg.PacketStore.MaxMemoryMB
 		}
-		limit, source := applyMemoryLimit(maxMB, envSet)
+		// runtime.maxMemoryMB (explicit) wins over packetStore-derived (implicit).
+		effectiveMB := maxMB
+		usedRuntimeCfg := false
+		if !envSet && runtimeMaxMB > 0 {
+			effectiveMB = runtimeMaxMB
+			usedRuntimeCfg = true
+		}
+		limit, source := applyMemoryLimit(effectiveMB, envSet)
 		switch source {
 		case "env":
 			log.Printf("[memlimit] using GOMEMLIMIT from environment (%s)", os.Getenv("GOMEMLIMIT"))
 		case "derived":
-			log.Printf("[memlimit] derived from packetStore.maxMemoryMB=%d → %d MiB (1.5x headroom)", maxMB, limit/(1024*1024))
+			if usedRuntimeCfg {
+				log.Printf("[memlimit] runtime.maxMemoryMB=%d → %d MiB (1.5x headroom)", runtimeMaxMB, limit/(1024*1024))
+			} else {
+				log.Printf("[memlimit] derived from packetStore.maxMemoryMB=%d → %d MiB (1.5x headroom)", maxMB, limit/(1024*1024))
+			}
 		default:
-			log.Printf("[memlimit] no soft memory limit set (GOMEMLIMIT unset, packetStore.maxMemoryMB=0); recommend setting one to avoid container OOM-kill")
+			log.Printf("[memlimit] unset → default (no soft memory limit; recommend setting GOMEMLIMIT or runtime.maxMemoryMB to ≥1.5× working set to avoid OOM-kill)")
 		}
+		warnIfMemlimitUnderprovisioned(limit)
 	}

 	// Resolve DB path
@@ -182,18 +198,56 @@ func main() {
 	// In-memory packet store
 	store := NewPacketStore(database, cfg.PacketStore, cfg.CacheTTL)
 	store.config = cfg
-	if err := store.Load(); err != nil {
-		log.Fatalf("[store] failed to load: %v", err)
+
+	// Load the persisted neighbor graph BEFORE the packet load so the
+	// chunked loader can resolve relay-hop pubkeys from path_json. Since
+	// #1287 the ingestor persists relay data only as aggregate
+	// neighbor_edges — observations.resolved_path is never written — so
+	// without an available graph at load time a relay node's analytics
+	// history would rebuild only from post-restart live traffic (the
+	// "timeline empty after every restart" bug). neighbor_edges is small,
+	// so this adds negligible latency before the HTTP listener binds. The
+	// fresh-DB branch (no snapshot) still builds in-memory AFTER the load
+	// below, because BuildFromStore needs the loaded packets.
+	neighborEdgesPersisted := neighborEdgesTableExists(database.conn)
+	if neighborEdgesPersisted {
+		store.graph.Store(loadNeighborEdgesFromDB(database.conn))
+		log.Printf("[neighbor] loaded persisted neighbor graph")
 	}
+
+	// #1009: chunked Load with early HTTP readiness. LoadChunked runs
+	// asynchronously and signals FirstChunkReady after the first chunk
+	// is merged so the HTTP listener can bind without waiting for the
+	// full multi-minute scan to finish. loadStatusMiddleware (wired
+	// below) advertises loading|ready via X-CoreScope-Load-Status.
+	chunkSize := cfg.DBLoadChunkSize()
+	loadErrCh := make(chan error, 1)
+	go func() {
+		loadErrCh <- store.LoadChunked(chunkSize)
+	}()
+	select {
+	case <-store.FirstChunkReady():
+		log.Printf("[store] first chunk ready (chunkSize=%d) — HTTP listener may bind", chunkSize)
+	case err := <-loadErrCh:
+		if err != nil {
+			log.Fatalf("[store] LoadChunked failed before first chunk: %v", err)
+		}
+		log.Printf("[store] LoadChunked completed before first-chunk signal (empty DB?)")
+	}
+	go func() {
+		if err := <-loadErrCh; err != nil {
+			log.Printf("[store] LoadChunked background error: %v", err)
+		}
+	}()
 	if store.hotStartupHours > 0 {
 		log.Printf("[store] starting background load: filling retentionHours=%gh from hotStartupHours=%gh",
 			store.retentionHours, store.hotStartupHours)
 		go store.loadBackgroundChunks()
 	}

-	// Initialize persisted neighbor graph.
-	// Per #1287, schema migrations all live in the ingestor (see
-	// dbschema.Apply). The server merely loads the snapshot here and
+	// Neighbor graph: the persisted snapshot (if present) was already
+	// loaded above, before the packet load. Per #1287 schema migrations
+	// all live in the ingestor; the server only reads the snapshot and
 	// then refreshes it via the recompNeighborGraph slot every 60s.
 	dbPath = database.path
 	database.hasResolvedPath = true // dbschema.AssertReady above already verified observations.resolved_path exists
@@ -201,11 +255,7 @@ func main() {
 	// WaitGroup for background init steps that gate /api/healthz readiness.
 	var initWg sync.WaitGroup

-	// Load or build neighbor graph
-	if neighborEdgesTableExists(database.conn) {
-		store.graph.Store(loadNeighborEdgesFromDB(database.conn))
-		log.Printf("[neighbor] loaded persisted neighbor graph")
-	} else {
+	if !neighborEdgesPersisted {
 		// No persisted snapshot yet (e.g. fresh DB before the ingestor
 		// has run its first edge-build cycle). Build an in-memory graph
 		// from the packets we already have so reads aren't empty. We
@@ -317,6 +367,39 @@ func main() {
 	defer stopAnalyticsRecomp()
 	log.Printf("[analytics-recompute] background recompute enabled (default=%s)", cfg.AnalyticsDefaultRecomputeInterval())

+	// #1481 P0-1: background recomputer for the default-shape
+	// /api/analytics/neighbor-graph response (5 min cadence). Reads
+	// hit an atomic pointer; the rebuild path no longer runs on the
+	// request goroutine for the common filter shape.
+	stopNeighborGraphCache := make(chan struct{})
+	ngInterval := neighborGraphCacheInterval
+	if cfg.NeighborGraph != nil && cfg.NeighborGraph.CacheRecomputeIntervalSeconds > 0 {
+		ngInterval = time.Duration(cfg.NeighborGraph.CacheRecomputeIntervalSeconds) * time.Second
+	}
+	srv.startNeighborGraphRecomputer(ngInterval, stopNeighborGraphCache)
+	defer close(stopNeighborGraphCache)
+	log.Printf("[neighbor-graph-cache] background recompute enabled (interval=%s)", ngInterval)
+
+	// Known-channels catalogue cache (issue #1323). OPT-IN: an empty
+	// cfg.KnownChannelsURL leaves srv.knownChannels nil and starts no
+	// background fetch. The /api/known-channels endpoint then serves an
+	// empty snapshot. Operators who want the community catalogue must
+	// set knownChannelsUrl explicitly in config.json (see
+	// config.example.json for the pinned-SHA recommendation).
+	if cfg.KnownChannelsURL != "" {
+		kcRefresh := DefaultKnownChannelsRefresh
+		if cfg.KnownChannelsRefreshMs > 0 {
+			kcRefresh = time.Duration(cfg.KnownChannelsRefreshMs) * time.Millisecond
+		}
+		srv.knownChannels = newKnownChannelsCache(cfg.KnownChannelsURL, kcRefresh)
+		kcCtx, stopKnownChannels := context.WithCancel(context.Background())
+		srv.knownChannels.run(kcCtx)
+		defer stopKnownChannels()
+		log.Printf("[known-channels] background fetch enabled (url=%s, refresh=%s)", cfg.KnownChannelsURL, kcRefresh)
+	} else {
+		log.Printf("[known-channels] disabled (knownChannelsUrl unset in config)")
+	}
+
 	// Steady-state repeater-enrichment recomputer (issue #1262).
 	// Prewarms the bulk caches feeding handleNodes so the very first
 	// /api/nodes?limit=2000 from live.js's SPA bootstrap hits a
@@ -366,6 +449,10 @@ func main() {
 		handler = gzipMiddlewareWithConfig(cfg.Compression, router)
 		log.Printf("[server] HTTP gzip compression enabled")
 	}
+	// #1009: stamp X-CoreScope-Load-Status on every response so probes
+	// and dashboards can see when the chunked Load is still in flight.
+	// Outermost wrap so the header is set regardless of gzip/etc.
+	handler = loadStatusMiddleware(store, handler)
 	if cfg.WSCompressionEnabled() {
 		log.Printf("[server] WebSocket permessage-deflate compression enabled")
 	}
@@ -444,6 +531,16 @@ func spaHandler(root string, fs http.Handler) http.Handler {
 	log.Printf("[static] cache-bust value: %s", bustValue)

 	return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// Defense-in-depth: explicitly reject path-traversal attempts before
+		// we touch the filesystem. gorilla/mux + http.FileServer already clean
+		// most of these, but we don't want a future SkipClean(true) (or a
+		// different router) to silently expose the FS. See
+		// audit-input-vulns-20260603 (LOW — SPA static handler depends on
+		// default mux path-cleaning).
+		if !isSafeStaticPath(r.URL.Path, r.URL.RawPath) {
+			http.Error(w, "bad request", http.StatusBadRequest)
+			return
+		}
 		// Serve pre-processed index.html for root and /index.html
 		if r.URL.Path == "/" || r.URL.Path == "/index.html" {
 			w.Header().Set("Content-Type", "text/html; charset=utf-8")
@@ -467,3 +564,29 @@ func spaHandler(root string, fs http.Handler) http.Handler {
 		fs.ServeHTTP(w, r)
 	})
 }
+
+// isSafeStaticPath rejects request paths that contain traversal sequences
+// or backslashes — defense-in-depth for the SPA static handler so a future
+// router with SkipClean(true) cannot expose the filesystem. Empty input is
+// safe (root handled earlier).
+//
+// urlPath is the decoded path (r.URL.Path); rawPath is the raw, possibly
+// percent-encoded path (r.URL.RawPath) used to catch encoded `..` / `\`.
+func isSafeStaticPath(urlPath, rawPath string) bool {
+	for _, p := range []string{urlPath, rawPath} {
+		if p == "" {
+			continue
+		}
+		// Lowercase for case-insensitive percent-encoding checks.
+		lp := strings.ToLower(p)
+		// Block "..", any URL-encoded "%2e%2e" sequence, and backslashes
+		// (which Windows-style traversal exploits convert to "\").
+		if strings.Contains(p, "..") ||
+			strings.Contains(lp, "%2e%2e") ||
+			strings.Contains(p, "\\") ||
+			strings.Contains(lp, "%5c") {
+			return false
+		}
+	}
+	return true
+}
@@ -1,9 +1,19 @@
 package main

 import (
+	"log"
+	"os"
 	"runtime/debug"
+	"strconv"
+	"strings"
 )

+// cgroupUnlimitedThreshold is the sentinel above which a cgroup memory value
+// means "no limit". cgroup v1 encodes unlimited as math.MaxInt64 (page-aligned
+// near 1<<63); 1<<62 is a safe upper bound that excludes all real limits while
+// staying well below the unlimited sentinel.
+const cgroupUnlimitedThreshold = int64(1 << 62)
+
 // applyMemoryLimit configures Go's soft memory limit (GOMEMLIMIT).
 //
 // Behavior:
@@ -30,3 +40,74 @@ func applyMemoryLimit(maxMemoryMB int, envSet bool) (int64, string) {
 	debug.SetMemoryLimit(limit)
 	return limit, "derived"
 }
+
+// readCgroupMemoryMBFn is the package-level hook used by
+// warnIfMemlimitUnderprovisioned. Tests override it to inject deterministic
+// cgroup values without needing a Linux kernel with cgroup mounts.
+var readCgroupMemoryMBFn = readCgroupMemoryMB
+
+// readCgroupMemoryMB returns the container's memory limit from cgroup, in MiB.
+// Returns 0 when unavailable (non-Linux, unlimited, or read error).
+func readCgroupMemoryMB() int64 {
+	// cgroup v2: single file, value in bytes or literal "max"
+	if b, err := os.ReadFile("/sys/fs/cgroup/memory.max"); err == nil {
+		s := strings.TrimSpace(string(b))
+		if s != "max" {
+			if v, err := strconv.ParseInt(s, 10, 64); err == nil && v > 0 {
+				return v / (1024 * 1024)
+			}
+		}
+	}
+	// cgroup v1: values near math.MaxInt64 represent "unlimited"
+	if b, err := os.ReadFile("/sys/fs/cgroup/memory/memory.limit_in_bytes"); err == nil {
+		if v, err := strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64); err == nil {
+			if v > 0 && v < cgroupUnlimitedThreshold {
+				return v / (1024 * 1024)
+			}
+		}
+	}
+	return 0
+}
+
+// memlimitUnderprovisioned reports whether effectiveMB is less than half of
+// cgroupMB. Extracted for unit testing the comparison boundary.
+func memlimitUnderprovisioned(effectiveMB, cgroupMB int64) bool {
+	return effectiveMB > 0 && cgroupMB > 0 && effectiveMB*2 < cgroupMB
+}
+
+// warnIfMemlimitUnderprovisioned logs a warning when GOMEMLIMIT is below 50%
+// of the container cgroup memory limit, which causes the Go GC to thrash.
+// In one reported incident (#1264) 82% of CPU was GC with a 1536 MiB limit
+// on a 7.7 GB container — all endpoints 3-100x slower until maxMemoryMB was
+// bumped and the process restarted.
+//
+// limitBytes is the value returned by applyMemoryLimit:
+//   - source="derived": the limit we set ourselves (> 0)
+//   - source="env":  0 — we did not touch the runtime; read it back below
+//   - source="none": 0 — no limit set at all; runtime default is math.MaxInt64,
+//     which the >= cgroupUnlimitedThreshold guard below catches and skips
+func warnIfMemlimitUnderprovisioned(limitBytes int64) {
+	cgroupMB := readCgroupMemoryMBFn()
+	if cgroupMB <= 0 {
+		return
+	}
+	effective := limitBytes
+	if effective <= 0 {
+		// Either GOMEMLIMIT was set via env (source="env") or no limit was
+		// configured (source="none"). Read the runtime's current value:
+		// - env case: returns whatever the operator set
+		// - none case: returns math.MaxInt64, caught by the guard below
+		// debug.SetMemoryLimit(-1) leaves the limit unchanged and returns it.
+		effective = debug.SetMemoryLimit(-1)
+	}
+	if effective <= 0 || effective >= cgroupUnlimitedThreshold {
+		return
+	}
+	effectiveMB := effective / (1024 * 1024)
+	if memlimitUnderprovisioned(effectiveMB, cgroupMB) {
+		log.Printf("[memlimit] WARN: GOMEMLIMIT=%d MiB is <50%% of container limit %d MiB — "+
+			"GC may thrash under load; consider bumping packetStore.maxMemoryMB "+
+			"(suggested: ~%d MiB, roughly 2/3 of container limit)",
+			effectiveMB, cgroupMB, cgroupMB*2/3)
+	}
+}
@@ -1,7 +1,10 @@
 package main

 import (
+	"bytes"
+	"log"
 	"runtime/debug"
+	"strings"
 	"testing"
 )

@@ -52,3 +55,109 @@ func TestApplyMemoryLimit_None(t *testing.T) {
 		t.Fatalf("expected limit=0, got %d", limit)
 	}
 }
+
+func TestMemlimitUnderprovisioned(t *testing.T) {
+	cases := []struct {
+		effective, cgroup int64
+		want              bool
+	}{
+		{512, 1536, true},   // 512*2=1024 < 1536 → underprovisioned
+		{768, 1536, false},  // 768*2=1536 == 1536 → not under (boundary)
+		{1024, 1536, false},
+		{0, 1536, false},    // no effective limit → skip
+		{512, 0, false},     // no cgroup info → skip
+	}
+	for _, c := range cases {
+		got := memlimitUnderprovisioned(c.effective, c.cgroup)
+		if got != c.want {
+			t.Errorf("memlimitUnderprovisioned(%d, %d) = %v, want %v", c.effective, c.cgroup, got, c.want)
+		}
+	}
+}
+
+// captureLog redirects the default logger to a buffer for the duration of f,
+// then restores the previous writer. Returns captured output.
+func captureLog(f func()) string {
+	var buf bytes.Buffer
+	prev := log.Writer()
+	log.SetOutput(&buf)
+	defer log.SetOutput(prev)
+	f()
+	return buf.String()
+}
+
+// TestWarnIfMemlimitUnderprovisioned_EmitsWarning verifies the warning IS
+// logged when the injected cgroup reader reports a container limit more than
+// 2x larger than the effective GOMEMLIMIT.
+func TestWarnIfMemlimitUnderprovisioned_EmitsWarning(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+	// Effective: 512 MiB; container: 2048 MiB → 512*2=1024 < 2048 → warn
+	debug.SetMemoryLimit(int64(512) * 1024 * 1024)
+
+	orig := readCgroupMemoryMBFn
+	readCgroupMemoryMBFn = func() int64 { return 2048 }
+	defer func() { readCgroupMemoryMBFn = orig }()
+
+	out := captureLog(func() {
+		warnIfMemlimitUnderprovisioned(int64(512) * 1024 * 1024)
+	})
+	if !strings.Contains(out, "[memlimit] WARN") {
+		t.Errorf("expected warning log, got: %q", out)
+	}
+}
+
+// TestWarnIfMemlimitUnderprovisioned_NoWarnWhenAdequate verifies no warning
+// when GOMEMLIMIT is >= 50% of the container limit.
+func TestWarnIfMemlimitUnderprovisioned_NoWarnWhenAdequate(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+	// Effective: 1024 MiB; container: 1536 MiB → 1024*2=2048 >= 1536 → no warn
+	debug.SetMemoryLimit(int64(1024) * 1024 * 1024)
+
+	orig := readCgroupMemoryMBFn
+	readCgroupMemoryMBFn = func() int64 { return 1536 }
+	defer func() { readCgroupMemoryMBFn = orig }()
+
+	out := captureLog(func() {
+		warnIfMemlimitUnderprovisioned(int64(1024) * 1024 * 1024)
+	})
+	if strings.Contains(out, "[memlimit] WARN") {
+		t.Errorf("unexpected warning when limit is adequate: %q", out)
+	}
+}
+
+// TestWarnIfMemlimitUnderprovisioned_NoCgroupNoLog verifies early exit when
+// no cgroup info is available (non-Linux / non-container).
+func TestWarnIfMemlimitUnderprovisioned_NoCgroupNoLog(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+	debug.SetMemoryLimit(int64(512) * 1024 * 1024)
+
+	orig := readCgroupMemoryMBFn
+	readCgroupMemoryMBFn = func() int64 { return 0 }
+	defer func() { readCgroupMemoryMBFn = orig }()
+
+	out := captureLog(func() {
+		warnIfMemlimitUnderprovisioned(int64(512) * 1024 * 1024)
+	})
+	if strings.Contains(out, "[memlimit] WARN") {
+		t.Errorf("unexpected warning when cgroup unavailable: %q", out)
+	}
+}
+
+// TestWarnIfMemlimitUnderprovisioned_NoneSource verifies that when no limit
+// was configured (source="none", limitBytes=0), the function reads back
+// math.MaxInt64 from the runtime and skips the warning.
+func TestWarnIfMemlimitUnderprovisioned_NoneSource(t *testing.T) {
+	defer debug.SetMemoryLimit(-1)
+	debug.SetMemoryLimit(int64(1<<63 - 1)) // math.MaxInt64 = "no limit"
+
+	orig := readCgroupMemoryMBFn
+	readCgroupMemoryMBFn = func() int64 { return 2048 }
+	defer func() { readCgroupMemoryMBFn = orig }()
+
+	out := captureLog(func() {
+		warnIfMemlimitUnderprovisioned(0) // source="none" passes limit=0
+	})
+	if strings.Contains(out, "[memlimit] WARN") {
+		t.Errorf("unexpected warning when no limit configured: %q", out)
+	}
+}
@@ -104,6 +104,10 @@ func (s *Server) handleNodeNeighbors(w http.ResponseWriter, r *http.Request) {
 		writeError(w, 404, "Not found")
 		return
 	}
+	if s.isPubkeyHidden(pubkey) {
+		writeError(w, 404, "Not found")
+		return
+	}

 	minCount := 1
 	if v := r.URL.Query().Get("min_count"); v != "" {
@@ -236,6 +240,54 @@ func (s *Server) handleNeighborGraph(w http.ResponseWriter, r *http.Request) {
 	region := r.URL.Query().Get("region")
 	roleFilter := strings.ToLower(r.URL.Query().Get("role"))

+	// #1481 P0-1: serve the default-shape request from the atomic-pointer
+	// snapshot maintained by the background recomputer (5 min cadence).
+	// Default shape: minCount=5, minScore=0.1, no region, no role.
+	if minCount == 5 && minScore == 0.1 && region == "" && roleFilter == "" {
+		if raw, age, ok := s.loadNeighborGraphCacheBytes(); ok {
+			w.Header().Set("Content-Type", "application/json")
+			w.Header().Set("X-Cache-Age-Seconds", cacheAgeSecondsHeader(age))
+			w.Write(raw)
+			return
+		}
+	}
+	// #1483: also serve the (minCount=1, minScore=0) shape from cache —
+	// that's what the analytics UI tab fetches so it can client-side
+	// slider over the full edge set. Without this branch the user-
+	// visible analytics tab still hit the cold compute path.
+	if minCount == 1 && minScore == 0 && region == "" && roleFilter == "" {
+		if raw, age, ok := s.loadNeighborGraphCacheBytesUnfiltered(); ok {
+			w.Header().Set("Content-Type", "application/json")
+			w.Header().Set("X-Cache-Age-Seconds", cacheAgeSecondsHeader(age))
+			w.Write(raw)
+			return
+		}
+	}
+
+	resp := s.computeNeighborGraphResponseDispatch(minCount, minScore, region, roleFilter)
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(resp)
+}
+
+// computeNeighborGraphResponseDispatch routes to the test-injected
+// function when set, otherwise to the real pipeline. #1483 follow-up.
+func (s *Server) computeNeighborGraphResponseDispatch(minCount int, minScore float64, region, roleFilter string) NeighborGraphResponse {
+	if s.computeNeighborGraphResponseFn != nil {
+		return s.computeNeighborGraphResponseFn(minCount, minScore, region, roleFilter)
+	}
+	return s.computeNeighborGraphResponse(minCount, minScore, region, roleFilter)
+}
+
+// buildDefaultNeighborGraphResponse builds the default-shape response
+// used by the #1481 P0-1 recomputer. Goes through the dispatch so test
+// hooks can inject failures (#1483 follow-up).
+func (s *Server) buildDefaultNeighborGraphResponse() NeighborGraphResponse {
+	return s.computeNeighborGraphResponseDispatch(5, 0.1, "", "")
+}
+
+// computeNeighborGraphResponse does the full graph build + filter + score
+// pipeline previously inlined in handleNeighborGraph.
+func (s *Server) computeNeighborGraphResponse(minCount int, minScore float64, region, roleFilter string) NeighborGraphResponse {
 	graph := s.getNeighborGraph()
 	allEdges := graph.AllEdges()
 	now := time.Now()
@@ -286,6 +338,10 @@ func (s *Server) handleNeighborGraph(w http.ResponseWriter, r *http.Request) {
 		if s.cfg != nil && (s.cfg.IsBlacklisted(e.NodeA) || s.cfg.IsBlacklisted(e.NodeB)) {
 			continue
 		}
+		// #1181: also drop edges touching a hidden-prefix node.
+		if s.isPubkeyHidden(e.NodeA) || s.isPubkeyHidden(e.NodeB) {
+			continue
+		}

 		ge := GraphEdge{
 			Source:        e.NodeA,
@@ -349,7 +405,7 @@ func (s *Server) handleNeighborGraph(w http.ResponseWriter, r *http.Request) {
 		avgCluster = float64(len(filteredEdges)*2) / float64(len(nodes))
 	}

-	resp := NeighborGraphResponse{
+	return NeighborGraphResponse{
 		Nodes: nodes,
 		Edges: filteredEdges,
 		Stats: GraphStats{
@@ -360,9 +416,6 @@ func (s *Server) handleNeighborGraph(w http.ResponseWriter, r *http.Request) {
 			RejectedEdgesGeoFar: atomic.LoadUint64(&graph.RejectedEdgesGeoFar),
 		},
 	}
-
-	w.Header().Set("Content-Type", "application/json")
-	json.NewEncoder(w).Encode(resp)
 }

 // ─── Helpers ───────────────────────────────────────────────────────────────────
@@ -384,6 +437,9 @@ func (s *Server) buildNodeInfoMap() map[string]nodeInfo {
 	if s.store == nil {
 		return nil
 	}
+	// FirstSeen is folded into getAllNodes (and therefore into the 30s
+	// node cache) so callers like /api/nodes/{pk}/reach get the field
+	// without a per-request SELECT — fixes #1627 r3 regression.
 	nodes, _ := s.store.getCachedNodesAndPM()
 	m := make(map[string]nodeInfo, len(nodes))
 	for _, n := range nodes {
@@ -525,3 +525,123 @@ func TestBuildNodeInfoMap_ObserverEnrichment(t *testing.T) {
 		}
 	}
 }
+
+// TestBuildNodeInfoMap_FirstSeenIsCached asserts the regression introduced by
+// #1627 r3 stays fixed: the per-pubkey first_seen field MUST come from the
+// already-30s-cached getCachedNodesAndPM path, not from a fresh uncached
+// `SELECT … FROM nodes` scan on every call.
+//
+// Method (no DB-driver wrapper needed): mutate the underlying SQLite file's
+// first_seen via a separate rw connection between two consecutive calls to
+// buildNodeInfoMap(). If first_seen is read fresh on every call (the
+// regression), the second call sees the new value. If folded into the
+// existing 30s node cache, both calls return the original value — same as
+// every other nodeInfo field that comes from getAllNodes().
+func TestBuildNodeInfoMap_FirstSeenIsCached(t *testing.T) {
+	tmpDir := t.TempDir()
+	dbPath := tmpDir + "/test.db"
+
+	// Seed via rw connection.
+	rw, err := sql.Open("sqlite", dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer rw.Close()
+	for _, stmt := range []string{
+		"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, first_seen TEXT, advert_count INTEGER)",
+		"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
+		"INSERT INTO nodes VALUES ('AAAA1111', 'Repeater-1', 'repeater', 0, 0, '', '2024-01-01T00:00:00Z', 0)",
+	} {
+		if _, err := rw.Exec(stmt); err != nil {
+			t.Fatalf("seed exec %q: %v", stmt, err)
+		}
+	}
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	store := NewPacketStore(db, nil)
+	store.Load()
+
+	srv := &Server{
+		db:        db,
+		store:     store,
+		perfStats: NewPerfStats(),
+	}
+
+	// Call 1: warm cache and record observed first_seen.
+	m1 := srv.buildNodeInfoMap()
+	first1 := m1["aaaa1111"].FirstSeen
+	if first1 != "2024-01-01T00:00:00Z" {
+		t.Fatalf("setup: expected first_seen=2024-01-01T00:00:00Z, got %q", first1)
+	}
+
+	// Mutate first_seen out-of-band via the rw connection. Any code path
+	// that re-reads first_seen from disk (uncached) will see this new
+	// value; a path that folds first_seen into the 30s node cache will
+	// not, because the cache is well under 30s old.
+	if _, err := rw.Exec("UPDATE nodes SET first_seen='2099-12-31T23:59:59Z' WHERE public_key='AAAA1111'"); err != nil {
+		t.Fatalf("mutate: %v", err)
+	}
+
+	// Call 2: should match call 1 if first_seen is cached.
+	m2 := srv.buildNodeInfoMap()
+	first2 := m2["aaaa1111"].FirstSeen
+	if first2 != first1 {
+		t.Errorf("buildNodeInfoMap re-scanned nodes.first_seen uncached (#1627 r3 regression): "+
+			"call 1 saw %q, call 2 saw %q after out-of-band UPDATE; expected both calls to return "+
+			"the cached value because getCachedNodesAndPM has a 30s TTL",
+			first1, first2)
+	}
+}
+
+// TestGetAllNodes_FirstSeenSchemaFallback exercises the schema-probe rung that
+// fires when nodes.first_seen is missing. The richest SELECT errors out, the
+// loop falls through to the next-richest query, and the resulting nodeInfo
+// values must have empty FirstSeen with no panic. Regression coverage for the
+// existing fallback branch (#1632 review loop 1).
+func TestGetAllNodes_FirstSeenSchemaFallback(t *testing.T) {
+	tmpDir := t.TempDir()
+	dbPath := tmpDir + "/test.db"
+
+	// Seed a nodes table WITHOUT first_seen (advert_count + last_seen present).
+	rw, err := sql.Open("sqlite", dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer rw.Close()
+	for _, stmt := range []string{
+		"CREATE TABLE nodes (public_key TEXT PRIMARY KEY, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, advert_count INTEGER)",
+		"CREATE TABLE observers (id TEXT, name TEXT, iata TEXT)",
+		"INSERT INTO nodes VALUES ('BBBB2222', 'Repeater-2', 'repeater', 0, 0, '2024-02-02T00:00:00Z', 3)",
+	} {
+		if _, err := rw.Exec(stmt); err != nil {
+			t.Fatalf("seed exec %q: %v", stmt, err)
+		}
+	}
+
+	db, err := OpenDB(dbPath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer db.conn.Close()
+
+	store := NewPacketStore(db, nil)
+	nodes := store.getAllNodes()
+	if len(nodes) != 1 {
+		t.Fatalf("expected 1 row from fallback rung, got %d", len(nodes))
+	}
+	n := nodes[0]
+	if n.PublicKey != "BBBB2222" {
+		t.Errorf("PublicKey mismatch: got %q", n.PublicKey)
+	}
+	if n.FirstSeen != "" {
+		t.Errorf("FirstSeen should be empty when nodes.first_seen column is missing, got %q", n.FirstSeen)
+	}
+	if n.ObservationCount != 3 {
+		t.Errorf("ObservationCount should still populate from advert_count fallback, got %d", n.ObservationCount)
+	}
+}
@@ -0,0 +1,155 @@
+package main
+
+import (
+	"bytes"
+	"encoding/json"
+	"log"
+	"runtime/debug"
+	"strconv"
+	"sync/atomic"
+	"time"
+)
+
+// #1481 P0-1: cached default-filter neighbor-graph response.
+//
+// The /api/analytics/neighbor-graph handler does graph build + per-edge
+// score + filter + ~900KB JSON marshal on every request. The default
+// (no-region, no-role, minCount=5, minScore=0.1) shape covers the
+// overwhelming majority of organic traffic; cache the fully-built AND
+// pre-marshaled response so warm reads are a single Write. Recomputed
+// every 5 minutes in the background — never on the hot path.
+
+const neighborGraphCacheInterval = 5 * time.Minute
+
+// neighborGraphCacheEntry holds both the response struct (kept for
+// tests / structured access) and the pre-marshaled bytes that the
+// handler writes verbatim.
+type neighborGraphCacheEntry struct {
+	resp NeighborGraphResponse
+	json []byte
+	at   time.Time
+}
+
+type neighborGraphCacheField struct {
+	ptr atomic.Pointer[neighborGraphCacheEntry]
+	// unfiltered = the (minCount=1, minScore=0, no region/role) shape
+	// the analytics tab actually hits. Cached separately so the UI
+	// tab also benefits from the warm path; client-side sliders then
+	// filter from full data. #1483 follow-up to perf claim.
+	unfilteredPtr atomic.Pointer[neighborGraphCacheEntry]
+}
+
+// startNeighborGraphRecomputer launches a background goroutine that
+// rebuilds the default-shape response every interval. Returns when
+// the stop channel is closed.
+func (s *Server) startNeighborGraphRecomputer(interval time.Duration, stop <-chan struct{}) {
+	if interval <= 0 {
+		interval = neighborGraphCacheInterval
+	}
+	go func() {
+		s.recomputeNeighborGraphCache()
+		t := time.NewTicker(interval)
+		defer t.Stop()
+		for {
+			select {
+			case <-t.C:
+				s.recomputeNeighborGraphCache()
+			case <-stop:
+				return
+			}
+		}
+	}()
+}
+
+// recomputeNeighborGraphCache builds and pre-marshals the default-shape
+// response and atomically swaps it in. Panic-defensive so a single bad
+// rebuild doesn't kill the background goroutine — but logs the panic
+// and increments a counter so operators see the failure (#1483 follow-up).
+func (s *Server) recomputeNeighborGraphCache() {
+	defer func() {
+		if r := recover(); r != nil {
+			log.Printf("[neighbor-graph-cache] rebuild panic: %v\n%s", r, debug.Stack())
+			atomic.AddUint64(&s.neighborGraphCacheRebuildFailures, 1)
+		}
+	}()
+	start := time.Now()
+	resp := s.buildDefaultNeighborGraphResponse()
+	var buf bytes.Buffer
+	if err := json.NewEncoder(&buf).Encode(resp); err != nil {
+		log.Printf("[neighbor-graph-cache] marshal error: %v", err)
+		atomic.AddUint64(&s.neighborGraphCacheRebuildFailures, 1)
+		return
+	}
+	s.neighborGraphCache.ptr.Store(&neighborGraphCacheEntry{
+		resp: resp,
+		json: buf.Bytes(),
+		at:   time.Now(),
+	})
+	log.Printf("[neighbor-graph-cache] rebuild ok in %v, nodes=%d", time.Since(start), len(resp.Nodes))
+
+	// Build + cache the analytics-tab shape (minCount=1, minScore=0).
+	// This is what the UI actually fetches so it can slider client-side.
+	// Cached separately so its TTL stays aligned with the default cache.
+	uStart := time.Now()
+	uResp := s.computeNeighborGraphResponseDispatch(1, 0, "", "")
+	var uBuf bytes.Buffer
+	if err := json.NewEncoder(&uBuf).Encode(uResp); err != nil {
+		log.Printf("[neighbor-graph-cache] unfiltered marshal error: %v", err)
+		atomic.AddUint64(&s.neighborGraphCacheRebuildFailures, 1)
+		return
+	}
+	s.neighborGraphCache.unfilteredPtr.Store(&neighborGraphCacheEntry{
+		resp: uResp,
+		json: uBuf.Bytes(),
+		at:   time.Now(),
+	})
+	log.Printf("[neighbor-graph-cache] unfiltered rebuild ok in %v, nodes=%d", time.Since(uStart), len(uResp.Nodes))
+}
+
+// loadNeighborGraphCache returns the cached default response if present.
+func (s *Server) loadNeighborGraphCache() (NeighborGraphResponse, bool) {
+	e := s.neighborGraphCache.ptr.Load()
+	if e == nil {
+		return NeighborGraphResponse{}, false
+	}
+	return e.resp, true
+}
+
+// loadNeighborGraphCacheBytes returns the pre-marshaled JSON for the
+// cached default response if present, along with the age of the
+// snapshot (zero when no entry is present).
+func (s *Server) loadNeighborGraphCacheBytes() ([]byte, time.Duration, bool) {
+	e := s.neighborGraphCache.ptr.Load()
+	if e == nil || len(e.json) == 0 {
+		return nil, 0, false
+	}
+	age := time.Duration(0)
+	if !e.at.IsZero() {
+		age = time.Since(e.at)
+	}
+	return e.json, age, true
+}
+
+// loadNeighborGraphCacheBytesUnfiltered returns the pre-marshaled JSON
+// for the (minCount=1, minScore=0) cache shape used by the analytics
+// tab. #1483 follow-up.
+func (s *Server) loadNeighborGraphCacheBytesUnfiltered() ([]byte, time.Duration, bool) {
+	e := s.neighborGraphCache.unfilteredPtr.Load()
+	if e == nil || len(e.json) == 0 {
+		return nil, 0, false
+	}
+	age := time.Duration(0)
+	if !e.at.IsZero() {
+		age = time.Since(e.at)
+	}
+	return e.json, age, true
+}
+
+// cacheAgeSecondsHeader formats a time.Duration as integer seconds for
+// the X-Cache-Age-Seconds response header.
+func cacheAgeSecondsHeader(d time.Duration) string {
+	if d < 0 {
+		d = 0
+	}
+	return strconv.FormatInt(int64(d/time.Second), 10)
+}
@@ -0,0 +1,48 @@
+package main
+
+import (
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// #1483 follow-up: assert the recompute interval is actually honored.
+// Without this, changing 5min → 5hr in code would silently still tick
+// every 5min and no test would catch it.
+func TestNeighborGraphCacheRecomputerHonorsInterval(t *testing.T) {
+	s := &Server{
+		computeNeighborGraphResponseFn: func(minCount int, minScore float64, region, role string) NeighborGraphResponse {
+			return NeighborGraphResponse{}
+		},
+	}
+	// Count successful rebuilds via the at-timestamp swaps.
+	var rebuilds atomic.Int32
+	stop := make(chan struct{})
+	// Wrap the recompute call by patching: easiest is to count from
+	// the swapped entry pointer. Use a small interval and watch for
+	// at least 3 ticks within a bounded wall-clock budget.
+	go func() {
+		var lastAt time.Time
+		for {
+			select {
+			case <-stop:
+				return
+			default:
+				if e := s.neighborGraphCache.ptr.Load(); e != nil && !e.at.Equal(lastAt) {
+					rebuilds.Add(1)
+					lastAt = e.at
+				}
+				time.Sleep(2 * time.Millisecond)
+			}
+		}
+	}()
+	// 10ms interval, run for ~120ms → expect ~12 rebuilds. Assert ≥ 3
+	// to keep the test robust against scheduling jitter.
+	s.startNeighborGraphRecomputer(10*time.Millisecond, stop)
+	time.Sleep(120 * time.Millisecond)
+	close(stop)
+	got := rebuilds.Load()
+	if got < 3 {
+		t.Fatalf("expected ≥3 rebuilds with 10ms interval over 120ms, got %d", got)
+	}
+}
@@ -0,0 +1,23 @@
+package main
+
+import (
+	"sync/atomic"
+	"testing"
+)
+
+// #1483 follow-up: a panic inside recomputeNeighborGraphCache must NOT
+// kill the goroutine but MUST increment the rebuild-failure counter so
+// operators see the failure on /api/stats.
+func TestNeighborGraphCacheRebuildPanicIncrementsCounter(t *testing.T) {
+	s := &Server{
+		computeNeighborGraphResponseFn: func(minCount int, minScore float64, region, role string) NeighborGraphResponse {
+			panic("intentional test panic")
+		},
+	}
+	before := atomic.LoadUint64(&s.neighborGraphCacheRebuildFailures)
+	s.recomputeNeighborGraphCache()
+	after := atomic.LoadUint64(&s.neighborGraphCacheRebuildFailures)
+	if after != before+1 {
+		t.Fatalf("expected rebuild-failure counter to increment by 1, before=%d after=%d", before, after)
+	}
+}
@@ -0,0 +1,127 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http/httptest"
+	"strings"
+	"sync/atomic"
+	"testing"
+)
+
+// #1481 P0-1: handler must serve from pre-marshaled cache when set.
+func TestNeighborGraphCacheServesFromAtomicPointer(t *testing.T) {
+	s := &Server{}
+	resp := NeighborGraphResponse{
+		Nodes: []GraphNode{{Pubkey: "deadbeef", Name: "cached-node"}},
+		Edges: []GraphEdge{},
+		Stats: GraphStats{TotalNodes: 1},
+	}
+	raw, _ := json.Marshal(resp)
+	s.neighborGraphCache.ptr.Store(&neighborGraphCacheEntry{resp: resp, json: raw})
+
+	req := httptest.NewRequest("GET", "/api/analytics/neighbor-graph", nil)
+	w := httptest.NewRecorder()
+	s.handleNeighborGraph(w, req)
+
+	if w.Code != 200 {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), "cached-node") {
+		t.Fatalf("expected cached node in response, got: %s", w.Body.String())
+	}
+}
+
+// #1481 P0-1: positive cache hit — default params with sentinel cache MUST
+// return the sentinel verbatim (proves cache is wired and consulted).
+func TestNeighborGraphCacheServesSentinelOnDefaultParams(t *testing.T) {
+	s := &Server{}
+	resp := NeighborGraphResponse{
+		Nodes: []GraphNode{{Pubkey: "deadbeef", Name: "CACHED-SENTINEL"}},
+	}
+	raw, _ := json.Marshal(resp)
+	s.neighborGraphCache.ptr.Store(&neighborGraphCacheEntry{resp: resp, json: raw})
+
+	req := httptest.NewRequest("GET", "/api/analytics/neighbor-graph", nil)
+	w := httptest.NewRecorder()
+	s.handleNeighborGraph(w, req)
+
+	if w.Code != 200 {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), "CACHED-SENTINEL") {
+		t.Fatalf("expected CACHED-SENTINEL in default-params body, got: %s", w.Body.String())
+	}
+}
+
+// #1483 follow-up: the analytics UI fetches with min_count=1&min_score=0;
+// that shape must ALSO be cache-served (from a separate atomic-pointer).
+func TestNeighborGraphCacheServesUnfilteredShape(t *testing.T) {
+	s := &Server{}
+	resp := NeighborGraphResponse{
+		Nodes: []GraphNode{{Pubkey: "abcd", Name: "UNFILTERED-SENTINEL"}},
+	}
+	raw, _ := json.Marshal(resp)
+	s.neighborGraphCache.unfilteredPtr.Store(&neighborGraphCacheEntry{resp: resp, json: raw})
+
+	req := httptest.NewRequest("GET", "/api/analytics/neighbor-graph?min_count=1&min_score=0", nil)
+	w := httptest.NewRecorder()
+	s.handleNeighborGraph(w, req)
+
+	if w.Code != 200 {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	if !strings.Contains(w.Body.String(), "UNFILTERED-SENTINEL") {
+		t.Fatalf("expected UNFILTERED-SENTINEL in analytics-shape body, got: %s", w.Body.String())
+	}
+	if h := w.Header().Get("X-Cache-Age-Seconds"); h == "" {
+		t.Error("expected X-Cache-Age-Seconds header on cached response")
+	}
+}
+
+// #1481 P0-1: non-default query (e.g. ?region=X) must bypass the cache
+// and call the compute path (verified by injected counter). The bypass
+// branch must NOT serve the sentinel — body must NOT contain it.
+func TestNeighborGraphCacheBypassOnRegionFilter(t *testing.T) {
+	var computeCalls atomic.Int32
+	bypassResp := NeighborGraphResponse{
+		Nodes: []GraphNode{{Pubkey: "abcd", Name: "BYPASS-COMPUTED"}},
+		Stats: GraphStats{TotalNodes: 1},
+	}
+	s := &Server{
+		computeNeighborGraphResponseFn: func(minCount int, minScore float64, region, role string) NeighborGraphResponse {
+			computeCalls.Add(1)
+			return bypassResp
+		},
+	}
+	sentinel := NeighborGraphResponse{
+		Nodes: []GraphNode{{Pubkey: "deadbeef", Name: "CACHED-SENTINEL"}},
+	}
+	rawSent, _ := json.Marshal(sentinel)
+	s.neighborGraphCache.ptr.Store(&neighborGraphCacheEntry{resp: sentinel, json: rawSent})
+
+	req := httptest.NewRequest("GET", "/api/analytics/neighbor-graph?region=USA", nil)
+	w := httptest.NewRecorder()
+	s.handleNeighborGraph(w, req)
+
+	if w.Code != 200 {
+		t.Fatalf("status=%d body=%s", w.Code, w.Body.String())
+	}
+	body := w.Body.String()
+	if strings.Contains(body, "CACHED-SENTINEL") {
+		t.Fatalf("region=USA must bypass cache, but CACHED-SENTINEL was served: %s", body)
+	}
+	if !strings.Contains(body, "BYPASS-COMPUTED") {
+		t.Fatalf("expected BYPASS-COMPUTED from compute fn, got: %s", body)
+	}
+	if got := computeCalls.Load(); got != 1 {
+		t.Fatalf("expected compute fn called exactly once, got %d", got)
+	}
+	// Body must parse as non-empty JSON object with a nodes array.
+	var parsed NeighborGraphResponse
+	if err := json.Unmarshal(w.Body.Bytes(), &parsed); err != nil {
+		t.Fatalf("body is not valid JSON: %v body=%s", err, body)
+	}
+	if len(parsed.Nodes) == 0 {
+		t.Fatalf("expected non-empty Nodes in response, got: %s", body)
+	}
+}
@@ -131,6 +131,63 @@ func resolvePathForObs(pathJSON, observerID string, tx *StoreTx, pm *prefixMap,
 	return resolved
 }

+// resolvePathForObsColdLoad is the cold-load (Load / loadChunk / scanAndMergeChunk)
+// variant of resolvePathForObs that gates hop resolution on `unique_prefix`
+// only. Live ingest uses the affinity/observation-count tiebreak via
+// resolvePathForObs because it has roughly-current state. Cold load runs
+// against observations up to retentionHours (168h) old, where today's
+// affinity winner ≠ historical affinity winner for that prefix — silently
+// mis-attributing the relay (PR #1643 R1 munger #1, "time-travel attribution
+// gate").
+//
+// Behavior: hops whose prefix maps to exactly one repeater resolve as
+// usual; hops whose prefix maps to multiple candidates return nil and
+// increment skipped (caller-owned counter for observability — a single
+// summary log line at the end of Load surfaces the total).
+//
+// Under-attribute > mis-attribute (reviewer consensus on PR #1643).
+func resolvePathForObsColdLoad(pathJSON, observerID string, tx *StoreTx, pm *prefixMap, skipped *int) []*string {
+	hops := parsePathJSON(pathJSON)
+	if len(hops) == 0 {
+		return nil
+	}
+	resolved := make([]*string, len(hops))
+	for i, hop := range hops {
+		// unique_prefix iff the prefix maps to exactly one candidate
+		// after the observer-known nonRelay filter. Mirrors the
+		// `len(candidates) == 1 → "unique_prefix"` arm of
+		// resolveWithContext (store.go ~6380). Calling resolveWithContext
+		// with a nil graph and empty context skips the affinity/
+		// observation-count tiers entirely — but tier-4
+		// observation_count_fallback would still pick a winner for
+		// ambiguous prefixes, which is exactly what we must NOT do.
+		// Hence the explicit candidate-count check here.
+		h := strings.ToLower(hop)
+		candidates := pm.m[h]
+		if len(pm.nonRelay) > 0 && len(candidates) > 0 {
+			filtered := candidates[:0:0]
+			for j := range candidates {
+				if _, isListener := pm.nonRelay[strings.ToLower(candidates[j].PublicKey)]; isListener {
+					continue
+				}
+				filtered = append(filtered, candidates[j])
+			}
+			candidates = filtered
+		}
+		if len(candidates) == 1 {
+			pk := strings.ToLower(candidates[0].PublicKey)
+			resolved[i] = &pk
+			continue
+		}
+		// Ambiguous (len > 1) or no_match (len == 0). Under-attribute.
+		if len(candidates) > 1 && skipped != nil {
+			*skipped++
+		}
+		// resolved[i] stays nil; extractResolvedPubkeys filters it out.
+	}
+	return resolved
+}
+
 // marshalResolvedPath converts []*string to JSON for in-memory caching.
 func marshalResolvedPath(rp []*string) string {
 	if len(rp) == 0 {
@@ -0,0 +1,93 @@
+package main
+
+import (
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+// Issue #1290 (MAJOR-1, adversarial review of PR #1624) — regression guard.
+// GetNonRelayObserverPubkeys() returns LOWER(id); the disambiguator
+// (pm.nonRelay) also uses lowercase. GetNodeHealth previously used
+// UPPERCASE for both insert and lookup which happens to work by symmetry,
+// but any refactor that changes how pkt.ObserverID is normalized would
+// silently break the badge. This test pins lowercase as the convention by
+// seeding an observer.id with mixed-case packet ObserverID and asserting
+// the listener badge is rendered for the matching observer in HeardBy.
+func TestNodeHealth_CanRelayCaseInsensitive_Issue1290(t *testing.T) {
+	srv, router := setupTestServer(t)
+
+	// DB row: observer id is the canonical LOWERCASE pubkey with can_relay=0.
+	const obsIDLower = "deadbeefcafe1290"
+	const obsIDMixed = "DeadBeefCafe1290" // packet observer-id w/ mixed case
+	const nodePubkey = "aabbccdd11223344" // seeded by seedTestData
+	now := time.Now().UTC().Format(time.RFC3339)
+	// The test fixture's observers table predates the can_relay migration;
+	// add both columns (matches dbschema migrations).
+	for _, ddl := range []string{
+		`ALTER TABLE observers ADD COLUMN can_relay INTEGER DEFAULT 1`,
+		`ALTER TABLE observers ADD COLUMN can_relay_seen INTEGER DEFAULT 0`,
+	} {
+		if _, err := srv.store.db.conn.Exec(ddl); err != nil {
+			t.Fatalf("alter: %v", err)
+		}
+	}
+	if _, err := srv.store.db.conn.Exec(
+		`INSERT INTO observers (id, name, iata, last_seen, first_seen, packet_count, can_relay, can_relay_seen)
+		 VALUES (?, 'ListenerOnly', 'SJC', ?, '2026-01-01T00:00:00Z', 1, 0, 1)`,
+		obsIDLower, now); err != nil {
+		t.Fatalf("seed observer: %v", err)
+	}
+
+	// In-memory packet with the MIXED-case observer id so the badge resolver
+	// must lower-case both sides to match against the lower-cased pubkey set.
+	snr := 7.0
+	srv.store.mu.Lock()
+	if srv.store.byNode == nil {
+		srv.store.byNode = make(map[string][]*StoreTx)
+	}
+	srv.store.byNode[nodePubkey] = append(srv.store.byNode[nodePubkey], &StoreTx{
+		Hash:             "1290casebadge00",
+		FirstSeen:        now,
+		SNR:              &snr,
+		ObservationCount: 1,
+		ObserverID:       obsIDMixed,
+		ObserverName:     "ListenerOnly",
+	})
+	srv.store.mu.Unlock()
+
+	req := httptest.NewRequest(http.MethodGet, "/api/nodes/"+nodePubkey+"/health", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("expected 200, got %d (body: %s)", w.Code, w.Body.String())
+	}
+
+	var body map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("json: %v", err)
+	}
+	obs, ok := body["observers"].([]interface{})
+	if !ok {
+		t.Fatalf("expected observers array, got %T", body["observers"])
+	}
+	var found bool
+	for _, raw := range obs {
+		row, ok := raw.(map[string]interface{})
+		if !ok {
+			continue
+		}
+		if row["observer_id"] != obsIDMixed {
+			continue
+		}
+		found = true
+		if row["can_relay"] != false {
+			t.Errorf("listener observer with can_relay=0 + mixed-case ObserverID: expected can_relay=false, got %v", row["can_relay"])
+		}
+	}
+	if !found {
+		t.Fatalf("did not find observer %q in HeardBy rows; got %v", obsIDMixed, obs)
+	}
+}
@@ -0,0 +1,738 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"log"
+	"net/http"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+
+	"github.com/gorilla/mux"
+	"golang.org/x/sync/singleflight"
+)
+
+// reachScanRowLimit hard-caps the windowed observation scan so a hot relay node
+// with weeks of traffic can't pull an unbounded result set into memory. A node
+// with >200k matching observations in the window is far past dashboard scale;
+// beyond the cap the counts are a (still representative) truncation. The LIKE
+// filter is unavoidably a text scan of path_json over the timestamp-narrowed
+// window — an indexed path-token column would need an ingestor-side schema
+// migration (the server is read-only by invariant), so it's a follow-up.
+// var (not const) so tests can lower the cap to exercise the truncation path
+// without inserting 200k rows.
+var reachScanRowLimit = 200000
+
+// pathRow is one observation fed to attributeDirections. path tokens are
+// uppercase hex hop prefixes (as stored in observations.path_json). SNR is a
+// value + validity flag (not *float64) to avoid a heap escape per row.
+type pathRow struct {
+	observerPK  string // lowercase pubkey of the observer (may be "")
+	fromPubkey  string // lowercase originator pubkey (may be "")
+	payloadType int
+	path        []string
+	snr         float64
+	snrValid    bool
+}
+
+type obsAgg struct {
+	count  int
+	snrSum float64
+	snrN   int
+}
+
+type dirCounts struct {
+	we    map[string]int
+	they  map[string]int
+	obs   map[string]obsAgg // value map — no per-observer heap alloc
+	relay int
+}
+
+// attributeDirections walks each path and attributes directional evidence for
+// the target node (identified by any token in ourTokens). resolve maps a hop
+// token → a unique relay pubkey ("" when ambiguous/unknown → skipped). ourPK is
+// the target's own pubkey (lowercase) so self-edges are ignored.
+func attributeDirections(rows []pathRow, ourTokens map[string]bool, ourPK string, resolve func(string) string) dirCounts {
+	// Size hint: a small constant covers typical neighbour fan-out (dozens)
+	// without over-allocating ~12.5k buckets on a 100k-row scan. Independent
+	// r2 #4: the old `len(rows)/8+1` was ~250× too large for relays with
+	// modest fan-out.
+	const hint = 64
+	d := dirCounts{
+		we:   make(map[string]int, hint),
+		they: make(map[string]int, hint),
+		obs:  make(map[string]obsAgg, hint),
+	}
+	for _, r := range rows {
+		n := len(r.path)
+		if n == 0 {
+			continue
+		}
+		hit := false
+		for i, tok := range r.path {
+			if !ourTokens[tok] {
+				continue
+			}
+			hit = true
+			// predecessor → we heard it
+			if i > 0 {
+				if pk := resolve(r.path[i-1]); pk != "" && pk != ourPK {
+					d.we[pk]++
+				}
+			} else if r.payloadType == PayloadADVERT && r.fromPubkey != "" && r.fromPubkey != ourPK {
+				d.we[r.fromPubkey]++
+			}
+			// successor → it heard us; or if we're the last hop, the observer did
+			if i < n-1 {
+				if pk := resolve(r.path[i+1]); pk != "" && pk != ourPK {
+					d.they[pk]++
+				}
+			} else if r.observerPK != "" && r.observerPK != ourPK {
+				d.they[r.observerPK]++
+				a := d.obs[r.observerPK] // value copy; read-modify-write
+				a.count++
+				if r.snrValid {
+					a.snrSum += r.snr
+					a.snrN++
+				}
+				d.obs[r.observerPK] = a
+			}
+		}
+		if hit {
+			d.relay++
+		}
+	}
+	return d
+}
+
+// reliableTokens returns the uppercase hex prefixes (1, 2, 3 byte) of pubkey
+// that are UNIQUE among relay-capable nodes in pm AND resolve to pubkey itself.
+// 1-byte prefixes almost always collide and are excluded. The self-check matters
+// for non-relay targets (companion/sensor): pm only holds path-capable roles, so
+// a companion's prefix could otherwise be "unique" while pointing at an unrelated
+// relay — which would then credit that relay's traffic to the companion.
+func reliableTokens(pubkey string, pm *prefixMap) map[string]bool {
+	out := map[string]bool{}
+	lpk := strings.ToLower(pubkey)
+	for _, l := range []int{2, 4, 6} { // hex chars = 1,2,3 bytes
+		if len(lpk) < l {
+			continue
+		}
+		p := lpk[:l]
+		if pm != nil && len(pm.m[p]) == 1 && strings.EqualFold(pm.m[p][0].PublicKey, pubkey) {
+			out[strings.ToUpper(p)] = true
+		}
+	}
+	return out
+}
+
+// uniqueResolve returns the single relay pubkey (lowercase) for a hop token, or
+// "" when the token resolves to zero or multiple candidates (conservative).
+// Callers should memoize across a request (see newResolver) so the per-hop
+// ToLower + map lookup runs once per distinct token, not once per row.
+func uniqueResolve(pm *prefixMap, token string) string {
+	if pm == nil {
+		return ""
+	}
+	cands := pm.m[strings.ToLower(token)]
+	if len(cands) == 1 {
+		return strings.ToLower(cands[0].PublicKey)
+	}
+	return ""
+}
+
+// parsePathTokens extracts the quoted hex hop tokens from a path_json array
+// (e.g. `["AA","01FA","BB"]`) in a single pass, uppercased. Avoids the
+// json.Unmarshal reflection + per-row interface allocations on the hot scan
+// path. Tokens slice into pj (no copy) except where ToUpper must rewrite a
+// lowercase hop; path_json holds only hex strings, so there are no escapes to
+// worry about. Returns nil for an empty/degenerate array.
+func parsePathTokens(pj string) []string {
+	out := make([]string, 0, 8) // paths are short (a handful of hops)
+	i := 0
+	for {
+		q1 := strings.IndexByte(pj[i:], '"')
+		if q1 < 0 {
+			break
+		}
+		q1 += i
+		rel := strings.IndexByte(pj[q1+1:], '"')
+		if rel < 0 {
+			break
+		}
+		q2 := q1 + 1 + rel
+		out = append(out, strings.ToUpper(pj[q1+1:q2]))
+		i = q2 + 1
+	}
+	return out
+}
+
+// newResolver returns a memoized hop-token → pubkey resolver. Paths reuse the
+// same hop tokens across thousands of rows, so caching collapses the repeated
+// ToLower + prefix-map lookups to once per distinct token.
+func newResolver(pm *prefixMap) func(string) string {
+	cache := make(map[string]string)
+	return func(tok string) string {
+		if pk, ok := cache[tok]; ok {
+			return pk
+		}
+		pk := uniqueResolve(pm, tok)
+		cache[tok] = pk
+		return pk
+	}
+}
+
+type NodeReachInfo struct {
+	Pubkey    string   `json:"pubkey"`
+	Name      string   `json:"name"`
+	Role      string   `json:"role"`
+	Lat       *float64 `json:"lat"`
+	Lon       *float64 `json:"lon"`
+	FirstSeen string   `json:"first_seen"`
+}
+type NodeReachWindow struct {
+	Days  int    `json:"days"`
+	Since string `json:"since"`
+}
+type NodeReachImportance struct {
+	NeighborDegree     int `json:"neighbor_degree"`
+	DegreeRank         int `json:"degree_rank"`
+	NodesWithEdges     int `json:"nodes_with_edges"`
+	RelayObservations  int `json:"relay_observations"`
+	BidirectionalLinks int `json:"bidirectional_links"`
+	DirectObservers    int `json:"direct_observers"`
+}
+type NodeReachObserver struct {
+	Pubkey     string   `json:"pubkey"`
+	Name       string   `json:"name"`
+	Count      int      `json:"count"`
+	AvgSNR     *float64 `json:"avg_snr"`
+	Lat        *float64 `json:"lat"`
+	Lon        *float64 `json:"lon"`
+	DistanceKm *float64 `json:"distance_km"`
+}
+type NodeReachLink struct {
+	Pubkey     string   `json:"pubkey"`
+	Name       string   `json:"name"`
+	Role       string   `json:"role"`
+	Lat        *float64 `json:"lat"`
+	Lon        *float64 `json:"lon"`
+	WeHear     int      `json:"we_hear"`
+	TheyHear   int      `json:"they_hear"`
+	Bottleneck int      `json:"bottleneck"`
+	Bidir      bool     `json:"bidir"`
+	DistanceKm *float64 `json:"distance_km"`
+}
+type NodeReachResponse struct {
+	Node            NodeReachInfo       `json:"node"`
+	Window          NodeReachWindow     `json:"window"`
+	ReliableTokens  []string            `json:"reliable_tokens"`
+	Importance      NodeReachImportance `json:"importance"`
+	DirectObservers []NodeReachObserver `json:"direct_observers"`
+	Links           []NodeReachLink     `json:"links"`
+}
+
+func fptr(v float64) *float64 { return &v }
+
+// gpsPtrs returns (lat,lon) pointers, nil when the node has no GPS.
+func gpsPtrs(info nodeInfo) (*float64, *float64) {
+	if !info.HasGPS {
+		return nil, nil
+	}
+	return fptr(info.Lat), fptr(info.Lon)
+}
+
+// clampDays bounds the lookback window to [1,30]; default callers pass 7.
+func clampDays(d int) int {
+	if d < 1 {
+		return 1
+	}
+	if d > 30 {
+		return 30
+	}
+	return d
+}
+
+// --- bounded TTL cache. perf is gated by the time window; this just avoids
+// recompute under dashboard polling. Keyed "pubkey|days". ---
+//
+// reachCacheMax bounds entry count; at ~2KB of marshalled JSON per entry the
+// worst case is well under 1MB, so an entry cap (rather than a byte budget)
+// keeps the bookkeeping trivial while staying memory-safe.
+const (
+	reachCacheTTL = 5 * time.Minute
+	reachCacheMax = 256
+)
+
+type reachCacheEntry struct {
+	at  time.Time
+	raw []byte
+}
+
+// reachState bundles per-server reach caches. Was a set of package-level
+// globals — moved onto *Server so two Server instances (tests, future
+// per-listener) don't share observable state (Independent r2 #2).
+type reachState struct {
+	cacheMu sync.RWMutex
+	cache   map[string]reachCacheEntry
+	// sf dedups concurrent cold-cache requests for the same key so N
+	// simultaneous callers run the scan + attribution once, not N times.
+	sf singleflight.Group
+
+	// lastSeenBlacklistGen is the BlacklistGeneration() value that the cache
+	// was last reconciled with. When the live generation moves past this
+	// value, the cache is purged wholesale on the next request to prevent
+	// prior-gen entries from accumulating until their TTL expires (#1629
+	// round-2, adversarial #5).
+	lastSeenBlacklistGen atomic.Uint64
+
+	degreeMu   sync.Mutex
+	degreeSnap *degreeSnapshot
+}
+
+// reachCacheGet returns the cached marshalled JSON for key. The returned slice
+// is shared (not copied): it is treated as immutable — only ever handed to
+// w.Write — so callers MUST NOT mutate it.
+func (s *Server) reachCacheGet(key string) ([]byte, bool) {
+	s.reach.cacheMu.RLock()
+	defer s.reach.cacheMu.RUnlock()
+	e, ok := s.reach.cache[key]
+	if !ok || time.Since(e.at) > reachCacheTTL {
+		return nil, false
+	}
+	return e.raw, true
+}
+
+// reachCacheLen returns the current entry count in the reach response cache.
+// Test helper — exposes the size without leaking the internal mutex/map.
+func (s *Server) reachCacheLen() int {
+	s.reach.cacheMu.RLock()
+	defer s.reach.cacheMu.RUnlock()
+	return len(s.reach.cache)
+}
+
+// reachPurgeIfBlacklistGenChanged drops every cached entry when the live
+// blacklist generation has advanced past the cache's last-seen value. CAS
+// gates the purge so concurrent callers only do the work once per gen bump
+// (#1629 round-2, adversarial #5).
+func (s *Server) reachPurgeIfBlacklistGenChanged(gen uint64) {
+	seen := s.reach.lastSeenBlacklistGen.Load()
+	if gen == seen {
+		return
+	}
+	// CAS gates the actual purge to a single winner on a given gen bump.
+	if !s.reach.lastSeenBlacklistGen.CompareAndSwap(seen, gen) {
+		// Another goroutine already advanced (and purged). Done.
+		return
+	}
+	s.reach.cacheMu.Lock()
+	s.reach.cache = nil
+	s.reach.cacheMu.Unlock()
+}
+
+// isHexPubkey reports whether s is a full 64-char lowercase-hex public key.
+// The handler lowercases input first, so we only accept [0-9a-f].
+func isHexPubkey(s string) bool {
+	if len(s) != 64 {
+		return false
+	}
+	for i := 0; i < len(s); i++ {
+		c := s[i]
+		if !(c >= '0' && c <= '9' || c >= 'a' && c <= 'f') {
+			return false
+		}
+	}
+	return true
+}
+
+func (s *Server) reachCachePut(key string, raw []byte) {
+	s.reach.cacheMu.Lock()
+	defer s.reach.cacheMu.Unlock()
+	if s.reach.cache == nil {
+		s.reach.cache = map[string]reachCacheEntry{}
+	}
+	if _, exists := s.reach.cache[key]; !exists && len(s.reach.cache) >= reachCacheMax {
+		s.evictReachLocked()
+	}
+	s.reach.cache[key] = reachCacheEntry{at: time.Now(), raw: raw}
+}
+
+// evictReachLocked drops expired entries first; if still at the cap it evicts
+// the single oldest entry. Avoids the full-map wipe that thrashed every cached
+// key once the cap was reached. Caller holds s.reach.cacheMu (write).
+func (s *Server) evictReachLocked() {
+	now := time.Now()
+	for k, e := range s.reach.cache {
+		if now.Sub(e.at) > reachCacheTTL {
+			delete(s.reach.cache, k)
+		}
+	}
+	if len(s.reach.cache) < reachCacheMax {
+		return
+	}
+	var oldestKey string
+	var oldestAt time.Time
+	first := true
+	for k, e := range s.reach.cache {
+		if first || e.at.Before(oldestAt) {
+			oldestKey, oldestAt, first = k, e.at, false
+		}
+	}
+	if !first {
+		delete(s.reach.cache, oldestKey)
+	}
+}
+
+func (s *Server) handleNodeReach(w http.ResponseWriter, r *http.Request) {
+	pubkey := strings.ToLower(mux.Vars(r)["pubkey"])
+	// Reject malformed pubkeys up front (cheap defense against cache-key
+	// pollution + wasted work on bogus IDs).
+	if !isHexPubkey(pubkey) {
+		writeError(w, 400, "invalid pubkey: expected 64 hex chars")
+		return
+	}
+	if s.cfg != nil && s.cfg.IsBlacklisted(pubkey) {
+		writeError(w, 404, "Not found")
+		return
+	}
+	if s.isPubkeyHidden(pubkey) {
+		writeError(w, 404, "Not found")
+		return
+	}
+	days := 7
+	if v := r.URL.Query().Get("days"); v != "" {
+		if n, err := strconv.Atoi(v); err == nil {
+			days = n
+		}
+	}
+	days = clampDays(days)
+
+	// cacheKey includes the blacklist generation so any mutation via
+	// SetNodeBlacklist invalidates all prior reach cache entries on the
+	// next request (#1629). Without the generation suffix a node added
+	// to the blacklist post-warm would keep being served the cached
+	// non-blacklisted response until the TTL expires.
+	var gen uint64
+	if s.cfg != nil {
+		gen = s.cfg.BlacklistGeneration()
+	}
+	// Purge prior-gen entries wholesale when the generation advances so a
+	// steady stream of operator blacklist edits cannot leak cache entries
+	// up to the TTL. Cheap: one map reset under the cache mutex, only when
+	// the gen actually moved (#1629 round-2, adversarial #5).
+	s.reachPurgeIfBlacklistGenChanged(gen)
+	cacheKey := pubkey + "|" + strconv.Itoa(days) + "|g" + strconv.FormatUint(gen, 10)
+	if raw, ok := s.reachCacheGet(cacheKey); ok {
+		w.Header().Set("Content-Type", "application/json")
+		w.Write(raw)
+		return
+	}
+
+	// singleflight: collapse a thundering herd on a cold key to one scan. The
+	// shared computation uses the triggering request's context; a disconnect
+	// there can cancel the in-flight scan for all waiters (acceptable — the
+	// next request recomputes).
+	v, err, _ := s.reach.sf.Do(cacheKey, func() (interface{}, error) {
+		if raw, ok := s.reachCacheGet(cacheKey); ok {
+			return raw, nil
+		}
+		resp, ok, cErr := s.computeNodeReach(r.Context(), pubkey, days)
+		if cErr != nil {
+			// Real backend failure (e.g. DB scan exploded) — propagate so the
+			// caller renders 500 instead of the misleading empty-reach
+			// response. Do NOT cache. (#1631)
+			return nil, cErr
+		}
+		if !ok {
+			return []byte(nil), nil
+		}
+		raw, mErr := json.Marshal(resp)
+		if mErr != nil {
+			log.Printf("[reach] marshal failed for %s: %v", cacheKey, mErr)
+			return nil, mErr
+		}
+		s.reachCachePut(cacheKey, raw)
+		return raw, nil
+	})
+	if err != nil {
+		writeError(w, 500, "reach computation failed")
+		return
+	}
+	raw, _ := v.([]byte)
+	if len(raw) == 0 {
+		writeError(w, 404, "Not found")
+		return
+	}
+	w.Header().Set("Content-Type", "application/json")
+	w.Write(raw)
+}
+
+// computeNodeReach does the read-only scan + assembly. ok=false → 404
+// (target node not present / inputs unavailable). A non-nil error signals a
+// real backend failure (e.g. DB scan exploded) — caller should render 500,
+// not 404 (issue #1631).
+func (s *Server) computeNodeReach(ctx context.Context, pubkey string, days int) (NodeReachResponse, bool, error) {
+	if s.store == nil || s.db == nil || s.db.conn == nil {
+		return NodeReachResponse{}, false, nil
+	}
+	nodeMap := s.buildNodeInfoMap()
+	self, found := nodeMap[pubkey]
+	if !found {
+		return NodeReachResponse{}, false, nil
+	}
+	_, pm := s.store.getCachedNodesAndPM()
+	tokens := reliableTokens(pubkey, pm)
+
+	since := time.Now().UTC().Add(-time.Duration(days) * 24 * time.Hour)
+	sinceEpoch := since.Unix()
+
+	var d dirCounts
+	if len(tokens) > 0 {
+		rows, err := s.scanReachRows(ctx, tokens, sinceEpoch)
+		if err != nil {
+			return NodeReachResponse{}, false, err
+		}
+		d = attributeDirections(rows, tokens, pubkey, newResolver(pm))
+	} else {
+		d = dirCounts{we: map[string]int{}, they: map[string]int{}, obs: map[string]obsAgg{}}
+	}
+
+	// importance: neighbor_edges degree + rank (all-time). Served from a
+	// coarse-TTL snapshot so the full UNION+GROUP-BY aggregate runs at most
+	// once per snapshotTTL, not on every cache miss.
+	degree, rank, nodesWithEdges := s.reachDegreeRank(ctx, pubkey)
+
+	// node first_seen comes from nodeInfo (buildNodeInfoMap folds it in via a
+	// single bulk SELECT). Missing → empty string (the node may be
+	// observer-only or pre-first_seen-schema).
+	firstSeen := self.FirstSeen
+
+	// assemble links
+	links := make([]NodeReachLink, 0, len(d.we)+len(d.they))
+	bidir := 0
+	seen := make(map[string]bool, len(d.we)+len(d.they))
+	for pk := range d.we {
+		seen[pk] = true
+	}
+	for pk := range d.they {
+		seen[pk] = true
+	}
+	for pk := range seen {
+		we, they := d.we[pk], d.they[pk]
+		info := nodeMap[pk]
+		lat, lon := gpsPtrs(info)
+		var dist *float64
+		if self.HasGPS && info.HasGPS {
+			dist = fptr(haversineKm(self.Lat, self.Lon, info.Lat, info.Lon))
+		}
+		b := we > 0 && they > 0
+		if b {
+			bidir++
+		}
+		links = append(links, NodeReachLink{
+			Pubkey: pk, Name: info.Name, Role: info.Role, Lat: lat, Lon: lon,
+			WeHear: we, TheyHear: they, Bottleneck: min(we, they), Bidir: b, DistanceKm: dist,
+		})
+	}
+	sort.Slice(links, func(i, j int) bool {
+		if links[i].Bidir != links[j].Bidir {
+			return links[i].Bidir
+		}
+		if links[i].Bottleneck != links[j].Bottleneck {
+			return links[i].Bottleneck > links[j].Bottleneck
+		}
+		return links[i].WeHear+links[i].TheyHear > links[j].WeHear+links[j].TheyHear
+	})
+
+	// direct observers
+	directObs := make([]NodeReachObserver, 0, len(d.obs))
+	for pk, a := range d.obs {
+		info := nodeMap[pk]
+		lat, lon := gpsPtrs(info)
+		var avg, dist *float64
+		if a.snrN > 0 {
+			avg = fptr(a.snrSum / float64(a.snrN))
+		}
+		if self.HasGPS && info.HasGPS {
+			dist = fptr(haversineKm(self.Lat, self.Lon, info.Lat, info.Lon))
+		}
+		directObs = append(directObs, NodeReachObserver{
+			Pubkey: pk, Name: info.Name, Count: a.count, AvgSNR: avg, Lat: lat, Lon: lon, DistanceKm: dist,
+		})
+	}
+	sort.Slice(directObs, func(i, j int) bool { return directObs[i].Count > directObs[j].Count })
+
+	toks := make([]string, 0, len(tokens))
+	for t := range tokens {
+		toks = append(toks, t)
+	}
+	sort.Strings(toks)
+
+	selfLat, selfLon := gpsPtrs(self)
+	return NodeReachResponse{
+		Node: NodeReachInfo{Pubkey: pubkey, Name: self.Name, Role: self.Role,
+			Lat: selfLat, Lon: selfLon, FirstSeen: firstSeen},
+		Window:         NodeReachWindow{Days: days, Since: since.Format(time.RFC3339)},
+		ReliableTokens: toks,
+		Importance: NodeReachImportance{
+			NeighborDegree: degree, DegreeRank: rank, NodesWithEdges: nodesWithEdges,
+			RelayObservations: d.relay, BidirectionalLinks: bidir, DirectObservers: len(directObs),
+		},
+		DirectObservers: directObs,
+		Links:           links,
+	}, true, nil
+}
+
+// --- neighbor-degree snapshot ---------------------------------------------
+// The degree/rank importance is identical across all reach requests except the
+// pubkey match, so the full neighbor_edges aggregate is computed once and shared
+// behind a coarse TTL. Rank is a binary search over the descending degree list.
+const reachDegreeTTL = 60 * time.Second
+
+type degreeSnapshot struct {
+	at         time.Time
+	total      int            // nodes that have any edge
+	deg        map[string]int // lowercase pubkey → neighbour count
+	sortedDesc []int          // degrees sorted descending, for rank
+}
+
+func (s *Server) reachDegreeRank(ctx context.Context, pubkey string) (degree, rank, total int) {
+	snap := s.getDegreeSnapshot(ctx)
+	if snap == nil {
+		return 0, 0, 0
+	}
+	degree = snap.deg[pubkey]
+	if degree == 0 {
+		// No edges → not ranked. rank=0 is the documented "off-the-list" value;
+		// avoids the nonsensical "#N+1 / N" the binary search would produce.
+		return 0, 0, snap.total
+	}
+	// rank = 1 + (number of nodes with strictly higher degree). sortedDesc is
+	// descending, so the count of entries > degree is the first index whose
+	// value is <= degree.
+	rank = 1 + sort.Search(len(snap.sortedDesc), func(i int) bool { return snap.sortedDesc[i] <= degree })
+	return degree, rank, snap.total
+}
+
+func (s *Server) getDegreeSnapshot(ctx context.Context) *degreeSnapshot {
+	// Fast path: serve a fresh snapshot under a short lock.
+	s.reach.degreeMu.Lock()
+	if s.reach.degreeSnap != nil && time.Since(s.reach.degreeSnap.at) < reachDegreeTTL {
+		snap := s.reach.degreeSnap
+		s.reach.degreeMu.Unlock()
+		return snap
+	}
+	stale := s.reach.degreeSnap
+	s.reach.degreeMu.Unlock()
+
+	// Rebuild WITHOUT holding the lock so concurrent reach requests aren't
+	// serialized behind the aggregate query. A brief cold-start herd may run a
+	// few redundant queries; the last writer wins.
+	rows, err := s.db.conn.QueryContext(ctx, `
+		SELECT pk, COUNT(*) neigh FROM (
+			SELECT node_a pk FROM neighbor_edges
+			UNION ALL SELECT node_b FROM neighbor_edges
+		) GROUP BY pk`)
+	if err != nil {
+		log.Printf("[reach] degree snapshot query failed: %v (serving stale)", err)
+		return stale // serve stale on error rather than zeroing
+	}
+	defer rows.Close()
+	deg := make(map[string]int)
+	var sortedDesc []int
+	for rows.Next() {
+		var pk string
+		var neigh int
+		if rows.Scan(&pk, &neigh) != nil {
+			continue
+		}
+		deg[strings.ToLower(pk)] = neigh
+		sortedDesc = append(sortedDesc, neigh)
+	}
+	sort.Sort(sort.Reverse(sort.IntSlice(sortedDesc)))
+	snap := &degreeSnapshot{at: time.Now(), total: len(deg), deg: deg, sortedDesc: sortedDesc}
+	s.reach.degreeMu.Lock()
+	s.reach.degreeSnap = snap
+	s.reach.degreeMu.Unlock()
+	return snap
+}
+
+// scanReachRows reads windowed observations whose path contains any reliable
+// token, with the originator + observer + snr needed for attribution. Observer
+// id and originator pubkey are lowercased in SQL (not per row), the path slice
+// is uppercased in place (no second allocation), and the result is hard-capped
+// at reachScanRowLimit.
+//
+// Returns a non-nil error if the underlying QueryContext or rows.Err() fails;
+// callers MUST treat that as a 500 (issue #1631 — previously the error was
+// swallowed, surfacing a transient DB failure as a misleading 404 / empty
+// reach to operators).
+func (s *Server) scanReachRows(ctx context.Context, tokens map[string]bool, sinceEpoch int64) ([]pathRow, error) {
+	if len(tokens) == 0 {
+		return nil, nil // defensive: an empty LIKE chain would render `AND ()` (SQL error)
+	}
+	likes := make([]string, 0, len(tokens))
+	args := []interface{}{sinceEpoch}
+	// Sort tokens so the generated SQL text is byte-stable across requests
+	// with the same token set — preserves the driver's prepared-statement
+	// cache and keeps query plans reproducible (Independent r2 #3).
+	toks := make([]string, 0, len(tokens))
+	for tok := range tokens {
+		toks = append(toks, tok)
+	}
+	sort.Strings(toks)
+	for _, tok := range toks {
+		likes = append(likes, "o.path_json LIKE ?")
+		args = append(args, "%\""+tok+"\"%")
+	}
+	q := `SELECT LOWER(COALESCE(obs.id,'')), LOWER(COALESCE(t.from_pubkey,'')), COALESCE(t.payload_type,0), o.path_json, o.snr
+	      FROM observations o
+	      JOIN transmissions t ON t.id = o.transmission_id
+	      LEFT JOIN observers obs ON obs.rowid = o.observer_idx
+	      WHERE o.timestamp >= ? AND (` + strings.Join(likes, " OR ") + `)
+	      LIMIT ?`
+	args = append(args, reachScanRowLimit)
+	rows, err := s.db.conn.QueryContext(ctx, q, args...)
+	if err != nil {
+		log.Printf("[reach] scan query failed: %v", err)
+		return nil, err
+	}
+	defer rows.Close()
+	// Modest preallocation: most nodes return far fewer than the cap, so seed a
+	// reasonable capacity rather than reserving reachScanRowLimit up front.
+	out := make([]pathRow, 0, 2048)
+	var skipped int // malformed/empty rows discarded — surfaced below so ingest bugs aren't silent
+	for rows.Next() {
+		var oid, fpk, pj string
+		var pt int
+		var snr sql.NullFloat64
+		if err := rows.Scan(&oid, &fpk, &pt, &pj, &snr); err != nil {
+			skipped++
+			continue
+		}
+		path := parsePathTokens(pj)
+		if len(path) == 0 {
+			skipped++
+			continue
+		}
+		pr := pathRow{observerPK: oid, fromPubkey: fpk, payloadType: pt, path: path}
+		if snr.Valid {
+			pr.snr = snr.Float64
+			pr.snrValid = true
+		}
+		out = append(out, pr)
+	}
+	if skipped > 0 {
+		log.Printf("[reach] scan discarded %d malformed/empty rows (kept %d)", skipped, len(out))
+	}
+	if err := rows.Err(); err != nil {
+		log.Printf("[reach] scan rows iteration failed: %v", err)
+		return nil, err
+	}
+	return out, nil
+}
@@ -0,0 +1,175 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"testing"
+
+	_ "modernc.org/sqlite"
+)
+
+// benchReachDB builds an in-memory DB with nObs observations. matchEvery
+// controls payload mix: 1 = every row contains the "01FA" token (worst case),
+// 2 = every other row matches (the rest carry an unrelated path), etc. This
+// lets benches measure the scan over a realistic mix, not just all-matching.
+func benchReachDB(b *testing.B, nObs, matchEvery int, lowerHops bool) *DB {
+	b.Helper()
+	if matchEvery < 1 {
+		matchEvery = 1
+	}
+	matchPath, fillerPath := `["AA","01FA","BB"]`, `["AA","CC","BB"]`
+	if lowerHops {
+		// Lowercase hops force parsePathTokens' ToUpper to allocate (production
+		// path_json is uppercase; this measures the worst case Carmack flagged).
+		matchPath, fillerPath = `["aa","01fa","bb"]`, `["aa","cc","bb"]`
+	}
+	conn, err := sql.Open("sqlite", ":memory:")
+	if err != nil {
+		b.Fatal(err)
+	}
+	schema := []string{
+		`CREATE TABLE transmissions (id INTEGER PRIMARY KEY, hash TEXT, first_seen TEXT, payload_type INTEGER, from_pubkey TEXT)`,
+		`CREATE TABLE observers (id TEXT PRIMARY KEY, name TEXT)`,
+		`CREATE TABLE observations (id INTEGER PRIMARY KEY, transmission_id INTEGER, observer_idx INTEGER, snr REAL, path_json TEXT, timestamp INTEGER)`,
+		`CREATE INDEX idx_obs_ts ON observations(timestamp)`,
+	}
+	for _, s := range schema {
+		if _, err := conn.Exec(s); err != nil {
+			b.Fatal(err)
+		}
+	}
+	tx, err := conn.Begin()
+	if err != nil {
+		b.Fatal(err)
+	}
+	if _, err := tx.Exec(`INSERT INTO observers (id, name) VALUES ('OBS', 'o')`); err != nil {
+		b.Fatal(err)
+	}
+	for i := 0; i < nObs; i++ {
+		if _, err := tx.Exec(`INSERT INTO transmissions (id, hash, first_seen, payload_type, from_pubkey) VALUES (?,?,?,5,'')`,
+			i, fmt.Sprintf("h%d", i), "2026-06-07T00:00:00Z"); err != nil {
+			b.Fatal(err)
+		}
+		path := fillerPath // non-matching filler
+		if i%matchEvery == 0 {
+			path = matchPath
+		}
+		if _, err := tx.Exec(`INSERT INTO observations (id, transmission_id, observer_idx, snr, path_json, timestamp) VALUES (?,?,1,-7.0,?,?)`,
+			i, i, path, 1000); err != nil {
+			b.Fatal(err)
+		}
+	}
+	if err := tx.Commit(); err != nil {
+		b.Fatal(err)
+	}
+	return &DB{conn: conn}
+}
+
+// BenchmarkNodeReachScan measures the windowed scan + path-decode at increasing
+// scale, all-matching (worst case for memory/allocs).
+func BenchmarkNodeReachScan(b *testing.B) {
+	tokens := map[string]bool{"01FA": true}
+	for _, n := range []int{1000, 10000, 100000} {
+		b.Run(fmt.Sprintf("rows=%d", n), func(b *testing.B) {
+			db := benchReachDB(b, n, 1, false)
+			srv := &Server{db: db}
+			b.ReportAllocs()
+			b.ResetTimer()
+			for i := 0; i < b.N; i++ {
+				rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
+				if len(rows) == 0 {
+					b.Fatal("expected rows")
+				}
+			}
+		})
+	}
+}
+
+// BenchmarkNodeReachScanMixed measures the scan when only half the windowed
+// rows actually contain the token — closer to production path mixes.
+func BenchmarkNodeReachScanMixed(b *testing.B) {
+	tokens := map[string]bool{"01FA": true}
+	db := benchReachDB(b, 100000, 2, false)
+	srv := &Server{db: db}
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
+		if len(rows) == 0 {
+			b.Fatal("expected rows")
+		}
+	}
+}
+
+// BenchmarkNodeReachScanLowerCase measures the worst case for path decoding:
+// lowercase hops force parsePathTokens' ToUpper to allocate a new string per
+// hop (production path_json is uppercase, where ToUpper is a no-op). Publishing
+// this alongside the all-uppercase numbers keeps the perf claims honest.
+func BenchmarkNodeReachScanLowerCase(b *testing.B) {
+	tokens := map[string]bool{"01FA": true}
+	db := benchReachDB(b, 100000, 1, true)
+	srv := &Server{db: db}
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
+		if len(rows) == 0 {
+			b.Fatal("expected rows")
+		}
+	}
+}
+
+// BenchmarkNodeReachAttribute measures the directional attribution pass over an
+// already-scanned row set (the in-memory hot loop + map building), isolated
+// from DB I/O.
+func BenchmarkNodeReachAttribute(b *testing.B) {
+	tokens := map[string]bool{"01FA": true}
+	db := benchReachDB(b, 100000, 1, false)
+	srv := &Server{db: db}
+	rows, _ := srv.scanReachRows(context.Background(), tokens, 0)
+	if len(rows) == 0 {
+		b.Fatal("expected rows")
+	}
+	resolve := func(tok string) string {
+		switch tok {
+		case "AA":
+			return "aa00000000000000"
+		case "BB":
+			return "bb00000000000000"
+		}
+		return ""
+	}
+	b.ReportAllocs()
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		d := attributeDirections(rows, tokens, "01fa326b", resolve)
+		if d.relay == 0 {
+			b.Fatal("expected relay hits")
+		}
+	}
+}
+
+// TestScanReachRows_ErrorReturn anchors the new ([]pathRow, error) signature
+// at the unit-level (issue #1631). Passing a Server whose db.conn is closed
+// must surface an error, not a swallowed nil. Lives in this file because
+// the bench callers in the same file rely on the same signature.
+func TestScanReachRows_ErrorReturn(t *testing.T) {
+	conn, err := sql.Open("sqlite", ":memory:")
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	// PREFLIGHT: async=true reason="test-only in-memory scratch schema, immediately closed"
+	if _, err := conn.Exec(`CREATE TABLE observations (id INTEGER); CREATE TABLE transmissions (id INTEGER); CREATE TABLE observers (rowid INTEGER, id TEXT)`); err != nil {
+		t.Fatalf("schema: %v", err)
+	}
+	conn.Close() // force QueryContext to fail
+	srv := &Server{db: &DB{conn: conn}}
+	rows, err := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
+	if err == nil {
+		t.Fatalf("expected error from closed DB, got nil (rows=%d)", len(rows))
+	}
+	if rows != nil {
+		t.Fatalf("expected nil rows on error, got %d", len(rows))
+	}
+}
@@ -0,0 +1,124 @@
+package main
+
+import (
+	"net/http"
+	"testing"
+)
+
+// TestNodeReach_BlacklistMutationBustsCache reproduces #1629.
+//
+// Scenario:
+//  1. Warm the reach response cache with a non-blacklisted pubkey (200 OK).
+//  2. Operator blacklists that pubkey via SetNodeBlacklist (the legitimate
+//     mutation entry point — config reload, admin call, etc.).
+//  3. The very next /reach request for that pubkey MUST return 404 (the
+//     blacklist response), not the cached 200 payload.
+//
+// Pre-fix the blacklist set is locked in by sync.Once at first read, so
+// IsBlacklisted keeps returning false after the mutation; the cache then
+// re-serves the prior reach body and the assertion fails.
+func TestNodeReach_BlacklistMutationBustsCache(t *testing.T) {
+	resetReachState(t)
+	db, n := newReachIntegrationDB(t, `["AABB","01FA","CCDD"]`)
+	defer db.conn.Close()
+
+	// Start with a non-empty blacklist (some unrelated decoy pubkey) so the
+	// blacklist set is materialised on the first IsBlacklisted call below.
+	// This is the realistic state: a deployment running with a populated
+	// blacklist where the operator later ADDS a new entry.
+	decoy := pk64("dec0")
+	cfg := &Config{NodeBlacklist: []string{decoy}}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	// 1. Warm cache (must 200 and populate cache).
+	rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr.Code != http.StatusOK {
+		t.Fatalf("warm-up: status=%d want 200 (body=%s)", rr.Code, rr.Body.String())
+	}
+	if srv.reachCacheLen() == 0 {
+		t.Fatalf("warm-up did not populate reach cache")
+	}
+
+	// 2. Operator adds the target node to the blacklist via the public setter.
+	cfg.SetNodeBlacklist([]string{decoy, n})
+
+	// 3. Next request MUST return 404. With the bug, the sync.Once-cached
+	// empty blacklist set makes IsBlacklisted return false, the response
+	// cache hits, and the prior 200 body is re-served.
+	rr2 := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr2.Code != http.StatusNotFound {
+		t.Fatalf("post-blacklist mutation: status=%d want 404 (cached payload was served — #1629)", rr2.Code)
+	}
+}
+
+// TestConfig_BlacklistGenerationIncrements asserts that every SetNodeBlacklist
+// call bumps the generation counter by exactly 1, regardless of whether the
+// content changed. The /reach cache key embeds this generation, so the
+// monotonic-bump contract is part of the public API of the package
+// (adversarial #4 from round-1 polish).
+func TestConfig_BlacklistGenerationIncrements(t *testing.T) {
+	cfg := &Config{}
+	g0 := cfg.BlacklistGeneration()
+	cfg.SetNodeBlacklist([]string{"aa"})
+	g1 := cfg.BlacklistGeneration()
+	if g1 != g0+1 {
+		t.Fatalf("first SetNodeBlacklist: gen %d -> %d (want +1)", g0, g1)
+	}
+	// Identical content — generation MUST still bump. Callers rely on
+	// "any call invalidates" rather than "content-diff invalidates."
+	cfg.SetNodeBlacklist([]string{"aa"})
+	g2 := cfg.BlacklistGeneration()
+	if g2 != g1+1 {
+		t.Fatalf("second SetNodeBlacklist (same content): gen %d -> %d (want +1)", g1, g2)
+	}
+	// Empty mutation also bumps.
+	cfg.SetNodeBlacklist(nil)
+	g3 := cfg.BlacklistGeneration()
+	if g3 != g2+1 {
+		t.Fatalf("nil SetNodeBlacklist: gen %d -> %d (want +1)", g2, g3)
+	}
+}
+
+// TestNodeReach_BlacklistMutationPurgesCache asserts that a blacklist
+// mutation evicts ALL prior reach cache entries (not just the affected
+// pubkey) on the next /reach request. Per adversarial #5, the previous
+// gen-suffix-only design left every prior cached entry stranded until TTL,
+// growing the cache by N entries per operator edit. The current design
+// purges on generation bump (detected on the next handler invocation) so a
+// steady stream of edits cannot leak entries unboundedly.
+func TestNodeReach_BlacklistMutationPurgesCache(t *testing.T) {
+	resetReachState(t)
+	db, n := newReachIntegrationDB(t, `["AABB","01FA","CCDD"]`)
+	defer db.conn.Close()
+
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	// Warm cache with two distinct keys (different days param).
+	for _, days := range []string{"30", "7"} {
+		rr := serveReach(srv, "/api/nodes/"+n+"/reach?days="+days)
+		if rr.Code != http.StatusOK {
+			t.Fatalf("warm-up days=%s: status=%d want 200", days, rr.Code)
+		}
+	}
+	before := srv.reachCacheLen()
+	if before < 2 {
+		t.Fatalf("warm-up populated %d entries, want >=2", before)
+	}
+
+	// Unrelated blacklist mutation. The cached pubkey is not in the
+	// blacklist, but prior entries are now keyed under a stale generation
+	// and would otherwise sit until TTL.
+	cfg.SetNodeBlacklist([]string{pk64("dead")})
+
+	// Next /reach request triggers the purge inside the reach path.
+	rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr.Code != http.StatusOK {
+		t.Fatalf("post-mutation request: status=%d want 200", rr.Code)
+	}
+	// After the purge + this single re-populate we expect exactly 1 entry,
+	// not the 2 stale + 1 new = 3 that the leaky design would leave behind.
+	if got := srv.reachCacheLen(); got != 1 {
+		t.Fatalf("post-mutation cache len = %d, want 1 (prior entries leaked — adv #5)", got)
+	}
+}
@@ -0,0 +1,312 @@
+package main
+
+import (
+	"database/sql"
+	"encoding/json"
+	"net/http"
+	"net/http/httptest"
+	"strconv"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/gorilla/mux"
+	_ "modernc.org/sqlite"
+)
+
+func serveReach(srv *Server, path string) *httptest.ResponseRecorder {
+	router := mux.NewRouter()
+	router.HandleFunc("/api/nodes/{pubkey}/reach", srv.handleNodeReach).Methods("GET")
+	req := httptest.NewRequest("GET", path, nil)
+	rr := httptest.NewRecorder()
+	router.ServeHTTP(rr, req)
+	return rr
+}
+
+// pk64 pads a short hex stem to a full 64-char lowercase pubkey.
+func pk64(stem string) string { return stem + strings.Repeat("0", 64-len(stem)) }
+
+// resetReachState clears the per-server reach caches so test order cannot
+// leak observable state between handler tests (and restores after the test).
+// Now operates on *Server (was package globals — Independent r2 #2); accepts
+// a variadic *Server so existing call sites that didn't pass one still
+// compile but the reset is a no-op (used by tests that build the Server
+// fresh and don't need state cleared).
+func resetReachState(t *testing.T, servers ...*Server) {
+	t.Helper()
+	clear := func() {
+		for _, s := range servers {
+			if s == nil {
+				continue
+			}
+			s.reach.cacheMu.Lock()
+			s.reach.cache = map[string]reachCacheEntry{}
+			s.reach.cacheMu.Unlock()
+			s.reach.degreeMu.Lock()
+			s.reach.degreeSnap = nil
+			s.reach.degreeMu.Unlock()
+		}
+	}
+	clear()
+	t.Cleanup(clear)
+}
+
+// newReachIntegrationDB builds a complete observer_idx-schema DB with a target
+// node N, two neighbours A/B, and one observation on obsPath so the HTTP handler
+// exercises real directional attribution. Pass a path that omits N's token to
+// build the zero-reach case (identifiable node, no matching observations).
+func newReachIntegrationDB(t *testing.T, obsPath string) (*DB, string) {
+	t.Helper()
+	conn, err := sql.Open("sqlite", ":memory:")
+	if err != nil {
+		t.Fatal(err)
+	}
+	n := pk64("01fa") // target — unique 2-byte token "01fa"
+	a := pk64("aabb") // predecessor → we hear A
+	b := pk64("ccdd") // successor → B hears us
+	now := time.Now().Unix()
+	stmts := []string{
+		`CREATE TABLE nodes (public_key TEXT, name TEXT, role TEXT, lat REAL, lon REAL, last_seen TEXT, first_seen TEXT, advert_count INTEGER)`,
+		`CREATE TABLE transmissions (id INTEGER PRIMARY KEY, from_pubkey TEXT, payload_type INTEGER)`,
+		`CREATE TABLE observers (id TEXT)`,
+		`CREATE TABLE observations (id INTEGER PRIMARY KEY, transmission_id INTEGER, observer_idx INTEGER, snr REAL, path_json TEXT, timestamp INTEGER)`,
+		`CREATE TABLE neighbor_edges (node_a TEXT, node_b TEXT, count INTEGER)`,
+	}
+	for _, s := range stmts {
+		if _, err := conn.Exec(s); err != nil {
+			t.Fatal(err)
+		}
+	}
+	ins := []struct {
+		q    string
+		args []interface{}
+	}{
+		{`INSERT INTO nodes VALUES (?, 'N', 'repeater', 50.9, 5.4, ?, '2026-06-01T00:00:00Z', 3)`, []interface{}{n, "2026-06-07T00:00:00Z"}},
+		{`INSERT INTO nodes VALUES (?, 'A', 'repeater', 51.0, 5.5, ?, '2026-06-01T00:00:00Z', 1)`, []interface{}{a, "2026-06-07T00:00:00Z"}},
+		{`INSERT INTO nodes VALUES (?, 'B', 'repeater', 51.1, 5.6, ?, '2026-06-01T00:00:00Z', 1)`, []interface{}{b, "2026-06-07T00:00:00Z"}},
+		{`INSERT INTO observers (id) VALUES ('OBS1')`, nil},
+		{`INSERT INTO transmissions (id, from_pubkey, payload_type) VALUES (1, '', 5)`, nil},
+		{`INSERT INTO observations (id, transmission_id, observer_idx, snr, path_json, timestamp) VALUES (1,1,1,-7.0,?,?)`, []interface{}{obsPath, now}},
+	}
+	for _, in := range ins {
+		if _, err := conn.Exec(in.q, in.args...); err != nil {
+			t.Fatal(err)
+		}
+	}
+	return &DB{conn: conn, isV3: true}, n
+}
+
+func TestClampDays(t *testing.T) {
+	cases := []struct{ in, want int }{{0, 1}, {-5, 1}, {1, 1}, {7, 7}, {30, 30}, {31, 30}, {999, 30}}
+	for _, c := range cases {
+		if got := clampDays(c.in); got != c.want {
+			t.Errorf("clampDays(%d)=%d want %d", c.in, got, c.want)
+		}
+	}
+}
+
+func TestNodeReach_UnknownNode(t *testing.T) {
+	srv := makeTestServer(makeTestGraph()) // no store/db wired → 404
+	rr := serveReach(srv, "/api/nodes/"+pk64("deadbeef")+"/reach")
+	if rr.Code != http.StatusNotFound {
+		t.Fatalf("status=%d want 404", rr.Code)
+	}
+}
+
+func TestNodeReach_InvalidPubkey(t *testing.T) {
+	srv := makeTestServer(makeTestGraph())
+	for _, bad := range []string{"deadbeef", "xyz", pk64("01") + "zz"} {
+		rr := serveReach(srv, "/api/nodes/"+bad+"/reach")
+		if rr.Code != http.StatusBadRequest {
+			t.Errorf("pubkey %q: status=%d want 400", bad, rr.Code)
+		}
+	}
+}
+
+func TestNodeReach_ValidPubkeyNotInNodes(t *testing.T) {
+	resetReachState(t)
+	db := setupTestDBv2(t)
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+	// Syntactically valid pubkey that was never inserted → real 404 path.
+	rr := serveReach(srv, "/api/nodes/"+pk64("beef")+"/reach")
+	if rr.Code != http.StatusNotFound {
+		t.Fatalf("status=%d want 404 (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func TestNodeReach_BlacklistedReturns404(t *testing.T) {
+	pk := pk64("01fa")
+	cfg := &Config{NodeBlacklist: []string{pk}}
+	srv := &Server{cfg: cfg}
+	rr := serveReach(srv, "/api/nodes/"+pk+"/reach")
+	if rr.Code != http.StatusNotFound {
+		t.Fatalf("blacklisted pubkey: status=%d want 404", rr.Code)
+	}
+}
+
+func TestNodeReach_AttributionAndCacheHit(t *testing.T) {
+	resetReachState(t)
+	db, n := newReachIntegrationDB(t, `["AABB","01FA","CCDD"]`)
+	defer db.conn.Close()
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr.Code != http.StatusOK {
+		t.Fatalf("status=%d want 200 (body=%s)", rr.Code, rr.Body.String())
+	}
+	var resp NodeReachResponse
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad json: %v", err)
+	}
+	if resp.Importance.RelayObservations < 1 {
+		t.Fatalf("expected ≥1 relay observation, got %d", resp.Importance.RelayObservations)
+	}
+	var weHearA, theyHearB bool
+	for _, l := range resp.Links {
+		if l.Name == "A" && l.WeHear >= 1 {
+			weHearA = true
+		}
+		if l.Name == "B" && l.TheyHear >= 1 {
+			theyHearB = true
+		}
+	}
+	if !weHearA {
+		t.Errorf("expected we_hear≥1 for neighbour A, links=%+v", resp.Links)
+	}
+	if !theyHearB {
+		t.Errorf("expected they_hear≥1 for neighbour B, links=%+v", resp.Links)
+	}
+
+	// Cache hit: the key (now generation-suffixed, #1629) must be populated
+	// and a second request must 200.
+	wantKey := n + "|30|g" + strconv.FormatUint(srv.cfg.BlacklistGeneration(), 10)
+	if _, ok := srv.reachCacheGet(wantKey); !ok {
+		t.Fatalf("expected reach response to be cached under %q", wantKey)
+	}
+	rr2 := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr2.Code != http.StatusOK || rr2.Body.String() != rr.Body.String() {
+		t.Fatalf("cache-hit response differs: code=%d", rr2.Code)
+	}
+}
+
+// Zero-reach happy path: a node that IS identifiable (has reliable tokens) but
+// whose observations contain none of its tokens must return 200 with empty
+// arrays — NOT 404. A wrong implementation that 404s here passes every other
+// test. (docs/api-spec.md contract.)
+func TestNodeReach_ZeroReach(t *testing.T) {
+	resetReachState(t)
+	db, n := newReachIntegrationDB(t, `["AABB","CCDD"]`) // path omits N's "01FA" token
+	defer db.conn.Close()
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr.Code != http.StatusOK {
+		t.Fatalf("zero-reach must be 200 not 404, got %d (body=%s)", rr.Code, rr.Body.String())
+	}
+	var resp NodeReachResponse
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad json: %v", err)
+	}
+	if len(resp.ReliableTokens) == 0 {
+		t.Fatalf("node should still be identifiable (reliable tokens present)")
+	}
+	if len(resp.Links) != 0 || len(resp.DirectObservers) != 0 || resp.Importance.RelayObservations != 0 {
+		t.Fatalf("expected empty reach, got links=%d obs=%d relay=%d",
+			len(resp.Links), len(resp.DirectObservers), resp.Importance.RelayObservations)
+	}
+}
+
+func TestNodeReach_ShapeAndClamp(t *testing.T) {
+	resetReachState(t)
+	db := setupTestDBv2(t)
+	const pk = "01fa326b475800a31105abcb9e4cac000b3e5d9e2b5ba0739981ce8d5f3a6754"
+	mustExecDB(t, db, `INSERT INTO nodes (public_key, name, role, lat, lon, last_seen, first_seen, advert_count)
+		VALUES ('`+pk+`', 'BE-Test', 'repeater', 50.9, 5.4, '2026-06-07T00:00:00Z', '2026-06-01T00:00:00Z', 3)`)
+	// scanReachRows joins observations on observer_idx; the v2 schema's
+	// observations table lacks that column. Previously the scan error was
+	// swallowed (issue #1631) and the test still saw empty arrays. With the
+	// fix that returns 500, we rebuild observations to the observer_idx
+	// shape (empty — no rows needed for shape/clamp assertions).
+	mustExecDB(t, db, `DROP TABLE observations`)
+	// PREFLIGHT: async=true reason="test-only in-memory schema rebuild; not a production migration"
+	mustExecDB(t, db, `CREATE TABLE observations (
+		id INTEGER PRIMARY KEY AUTOINCREMENT,
+		transmission_id INTEGER,
+		observer_idx INTEGER,
+		snr REAL,
+		path_json TEXT,
+		timestamp INTEGER
+	)`)
+
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	rr := serveReach(srv, "/api/nodes/"+pk+"/reach?days=999")
+	if rr.Code != http.StatusOK {
+		t.Fatalf("status=%d want 200 (body=%s)", rr.Code, rr.Body.String())
+	}
+	var resp NodeReachResponse
+	if err := json.Unmarshal(rr.Body.Bytes(), &resp); err != nil {
+		t.Fatalf("bad json: %v", err)
+	}
+	if resp.Window.Days != 30 {
+		t.Fatalf("days not clamped to 30: %d", resp.Window.Days)
+	}
+	if resp.Links == nil || resp.DirectObservers == nil || resp.ReliableTokens == nil {
+		t.Fatalf("array fields must be non-nil (never null)")
+	}
+	if !contains(resp.ReliableTokens, "01FA") {
+		t.Fatalf("expected 01FA reliable token, got %v", resp.ReliableTokens)
+	}
+	if resp.Node.FirstSeen != "2026-06-01T00:00:00Z" {
+		t.Fatalf("first_seen not sourced from nodes table: %q", resp.Node.FirstSeen)
+	}
+}
+
+// Issue #1631: a DB failure inside scanReachRows must surface as 500, not
+// as a misleading "no reach" 200 or 404. We warm the integration DB, drop
+// the observations table so the next reach scan query fails inside
+// QueryContext, then assert the handler returns 500 (not 200 with empty
+// arrays, which is the buggy current behavior — scanReachRows swallows the
+// error and returns nil).
+func TestNodeReach_ScanDBErrorReturns500(t *testing.T) {
+	resetReachState(t)
+	db, n := newReachIntegrationDB(t, `["AABB","01FA","CCDD"]`)
+	defer db.conn.Close()
+	cfg := &Config{}
+	srv := &Server{store: newTestStoreWithDB(t, db, cfg), db: db, cfg: cfg, perfStats: NewPerfStats()}
+
+	// Warm the store's node cache (so buildNodeInfoMap on the failing call
+	// still finds the target node). One healthy call also primes the
+	// reach response cache — clear it below so the next call recomputes.
+	if rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30"); rr.Code != http.StatusOK {
+		t.Fatalf("warm-up call: status=%d want 200 (body=%s)", rr.Code, rr.Body.String())
+	}
+	srv.reach.cacheMu.Lock()
+	srv.reach.cache = map[string]reachCacheEntry{}
+	srv.reach.cacheMu.Unlock()
+
+	// Break the table that scanReachRows reads from. nodes / observers /
+	// neighbor_edges remain intact so the failure is isolated to the
+	// scanReachRows QueryContext path.
+	if _, err := db.conn.Exec("DROP TABLE observations"); err != nil {
+		t.Fatalf("drop observations: %v", err)
+	}
+
+	rr := serveReach(srv, "/api/nodes/"+n+"/reach?days=30")
+	if rr.Code != http.StatusInternalServerError {
+		t.Fatalf("expected 500 on DB error inside scanReachRows, got %d (body=%s)", rr.Code, rr.Body.String())
+	}
+}
+
+func contains(s []string, v string) bool {
+	for _, x := range s {
+		if x == v {
+			return true
+		}
+	}
+	return false
+}
@@ -0,0 +1,291 @@
+package main
+
+import (
+	"context"
+	"database/sql"
+	"strconv"
+	"testing"
+
+	_ "modernc.org/sqlite"
+)
+
+// newReachScanTestDB builds a minimal observer_idx-schema DB with two rows whose
+// path contains "01FA" and one that does not, for scanReachRows coverage.
+func newReachScanTestDB(t *testing.T) *DB {
+	t.Helper()
+	conn, err := sql.Open("sqlite", ":memory:")
+	if err != nil {
+		t.Fatal(err)
+	}
+	stmts := []string{
+		`CREATE TABLE transmissions (id INTEGER PRIMARY KEY, from_pubkey TEXT, payload_type INTEGER)`,
+		`CREATE TABLE observers (id TEXT)`,
+		`CREATE TABLE observations (id INTEGER PRIMARY KEY, transmission_id INTEGER, observer_idx INTEGER, snr REAL, path_json TEXT, timestamp INTEGER)`,
+		`INSERT INTO observers (id) VALUES ('OBS1')`, // rowid 1
+		`INSERT INTO transmissions (id, from_pubkey, payload_type) VALUES (1,'FF00',4),(2,'',5),(3,'',5)`,
+		`INSERT INTO observations (id, transmission_id, observer_idx, snr, path_json, timestamp) VALUES
+			(1,1,1,-7.0,'["AA","01FA","BB"]',1000),
+			(2,2,1,NULL,'["01FA","CC"]',1000),
+			(3,3,1,-5.0,'["AA","CC"]',1000)`, // no 01FA → excluded
+	}
+	for _, s := range stmts {
+		if _, err := conn.Exec(s); err != nil {
+			t.Fatal(err)
+		}
+	}
+	return &DB{conn: conn}
+}
+
+// resolver that only resolves the exact tokens it's told are unique.
+func testResolver(unique map[string]string) func(string) string {
+	return func(tok string) string {
+		if pk, ok := unique[tok]; ok {
+			return pk
+		}
+		return "" // ambiguous / unknown → skip
+	}
+}
+
+func TestParsePathTokens(t *testing.T) {
+	cases := []struct {
+		in   string
+		want []string
+	}{
+		{`["AA","01FA","BB"]`, []string{"AA", "01FA", "BB"}},
+		{`["aa","01fa"]`, []string{"AA", "01FA"}}, // uppercased
+		{`["EFEF"]`, []string{"EFEF"}},
+		{`[]`, nil},
+		{``, nil},
+		{`null`, nil},
+		{`["49A985"]`, []string{"49A985"}}, // 3-byte hop preserved
+	}
+	for _, c := range cases {
+		got := parsePathTokens(c.in)
+		if len(got) != len(c.want) {
+			t.Fatalf("parsePathTokens(%q) = %v, want %v", c.in, got, c.want)
+		}
+		for i := range got {
+			if got[i] != c.want[i] {
+				t.Errorf("parsePathTokens(%q)[%d] = %q, want %q", c.in, i, got[i], c.want[i])
+			}
+		}
+	}
+}
+
+func TestAttributeDirections_PredecessorAndSuccessor(t *testing.T) {
+	// path A(aa) -> N(01fa) -> B(bb): we hear A, B hears us.
+	unique := map[string]string{"AA": "aa00", "BB": "bb00"}
+	rows := []pathRow{{
+		observerPK: "obs1", payloadType: 5,
+		path: []string{"AA", "01FA", "BB"},
+	}}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b", testResolver(unique))
+	if d.we["aa00"] != 1 {
+		t.Fatalf("we_hear[aa00]=%d want 1", d.we["aa00"])
+	}
+	if d.they["bb00"] != 1 {
+		t.Fatalf("they_hear[bb00]=%d want 1", d.they["bb00"])
+	}
+	if d.relay != 1 {
+		t.Fatalf("relay=%d want 1", d.relay)
+	}
+}
+
+func TestAttributeDirections_LastHopObserverAndAdvertFirstHop(t *testing.T) {
+	rows := []pathRow{
+		// N is last hop → observer heard us directly (+snr).
+		{observerPK: "obsx", payloadType: 5, path: []string{"AA", "01FA"}, snr: 4.0, snrValid: true},
+		// N is first hop of an ADVERT (type 4) → we heard the originator.
+		{observerPK: "obsy", payloadType: 4, fromPubkey: "origin1", path: []string{"01FA", "CC"}},
+	}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b",
+		testResolver(map[string]string{"CC": "cc00"}))
+	if a, ok := d.obs["obsx"]; !ok || a.count != 1 {
+		t.Fatalf("observer obsx not counted")
+	}
+	if a := d.obs["obsx"]; a.snrN != 1 || a.snrSum != 4.0 {
+		t.Fatalf("observer snr not aggregated")
+	}
+	if d.they["obsx"] != 1 {
+		t.Fatalf("they_hear[obsx]=%d want 1", d.they["obsx"])
+	}
+	if d.we["origin1"] != 1 {
+		t.Fatalf("we_hear[origin1]=%d want 1 (advert first-hop)", d.we["origin1"])
+	}
+	if d.they["cc00"] != 1 {
+		t.Fatalf("they_hear[cc00]=%d want 1 (successor)", d.they["cc00"])
+	}
+}
+
+func TestAttributeDirections_AmbiguousSkippedAndSelfIgnored(t *testing.T) {
+	// No observer, so the last-hop observer branch can't fire — this isolates
+	// the resolve logic. ZZ is unresolved (ambiguous → skipped); the trailing
+	// 01FA resolves to self (ourPK) and must be ignored as a successor.
+	rows := []pathRow{{observerPK: "", payloadType: 5, path: []string{"ZZ", "01FA", "01FA"}}}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b",
+		testResolver(map[string]string{"01FA": "01fa326b"}))
+	if len(d.we) != 0 || len(d.they) != 0 {
+		t.Fatalf("ambiguous/self should yield no edges, got we=%v they=%v", d.we, d.they)
+	}
+}
+
+func TestAttributeDirections_LastHopWithObserverCountsObserver(t *testing.T) {
+	// Guards the case the previous test deliberately excludes: when our token is
+	// the last hop AND an observer is present, that observer heard us directly.
+	rows := []pathRow{{observerPK: "obs1", payloadType: 5, path: []string{"ZZ", "01FA"}}}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b",
+		testResolver(map[string]string{}))
+	if a, ok := d.obs["obs1"]; d.they["obs1"] != 1 || !ok || a.count != 1 {
+		t.Fatalf("last-hop observer should be counted, got they=%v", d.they)
+	}
+}
+
+func TestReliableTokens(t *testing.T) {
+	// pm where "01fa" is unique but "01" is shared (collision).
+	nodes := []nodeInfo{
+		{PublicKey: "01fa326b0000", Role: "repeater"},
+		{PublicKey: "0188aaaa0000", Role: "repeater"},
+	}
+	pm := buildPrefixMap(nodes)
+	toks := reliableTokens("01fa326b0000", pm)
+	if !toks["01FA"] {
+		t.Fatalf("expected 01FA reliable, got %v", toks)
+	}
+	if toks["01"] {
+		t.Fatalf("1-byte 01 must be excluded (collision), got %v", toks)
+	}
+}
+
+func TestReliableTokens_CompanionNotMisattributed(t *testing.T) {
+	// pm holds only path-capable relays. A companion target (not in pm) whose
+	// prefix uniquely matches an UNRELATED relay must yield NO reliable tokens —
+	// otherwise that relay's traffic would be credited to the companion.
+	relay := nodeInfo{PublicKey: "aa11000000000000", Role: "repeater"}
+	pm := buildPrefixMap([]nodeInfo{relay})
+	companion := "aa11ffff00000000" // shares 2-byte "aa11" with the relay, differs at byte 3
+	toks := reliableTokens(companion, pm)
+	if len(toks) != 0 {
+		t.Fatalf("companion must get no reliable tokens (prefix points at a relay), got %v", toks)
+	}
+	// Sanity: the relay itself still resolves to its own prefix.
+	if !reliableTokens(relay.PublicKey, pm)["AA11"] {
+		t.Fatalf("relay should keep its own AA11 token")
+	}
+}
+
+func TestScanReachRows_CapTruncates(t *testing.T) {
+	defer func(orig int) { reachScanRowLimit = orig }(reachScanRowLimit)
+	reachScanRowLimit = 1 // newReachScanTestDB has 2 matching rows
+	db := newReachScanTestDB(t)
+	defer db.conn.Close()
+	srv := &Server{db: db}
+	rows, _ := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
+	if len(rows) != 1 {
+		t.Fatalf("scan must hard-cap at reachScanRowLimit (1), got %d rows", len(rows))
+	}
+}
+
+func TestReachCacheEviction_BoundedNotWiped(t *testing.T) {
+	srv := &Server{}
+	resetReachState(t, srv)
+	for i := 0; i < reachCacheMax+50; i++ {
+		srv.reachCachePut("k"+strconv.Itoa(i), []byte("x"))
+	}
+	srv.reach.cacheMu.RLock()
+	n := len(srv.reach.cache)
+	srv.reach.cacheMu.RUnlock()
+	// Bounded at the cap and NOT a full wipe (the old crude reset would leave 1).
+	if n != reachCacheMax {
+		t.Fatalf("cache size after overflow = %d, want %d (bounded, evict-oldest not full-wipe)", n, reachCacheMax)
+	}
+}
+
+func TestReliableTokens_ThreeByteBranch(t *testing.T) {
+	// Two nodes share the 2-byte prefix "01fa" but diverge at byte 3, so the
+	// 3-byte (6-hex) prefix is the shortest unique token. Exercises the l=6
+	// branch that the 1-/2-byte test does not.
+	nodes := []nodeInfo{
+		{PublicKey: "01fa32000000", Role: "repeater"},
+		{PublicKey: "01fa99000000", Role: "repeater"},
+	}
+	pm := buildPrefixMap(nodes)
+	toks := reliableTokens("01fa32000000", pm)
+	if toks["01FA"] {
+		t.Fatalf("2-byte 01FA collides here and must be excluded, got %v", toks)
+	}
+	if !toks["01FA32"] {
+		t.Fatalf("expected 3-byte 01FA32 reliable token, got %v", toks)
+	}
+}
+
+func TestAttributeDirections_NonAdvertFirstHopNotCredited(t *testing.T) {
+	// Our token is the FIRST hop but payloadType is NOT an advert. The
+	// fromPubkey must NOT be credited as we_hear (only adverts carry a
+	// trustworthy originator → first-hop relationship). Guards the
+	// `payloadType == PayloadADVERT` condition on the first-hop branch.
+	rows := []pathRow{{
+		observerPK: "obs1", payloadType: 5, fromPubkey: "origin1",
+		path: []string{"01FA", "BB"},
+	}}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b",
+		testResolver(map[string]string{"BB": "bb00"}))
+	if d.we["origin1"] != 0 {
+		t.Fatalf("non-advert first hop must not credit we_hear[origin1], got %d", d.we["origin1"])
+	}
+	if len(d.we) != 0 {
+		t.Fatalf("expected no we_hear edges, got %v", d.we)
+	}
+	if d.they["bb00"] != 1 { // successor still counts
+		t.Fatalf("they_hear[bb00]=%d want 1", d.they["bb00"])
+	}
+}
+
+func TestAttributeDirections_ObserverAggregatesAcrossRows(t *testing.T) {
+	// Same observer on the last hop across multiple rows: count and SNR must
+	// accumulate, not overwrite.
+	rows := []pathRow{
+		{observerPK: "obs1", payloadType: 5, path: []string{"AA", "01FA"}, snr: 2.0, snrValid: true},
+		{observerPK: "obs1", payloadType: 5, path: []string{"BB", "01FA"}, snr: 6.0, snrValid: true},
+	}
+	d := attributeDirections(rows, map[string]bool{"01FA": true}, "01fa326b", testResolver(nil))
+	a, ok := d.obs["obs1"]
+	if !ok || a.count != 2 {
+		t.Fatalf("observer count should aggregate to 2, got %+v", a)
+	}
+	if a.snrN != 2 || a.snrSum != 8.0 {
+		t.Fatalf("snr should aggregate (n=2,sum=8), got n=%d sum=%v", a.snrN, a.snrSum)
+	}
+	if d.they["obs1"] != 2 {
+		t.Fatalf("they_hear[obs1]=%d want 2", d.they["obs1"])
+	}
+}
+
+func TestScanReachRows_DecodesRows(t *testing.T) {
+	db := newReachScanTestDB(t)
+	defer db.conn.Close()
+	srv := &Server{db: db}
+	rows, _ := srv.scanReachRows(context.Background(), map[string]bool{"01FA": true}, 0)
+	if len(rows) != 2 {
+		t.Fatalf("expected 2 matching rows (non-matching path excluded), got %d", len(rows))
+	}
+	// Find the advert row (order is not guaranteed without ORDER BY).
+	var got *pathRow
+	for i := range rows {
+		if rows[i].payloadType == 4 {
+			got = &rows[i]
+		}
+	}
+	if got == nil {
+		t.Fatalf("advert row not returned: %+v", rows)
+	}
+	// Fields are decoded + normalized: lowercase observer/from, uppercase path.
+	if got.observerPK != "obs1" || got.fromPubkey != "ff00" {
+		t.Fatalf("decoded fields wrong: %+v", *got)
+	}
+	if len(got.path) != 3 || got.path[1] != "01FA" {
+		t.Fatalf("path not parsed/uppercased: %v", got.path)
+	}
+	if !got.snrValid || got.snr != -7.0 {
+		t.Fatalf("snr not decoded: valid=%v val=%v", got.snrValid, got.snr)
+	}
+}
@@ -0,0 +1,37 @@
+package main
+
+import "time"
+
+// observerNaiveClockWindow is the rolling window after which a recorded
+// naive-clock skew event "decays" and the observer is no longer flagged in
+// the UI. Read-time decay (no background sweep) keeps it cheap.
+const observerNaiveClockWindow = 24 * time.Hour
+
+// applyObserverNaiveClock populates the four clock_* fields on ObserverResp
+// from the underlying Observer row, applying read-time decay: any event
+// older than observerNaiveClockWindow is treated as absent so the chip and
+// banner clear automatically without a background sweep.
+//
+// Issue #1478.
+func applyObserverNaiveClock(resp *ObserverResp, o *Observer, now time.Time) {
+	if o.ClockLastNaiveAt == nil || *o.ClockLastNaiveAt == "" {
+		return
+	}
+	last, err := time.Parse(time.RFC3339, *o.ClockLastNaiveAt)
+	if err != nil {
+		return
+	}
+	if now.Sub(last) > observerNaiveClockWindow {
+		// Decayed — leave clock_naive=false and counters at zero. We
+		// intentionally do NOT clear the underlying row here (server is
+		// read-only; the next ingestor write or a future #1478 followup
+		// vacuum can rewrite). The response just shows zero/null.
+		return
+	}
+	resp.ClockNaive = true
+	if o.ClockSkewSeconds != nil {
+		resp.ClockSkewSeconds = *o.ClockSkewSeconds
+	}
+	resp.ClockSkewCount24h = o.ClockSkewCount24h
+	resp.ClockLastNaiveAt = *o.ClockLastNaiveAt
+}
@@ -0,0 +1,171 @@
+package main
+
+// Issue #1478 — surface observers whose envelope timestamps were clamped
+// because they were emitted with a naive (zone-less) local-time string.
+// /api/observers and /api/observers/{id} must expose four new fields so the
+// UI can render a ⚠️ chip + a banner explaining "this observer's clock is
+// off and per-packet timing is being clamped to ingest time".
+//
+// Tests are behavioral: they seed the same DB columns the ingestor will write
+// to and assert the JSON response carries the field values plus the derived
+// `clock_naive` boolean. They will FAIL on master (columns don't exist; JSON
+// has no clock_* keys) → red commit.
+
+import (
+	"encoding/json"
+	"net/http/httptest"
+	"testing"
+	"time"
+)
+
+// recentNaiveObserver inserts an observer row with a recent clock-naive
+// skew event already recorded. The handler should report clock_naive=true.
+func TestHandleObservers_Issue1478_SurfacesRecentNaiveSkew(t *testing.T) {
+	srv, router := setupTestServer(t)
+	_ = srv
+
+	now := time.Now().UTC()
+	recent := now.Add(-2 * time.Hour).Format(time.RFC3339)
+	// Seed an observer whose ingestor has recorded a -8h naive clamp 2h ago.
+	_, err := srv.db.conn.Exec(`INSERT INTO observers
+		(id, name, iata, last_seen, first_seen, packet_count,
+		 clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+		"naive-obs-1", "California Pi", "SFO",
+		now.Format(time.RFC3339), now.Add(-7*24*time.Hour).Format(time.RFC3339),
+		42, -28800, 17, recent)
+	if err != nil {
+		t.Fatalf("seed observer: %v", err)
+	}
+
+	req := httptest.NewRequest("GET", "/api/observers", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != 200 {
+		t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String())
+	}
+
+	var body struct {
+		Observers []map[string]interface{} `json:"observers"`
+	}
+	if err := json.Unmarshal(w.Body.Bytes(), &body); err != nil {
+		t.Fatalf("decode: %v body=%s", err, w.Body.String())
+	}
+
+	var got map[string]interface{}
+	for _, o := range body.Observers {
+		if o["id"] == "naive-obs-1" {
+			got = o
+			break
+		}
+	}
+	if got == nil {
+		t.Fatalf("expected observer naive-obs-1 in response, got %d entries", len(body.Observers))
+	}
+
+	if v, ok := got["clock_naive"]; !ok {
+		t.Fatalf("expected clock_naive field in observer JSON, missing. keys=%v", mapKeys1478(got))
+	} else if b, ok := v.(bool); !ok || !b {
+		t.Errorf("expected clock_naive=true, got %v (%T)", v, v)
+	}
+	if v, ok := got["clock_skew_seconds"]; !ok {
+		t.Errorf("expected clock_skew_seconds field, missing")
+	} else if n, ok := v.(float64); !ok || int64(n) != -28800 {
+		t.Errorf("expected clock_skew_seconds=-28800, got %v", v)
+	}
+	if v, ok := got["clock_skew_count_24h"]; !ok {
+		t.Errorf("expected clock_skew_count_24h field, missing")
+	} else if n, ok := v.(float64); !ok || int(n) != 17 {
+		t.Errorf("expected clock_skew_count_24h=17, got %v", v)
+	}
+	if v, ok := got["clock_last_naive_at"]; !ok || v == nil {
+		t.Errorf("expected clock_last_naive_at populated, got %v", v)
+	}
+}
+
+func TestHandleObservers_Issue1478_DecaysAfter24h(t *testing.T) {
+	srv, router := setupTestServer(t)
+	now := time.Now().UTC()
+	// 30h ago — past the 24h window. clock_naive must be false.
+	stale := now.Add(-30 * time.Hour).Format(time.RFC3339)
+	_, err := srv.db.conn.Exec(`INSERT INTO observers
+		(id, name, iata, last_seen, first_seen, packet_count,
+		 clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+		"naive-obs-old", "Fixed Pi", "LAX",
+		now.Format(time.RFC3339), now.Add(-30*24*time.Hour).Format(time.RFC3339),
+		99, -28800, 5, stale)
+	if err != nil {
+		t.Fatalf("seed observer: %v", err)
+	}
+
+	req := httptest.NewRequest("GET", "/api/observers", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+
+	var body struct {
+		Observers []map[string]interface{} `json:"observers"`
+	}
+	json.Unmarshal(w.Body.Bytes(), &body)
+
+	var got map[string]interface{}
+	for _, o := range body.Observers {
+		if o["id"] == "naive-obs-old" {
+			got = o
+			break
+		}
+	}
+	if got == nil {
+		t.Fatalf("expected naive-obs-old in response")
+	}
+	if v, _ := got["clock_naive"]; v != false {
+		t.Errorf("after 24h decay clock_naive must be false, got %v", v)
+	}
+	// Count and skew should also be zeroed for the response (decay).
+	if v, _ := got["clock_skew_count_24h"]; v != nil {
+		if n, ok := v.(float64); ok && int(n) != 0 {
+			t.Errorf("expected clock_skew_count_24h=0 after decay, got %v", v)
+		}
+	}
+}
+
+func TestHandleObserverDetail_Issue1478_IncludesClockNaiveFields(t *testing.T) {
+	srv, router := setupTestServer(t)
+	now := time.Now().UTC()
+	recent := now.Add(-5 * time.Minute).Format(time.RFC3339)
+	_, err := srv.db.conn.Exec(`INSERT INTO observers
+		(id, name, iata, last_seen, first_seen, packet_count,
+		 clock_skew_seconds, clock_skew_count_24h, clock_last_naive_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+		"naive-obs-detail", "Detail Pi", "SJC",
+		now.Format(time.RFC3339), now.Add(-2*24*time.Hour).Format(time.RFC3339),
+		7, 25200, 3, recent)
+	if err != nil {
+		t.Fatalf("seed observer: %v", err)
+	}
+
+	req := httptest.NewRequest("GET", "/api/observers/naive-obs-detail", nil)
+	w := httptest.NewRecorder()
+	router.ServeHTTP(w, req)
+	if w.Code != 200 {
+		t.Fatalf("expected 200, got %d body=%s", w.Code, w.Body.String())
+	}
+	var got map[string]interface{}
+	if err := json.Unmarshal(w.Body.Bytes(), &got); err != nil {
+		t.Fatalf("decode: %v body=%s", err, w.Body.String())
+	}
+	if v, _ := got["clock_naive"]; v != true {
+		t.Errorf("expected clock_naive=true, got %v", v)
+	}
+	if v, _ := got["clock_skew_seconds"]; v == nil {
+		t.Errorf("expected clock_skew_seconds set")
+	}
+}
+
+func mapKeys1478(m map[string]interface{}) []string {
+	out := make([]string, 0, len(m))
+	for k := range m {
+		out = append(out, k)
+	}
+	return out
+}
@@ -0,0 +1,74 @@
+package main
+
+// observers cache for /api/observers default (no-filter) response.
+// Issue #1481 P0-3 + #1483 follow-up.
+//
+// Design:
+//   - Atomic pointer holds the immutable cached response.
+//   - Wall-clock TTL replaced with monotonic time.Time (#1483: NTP
+//     step-backward must not extend the cache).
+//   - singleflight collapses TTL-boundary thundering herd into one
+//     SQL fill, regardless of incoming concurrency.
+
+import (
+	"sync/atomic"
+	"time"
+
+	"golang.org/x/sync/singleflight"
+)
+
+// observersCacheTTL is the default freshness window for the cached
+// default (no-filter) /api/observers response when no per-server
+// override is configured. Configurable via ObserversCache.TTLSeconds
+// (#1483).
+const observersCacheTTL = 30 * time.Second
+
+// effectiveObserversCacheTTL returns the cfg-overridden TTL or the
+// default. Falls back to the default on nil cfg / non-positive value.
+func (s *Server) effectiveObserversCacheTTL() time.Duration {
+	if s.cfg != nil && s.cfg.ObserversCache != nil && s.cfg.ObserversCache.TTLSeconds > 0 {
+		return time.Duration(s.cfg.ObserversCache.TTLSeconds) * time.Second
+	}
+	return observersCacheTTL
+}
+
+// singleflight key for the default-shape cache fill.
+const observersCacheFlightKey = "observers:default"
+
+// observersCacheEntry pairs the response with the monotonic timestamp
+// of when it was built. atomic.Pointer guarantees the read is a single
+// load; the entry is immutable once stored.
+type observersCacheEntry struct {
+	resp ObserverListResponse
+	at   time.Time
+}
+
+// observersCacheField bundles the atomic pointer with the singleflight
+// group that gates concurrent refills.
+type observersCacheField struct {
+	ptr atomic.Pointer[observersCacheEntry]
+	sf  singleflight.Group
+
+	// fillCount increments once per actual SQL fill (i.e., per
+	// singleflight winner). Tests use this to assert the herd was
+	// collapsed; production code never reads it.
+	fillCount atomic.Int64
+}
+
+// observersCacheExpired reports whether the cached entry at `t` is
+// older than observersCacheTTL or absent (zero time).
+func (s *Server) observersCacheExpired(t time.Time) bool {
+	if t.IsZero() {
+		return true
+	}
+	return time.Since(t) >= s.effectiveObserversCacheTTL()
+}
+
+// loadObserversCache returns the cached entry and its age, or nil.
+func (s *Server) loadObserversCache() (*observersCacheEntry, bool) {
+	e := s.observersCacheV2.ptr.Load()
+	if e == nil {
+		return nil, false
+	}
+	return e, true
+}
--- a/Show More
+++ b/Show More