Merge branch 'master' into fix/cache-hit-rate

fix: use per-PR concurrency group to prevent cross-PR cancellation
The flat 'deploy' concurrency group caused ALL PRs to share one queue, so pushing to any PR would cancel CI runs on other PRs. Changed to deploy-${{ github.event.pull_request.number || github.ref }} so each PR gets its own concurrency group while re-pushes to the same PR still cancel the previous run.
2026-07-02 01:31:38 +00:00 · 2026-03-29 07:15:20 -07:00 · 2026-03-29 14:14:57 +00:00 · 2026-03-29 07:10:32 -07:00 · 2026-03-29 07:09:05 -07:00 · 2026-03-28 18:58:31 -07:00
6 changed files with 86 additions and 7 deletions
@@ -17,7 +17,7 @@ on:
      - 'docs/**'

 concurrency:
-  group: deploy
+  group: deploy-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

 env:
@@ -33,6 +33,11 @@ type Server struct {
 	memStatsMu   sync.Mutex
 	memStatsCache runtime.MemStats
 	memStatsCachedAt time.Time
+
+	// Cached /api/stats response — recomputed at most once every 10s
+	statsMu      sync.Mutex
+	statsCache   *StatsResponse
+	statsCachedAt time.Time
 }

 // PerfStats tracks request performance.
@@ -380,6 +385,17 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
 }

 func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
+	const statsTTL = 10 * time.Second
+
+	s.statsMu.Lock()
+	if s.statsCache != nil && time.Since(s.statsCachedAt) < statsTTL {
+		cached := s.statsCache
+		s.statsMu.Unlock()
+		writeJSON(w, cached)
+		return
+	}
+	s.statsMu.Unlock()
+
 	var stats *Stats
 	var err error
 	if s.store != nil {
@@ -392,7 +408,7 @@ func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 	counts := s.db.GetRoleCounts()
-	writeJSON(w, StatsResponse{
+	resp := &StatsResponse{
 		TotalPackets:       stats.TotalPackets,
 		TotalTransmissions: &stats.TotalTransmissions,
 		TotalObservations:  stats.TotalObservations,
@@ -411,7 +427,14 @@ func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
 			Companions: counts["companions"],
 			Sensors:    counts["sensors"],
 		},
-	})
+	}
+
+	s.statsMu.Lock()
+	s.statsCache = resp
+	s.statsCachedAt = time.Now()
+	s.statsMu.Unlock()
+
+	writeJSON(w, resp)
 }

 func (s *Server) handlePerf(w http.ResponseWriter, r *http.Request) {
@@ -98,6 +98,11 @@ type PacketStore struct {
 	// computed during Load() and incrementally updated on ingest.
 	distHops  []distHopRecord
 	distPaths []distPathRecord
+
+	// Cached GetNodeHashSizeInfo result — recomputed at most once every 15s
+	hashSizeInfoMu    sync.Mutex
+	hashSizeInfoCache map[string]*hashSizeNodeInfo
+	hashSizeInfoAt    time.Time
 }

 // Precomputed distance records for fast analytics aggregation.
@@ -3722,8 +3727,26 @@ type hashSizeNodeInfo struct {
 	Inconsistent bool
 }

-// GetNodeHashSizeInfo scans advert packets to compute per-node hash size data.
+// GetNodeHashSizeInfo returns cached per-node hash size data, recomputing at most every 15s.
 func (s *PacketStore) GetNodeHashSizeInfo() map[string]*hashSizeNodeInfo {
+	const ttl = 15 * time.Second
+	s.hashSizeInfoMu.Lock()
+	if s.hashSizeInfoCache != nil && time.Since(s.hashSizeInfoAt) < ttl {
+		cached := s.hashSizeInfoCache
+		s.hashSizeInfoMu.Unlock()
+		return cached
+	}
+	s.hashSizeInfoMu.Unlock()
+	result := s.computeNodeHashSizeInfo()
+	s.hashSizeInfoMu.Lock()
+	s.hashSizeInfoCache = result
+	s.hashSizeInfoAt = time.Now()
+	s.hashSizeInfoMu.Unlock()
+	return result
+}
+
+// computeNodeHashSizeInfo scans advert packets to compute per-node hash size data.
+func (s *PacketStore) computeNodeHashSizeInfo() map[string]*hashSizeNodeInfo {
 	s.mu.RLock()
 	defer s.mu.RUnlock()

@@ -5,9 +5,12 @@

 services:
  prod:
+    build: .
    image: corescope:latest
    container_name: corescope-prod
    restart: unless-stopped
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
    ports:
      - "${PROD_HTTP_PORT:-80}:${PROD_HTTP_PORT:-80}"
      - "${PROD_HTTPS_PORT:-443}:${PROD_HTTPS_PORT:-443}"
@@ -26,9 +29,12 @@ services:
      retries: 3

  staging:
+    build: .
    image: corescope:latest
    container_name: corescope-staging
    restart: unless-stopped
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
    ports:
      - "${STAGING_HTTP_PORT:-81}:${STAGING_HTTP_PORT:-81}"
      - "${STAGING_MQTT_PORT:-1884}:1883"
@@ -57,6 +63,8 @@ services:
    image: corescope-go:latest
    container_name: corescope-staging-go
    restart: unless-stopped
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
    ports:
      - "${STAGING_GO_HTTP_PORT:-82}:80"
      - "${STAGING_GO_MQTT_PORT:-1885}:1883"
@@ -207,6 +207,13 @@ class TTLCache {
      if (key.startsWith(prefix)) this.store.delete(key);
    }
  }
+  debouncedInvalidateBulkHealth() {
+    if (this._bulkHealthTimer) return;
+    this._bulkHealthTimer = setTimeout(() => {
+      this._bulkHealthTimer = null;
+      this.invalidate('bulk-health');
+    }, 30000);
+  }
  debouncedInvalidateAll() {
    if (this._debounceTimer) return;
    this._debounceTimer = setTimeout(() => {
@@ -410,7 +417,7 @@ app.get('/api/perf', (req, res) => {
    avgMs: perfStats.requests ? Math.round(perfStats.totalMs / perfStats.requests * 10) / 10 : 0,
    endpoints: Object.fromEntries(sorted),
    slowQueries: perfStats.slowQueries.slice(-20),
-    cache: { size: cache.size, hits: cache.hits, misses: cache.misses, staleHits: cache.staleHits, recomputes: cache.recomputes, hitRate: cache.hits + cache.misses > 0 ? Math.round(cache.hits / (cache.hits + cache.misses) * 1000) / 10 : 0 },
+    cache: { size: cache.size, hits: cache.hits, misses: cache.misses, staleHits: cache.staleHits, recomputes: cache.recomputes, hitRate: cache.hits + cache.staleHits + cache.misses > 0 ? Math.round((cache.hits + cache.staleHits) / (cache.hits + cache.staleHits + cache.misses) * 1000) / 10 : 0 },
    packetStore: pktStore.getStats(),
    sqlite: (() => {
      try {
@@ -519,7 +526,7 @@ app.get('/api/health', (req, res) => {
      misses: cache.misses,
      staleHits: cache.staleHits,
      recomputes: cache.recomputes,
-      hitRate: cache.hits + cache.misses > 0 ? Math.round(cache.hits / (cache.hits + cache.misses) * 1000) / 10 : 0,
+      hitRate: cache.hits + cache.staleHits + cache.misses > 0 ? Math.round((cache.hits + cache.staleHits) / (cache.hits + cache.staleHits + cache.misses) * 1000) / 10 : 0,
    },
    websocket: {
      clients: wsClients,
@@ -723,7 +730,7 @@ for (const source of mqttSources) {
            // Invalidate this node's caches on advert
            cache.invalidate('node:' + p.pubKey);
            cache.invalidate('health:' + p.pubKey);
-            cache.invalidate('bulk-health');
+            cache.debouncedInvalidateBulkHealth();

            // Cross-reference: if this node's pubkey matches an existing observer, backfill observer name
            if (p.name && p.pubKey) {
@@ -1254,6 +1254,24 @@ seedTestData();
    lastPathSeenMap.delete(liveNode);
  });

+  // ── Cache hit rate includes stale hits ──
+  await t('Cache hitRate includes staleHits in formula', async () => {
+    cache.clear();
+    cache.hits = 0;
+    cache.misses = 0;
+    cache.staleHits = 0;
+    // Simulate: 3 hits, 2 stale hits, 5 misses => rate = (3+2)/(3+2+5) = 50%
+    cache.hits = 3;
+    cache.staleHits = 2;
+    cache.misses = 5;
+    const r = await request(app).get('/api/health').expect(200);
+    assert(r.body.cache.hitRate === 50, 'hitRate should be (hits+staleHits)/(hits+staleHits+misses) = 50%, got ' + r.body.cache.hitRate);
+    // Reset
+    cache.hits = 0;
+    cache.misses = 0;
+    cache.staleHits = 0;
+  });
+
  // ── Summary ──
  console.log(`\n═══ Server Route Tests: ${passed} passed, ${failed} failed ═══`);
  if (failed > 0) process.exit(1);
Author	SHA1	Message	Date
Kpa-clawbot	499a6db2cc	Merge branch 'master' into fix/cache-hit-rate	2026-03-29 07:15:20 -07:00
you	206d9bd64a	fix: use per-PR concurrency group to prevent cross-PR cancellation The flat 'deploy' concurrency group caused ALL PRs to share one queue, so pushing to any PR would cancel CI runs on other PRs. Changed to deploy-${{ github.event.pull_request.number \|\| github.ref }} so each PR gets its own concurrency group while re-pushes to the same PR still cancel the previous run.	2026-03-29 14:14:57 +00:00
you	ec35b291ee	fix: cache hit rate excludes stale hits + debounce bulk-health invalidation Two cache bugs fixed: 1. Hit rate formula excluded stale hits — reported rate was artificially low because stale-while-revalidate responses (which ARE cache hits from the caller's perspective) were not counted. Changed formula from hits/(hits+misses) to (hits+staleHits)/(hits+staleHits+misses). 2. Bulk-health cache invalidated on every advert packet — in a mesh with dozens of nodes advertising every few seconds, this caused the expensive bulk-health query to be recomputed on nearly every request, defeating the cache entirely. Switched to 30s debounced invalidation via debouncedInvalidateBulkHealth(). Added regression test for hit rate formula in test-server-routes.js.	2026-03-29 07:10:32 -07:00
efiten	3f54632b07	fix: cache /stats and GetNodeHashSizeInfo to eliminate slow API calls - /api/stats: 10s server-side cache — was running 5 SQLite COUNT queries on every call, taking ~1500ms with 28 concurrent WS clients polling every 15s - GetNodeHashSizeInfo: 15s cache — was doing a full O(n) scan + JSON unmarshal of all advert packets in memory on every /nodes request, taking ~1200ms Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-29 07:09:05 -07:00
Kpa-clawbot	609b12541e	fix: add extra_hosts host.docker.internal to all services — fixes #238 Linux Docker doesn't resolve host.docker.internal by default. Required when MQTT sources in config.json point to the host machine. Harmless on Docker Desktop where it already works. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>	2026-03-28 18:58:31 -07:00
Kpa-clawbot	4369e58a3c	Merge pull request #235 from Kpa-clawbot/fix/compose-build-directive fix: docker-compose prod/staging need build: directive — fixes pull access denied	2026-03-28 18:36:21 -07:00
Kpa-clawbot	8ef321bf70	fix: add build context to prod and staging services in docker-compose.yml Without build: directive, docker compose tries to pull corescope:latest from Docker Hub instead of building locally. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>	2026-03-28 18:35:35 -07:00