Compare commits

..

7 Commits

Author SHA1 Message Date
Kpa-clawbot
499a6db2cc Merge branch 'master' into fix/cache-hit-rate 2026-03-29 07:15:20 -07:00
you
206d9bd64a fix: use per-PR concurrency group to prevent cross-PR cancellation
The flat 'deploy' concurrency group caused ALL PRs to share one queue,
so pushing to any PR would cancel CI runs on other PRs.

Changed to deploy-${{ github.event.pull_request.number || github.ref }}
so each PR gets its own concurrency group while re-pushes to the same
PR still cancel the previous run.
2026-03-29 14:14:57 +00:00
you
ec35b291ee fix: cache hit rate excludes stale hits + debounce bulk-health invalidation
Two cache bugs fixed:

1. Hit rate formula excluded stale hits — reported rate was artificially low
   because stale-while-revalidate responses (which ARE cache hits from the
   caller's perspective) were not counted. Changed formula from
   hits/(hits+misses) to (hits+staleHits)/(hits+staleHits+misses).

2. Bulk-health cache invalidated on every advert packet — in a mesh with
   dozens of nodes advertising every few seconds, this caused the expensive
   bulk-health query to be recomputed on nearly every request, defeating
   the cache entirely. Switched to 30s debounced invalidation via
   debouncedInvalidateBulkHealth().

Added regression test for hit rate formula in test-server-routes.js.
2026-03-29 07:10:32 -07:00
efiten
3f54632b07 fix: cache /stats and GetNodeHashSizeInfo to eliminate slow API calls
- /api/stats: 10s server-side cache — was running 5 SQLite COUNT queries
  on every call, taking ~1500ms with 28 concurrent WS clients polling every 15s
- GetNodeHashSizeInfo: 15s cache — was doing a full O(n) scan + JSON unmarshal
  of all advert packets in memory on every /nodes request, taking ~1200ms

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-29 07:09:05 -07:00
Kpa-clawbot
609b12541e fix: add extra_hosts host.docker.internal to all services — fixes #238
Linux Docker doesn't resolve host.docker.internal by default.
Required when MQTT sources in config.json point to the host machine.
Harmless on Docker Desktop where it already works.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-28 18:58:31 -07:00
Kpa-clawbot
4369e58a3c Merge pull request #235 from Kpa-clawbot/fix/compose-build-directive
fix: docker-compose prod/staging need build: directive — fixes pull access denied
2026-03-28 18:36:21 -07:00
Kpa-clawbot
8ef321bf70 fix: add build context to prod and staging services in docker-compose.yml
Without build: directive, docker compose tries to pull corescope:latest
from Docker Hub instead of building locally.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-03-28 18:35:35 -07:00
6 changed files with 86 additions and 7 deletions

View File

@@ -17,7 +17,7 @@ on:
- 'docs/**'
concurrency:
group: deploy
group: deploy-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:

View File

@@ -33,6 +33,11 @@ type Server struct {
memStatsMu sync.Mutex
memStatsCache runtime.MemStats
memStatsCachedAt time.Time
// Cached /api/stats response — recomputed at most once every 10s
statsMu sync.Mutex
statsCache *StatsResponse
statsCachedAt time.Time
}
// PerfStats tracks request performance.
@@ -380,6 +385,17 @@ func (s *Server) handleHealth(w http.ResponseWriter, r *http.Request) {
}
func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
const statsTTL = 10 * time.Second
s.statsMu.Lock()
if s.statsCache != nil && time.Since(s.statsCachedAt) < statsTTL {
cached := s.statsCache
s.statsMu.Unlock()
writeJSON(w, cached)
return
}
s.statsMu.Unlock()
var stats *Stats
var err error
if s.store != nil {
@@ -392,7 +408,7 @@ func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
return
}
counts := s.db.GetRoleCounts()
writeJSON(w, StatsResponse{
resp := &StatsResponse{
TotalPackets: stats.TotalPackets,
TotalTransmissions: &stats.TotalTransmissions,
TotalObservations: stats.TotalObservations,
@@ -411,7 +427,14 @@ func (s *Server) handleStats(w http.ResponseWriter, r *http.Request) {
Companions: counts["companions"],
Sensors: counts["sensors"],
},
})
}
s.statsMu.Lock()
s.statsCache = resp
s.statsCachedAt = time.Now()
s.statsMu.Unlock()
writeJSON(w, resp)
}
func (s *Server) handlePerf(w http.ResponseWriter, r *http.Request) {

View File

@@ -98,6 +98,11 @@ type PacketStore struct {
// computed during Load() and incrementally updated on ingest.
distHops []distHopRecord
distPaths []distPathRecord
// Cached GetNodeHashSizeInfo result — recomputed at most once every 15s
hashSizeInfoMu sync.Mutex
hashSizeInfoCache map[string]*hashSizeNodeInfo
hashSizeInfoAt time.Time
}
// Precomputed distance records for fast analytics aggregation.
@@ -3722,8 +3727,26 @@ type hashSizeNodeInfo struct {
Inconsistent bool
}
// GetNodeHashSizeInfo scans advert packets to compute per-node hash size data.
// GetNodeHashSizeInfo returns cached per-node hash size data, recomputing at most every 15s.
func (s *PacketStore) GetNodeHashSizeInfo() map[string]*hashSizeNodeInfo {
const ttl = 15 * time.Second
s.hashSizeInfoMu.Lock()
if s.hashSizeInfoCache != nil && time.Since(s.hashSizeInfoAt) < ttl {
cached := s.hashSizeInfoCache
s.hashSizeInfoMu.Unlock()
return cached
}
s.hashSizeInfoMu.Unlock()
result := s.computeNodeHashSizeInfo()
s.hashSizeInfoMu.Lock()
s.hashSizeInfoCache = result
s.hashSizeInfoAt = time.Now()
s.hashSizeInfoMu.Unlock()
return result
}
// computeNodeHashSizeInfo scans advert packets to compute per-node hash size data.
func (s *PacketStore) computeNodeHashSizeInfo() map[string]*hashSizeNodeInfo {
s.mu.RLock()
defer s.mu.RUnlock()

View File

@@ -5,9 +5,12 @@
services:
prod:
build: .
image: corescope:latest
container_name: corescope-prod
restart: unless-stopped
extra_hosts:
- "host.docker.internal:host-gateway"
ports:
- "${PROD_HTTP_PORT:-80}:${PROD_HTTP_PORT:-80}"
- "${PROD_HTTPS_PORT:-443}:${PROD_HTTPS_PORT:-443}"
@@ -26,9 +29,12 @@ services:
retries: 3
staging:
build: .
image: corescope:latest
container_name: corescope-staging
restart: unless-stopped
extra_hosts:
- "host.docker.internal:host-gateway"
ports:
- "${STAGING_HTTP_PORT:-81}:${STAGING_HTTP_PORT:-81}"
- "${STAGING_MQTT_PORT:-1884}:1883"
@@ -57,6 +63,8 @@ services:
image: corescope-go:latest
container_name: corescope-staging-go
restart: unless-stopped
extra_hosts:
- "host.docker.internal:host-gateway"
ports:
- "${STAGING_GO_HTTP_PORT:-82}:80"
- "${STAGING_GO_MQTT_PORT:-1885}:1883"

View File

@@ -207,6 +207,13 @@ class TTLCache {
if (key.startsWith(prefix)) this.store.delete(key);
}
}
debouncedInvalidateBulkHealth() {
if (this._bulkHealthTimer) return;
this._bulkHealthTimer = setTimeout(() => {
this._bulkHealthTimer = null;
this.invalidate('bulk-health');
}, 30000);
}
debouncedInvalidateAll() {
if (this._debounceTimer) return;
this._debounceTimer = setTimeout(() => {
@@ -410,7 +417,7 @@ app.get('/api/perf', (req, res) => {
avgMs: perfStats.requests ? Math.round(perfStats.totalMs / perfStats.requests * 10) / 10 : 0,
endpoints: Object.fromEntries(sorted),
slowQueries: perfStats.slowQueries.slice(-20),
cache: { size: cache.size, hits: cache.hits, misses: cache.misses, staleHits: cache.staleHits, recomputes: cache.recomputes, hitRate: cache.hits + cache.misses > 0 ? Math.round(cache.hits / (cache.hits + cache.misses) * 1000) / 10 : 0 },
cache: { size: cache.size, hits: cache.hits, misses: cache.misses, staleHits: cache.staleHits, recomputes: cache.recomputes, hitRate: cache.hits + cache.staleHits + cache.misses > 0 ? Math.round((cache.hits + cache.staleHits) / (cache.hits + cache.staleHits + cache.misses) * 1000) / 10 : 0 },
packetStore: pktStore.getStats(),
sqlite: (() => {
try {
@@ -519,7 +526,7 @@ app.get('/api/health', (req, res) => {
misses: cache.misses,
staleHits: cache.staleHits,
recomputes: cache.recomputes,
hitRate: cache.hits + cache.misses > 0 ? Math.round(cache.hits / (cache.hits + cache.misses) * 1000) / 10 : 0,
hitRate: cache.hits + cache.staleHits + cache.misses > 0 ? Math.round((cache.hits + cache.staleHits) / (cache.hits + cache.staleHits + cache.misses) * 1000) / 10 : 0,
},
websocket: {
clients: wsClients,
@@ -723,7 +730,7 @@ for (const source of mqttSources) {
// Invalidate this node's caches on advert
cache.invalidate('node:' + p.pubKey);
cache.invalidate('health:' + p.pubKey);
cache.invalidate('bulk-health');
cache.debouncedInvalidateBulkHealth();
// Cross-reference: if this node's pubkey matches an existing observer, backfill observer name
if (p.name && p.pubKey) {

View File

@@ -1254,6 +1254,24 @@ seedTestData();
lastPathSeenMap.delete(liveNode);
});
// ── Cache hit rate includes stale hits ──
await t('Cache hitRate includes staleHits in formula', async () => {
cache.clear();
cache.hits = 0;
cache.misses = 0;
cache.staleHits = 0;
// Simulate: 3 hits, 2 stale hits, 5 misses => rate = (3+2)/(3+2+5) = 50%
cache.hits = 3;
cache.staleHits = 2;
cache.misses = 5;
const r = await request(app).get('/api/health').expect(200);
assert(r.body.cache.hitRate === 50, 'hitRate should be (hits+staleHits)/(hits+staleHits+misses) = 50%, got ' + r.body.cache.hitRate);
// Reset
cache.hits = 0;
cache.misses = 0;
cache.staleHits = 0;
});
// ── Summary ──
console.log(`\n═══ Server Route Tests: ${passed} passed, ${failed} failed ═══`);
if (failed > 0) process.exit(1);