mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-01 19:14:08 +00:00
9383201c07
Red commit:
https://github.com/Kpa-clawbot/CoreScope/commit/eae179b99b5fd34924547632aa8f8025c405aa53
(CI: pending — opens with this PR)
Finishes #1283. RED test `TestServerSourceHasNoCachedRWCalls` goes from
failing (13 writer call-sites) to GREEN (zero). Per #1287 Option 4
(https://github.com/Kpa-clawbot/CoreScope/issues/1287#issuecomment-4485099992):
ingestor owns the neighbor graph build + persist; server reads the
snapshot.
**Category A — Schema migrations** → new `internal/dbschema` package.
`dbschema.Apply(rw)` runs in `cmd/ingestor` startup (in `OpenStore`).
`dbschema.AssertReady(ro)` runs in `cmd/server/main.go` and
FATAL-LOG-EXITS if any expected column/index/table is missing — the
operator must restart the ingestor first. Covers indexes,
`neighbor_edges`, `observations.resolved_path`,
`observers.{inactive,last_packet_at,iata}`,
`(inactive_)nodes.foreign_advert`, `transmissions.from_pubkey`.
**Category B — Backfill** → ingestor.
`BackfillFromPubkey` and observer-blacklist soft-delete moved to
`cmd/ingestor/maintenance.go`. Server keeps an inert
`fromPubkeyBackfillSnapshot` stub for `/api/healthz` API compatibility.
**Category C — Neighbor-graph persistence (Option 4)** → ingestor
writes, server reads.
- Ingestor (`cmd/ingestor/neighbor_builder.go`): every 60s scans
`observations + transmissions`, extracts edges (originator↔first-hop for
ADVERTs; observer↔last-hop for all), resolves hop prefixes via a
node-table prefix index, upserts into `neighbor_edges`.
- Server (`cmd/server/neighbor_recomputer.go`): every 60s re-reads
`neighbor_edges` and atomic-swaps the resulting `NeighborGraph` into
`s.graph`. Initial load is synchronous on startup. All server-side
incremental edge writers (the two `asyncPersistResolvedPathsAndEdges`
paths in `cmd/server/store.go`) are gone.
- Neighbor-edge daily prune (`PruneNeighborEdges`) moved to ingestor.
**Why Option 4**: clean read/write separation, no startup CPU spike
(server loads existing snapshot instead of rebuilding from history), no
IPC/delta-protocol churn. Staleness budget ~60s — same model as the
analytics recomputers in #1240 / #1248 / #672 axis 2.
**Recomputer interval default for neighbor graph**: 60s
(`NeighborGraphRecomputerDefaultInterval`,
`NeighborEdgesBuilderInterval`).
**Invariants added**:
- `TestServerSourceHasNoCachedRWCalls` (RED commit eae179b9): grep
enforces zero `cachedRW(`, `mode=rw`, or `sql.Open(_journal_mode=WAL…)`
in non-test `cmd/server/` sources.
- `TestServerStartupRequiresMigratedSchema`: server refuses to start
against an unmigrated DB.
- `TestNeighborGraphRecomputerLoadsSnapshot`: post-write snapshot is
picked up on the next refresh.
- `TestNeighborEdgesBuilderUpsertsFromObservations`: end-to-end pipeline
writes the expected edge.
`grep cachedRW cmd/server/*.go | grep -v _test.go` → 0 matches.
Fixes #1287.
---------
Co-authored-by: MeshCore Bot <bot@meshcore.local>
Co-authored-by: Kpa-clawbot <Kpa-clawbot@users.noreply.github.com>
Co-authored-by: corescope-bot <bot@corescope.local>
98 lines
2.8 KiB
Go
98 lines
2.8 KiB
Go
// Package main: neighbor-graph snapshot recomputer (issue #1287).
|
|
//
|
|
// Per #1287 Option 4: the ingestor owns the neighbor_edges table —
|
|
// it computes the graph from observations it ingests and persists
|
|
// snapshots there. The server READS the snapshot and atomic-swaps
|
|
// it into s.graph; that swap is exactly what this recomputer does.
|
|
//
|
|
// Cadence: 60s default. Staleness budget matches the existing
|
|
// analytics recomputer (#1240) — operators already accept that
|
|
// derived analytics lag the wire by tens of seconds.
|
|
package main
|
|
|
|
import (
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
// NeighborGraphRecomputerDefaultInterval is how often the server
|
|
// re-reads the neighbor_edges snapshot. 60s is the standard
|
|
// staleness budget for derived analytics (#1240 / #1262 / #672 axis 2).
|
|
const NeighborGraphRecomputerDefaultInterval = 60 * time.Second
|
|
|
|
var (
|
|
neighborRecompStartedMu sync.Mutex
|
|
neighborRecompStarted bool
|
|
)
|
|
|
|
// StartNeighborGraphRecomputer launches the background goroutine that
|
|
// re-reads neighbor_edges every `interval` and atomic-swaps the
|
|
// resulting NeighborGraph into s.graph. Idempotent — subsequent calls
|
|
// are no-ops and return a no-op stop closure.
|
|
//
|
|
// Server NEVER writes to neighbor_edges; the ingestor owns those
|
|
// writes per #1287. This recomputer is the ONLY thing that updates
|
|
// s.graph at steady state (the initial startup load in main.go is the
|
|
// other writer to s.graph, only at boot).
|
|
func (s *PacketStore) StartNeighborGraphRecomputer(interval time.Duration) func() {
|
|
if interval <= 0 {
|
|
interval = NeighborGraphRecomputerDefaultInterval
|
|
}
|
|
|
|
neighborRecompStartedMu.Lock()
|
|
if neighborRecompStarted {
|
|
neighborRecompStartedMu.Unlock()
|
|
return func() {}
|
|
}
|
|
neighborRecompStarted = true
|
|
stop := make(chan struct{})
|
|
done := make(chan struct{})
|
|
neighborRecompStartedMu.Unlock()
|
|
|
|
var stopOnce sync.Once
|
|
go func() {
|
|
defer close(done)
|
|
t := time.NewTicker(interval)
|
|
defer t.Stop()
|
|
for {
|
|
select {
|
|
case <-t.C:
|
|
s.refreshNeighborGraphFromSnapshot()
|
|
case <-stop:
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
return func() {
|
|
stopOnce.Do(func() { close(stop) })
|
|
select {
|
|
case <-done:
|
|
case <-time.After(5 * time.Second):
|
|
}
|
|
}
|
|
}
|
|
|
|
// refreshNeighborGraphFromSnapshot re-reads neighbor_edges through
|
|
// the read-only DB handle and atomic-swaps a freshly built graph.
|
|
// Panics are swallowed defensively — the previous snapshot remains
|
|
// valid if a read fails.
|
|
func (s *PacketStore) refreshNeighborGraphFromSnapshot() {
|
|
defer func() { _ = recover() }()
|
|
if s.db == nil || s.db.conn == nil {
|
|
return
|
|
}
|
|
g := loadNeighborEdgesFromDB(s.db.conn)
|
|
if g != nil {
|
|
s.graph.Store(g)
|
|
}
|
|
}
|
|
|
|
// resetNeighborRecomputerForTest is a test helper — production code
|
|
// MUST NOT call this.
|
|
func resetNeighborRecomputerForTest() {
|
|
neighborRecompStartedMu.Lock()
|
|
neighborRecompStarted = false
|
|
neighborRecompStartedMu.Unlock()
|
|
}
|