mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-20 10:55:54 +00:00
9383201c07
Red commit:
https://github.com/Kpa-clawbot/CoreScope/commit/eae179b99b5fd34924547632aa8f8025c405aa53
(CI: pending — opens with this PR)
Finishes #1283. RED test `TestServerSourceHasNoCachedRWCalls` goes from
failing (13 writer call-sites) to GREEN (zero). Per #1287 Option 4
(https://github.com/Kpa-clawbot/CoreScope/issues/1287#issuecomment-4485099992):
ingestor owns the neighbor graph build + persist; server reads the
snapshot.
**Category A — Schema migrations** → new `internal/dbschema` package.
`dbschema.Apply(rw)` runs in `cmd/ingestor` startup (in `OpenStore`).
`dbschema.AssertReady(ro)` runs in `cmd/server/main.go` and
FATAL-LOG-EXITS if any expected column/index/table is missing — the
operator must restart the ingestor first. Covers indexes,
`neighbor_edges`, `observations.resolved_path`,
`observers.{inactive,last_packet_at,iata}`,
`(inactive_)nodes.foreign_advert`, `transmissions.from_pubkey`.
**Category B — Backfill** → ingestor.
`BackfillFromPubkey` and observer-blacklist soft-delete moved to
`cmd/ingestor/maintenance.go`. Server keeps an inert
`fromPubkeyBackfillSnapshot` stub for `/api/healthz` API compatibility.
**Category C — Neighbor-graph persistence (Option 4)** → ingestor
writes, server reads.
- Ingestor (`cmd/ingestor/neighbor_builder.go`): every 60s scans
`observations + transmissions`, extracts edges (originator↔first-hop for
ADVERTs; observer↔last-hop for all), resolves hop prefixes via a
node-table prefix index, upserts into `neighbor_edges`.
- Server (`cmd/server/neighbor_recomputer.go`): every 60s re-reads
`neighbor_edges` and atomic-swaps the resulting `NeighborGraph` into
`s.graph`. Initial load is synchronous on startup. All server-side
incremental edge writers (the two `asyncPersistResolvedPathsAndEdges`
paths in `cmd/server/store.go`) are gone.
- Neighbor-edge daily prune (`PruneNeighborEdges`) moved to ingestor.
**Why Option 4**: clean read/write separation, no startup CPU spike
(server loads existing snapshot instead of rebuilding from history), no
IPC/delta-protocol churn. Staleness budget ~60s — same model as the
analytics recomputers in #1240 / #1248 / #672 axis 2.
**Recomputer interval default for neighbor graph**: 60s
(`NeighborGraphRecomputerDefaultInterval`,
`NeighborEdgesBuilderInterval`).
**Invariants added**:
- `TestServerSourceHasNoCachedRWCalls` (RED commit eae179b9): grep
enforces zero `cachedRW(`, `mode=rw`, or `sql.Open(_journal_mode=WAL…)`
in non-test `cmd/server/` sources.
- `TestServerStartupRequiresMigratedSchema`: server refuses to start
against an unmigrated DB.
- `TestNeighborGraphRecomputerLoadsSnapshot`: post-write snapshot is
picked up on the next refresh.
- `TestNeighborEdgesBuilderUpsertsFromObservations`: end-to-end pipeline
writes the expected edge.
`grep cachedRW cmd/server/*.go | grep -v _test.go` → 0 matches.
Fixes #1287.
---------
Co-authored-by: MeshCore Bot <bot@meshcore.local>
Co-authored-by: Kpa-clawbot <Kpa-clawbot@users.noreply.github.com>
Co-authored-by: corescope-bot <bot@corescope.local>
129 lines
4.0 KiB
Go
129 lines
4.0 KiB
Go
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
"testing"
|
|
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
// TestServerSourceHasNoCachedRWCalls enforces issue #1287: after the
|
|
// follow-up to #1283, cmd/server/ must contain ZERO writer call sites.
|
|
// Specifically, no `cachedRW(`, no `mode=rw`, and no `sql.Open(...rw...)`
|
|
// in non-test source files. All schema migrations, backfills, and
|
|
// neighbor-edge persistence must live in cmd/ingestor or a shared
|
|
// package — the server is the read path.
|
|
func TestServerSourceHasNoCachedRWCalls(t *testing.T) {
|
|
entries, err := os.ReadDir(".")
|
|
if err != nil {
|
|
t.Fatalf("read cmd/server dir: %v", err)
|
|
}
|
|
// Patterns that indicate write-side DB usage on the server.
|
|
patterns := []*regexp.Regexp{
|
|
regexp.MustCompile(`\bcachedRW\s*\(`),
|
|
regexp.MustCompile(`mode=rw`),
|
|
regexp.MustCompile(`sql\.Open\([^)]*\?[^)]*_journal_mode=WAL[^)]*\)`),
|
|
}
|
|
violations := []string{}
|
|
for _, e := range entries {
|
|
name := e.Name()
|
|
if e.IsDir() {
|
|
continue
|
|
}
|
|
if !strings.HasSuffix(name, ".go") {
|
|
continue
|
|
}
|
|
if strings.HasSuffix(name, "_test.go") {
|
|
continue
|
|
}
|
|
b, err := os.ReadFile(filepath.Join(".", name))
|
|
if err != nil {
|
|
t.Fatalf("read %s: %v", name, err)
|
|
}
|
|
for _, p := range patterns {
|
|
if loc := p.FindIndex(b); loc != nil {
|
|
// Get line number
|
|
line := 1 + strings.Count(string(b[:loc[0]]), "\n")
|
|
violations = append(violations, fmt.Sprintf("%s:%d: %s", name, line, p.String()))
|
|
}
|
|
}
|
|
}
|
|
if len(violations) > 0 {
|
|
t.Errorf("cmd/server/ contains forbidden writer call sites (#1287):\n %s",
|
|
strings.Join(violations, "\n "))
|
|
}
|
|
}
|
|
|
|
// TestServerDBHasNoWriteMethods enforces the architectural invariant from
|
|
// issue #1283: cmd/server is the read path. All write/maintenance methods
|
|
// (PruneOldPackets, PruneOldMetrics, RemoveStaleObservers) MUST live on
|
|
// the ingestor's *Store, not on the server's *DB.
|
|
//
|
|
// Before the fix, these methods existed on cmd/server/*DB and used
|
|
// cachedRW(db.path) to acquire a write lock, racing with the ingestor's
|
|
// concurrent INSERTs and producing SQLITE_BUSY (the bug in #1283).
|
|
// After the fix, this test passes because the methods are gone.
|
|
func TestServerDBHasNoWriteMethods(t *testing.T) {
|
|
forbidden := []string{
|
|
"PruneOldPackets",
|
|
"PruneOldMetrics",
|
|
"RemoveStaleObservers",
|
|
}
|
|
typ := reflect.TypeOf((*DB)(nil))
|
|
for _, name := range forbidden {
|
|
if _, ok := typ.MethodByName(name); ok {
|
|
t.Errorf("server *DB exposes forbidden write method %q — must be relocated to ingestor (#1283)", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestServerDBConnIsReadOnly asserts that the *sql.DB the server opens
|
|
// cannot acquire a write lock. The server has always opened mode=ro, but
|
|
// before #1283 it routed around that by calling cachedRW(path) to get a
|
|
// second RW handle. After the fix, server-side writes are impossible
|
|
// because there is no helper to open a writable connection.
|
|
func TestServerDBConnIsReadOnly(t *testing.T) {
|
|
dir := t.TempDir()
|
|
path := dir + "/ro_invariant.db"
|
|
|
|
// Bootstrap a minimal DB with the ingestor-style WAL opener so the
|
|
// server can attach in read-only mode.
|
|
if err := bootstrapMinimalDB(path); err != nil {
|
|
t.Fatalf("bootstrap: %v", err)
|
|
}
|
|
|
|
d, err := OpenDB(path)
|
|
if err != nil {
|
|
t.Fatalf("OpenDB: %v", err)
|
|
}
|
|
defer d.conn.Close()
|
|
|
|
_, err = d.conn.Exec(`INSERT INTO nodes (public_key, name) VALUES ('x','y')`)
|
|
if err == nil {
|
|
t.Fatalf("expected INSERT via server *DB to fail (read-only invariant)")
|
|
}
|
|
}
|
|
|
|
// bootstrapMinimalDB creates a tiny DB with the columns these tests
|
|
// need, opened with WAL so the read-only opener in OpenDB can attach.
|
|
// Kept in *_test.go so it does NOT add any write capability to the
|
|
// production server binary.
|
|
func bootstrapMinimalDB(path string) error {
|
|
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=5000", path)
|
|
rw, err := sql.Open("sqlite", dsn)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rw.Close()
|
|
if _, err := rw.Exec(`CREATE TABLE IF NOT EXISTS nodes (public_key TEXT PRIMARY KEY, name TEXT)`); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|