mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-05-12 19:24:43 +00:00
9e90548637
## Summary Remove `ResolvedPath []*string` field from `StoreTx` and `StoreObs` structs, replacing it with a compact membership index + on-demand SQL decode. This eliminates the dominant heap cost identified in profiling (#791, #799). **Spec:** #800 (consolidated from two rounds of expert + implementer review on #799) Closes #800 Closes #791 ## Design ### Removed - `StoreTx.ResolvedPath []*string` - `StoreObs.ResolvedPath []*string` - `TransmissionResp.ResolvedPath`, `ObservationResp.ResolvedPath` struct fields ### Added | Structure | Purpose | Est. cost at 1M obs | |---|---|---:| | `resolvedPubkeyIndex map[uint64][]int` | FNV-1a(pubkey) → []txID forward index | 50–120 MB | | `resolvedPubkeyReverse map[int][]uint64` | txID → []hashes for clean removal | ~40 MB | | `apiResolvedPathLRU` (10K entries) | FIFO cache for on-demand API decode | ~2 MB | ### Decode-window discipline `resolved_path` JSON decoded once per packet. Consumers fed in order, temp slice dropped — never stored on struct: 1. `addToByNode` — relay node indexing 2. `touchRelayLastSeen` — relay liveness DB updates 3. `byPathHop` resolved-key entries 4. `resolvedPubkeyIndex` + reverse insert 5. WebSocket broadcast map (raw JSON bytes) 6. Persist batch (raw JSON bytes for SQL UPDATE) ### Collision safety When the forward index returns candidates, a batched SQL query confirms exact pubkey presence using `LIKE '%"pubkey"%'` on the `resolved_path` column. ### Feature flag `useResolvedPathIndex` (default `true`). Off-path is conservative: all candidates kept, index not consulted. For one-release rollback safety. ## Files changed | File | Changes | |---|---| | `resolved_index.go` | **New** — index structures, LRU cache, on-demand SQL helpers, collision safety | | `store.go` | Remove RP fields, decode-window discipline in Load/Ingest, on-demand txToMap/obsToMap/enrichObs, eviction cleanup via SQL, memory accounting update | | `types.go` | Remove RP fields from TransmissionResp/ObservationResp | | `routes.go` | Replace `nodeInResolvedPath` with `nodeInResolvedPathViaIndex`, remove RP from mapSlice helpers | | `neighbor_persist.go` | Refactor backfill: reverse-map removal → forward+reverse insert → LRU invalidation | ## Tests added (27 new) **Unit:** - `TestStoreTx_ResolvedPathFieldAbsent` — reflection guard - `TestResolvedPubkeyIndex_BuildFromLoad` — forward+reverse consistency - `TestResolvedPubkeyIndex_HashCollision` — SQL collision safety - `TestResolvedPubkeyIndex_IngestUpdate` — maps reflect new ingests - `TestResolvedPubkeyIndex_RemoveOnEvict` — clean removal via reverse map - `TestResolvedPubkeyIndex_PerObsCoverage` — non-best obs pubkeys indexed - `TestAddToByNode_WithoutResolvedPathField` - `TestTouchRelayLastSeen_WithoutResolvedPathField` - `TestWebSocketBroadcast_IncludesResolvedPath` - `TestBackfill_InvalidatesLRU` - `TestEviction_ByNodeCleanup_OnDemandSQL` - `TestExtractResolvedPubkeys`, `TestMergeResolvedPubkeys` - `TestResolvedPubkeyHash_Deterministic` - `TestLRU_EvictionOnFull` **Endpoint:** - `TestPathsThroughNode_NilResolvedPathFallback` - `TestPacketsAPI_OnDemandResolvedPath` - `TestPacketsAPI_OnDemandResolvedPath_LRUHit` - `TestPacketsAPI_OnDemandResolvedPath_Empty` **Feature flag:** - `TestFeatureFlag_OffPath_PreservesOldBehavior` - `TestFeatureFlag_Toggle_NoStateLeak` **Concurrency:** - `TestReverseMap_NoLeakOnPartialFailure` - `TestDecodeWindow_LockHoldTimeBounded` - `TestLivePolling_LRUUnderConcurrentIngest` **Regression:** - `TestRepeaterLiveness_StillAccurate` **Benchmarks:** - `BenchmarkLoad_BeforeAfter` - `BenchmarkResolvedPubkeyIndex_Memory` - `BenchmarkPathsThroughNode_Latency` - `BenchmarkLivePolling_UnderIngest` ## Benchmark results ``` BenchmarkResolvedPubkeyIndex_Memory/pubkeys=50K 429ms 103MB 777K allocs BenchmarkResolvedPubkeyIndex_Memory/pubkeys=500K 4205ms 896MB 7.67M allocs BenchmarkLoad_BeforeAfter 65ms 20MB 202K allocs BenchmarkPathsThroughNode_Latency 3.9µs 0B 0 allocs BenchmarkLivePolling_UnderIngest 5.4µs 545B 7 allocs ``` Key: per-obs `[]*string` overhead completely eliminated. At 1M obs with 3 hops average, this saves ~72 bytes/obs × 1M = ~68 MB just from the slice headers + pointers, plus the JSON-decoded string data (~900 MB at scale per profiling). ## Design choices - **FNV-1a instead of xxhash**: stdlib availability, no external dependency. Performance is equivalent for this use case (pubkey strings are short). - **FIFO LRU instead of true LRU**: simpler implementation, adequate for the access pattern (mostly sequential obs IDs from live polling). - **Grouped packets view omits resolved_path**: cold path, not worth SQL round-trip per page render. - **Backfill pending check uses reverse-map presence** instead of per-obs field: if a tx has any indexed pubkeys, its observations are considered resolved. Closes #807 --------- Co-authored-by: you <you@example.com>
127 lines
3.6 KiB
Go
127 lines
3.6 KiB
Go
package main
|
|
|
|
import (
|
|
"database/sql"
|
|
"testing"
|
|
"time"
|
|
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
func TestTouchNodeLastSeen_UpdatesDB(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
|
|
// Insert a node with no last_seen
|
|
db.conn.Exec("INSERT INTO nodes (public_key, name, role) VALUES (?, ?, ?)", "abc123", "relay1", "REPEATER")
|
|
|
|
err := db.TouchNodeLastSeen("abc123", "2026-04-12T04:00:00Z")
|
|
if err != nil {
|
|
t.Fatalf("TouchNodeLastSeen returned error: %v", err)
|
|
}
|
|
|
|
var lastSeen sql.NullString
|
|
db.conn.QueryRow("SELECT last_seen FROM nodes WHERE public_key = ?", "abc123").Scan(&lastSeen)
|
|
if !lastSeen.Valid || lastSeen.String != "2026-04-12T04:00:00Z" {
|
|
t.Fatalf("expected last_seen=2026-04-12T04:00:00Z, got %v", lastSeen)
|
|
}
|
|
}
|
|
|
|
func TestTouchNodeLastSeen_DoesNotGoBackwards(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
|
|
db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)",
|
|
"abc123", "relay1", "REPEATER", "2026-04-12T05:00:00Z")
|
|
|
|
// Try to set an older timestamp
|
|
err := db.TouchNodeLastSeen("abc123", "2026-04-12T04:00:00Z")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
|
|
var lastSeen string
|
|
db.conn.QueryRow("SELECT last_seen FROM nodes WHERE public_key = ?", "abc123").Scan(&lastSeen)
|
|
if lastSeen != "2026-04-12T05:00:00Z" {
|
|
t.Fatalf("last_seen went backwards: got %s", lastSeen)
|
|
}
|
|
}
|
|
|
|
func TestTouchNodeLastSeen_NonExistentNode(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
|
|
// Should not error for non-existent node
|
|
err := db.TouchNodeLastSeen("nonexistent", "2026-04-12T04:00:00Z")
|
|
if err != nil {
|
|
t.Fatalf("unexpected error for non-existent node: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestTouchRelayLastSeen_Debouncing(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
|
|
db.conn.Exec("INSERT INTO nodes (public_key, name, role) VALUES (?, ?, ?)", "relay1", "R1", "REPEATER")
|
|
|
|
s := &PacketStore{
|
|
db: db,
|
|
lastSeenTouched: make(map[string]time.Time),
|
|
}
|
|
|
|
// After #800, touchRelayLastSeen takes a []string of pubkeys (from decode-window)
|
|
pks := []string{"relay1"}
|
|
|
|
now := time.Now()
|
|
s.touchRelayLastSeen(pks, now)
|
|
|
|
// Verify it was written
|
|
var lastSeen sql.NullString
|
|
db.conn.QueryRow("SELECT last_seen FROM nodes WHERE public_key = ?", "relay1").Scan(&lastSeen)
|
|
if !lastSeen.Valid {
|
|
t.Fatal("expected last_seen to be set after first touch")
|
|
}
|
|
|
|
// Reset last_seen to check debounce prevents second write
|
|
db.conn.Exec("UPDATE nodes SET last_seen = NULL WHERE public_key = ?", "relay1")
|
|
|
|
// Call again within 5 minutes — should be debounced (no write)
|
|
s.touchRelayLastSeen(pks, now.Add(2*time.Minute))
|
|
|
|
db.conn.QueryRow("SELECT last_seen FROM nodes WHERE public_key = ?", "relay1").Scan(&lastSeen)
|
|
if lastSeen.Valid {
|
|
t.Fatal("expected debounce to prevent second write within 5 minutes")
|
|
}
|
|
|
|
// Call after 5 minutes — should write again
|
|
s.touchRelayLastSeen(pks, now.Add(6*time.Minute))
|
|
db.conn.QueryRow("SELECT last_seen FROM nodes WHERE public_key = ?", "relay1").Scan(&lastSeen)
|
|
if !lastSeen.Valid {
|
|
t.Fatal("expected write after debounce interval expired")
|
|
}
|
|
}
|
|
|
|
func TestTouchRelayLastSeen_SkipsEmptyPubkeys(t *testing.T) {
|
|
db := setupTestDB(t)
|
|
defer db.Close()
|
|
|
|
s := &PacketStore{
|
|
db: db,
|
|
lastSeenTouched: make(map[string]time.Time),
|
|
}
|
|
|
|
// Empty pubkeys — should not panic or error
|
|
s.touchRelayLastSeen([]string{}, time.Now())
|
|
s.touchRelayLastSeen(nil, time.Now())
|
|
}
|
|
|
|
func TestTouchRelayLastSeen_NilDB(t *testing.T) {
|
|
s := &PacketStore{
|
|
db: nil,
|
|
lastSeenTouched: make(map[string]time.Time),
|
|
}
|
|
|
|
// Should not panic with nil db
|
|
s.touchRelayLastSeen([]string{"abc"}, time.Now())
|
|
}
|