From 5fa3b56ccbcdbcb7545352e916e93ed8a531bbe8 Mon Sep 17 00:00:00 2001 From: Kpa-clawbot Date: Tue, 5 May 2026 02:33:27 -0700 Subject: [PATCH] fix(#662): GetRepeaterRelayInfo also looks up byPathHop by 1-byte prefix (#1086) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Partial fix for #662. `GetRepeaterRelayInfo` was reporting "never observed as relay hop" / `RelayCount24h=0` for nodes that clearly DO have packets passing through them — visible on the same node detail page in the "Paths seen through node" view. ## Root cause The `byPathHop` index is keyed by **both**: - full resolved pubkey (populated when neighbor-affinity resolution succeeds), and - raw 1-byte hop prefix from the wire (e.g. `"a3"`) `GetRepeaterRelayInfo` only looked up the full-pubkey key. Many ingested non-advert packets only carry the raw 1-byte hop — so any repeater whose path appearances are all raw-hop entries returned 0, even though the path-listing endpoint (which prefix-matches) renders them. Example node: an `a3…` repeater on staging has ~dozens of paths through it in the UI but the relay-info function returns 0. ## Fix Look up under both keys (full pubkey + 1-byte prefix) and de-dup by tx ID before counting. ## Trade-off The 1-byte prefix CAN over-count when multiple nodes share a first byte. This trades a possible over-count for clearly false zeros. The richer disambiguation done by the path-listing endpoint (resolved-path SQL post-filter via `confirmResolvedPathContains`) is out of scope for this partial fix — adding it here would mean disk I/O inside what is currently a pure in-memory lookup. Worth a follow-up if over-counting shows up in practice. ## TDD - Red commit (`test: failing test for relay-info prefix-hop mismatch`): adds `TestRepeaterRelayActivity_PrefixHop` that builds a non-advert packet with `PathJSON: ["a3"]`, indexes it via `addTxToPathHopIndex`, then asserts `RelayCount24h>=1` for the full pubkey starting with `a3…`. Fails on the assertion (got 0), not a build error. - Green commit (`fix: GetRepeaterRelayInfo also looks up byPathHop by 1-byte prefix`): the lookup change. All five `TestRepeaterRelayActivity_*` tests pass. ## Scope This is a **partial** fix — addresses the read-side prefix mismatch only. Issue #662 is a 4-axis epic (also covers ingest indexing consistency, UI surfacing, and schema). Leaving #662 open. --------- Co-authored-by: corescope-bot Co-authored-by: clawbot --- cmd/server/repeater_liveness.go | 64 ++++++++++++++--- cmd/server/repeater_liveness_test.go | 101 +++++++++++++++++++++++++++ 2 files changed, 157 insertions(+), 8 deletions(-) diff --git a/cmd/server/repeater_liveness.go b/cmd/server/repeater_liveness.go index 47442ad3..b49ae646 100644 --- a/cmd/server/repeater_liveness.go +++ b/cmd/server/repeater_liveness.go @@ -82,24 +82,72 @@ func (s *PacketStore) GetRepeaterRelayInfo(pubkey string, windowHours float64) R key := strings.ToLower(pubkey) s.mu.RLock() + // byPathHop is keyed by both full resolved pubkey AND raw 1-byte hop + // prefix (e.g. "a3"). Many ingested non-advert packets only carry the + // raw hop on the wire — resolution to the full pubkey happens later + // via neighbor affinity. To match what the "Paths seen through node" + // view shows, we look up under both keys and de-dupe by tx ID. + // + // The 1-byte prefix lookup CAN over-count when multiple nodes share + // the same first byte. This trades a possible over-count for clearly + // false zeros (issue #662). The richer disambiguation done by the + // path-listing endpoint (resolved-path SQL post-filter) is out of + // scope for this partial fix. txList := s.byPathHop[key] + var prefixList []*StoreTx + if len(key) >= 2 { + // key[:2] is the first 2 hex characters of the lowercase pubkey, + // i.e. exactly 1 byte of raw hop data — the same shape used by + // addTxToPathHopIndex when only a wire-level 1-byte path hop is + // available (no resolved full pubkey yet). + prefix := key[:2] + if prefix != key { + prefixList = s.byPathHop[prefix] + } + } // Copy only the timestamps + payload types we need so we can release // the read lock before doing parsing/compare work below. + // + // scratch is sized to the actual unique tx count across both lists + // rather than `len(txList)+len(prefixList)`. On busy nodes the same + // tx is frequently indexed under BOTH the full pubkey AND the raw + // 1-byte prefix, so the naive sum can over-allocate by ~2x. We do a + // quick ID-set pass to get the exact size before allocating. type entry struct { ts string pt int } - scratch := make([]entry, 0, len(txList)) + uniq := make(map[int]struct{}, len(txList)+len(prefixList)) for _, tx := range txList { - if tx == nil { - continue + if tx != nil { + uniq[tx.ID] = struct{}{} } - pt := -1 - if tx.PayloadType != nil { - pt = *tx.PayloadType - } - scratch = append(scratch, entry{ts: tx.FirstSeen, pt: pt}) } + for _, tx := range prefixList { + if tx != nil { + uniq[tx.ID] = struct{}{} + } + } + scratch := make([]entry, 0, len(uniq)) + seen := make(map[int]bool, len(uniq)) + collect := func(list []*StoreTx) { + for _, tx := range list { + if tx == nil { + continue + } + if seen[tx.ID] { + continue + } + seen[tx.ID] = true + pt := -1 + if tx.PayloadType != nil { + pt = *tx.PayloadType + } + scratch = append(scratch, entry{ts: tx.FirstSeen, pt: pt}) + } + } + collect(txList) + collect(prefixList) s.mu.RUnlock() now := time.Now().UTC() diff --git a/cmd/server/repeater_liveness_test.go b/cmd/server/repeater_liveness_test.go index 14615540..542b8d4f 100644 --- a/cmd/server/repeater_liveness_test.go +++ b/cmd/server/repeater_liveness_test.go @@ -160,3 +160,104 @@ func TestRepeaterRelayActivity_IgnoresAdverts(t *testing.T) { t.Errorf("expected zero relay counts (adverts ignored), got 1h=%d 24h=%d", info.RelayCount1h, info.RelayCount24h) } } + +// TestRepeaterRelayActivity_PrefixHop verifies that GetRepeaterRelayInfo +// counts a non-advert packet whose path contains only the 1-byte raw hop +// prefix matching the target node (not the full resolved pubkey). +// +// Reality on prod/staging: many ingested packets only carry raw 1-byte +// path hops (e.g. ["a3"] from the wire) — resolution to a full pubkey +// happens later via neighbor affinity for the "Paths seen through node" +// view. The byPathHop index is populated under BOTH keys (raw hop AND +// resolved pubkey), but GetRepeaterRelayInfo only looks up the full +// pubkey, missing all raw-hop-only entries. This is the cause of the +// "never observed as relay hop" claim on nodes that clearly have paths +// shown through them. See https://analyzer-stg.00id.net/#/nodes/. +func TestRepeaterRelayActivity_PrefixHop(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "a36a21290d9c25a158130fe7c489541210d5f09f25fab997db5e942fb7680510" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepPrefix", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + // Non-advert packet with a single raw 1-byte hop matching the target + // pubkey's first byte ("a3"). Index it the way addTxToPathHopIndex + // does — under the raw hop key only, not the full pubkey. + pt := 1 + tx := &StoreTx{ + RawHex: "0100", + PayloadType: &pt, + PathJSON: `["a3"]`, + FirstSeen: recentTS(2), + } + store.mu.Lock() + tx.ID = len(store.packets) + 1 + tx.Hash = "test-relay-prefix-1" + store.packets = append(store.packets, tx) + store.byHash[tx.Hash] = tx + store.byTxID[tx.ID] = tx + addTxToPathHopIndex(store.byPathHop, tx) + store.mu.Unlock() + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.RelayCount24h < 1 { + t.Fatalf("expected RelayCount24h>=1 for node with prefix-matched hop in path, got %d (LastRelayed=%q)", + info.RelayCount24h, info.LastRelayed) + } + if info.LastRelayed == "" { + t.Errorf("expected non-empty LastRelayed when prefix hop matched, got empty") + } + if !info.RelayActive { + t.Errorf("expected RelayActive=true within 24h window, got false (LastRelayed=%s)", info.LastRelayed) + } +} + +// TestRepeaterRelayActivity_DedupAcrossPrefixAndFullKey verifies that when +// the SAME packet is indexed in byPathHop under BOTH the full pubkey AND +// the raw 1-byte prefix, GetRepeaterRelayInfo counts it exactly once. This +// gates the `seen[tx.ID]` dedup map: without it, hop counts would double +// for any tx that resolved-path indexing recorded under both keys. +func TestRepeaterRelayActivity_DedupAcrossPrefixAndFullKey(t *testing.T) { + db := setupCapabilityTestDB(t) + defer db.conn.Close() + + pubkey := "a36a21290d9c25a158130fe7c489541210d5f09f25fab997db5e942fb7680510" + db.conn.Exec("INSERT INTO nodes (public_key, name, role, last_seen) VALUES (?, ?, ?, ?)", + pubkey, "RepDedup", "repeater", recentTS(1)) + + store := NewPacketStore(db, nil) + + pt := 1 + tx := &StoreTx{ + RawHex: "0100", + PayloadType: &pt, + PathJSON: `["a3"]`, + FirstSeen: recentTS(2), + } + store.mu.Lock() + tx.ID = len(store.packets) + 1 + tx.Hash = "test-relay-dedup-1" + store.packets = append(store.packets, tx) + store.byHash[tx.Hash] = tx + store.byTxID[tx.ID] = tx + // Index under BOTH the full pubkey AND the raw 1-byte prefix — this + // is the exact double-index case that occurs when wire ingest records + // the raw hop and a later resolution pass also records the full key. + store.byPathHop[pubkey] = append(store.byPathHop[pubkey], tx) + store.byPathHop[pubkey[:2]] = append(store.byPathHop[pubkey[:2]], tx) + store.mu.Unlock() + + info := store.GetRepeaterRelayInfo(pubkey, 24) + if info.RelayCount24h != 1 { + t.Fatalf("expected RelayCount24h=1 (dedup across full+prefix indexing), got %d", info.RelayCount24h) + } + if info.RelayCount1h != 0 { + t.Errorf("expected RelayCount1h=0 (relay was 2h ago, outside 1h window), got %d", info.RelayCount1h) + } + if !info.RelayActive { + t.Errorf("expected RelayActive=true, got false (LastRelayed=%s)", info.LastRelayed) + } +}