test(#1229): RED — tier-1 must prefer multi-observer edges

Adds two tests that fail on master: 1. TestResolveWithContext_Tier1_ConfidencePrefersMultiObserverEdge — two 8a candidates from the same anchor's neighborhood. candX has 25 contributions from 1 observer (single-source, suspect); candY has 30 contributions from 6 distinct observers (corroborated). Resolver currently picks candX via tier-2 geo_proximity because raw counts are within 3x of each other (1.2x). The test asserts candY via neighbor_affinity — i.e. confidence weighting must demote single-source edges enough that the corroborated edge clears the affinityConfidenceRatio gate. 2. TestNeighborEdge_ObserverSetIsDistinct — repeated contributions from the same observer must not inflate the Observers set; new observers must increment it. Pins the source-diversity counter behavior end-to-end via the public upsert path. Also asserts Confidence() returns 1.0 saturated for >=3 observers and a value in (0,1) for a single observer. Confidence() ships as a 1.0-returning stub so the RED test fails on an assertion (wrong candidate, wrong method) — not on a missing method — keeping AGENTS.md "red commit must compile + fail on assertion" discipline. Refs #1229
2026-05-19 01:25:20 +00:00 · 2026-05-16 19:43:28 +00:00
parent b21badbcbd
commit 235b65b4e6
2 changed files with 130 additions and 0 deletions
@@ -0,0 +1,113 @@
+package main
+
+import (
+	"testing"
+	"time"
+)
+
+// Issue #1229 (Option C): edge source-diversity confidence weighting.
+//
+// The tier-1 affinity scorer must demote edges contributed by a single
+// observer relative to edges corroborated by multiple distinct observers.
+// Without this guard, one observer with a chatty link can dominate the
+// global graph and force resolution to the "wrong" candidate in a region
+// it doesn't actually cover.
+//
+// Fixture (two "8a" candidates from the same anchor's neighborhood):
+//   candX: 25 contributions from 1 observer (single-source, suspect)
+//   candY: 30 contributions from 6 distinct observers (corroborated)
+//
+// Raw count score:
+//   candX score ≈ 0.25, candY score ≈ 0.30 — ratio ≈ 1.2× (below 3×, falls
+//   through to tier 2). Without confidence weighting tier 2 would pick
+//   candX because we placed it geo-near the anchor — exactly the
+//   cross-region pollution failure mode described in the issue.
+//
+// Confidence-weighted score (multiplier = min(1, |observers|/3)):
+//   candX = 0.25 × (1/3)  ≈ 0.083
+//   candY = 0.30 × 1.0    = 0.30
+//   ratio ≈ 3.6× — clears affinityConfidenceRatio, tier-1 returns candY
+//   with method "neighbor_affinity".
+
+func seedAffinityFromObservers(g *NeighborGraph, anchor, candPK, prefix string, observers []string, perObserver int) {
+	now := time.Now()
+	step := 0
+	for _, obs := range observers {
+		for i := 0; i < perObserver; i++ {
+			g.upsertEdge(anchor, candPK, prefix, obs, nil, now.Add(-time.Duration(step)*time.Minute))
+			step++
+		}
+	}
+}
+
+func TestResolveWithContext_Tier1_ConfidencePrefersMultiObserverEdge(t *testing.T) {
+	nodes := []nodeInfo{
+		// candX: placed near the anchor so tier-2 (geo) would pick it.
+		{PublicKey: "8aaaaaaaaaaa", Role: "repeater", Name: "candX", HasGPS: true, Lat: 34.06, Lon: -118.26},
+		// candY: far from anchor; only source-diversity confidence rescues it.
+		{PublicKey: "8abbbbbbbbbb", Role: "repeater", Name: "candY", HasGPS: true, Lat: 47.6, Lon: -122.3},
+		{PublicKey: "ffeeeeeeeeee", Role: "repeater", Name: "anchor", HasGPS: true, Lat: 34.05, Lon: -118.25},
+	}
+	anchor := "ffeeeeeeeeee"
+
+	g := NewNeighborGraph()
+	// candX: 1 observer × 25 obs → single-source, demoted to 1/3 weight.
+	seedAffinityFromObservers(g, anchor, "8aaaaaaaaaaa", "8a",
+		[]string{"obs1"}, 25)
+	// candY: 6 distinct observers × 5 obs each = 30 obs → full weight.
+	seedAffinityFromObservers(g, anchor, "8abbbbbbbbbb", "8a",
+		[]string{"obs1", "obs2", "obs3", "obs4", "obs5", "obs6"}, 5)
+
+	pm := buildPrefixMap(nodes)
+	r, method, score := pm.resolveWithContext("8a", []string{anchor}, g)
+	if r == nil {
+		t.Fatal("expected non-nil candidate")
+	}
+	if r.Name != "candY" {
+		t.Fatalf("want candY (corroborated by 6 observers); got %s via %s score=%v",
+			r.Name, method, score)
+	}
+	if method != "neighbor_affinity" {
+		t.Fatalf("want method=neighbor_affinity (confidence-weighted tier 1); got %s", method)
+	}
+}
+
+// Sanity gate on the source-diversity counter itself: repeated contributions
+// from the same observer must NOT inflate the observer-set count, but
+// contributions from new observers must increment it.
+func TestNeighborEdge_ObserverSetIsDistinct(t *testing.T) {
+	g := NewNeighborGraph()
+	now := time.Now()
+	// 10 contributions from obs1 — set size must stay 1.
+	for i := 0; i < 10; i++ {
+		g.upsertEdge("aa11", "bb22", "bb", "obs1", nil, now)
+	}
+	// 1 contribution each from obs2..obs4 — set size grows to 4.
+	g.upsertEdge("aa11", "bb22", "bb", "obs2", nil, now)
+	g.upsertEdge("aa11", "bb22", "bb", "obs3", nil, now)
+	g.upsertEdge("aa11", "bb22", "bb", "obs4", nil, now)
+
+	edges := g.Neighbors("aa11")
+	if len(edges) != 1 {
+		t.Fatalf("expected 1 edge; got %d", len(edges))
+	}
+	e := edges[0]
+	if len(e.Observers) != 4 {
+		t.Fatalf("expected 4 distinct observers; got %d (%v)", len(e.Observers), e.Observers)
+	}
+	if e.Count != 13 {
+		t.Fatalf("expected count=13 (10+3); got %d", e.Count)
+	}
+	if got := e.Confidence(); got != 1.0 {
+		t.Fatalf("Confidence() with 4 observers: want 1.0 (saturated); got %v", got)
+	}
+	// Single-observer edge must report degraded confidence.
+	g.upsertEdge("aa11", "cc33", "cc", "obs1", nil, now)
+	for _, ee := range g.Neighbors("aa11") {
+		if ee.NodeA == "cc33" || ee.NodeB == "cc33" {
+			if got := ee.Confidence(); got >= 1.0 || got <= 0 {
+				t.Fatalf("Confidence() single-observer: want in (0,1); got %v", got)
+			}
+		}
+	}
+}
@@ -71,6 +71,23 @@ func (e *NeighborEdge) Score(now time.Time) float64 {
 	return countFactor * decay
 }

+// Confidence returns a source-diversity multiplier in (0, 1] derived from the
+// number of distinct observers that have contributed to this edge. Issue #1229
+// (Option C): edges corroborated by multiple independent observers should
+// outrank edges seen by a single observer at the same raw score.
+//
+// Formula: min(1.0, max(1, |Observers|) / affinityObserverSaturation).
+// With saturation=3, a single observer yields 1/3, two observers 2/3, and
+// three-or-more observers saturate at 1.0 — full historical weight.
+//
+// STUB: real implementation lands in the GREEN commit. Returning 1.0 here
+// keeps the resolver behavior identical so the RED test fails on the
+// behavioral assertion (resolver picks the wrong candidate), not on a
+// missing method.
+func (e *NeighborEdge) Confidence() float64 {
+	return 1.0
+}
+
 // AvgSNR returns the average SNR, or 0 if no samples.
 func (e *NeighborEdge) AvgSNR() float64 {
 	if e.SNRCount == 0 {