test(#1724): RED — assert tx_last_seen backfill chunks UPDATE

Seeds 12k transmissions with last_seen=0 and runs
runTxLastSeenBackfillChunked with batchSize=1000. Asserts (a) the
progress callback fires more than once, and (b) every per-batch delta
is bounded by batchSize. Both fail today: the stub still executes the
original PR #1691 full-table UPDATE that pinned the SQLite writer
10-15 min on prod-sized DBs (#1724).

The GREEN commit will replace the stub body with a chunked LIMIT-N
loop + per-batch yield.
This commit is contained in:
corescope-bot
2026-06-14 17:33:37 +00:00
parent 92e001c093
commit 716730f7f7
2 changed files with 159 additions and 0 deletions
+60
View File
@@ -0,0 +1,60 @@
// tx_last_seen_backfill — chunked backfill of transmissions.last_seen (#1724).
//
// Stub for the chunked backfill. PR #1691 originally ran the populate as a
// single correlated UPDATE; on a prod-shaped DB (71K tx / 1.5M obs) that
// pinned the SQLite writer for 10-15 min, starving every reader. The fix
// (forthcoming, gated by tx_last_seen_backfill_test.go) replaces this body
// with a batched loop that yields the writer between batches.
package main
import (
"context"
"database/sql"
"time"
)
// TxLastSeenBackfillProgress is the snapshot reported to the optional
// progress callback after each batch.
type TxLastSeenBackfillProgress struct {
RowsProcessed int64
RowsTotal int64
BatchNum int
ElapsedMs int64
}
// TxLastSeenBackfillOpts tunes the chunked backfill. Zero values fall back
// to production defaults (see GREEN commit for tuning rationale).
type TxLastSeenBackfillOpts struct {
BatchSize int
YieldDelay time.Duration
Progress func(TxLastSeenBackfillProgress)
}
// runTxLastSeenBackfillChunked is the body of the
// tx_last_seen_backfill_v1 async migration. RED stub: still runs the
// single-shot UPDATE that caused #1724. Replaced in the GREEN commit
// with a chunked implementation.
func runTxLastSeenBackfillChunked(ctx context.Context, db *sql.DB, opts TxLastSeenBackfillOpts) (int64, error) {
start := time.Now()
res, err := db.ExecContext(ctx, `
UPDATE transmissions
SET last_seen = COALESCE((
SELECT MAX(timestamp) FROM observations WHERE transmission_id = transmissions.id
), last_seen)
WHERE last_seen = 0
`)
if err != nil {
return 0, err
}
n, _ := res.RowsAffected()
if opts.Progress != nil {
opts.Progress(TxLastSeenBackfillProgress{
RowsProcessed: n,
RowsTotal: n,
BatchNum: 1,
ElapsedMs: time.Since(start).Milliseconds(),
})
}
return n, nil
}
@@ -0,0 +1,99 @@
// Test for issue #1724 — the tx_last_seen backfill MUST chunk its
// UPDATE so SQLite readers can make forward progress while the
// backfill runs. The original PR #1691 implementation ran a single
// correlated UPDATE that pinned the writer 10-15 min on a prod-sized
// DB; this test asserts the chunked behavior (≤ batchSize rows per
// batch + multiple progress callbacks).
package main
import (
"context"
"testing"
"time"
)
// TestIssue1724_TxLastSeenBackfillIsChunked seeds 12k transmissions
// with last_seen=0 and runs runTxLastSeenBackfillChunked with a
// batchSize of 1000. It asserts:
//
// 1. The progress callback fires more than once (proving the loop
// batches, not single-shots).
// 2. Every per-batch RowsProcessed delta is ≤ batchSize+epsilon
// (proving each UPDATE is bounded, not full-table).
//
// Pre-fix (single full-table UPDATE) the callback fires exactly once
// with RowsProcessed=12000, failing both assertions on an assertion
// (not a build/import error).
func TestIssue1724_TxLastSeenBackfillIsChunked(t *testing.T) {
s := newTestStore(t)
ctx := context.Background()
const seedN = 12000
const batchSize = 1000
// Seed transmissions with last_seen=0 and one matching observation
// each so the correlated MAX(timestamp) subquery returns a non-zero
// value (forces RowsAffected to be non-zero).
tx, err := s.db.Begin()
if err != nil {
t.Fatalf("begin: %v", err)
}
insTx, err := tx.Prepare(`INSERT INTO transmissions(raw_hex, hash, first_seen, last_seen) VALUES('00','h'||?, '2024-01-01T00:00:00Z', 0)`)
if err != nil {
t.Fatalf("prep tx: %v", err)
}
insObs, err := tx.Prepare(`INSERT INTO observations(transmission_id, observer_idx, timestamp) VALUES(?, 1, ?)`)
if err != nil {
t.Fatalf("prep obs: %v", err)
}
for i := 0; i < seedN; i++ {
res, err := insTx.Exec(i)
if err != nil {
t.Fatalf("seed tx %d: %v", i, err)
}
id, _ := res.LastInsertId()
if _, err := insObs.Exec(id, time.Now().Unix()+int64(i)); err != nil {
t.Fatalf("seed obs %d: %v", i, err)
}
}
insTx.Close()
insObs.Close()
if err := tx.Commit(); err != nil {
t.Fatalf("commit: %v", err)
}
var snapshots []TxLastSeenBackfillProgress
progress := func(p TxLastSeenBackfillProgress) {
snapshots = append(snapshots, p)
}
total, err := runTxLastSeenBackfillChunked(ctx, s.db, TxLastSeenBackfillOpts{
BatchSize: batchSize,
YieldDelay: time.Millisecond,
Progress: progress,
})
if err != nil {
t.Fatalf("backfill: %v", err)
}
if total != seedN {
t.Fatalf("total rows updated = %d, want %d", total, seedN)
}
// Invariant 1: the loop must batch.
if len(snapshots) < 2 {
t.Fatalf("progress callback fired %d times; want ≥ 2 (chunked loop should emit one per batch; pre-fix #1724 emits exactly 1 for the full-table UPDATE)",
len(snapshots))
}
// Invariant 2: per-batch delta must be bounded by batchSize.
var prev int64
for i, snap := range snapshots {
delta := snap.RowsProcessed - prev
if delta > int64(batchSize) {
t.Fatalf("snapshot[%d] delta=%d exceeds batchSize=%d; backfill is not chunking (pre-fix #1724 ran one full-table UPDATE)",
i, delta, batchSize)
}
prev = snap.RowsProcessed
}
}