mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-04-25 13:52:08 +00:00
fix: set PRAGMA busy_timeout on all RW SQLite connections (#707)
## Problem `SQLITE_BUSY` contention between the ingestor and server's async persistence goroutine drops `resolved_path` and `neighbor_edges` updates. The DSN parameter `_busy_timeout=10000` may not be honored by the modernc/sqlite driver. ## Fix - **`openRW()` now sets `PRAGMA busy_timeout = 5000`** after opening the connection, guaranteeing SQLite retries for up to 5 seconds before returning `SQLITE_BUSY` - **Refactored `PruneOldPackets` and `PruneOldMetrics`** to use `openRW()` instead of duplicating connection setup — all RW connections now get consistent busy_timeout handling - Added test verifying the pragma is set correctly ## Changes | File | Change | |------|--------| | `cmd/server/neighbor_persist.go` | `openRW()` sets `PRAGMA busy_timeout = 5000` after open | | `cmd/server/db.go` | `PruneOldPackets` and `PruneOldMetrics` use `openRW()` instead of inline `sql.Open` | | `cmd/server/neighbor_persist_test.go` | `TestOpenRW_BusyTimeout` verifies pragma is set | ## Performance No performance impact — `PRAGMA busy_timeout` is a connection-level setting with zero overhead on uncontended writes. Under contention, it converts immediate `SQLITE_BUSY` failures into brief retries (up to 5s), which is strictly better than dropping data. Fixes #705 --------- Co-authored-by: you <you@example.com>
This commit is contained in:
@@ -1704,12 +1704,10 @@ func nullInt(ni sql.NullInt64) interface{} {
|
||||
// Returns the number of transmissions deleted.
|
||||
// Opens a separate read-write connection since the main connection is read-only.
|
||||
func (db *DB) PruneOldPackets(days int) (int64, error) {
|
||||
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=10000", db.path)
|
||||
rw, err := sql.Open("sqlite", dsn)
|
||||
rw, err := openRW(db.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
rw.SetMaxOpenConns(1)
|
||||
defer rw.Close()
|
||||
|
||||
cutoff := time.Now().UTC().AddDate(0, 0, -days).Format(time.RFC3339)
|
||||
@@ -2053,12 +2051,10 @@ func (db *DB) GetMetricsSummary(since string) ([]MetricsSummaryRow, error) {
|
||||
|
||||
// PruneOldMetrics deletes observer_metrics rows older than retentionDays.
|
||||
func (db *DB) PruneOldMetrics(retentionDays int) (int64, error) {
|
||||
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=10000", db.path)
|
||||
rw, err := sql.Open("sqlite", dsn)
|
||||
rw, err := openRW(db.path)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
rw.SetMaxOpenConns(1)
|
||||
defer rw.Close()
|
||||
|
||||
cutoff := time.Now().UTC().AddDate(0, 0, -retentionDays).Format(time.RFC3339)
|
||||
|
||||
@@ -584,12 +584,18 @@ func extractEdgesFromObs(obs *StoreObs, tx *StoreTx, pm *prefixMap) []edgeCandid
|
||||
|
||||
// openRW opens a read-write SQLite connection (same pattern as PruneOldPackets).
|
||||
func openRW(dbPath string) (*sql.DB, error) {
|
||||
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL&_busy_timeout=10000", dbPath)
|
||||
dsn := fmt.Sprintf("file:%s?_journal_mode=WAL", dbPath)
|
||||
rw, err := sql.Open("sqlite", dsn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rw.SetMaxOpenConns(1)
|
||||
// DSN _busy_timeout may not be honored by all drivers; set via PRAGMA
|
||||
// to guarantee SQLite retries for up to 5s before returning SQLITE_BUSY.
|
||||
if _, err := rw.Exec("PRAGMA busy_timeout = 5000"); err != nil {
|
||||
rw.Close()
|
||||
return nil, fmt.Errorf("set busy_timeout: %w", err)
|
||||
}
|
||||
return rw, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -532,3 +532,31 @@ func TestPersistSemaphoreTryAcquireSkipsBatch(t *testing.T) {
|
||||
|
||||
<-persistSem // release
|
||||
}
|
||||
|
||||
func TestOpenRW_BusyTimeout(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
dbPath := filepath.Join(dir, "test.db")
|
||||
|
||||
// Create the DB file first
|
||||
db, err := sql.Open("sqlite", "file:"+dbPath+"?_journal_mode=WAL")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
db.Exec("CREATE TABLE dummy (id INTEGER)")
|
||||
db.Close()
|
||||
|
||||
// Open via openRW and verify busy_timeout is set
|
||||
rw, err := openRW(dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("openRW failed: %v", err)
|
||||
}
|
||||
defer rw.Close()
|
||||
|
||||
var timeout int
|
||||
if err := rw.QueryRow("PRAGMA busy_timeout").Scan(&timeout); err != nil {
|
||||
t.Fatalf("query busy_timeout: %v", err)
|
||||
}
|
||||
if timeout != 5000 {
|
||||
t.Errorf("expected busy_timeout=5000, got %d", timeout)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user