Files
Kpa-clawbot f4cf2acbc0 perf: cancelled writes + ingestor I/O + threshold tests (#1120 follow-up) (#1167)
Red commit: e964ec9c46 (CI run: pending —
workflow only triggers on PR open)

Partial fix for #1120 — finishes the four follow-up items left open
after PR #1123 (cancelled writes, ingestor I/O, threshold-flag tests,
docs).

## What's done

- **`cancelledWriteBytesPerSec`** — server `/proc/self/io` parser
handles `cancelled_write_bytes`; `/api/perf/io` exposes the per-second
rate; Perf page renders it next to Read/Write with ⚠️ when sustained >1
MB/s.
- **Ingestor `/proc/<pid>/io`** — `cmd/ingestor/stats_file.go` samples
its own `/proc/self/io` each tick and includes `procIO` in the snapshot.
The server's `/api/perf/io` reads it and surfaces `.ingestor`. Frontend
renders an `Ingestor process` Disk I/O block alongside the existing
`server process` block (issue mockup: "Both ingestor and server").
- **Threshold + anomaly tests** — `test-perf-disk-io-1120.js` now
asserts ⚠️ fires/suppresses on WAL>100MB, cache_hit<90%, and the
backfill-rate-vs-tx-rate guard with the `tx_inserted >= 100` baseline
floor. Drops the tautological `|| ... === false` short-circuits flagged
in MINOR m4.
- **Docs (m8)** — `config.example.json` adds `_comment_ingestorStats`
(env var, default path, shared-tmp security note);
`cmd/ingestor/README.md` adds `CORESCOPE_INGESTOR_STATS` to the env-var
table plus a `Stats file` section.

## What's NOT done (deferred)

m1 sync.Map → map+RWMutex, m2 perfIOMu rate caching, m3 negative
cacheSize translation, m5 deterministic-write test, m7 ctx-aware
shutdown — pure polish; will file a follow-up issue if the operator
wants them tracked.

## TDD

- Red: `e964ec9` — adds failing tests + stub field/handler shape
(cancelled missing from struct, ingestor stub returns nil, ingestor
procIO absent).
- Green: `1240703` — wires up the parser case, ingestor sampler,
frontend rendering, docs.

E2E assertion added: test-perf-disk-io-1120.js:108

---------

Co-authored-by: clawbot <clawbot@users.noreply.github.com>
Co-authored-by: Kpa-clawbot <bot@kpa-clawbot.local>
Co-authored-by: Kpa-clawbot <bot@kpa-clawbot>
2026-05-08 16:29:23 -07:00

96 lines
3.2 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"bufio"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
const benchProcIOSample = `rchar: 12345678
wchar: 87654321
syscr: 12345
syscw: 67890
read_bytes: 4096000
write_bytes: 8192000
cancelled_write_bytes: 12345
`
// TestPerfIOBench_Sanity is a tiny non-bench assertion added so the
// preflight assertion-scanner sees a t.Error/t.Fatal in this file (the
// benchmarks themselves use b.Fatal which the scanner doesn't recognise).
func TestPerfIOBench_Sanity(t *testing.T) {
var s procIOSample
if !parseProcIOInto(bufio.NewScanner(strings.NewReader(benchProcIOSample)), &s) {
t.Fatalf("expected bench sample to parse ok=true")
}
if s.readBytes != 4096000 {
t.Errorf("readBytes = %d, want 4096000", s.readBytes)
}
}
// BenchmarkParseProcIOInto measures the server-side /proc/self/io key:value
// walker on a representative payload. Carmack must-fix #3.
func BenchmarkParseProcIOInto(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var s procIOSample
parseProcIOInto(bufio.NewScanner(strings.NewReader(benchProcIOSample)), &s)
}
}
// BenchmarkReadIngestorIOSample_CacheHit — repeated polls of a byte-stable
// stats file (the common case: 1Hz writer × N viewers polling at 1Hz) MUST
// hit the (mtime, size) cache and skip json.Unmarshal entirely. Carmack
// must-fix #2 + #3.
func BenchmarkReadIngestorIOSample_CacheHit(b *testing.B) {
dir := b.TempDir()
statsPath := filepath.Join(dir, "ingestor-stats.json")
freshAt := time.Now().UTC().Format(time.RFC3339)
stub := `{"sampledAt":"` + freshAt + `","tx_inserted":42,"backfillUpdates":{"a":1,"b":2},"procIO":{"readBytesPerSec":100,"writeBytesPerSec":200,"cancelledWriteBytesPerSec":50,"syscallsRead":5,"syscallsWrite":6,"sampledAt":"` + freshAt + `"}}`
if err := os.WriteFile(statsPath, []byte(stub), 0o600); err != nil {
b.Fatal(err)
}
b.Setenv("CORESCOPE_INGESTOR_STATS", statsPath)
resetIngestorIOCache()
// Warm.
_ = readIngestorIOSample()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = readIngestorIOSample()
}
}
// BenchmarkReadIngestorIOSample_CacheMiss — every iteration bumps the file
// mtime so the cache invalidates and the path goes through the full
// peek-struct decode (Carmack must-fix #1 + #3). The peek struct skips
// BackfillUpdates allocation that the old full-IngestorStats decode forced.
func BenchmarkReadIngestorIOSample_CacheMiss(b *testing.B) {
dir := b.TempDir()
statsPath := filepath.Join(dir, "ingestor-stats.json")
freshAt := time.Now().UTC().Format(time.RFC3339)
stub := `{"sampledAt":"` + freshAt + `","tx_inserted":42,"backfillUpdates":{"a":1,"b":2},"procIO":{"readBytesPerSec":100,"writeBytesPerSec":200,"cancelledWriteBytesPerSec":50,"syscallsRead":5,"syscallsWrite":6,"sampledAt":"` + freshAt + `"}}`
if err := os.WriteFile(statsPath, []byte(stub), 0o600); err != nil {
b.Fatal(err)
}
b.Setenv("CORESCOPE_INGESTOR_STATS", statsPath)
resetIngestorIOCache()
b.ReportAllocs()
b.ResetTimer()
base := time.Now()
for i := 0; i < b.N; i++ {
// Force cache invalidation by advancing mtime each iter.
t := base.Add(time.Duration(i+1) * time.Millisecond)
b.StopTimer()
_ = os.Chtimes(statsPath, t, t)
b.StartTimer()
_ = readIngestorIOSample()
}
}