mirror of
https://github.com/Kpa-clawbot/meshcore-analyzer.git
synced 2026-06-04 15:31:19 +00:00
7c40e24a35
## Summary - Adds `readCgroupMemoryMB()` to detect container memory ceiling from cgroup v2 (`/sys/fs/cgroup/memory.max`) and v1 (`/sys/fs/cgroup/memory.limit_in_bytes`) - Adds `warnIfMemlimitUnderprovisioned()` called once from `main()` after the existing memlimit block — logs a `[memlimit] WARN` at startup if the effective GOMEMLIMIT is below 50% of the container limit - Works whether the limit was set via `GOMEMLIMIT` env var or derived from `packetStore.maxMemoryMB` - Adds `readCgroupMemoryMBFn` package-level hook for test injection (same pattern as `readProcSelfIOFn` in the ingestor) Fixes #1264. In the reported incident, GOMEMLIMIT was 1536 MiB on a 7.7 GB container; GC consumed 82% of CPU and all endpoints were 3–100× slower. This warning fires at startup so operators catch the misconfiguration before it causes an incident. ## Test plan - [ ] `TestWarnIfMemlimitUnderprovisioned_EmitsWarning` — warning fires when effective < 50% of cgroup - [ ] `TestWarnIfMemlimitUnderprovisioned_NoWarnWhenAdequate` — no warning at boundary (effective = 1024 MiB, cgroup = 1536 MiB) - [ ] `TestWarnIfMemlimitUnderprovisioned_NoCgroupNoLog` — silent on non-container hosts - [ ] `TestWarnIfMemlimitUnderprovisioned_NoneSource` — no warning when `source="none"` (no limit configured, runtime returns math.MaxInt64) - [ ] `TestMemlimitUnderprovisioned` — boundary table for the comparison helper - [ ] All existing `TestApplyMemoryLimit_*` still pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
114 lines
4.5 KiB
Go
114 lines
4.5 KiB
Go
package main
|
|
|
|
import (
|
|
"log"
|
|
"os"
|
|
"runtime/debug"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// cgroupUnlimitedThreshold is the sentinel above which a cgroup memory value
|
|
// means "no limit". cgroup v1 encodes unlimited as math.MaxInt64 (page-aligned
|
|
// near 1<<63); 1<<62 is a safe upper bound that excludes all real limits while
|
|
// staying well below the unlimited sentinel.
|
|
const cgroupUnlimitedThreshold = int64(1 << 62)
|
|
|
|
// applyMemoryLimit configures Go's soft memory limit (GOMEMLIMIT).
|
|
//
|
|
// Behavior:
|
|
// - If envSet is true (GOMEMLIMIT env var present), the runtime has already
|
|
// parsed it; we leave it alone and report source="env" with limit=0.
|
|
// - Otherwise, if maxMemoryMB > 0, we derive a limit of maxMemoryMB * 1.5 MiB
|
|
// and set it via debug.SetMemoryLimit. This forces aggressive GC under
|
|
// cgroup pressure so the process self-throttles before SIGKILL. See #836.
|
|
// - Otherwise, no limit is applied; source="none".
|
|
//
|
|
// Returns the limit (in bytes) we actually set, or 0 if we did not set one,
|
|
// plus a short source identifier ("env" | "derived" | "none") for logging.
|
|
func applyMemoryLimit(maxMemoryMB int, envSet bool) (int64, string) {
|
|
if envSet {
|
|
return 0, "env"
|
|
}
|
|
if maxMemoryMB <= 0 {
|
|
return 0, "none"
|
|
}
|
|
// 1.5x headroom over the steady-state packet store budget covers
|
|
// transient peaks (cold-load row-scan / decode pipeline, Go's NextGC
|
|
// trigger at ~2x live heap). See issue #836 heap profile.
|
|
limit := int64(maxMemoryMB) * 1024 * 1024 * 3 / 2
|
|
debug.SetMemoryLimit(limit)
|
|
return limit, "derived"
|
|
}
|
|
|
|
// readCgroupMemoryMBFn is the package-level hook used by
|
|
// warnIfMemlimitUnderprovisioned. Tests override it to inject deterministic
|
|
// cgroup values without needing a Linux kernel with cgroup mounts.
|
|
var readCgroupMemoryMBFn = readCgroupMemoryMB
|
|
|
|
// readCgroupMemoryMB returns the container's memory limit from cgroup, in MiB.
|
|
// Returns 0 when unavailable (non-Linux, unlimited, or read error).
|
|
func readCgroupMemoryMB() int64 {
|
|
// cgroup v2: single file, value in bytes or literal "max"
|
|
if b, err := os.ReadFile("/sys/fs/cgroup/memory.max"); err == nil {
|
|
s := strings.TrimSpace(string(b))
|
|
if s != "max" {
|
|
if v, err := strconv.ParseInt(s, 10, 64); err == nil && v > 0 {
|
|
return v / (1024 * 1024)
|
|
}
|
|
}
|
|
}
|
|
// cgroup v1: values near math.MaxInt64 represent "unlimited"
|
|
if b, err := os.ReadFile("/sys/fs/cgroup/memory/memory.limit_in_bytes"); err == nil {
|
|
if v, err := strconv.ParseInt(strings.TrimSpace(string(b)), 10, 64); err == nil {
|
|
if v > 0 && v < cgroupUnlimitedThreshold {
|
|
return v / (1024 * 1024)
|
|
}
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// memlimitUnderprovisioned reports whether effectiveMB is less than half of
|
|
// cgroupMB. Extracted for unit testing the comparison boundary.
|
|
func memlimitUnderprovisioned(effectiveMB, cgroupMB int64) bool {
|
|
return effectiveMB > 0 && cgroupMB > 0 && effectiveMB*2 < cgroupMB
|
|
}
|
|
|
|
// warnIfMemlimitUnderprovisioned logs a warning when GOMEMLIMIT is below 50%
|
|
// of the container cgroup memory limit, which causes the Go GC to thrash.
|
|
// In one reported incident (#1264) 82% of CPU was GC with a 1536 MiB limit
|
|
// on a 7.7 GB container — all endpoints 3-100x slower until maxMemoryMB was
|
|
// bumped and the process restarted.
|
|
//
|
|
// limitBytes is the value returned by applyMemoryLimit:
|
|
// - source="derived": the limit we set ourselves (> 0)
|
|
// - source="env": 0 — we did not touch the runtime; read it back below
|
|
// - source="none": 0 — no limit set at all; runtime default is math.MaxInt64,
|
|
// which the >= cgroupUnlimitedThreshold guard below catches and skips
|
|
func warnIfMemlimitUnderprovisioned(limitBytes int64) {
|
|
cgroupMB := readCgroupMemoryMBFn()
|
|
if cgroupMB <= 0 {
|
|
return
|
|
}
|
|
effective := limitBytes
|
|
if effective <= 0 {
|
|
// Either GOMEMLIMIT was set via env (source="env") or no limit was
|
|
// configured (source="none"). Read the runtime's current value:
|
|
// - env case: returns whatever the operator set
|
|
// - none case: returns math.MaxInt64, caught by the guard below
|
|
// debug.SetMemoryLimit(-1) leaves the limit unchanged and returns it.
|
|
effective = debug.SetMemoryLimit(-1)
|
|
}
|
|
if effective <= 0 || effective >= cgroupUnlimitedThreshold {
|
|
return
|
|
}
|
|
effectiveMB := effective / (1024 * 1024)
|
|
if memlimitUnderprovisioned(effectiveMB, cgroupMB) {
|
|
log.Printf("[memlimit] WARN: GOMEMLIMIT=%d MiB is <50%% of container limit %d MiB — "+
|
|
"GC may thrash under load; consider bumping packetStore.maxMemoryMB "+
|
|
"(suggested: ~%d MiB, roughly 2/3 of container limit)",
|
|
effectiveMB, cgroupMB, cgroupMB*2/3)
|
|
}
|
|
}
|