diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index 0c648fc9..21ee8b4f 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -83,6 +83,9 @@ jobs:
       - name: Verify Dockerfile COPY invariants (issue #1316)
         run: bash scripts/check-dockerfile-internal-pkgs.sh
 
+      - name: Staging disk-monitor unit tests (issue #1684)
+        run: bash scripts/staging/test-disk-monitor.sh
+
       - name: Lint CSS variables (issue #1128)
         run: |
           set -e
diff --git a/DEPLOY.md b/DEPLOY.md
index 450c3e1f..cc3a28a1 100644
--- a/DEPLOY.md
+++ b/DEPLOY.md
@@ -129,3 +129,98 @@ docker compose pull && docker compose up -d
 | `./manage.sh setup` | Copy `docker-compose.example.yml`, edit env vars |
 
 `manage.sh` remains available for advanced use cases (building from source, custom patches, development). Pre-built images are recommended for most production deployments.
+
+## Staging VM — disk-usage monitor & cleanup (#1684)
+
+The staging VM ran out of disk during a hot-patch (#1684). To prevent
+repeats, two scripts live in `scripts/staging/`:
+
+- `disk-monitor.sh <mount>` — reads `df -P`, classifies usage against
+  `<80 ok / >=80 warn / >=90 error / >=95 alert`, emits to stderr +
+  journald (via `logger`). Returns non-zero on `error|alert` so
+  systemd surfaces the unit as failed.
+- `disk-cleanup.sh` — removes `/tmp` snapshot files (`*.db`,
+  `staging-snap.*`, `cs-*`, `node-compile-cache`) older than 7 days
+  and runs `docker builder prune` + `docker image prune` with
+  `--filter "until=72h" --filter "label!=keep"`. Set
+  `CORESCOPE_CLEANUP_DRY_RUN=1` to log without deleting.
+
+### Install on the staging host
+
+SSH to `<STAGING_HOST>` as the staging operator user and:
+
+```bash
+sudo install -m 0755 scripts/staging/disk-monitor.sh  /usr/local/bin/corescope-disk-monitor
+sudo install -m 0755 scripts/staging/disk-cleanup.sh  /usr/local/bin/corescope-disk-cleanup
+
+# 15-minute monitor
+sudo tee /etc/systemd/system/corescope-disk-monitor.service >/dev/null <<'UNIT'
+[Unit]
+Description=CoreScope staging disk-usage monitor (issue #1684)
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/corescope-disk-monitor /
+UNIT
+
+sudo tee /etc/systemd/system/corescope-disk-monitor.timer >/dev/null <<'UNIT'
+[Unit]
+Description=Run CoreScope disk-usage monitor every 15 minutes
+[Timer]
+OnBootSec=5min
+OnUnitActiveSec=15min
+Unit=corescope-disk-monitor.service
+[Install]
+WantedBy=timers.target
+UNIT
+
+# Daily cleanup at 03:30 local
+sudo tee /etc/systemd/system/corescope-disk-cleanup.service >/dev/null <<'UNIT'
+[Unit]
+Description=CoreScope staging disk cleanup (issue #1684)
+[Service]
+Type=oneshot
+ExecStart=/usr/local/bin/corescope-disk-cleanup
+UNIT
+
+sudo tee /etc/systemd/system/corescope-disk-cleanup.timer >/dev/null <<'UNIT'
+[Unit]
+Description=Run CoreScope disk cleanup daily at off-peak
+[Timer]
+OnCalendar=*-*-* 03:30:00
+Persistent=true
+Unit=corescope-disk-cleanup.service
+[Install]
+WantedBy=timers.target
+UNIT
+
+sudo systemctl daemon-reload
+sudo systemctl enable --now corescope-disk-monitor.timer corescope-disk-cleanup.timer
+```
+
+`<STAGING_HOST>` is the staging VM hostname/IP — operator supplies it,
+not committed to the repo.
+
+### Inspecting alerts
+
+```bash
+journalctl -t corescope-disk-monitor   --since '-1d'
+journalctl -t corescope-disk-cleanup   --since '-7d'
+systemctl list-timers | grep corescope-disk
+```
+
+`logger` priorities map: `ok→info`, `warn→warning`, `error→err`,
+`alert→alert` (syslog severity 1, the highest level). Wire
+`journalctl -p alert ...` to whatever ops channel the operator
+prefers; use `-p err` to also catch the `error` tier.
+
+### Notes on `staging-snap.db` root cause (#1684 phase 3)
+
+`grep -rn staging-snap.db cmd/ public/ scripts/` returns **zero**
+hits in the repo. The 4.4 GB orphan was a manual debugging artifact,
+not produced by any committed code. The `disk-cleanup.sh` retention
+rule (anything matching `staging-snap.*` in `/tmp` older than 7 days)
+prevents recurrence without needing source-side TTL changes.
+
+If a future feature legitimately needs persistent snapshot DBs, put
+them under `/var/lib/corescope/snapshots/` with explicit rotation —
+not in `/tmp`, which is ephemeral by definition.
diff --git a/scripts/staging/disk-cleanup.sh b/scripts/staging/disk-cleanup.sh
new file mode 100755
index 00000000..7ee9cea8
--- /dev/null
+++ b/scripts/staging/disk-cleanup.sh
@@ -0,0 +1,73 @@
+#!/usr/bin/env bash
+# disk-cleanup.sh — daily staging VM cleanup (issue #1684).
+#
+# Removes orphaned /tmp snapshots older than 7 days and prunes Docker
+# build cache + dangling images older than 72h (respecting label=keep).
+#
+# Designed to run from a daily systemd timer at off-peak. Idempotent.
+# Set CORESCOPE_CLEANUP_DRY_RUN=1 to log without deleting.
+
+set -euo pipefail
+
+DRY_RUN="${CORESCOPE_CLEANUP_DRY_RUN:-0}"
+LOG_TAG="corescope-disk-cleanup"
+
+log() {
+    echo "$LOG_TAG: $*" >&2
+    if command -v logger >/dev/null 2>&1; then
+        logger -t "$LOG_TAG" -- "$*"
+    fi
+}
+
+run_or_dry() {
+    if [ "$DRY_RUN" = "1" ]; then
+        log "DRY_RUN: $*"
+    else
+        log "exec: $*"
+        "$@"
+    fi
+}
+
+# ----- /tmp snapshot retention ----------------------------------------------
+# Anything in /tmp matching known snapshot/cache patterns older than 7 days dies.
+# -mindepth 1 avoids touching /tmp itself; -maxdepth 2 limits blast radius.
+cleanup_tmp() {
+    log "scanning /tmp for snapshots older than 7d"
+    local find_args=(
+        /tmp -mindepth 1 -maxdepth 2 -mtime +7
+        \(
+          -name 'staging-snap.*' -o
+          -name 'cs-*' -o
+          -name 'node-compile-cache'
+        \)
+    )
+    if [ "$DRY_RUN" = "1" ]; then
+        find "${find_args[@]}" -print | while IFS= read -r f; do
+            log "DRY_RUN: would rm -rf $f"
+        done
+    else
+        # -print before -exec so we have an audit trail in journald.
+        find "${find_args[@]}" -print -exec rm -rf {} +
+    fi
+}
+
+# ----- Docker prune ---------------------------------------------------------
+cleanup_docker() {
+    if ! command -v docker >/dev/null 2>&1; then
+        log "docker not installed; skipping docker prune"
+        return 0
+    fi
+    run_or_dry docker builder prune -af --filter "until=72h"
+    run_or_dry docker image prune -af --filter "until=72h" --filter "label!=keep"
+}
+
+main() {
+    log "starting (dry_run=$DRY_RUN)"
+    cleanup_tmp
+    cleanup_docker
+    log "done"
+}
+
+if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
+    main "$@"
+fi
diff --git a/scripts/staging/disk-monitor.sh b/scripts/staging/disk-monitor.sh
new file mode 100755
index 00000000..fb5c98c6
--- /dev/null
+++ b/scripts/staging/disk-monitor.sh
@@ -0,0 +1,101 @@
+#!/usr/bin/env bash
+# disk-monitor.sh — staging VM disk-usage monitor (issue #1684).
+#
+# Reads `df` for a mount point, classifies usage against thresholds, and
+# emits a single line to stderr (and journald via systemd) at the matching
+# severity. Designed to be invoked by a 15-minute systemd timer; output
+# goes to journald which the operator can wire to alerts as needed.
+#
+# Pure-bash helpers (parse_df_percent, classify_threshold) are sourced by
+# scripts/test-disk-monitor.sh — keep them side-effect free.
+
+set -euo pipefail
+
+# ----- pure helpers (testable) -----------------------------------------------
+
+# parse_df_percent <df-output>
+# Extracts the Use% column (column 5) from a 2-line `df -P` output and
+# strips the trailing '%'. Echoes the integer percent (0-100). Returns
+# non-zero if the input doesn't look like df output.
+parse_df_percent() {
+    local input="$1"
+    # df -P guarantees a 2-line output: header + data. Take the last line.
+    local data
+    data="$(printf '%s\n' "$input" | tail -n1)"
+    # Column 5 is Use% (e.g. "81%").
+    local pct
+    pct="$(printf '%s\n' "$data" | awk '{print $5}')"
+    case "$pct" in
+        *%) ;;
+        *) return 1 ;;
+    esac
+    printf '%s\n' "${pct%\%}"
+}
+
+# classify_threshold <percent>
+# Echoes one of: ok | warn | error | alert based on the issue #1684 spec:
+#   <80 ok ; >=80 warn ; >=90 error ; >=95 alert
+# Returns non-zero if input is not an integer 0-100.
+classify_threshold() {
+    local pct="$1"
+    case "$pct" in
+        ''|*[!0-9]*) return 1 ;;
+    esac
+    if [ "$pct" -lt 0 ] || [ "$pct" -gt 100 ]; then
+        return 1
+    fi
+    if [ "$pct" -ge 95 ]; then
+        echo alert
+    elif [ "$pct" -ge 90 ]; then
+        echo error
+    elif [ "$pct" -ge 80 ]; then
+        echo warn
+    else
+        echo ok
+    fi
+}
+
+# severity_priority <severity>
+# Echoes the syslog priority for `logger -p`. Maps to the canonical
+# syslog severity ladder (RFC 5424): alert=1, crit=2, err=3, warning=4,
+# info=6. We deliberately use `alert` (not `crit`) for the >=95% case so
+# downstream `journalctl -p alert` filters fire at the highest level.
+#   ok=info warn=warning error=err alert=alert
+severity_priority() {
+    case "$1" in
+        ok)    echo user.info ;;
+        warn)  echo user.warning ;;
+        error) echo user.err ;;
+        alert) echo user.alert ;;
+        *)     return 1 ;;
+    esac
+}
+
+# ----- main -----------------------------------------------------------------
+
+main() {
+    local mount="${1:-/}"
+    local df_out
+    df_out="$(df -P "$mount")"
+    local pct severity prio
+    pct="$(parse_df_percent "$df_out")"
+    severity="$(classify_threshold "$pct")"
+    prio="$(severity_priority "$severity")"
+    local msg="disk-monitor mount=$mount used=${pct}% severity=$severity"
+    # journald via systemd captures stderr; also emit through logger so
+    # syslog-based collectors see the priority.
+    echo "$msg" >&2
+    if command -v logger >/dev/null 2>&1; then
+        logger -t corescope-disk-monitor -p "$prio" -- "$msg"
+    fi
+    # Exit codes: 0 ok|warn, 1 error|alert (so timers can surface failures).
+    case "$severity" in
+        ok|warn) return 0 ;;
+        *)       return 1 ;;
+    esac
+}
+
+# Only run main when executed directly (not when sourced by tests).
+if [ "${BASH_SOURCE[0]}" = "${0}" ]; then
+    main "$@"
+fi
diff --git a/scripts/staging/test-disk-monitor.sh b/scripts/staging/test-disk-monitor.sh
new file mode 100755
index 00000000..e327cef8
--- /dev/null
+++ b/scripts/staging/test-disk-monitor.sh
@@ -0,0 +1,109 @@
+#!/usr/bin/env bash
+# test-disk-monitor.sh — unit tests for scripts/staging/disk-monitor.sh
+# (issue #1684). Pure bash, no external deps. Sources the script and
+# exercises its pure helpers against table-driven cases.
+#
+# Run: bash scripts/staging/test-disk-monitor.sh
+# Exits non-zero if any case fails.
+
+set -u
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+# shellcheck source=disk-monitor.sh
+. "$SCRIPT_DIR/disk-monitor.sh"
+
+PASS=0
+FAIL=0
+
+assert_eq() {
+    local label="$1" expected="$2" actual="$3"
+    if [ "$expected" = "$actual" ]; then
+        PASS=$((PASS + 1))
+        # echo "PASS: $label"
+    else
+        FAIL=$((FAIL + 1))
+        echo "FAIL: $label — expected '$expected' got '$actual'" >&2
+    fi
+}
+
+# ----- classify_threshold ---------------------------------------------------
+# Spec from issue #1684: <80 ok ; >=80 warn ; >=90 error ; >=95 alert
+assert_eq "classify 0"   "ok"    "$(classify_threshold 0)"
+assert_eq "classify 50"  "ok"    "$(classify_threshold 50)"
+assert_eq "classify 79"  "ok"    "$(classify_threshold 79)"
+assert_eq "classify 80"  "warn"  "$(classify_threshold 80)"
+assert_eq "classify 85"  "warn"  "$(classify_threshold 85)"
+assert_eq "classify 89"  "warn"  "$(classify_threshold 89)"
+assert_eq "classify 90"  "error" "$(classify_threshold 90)"
+assert_eq "classify 94"  "error" "$(classify_threshold 94)"
+assert_eq "classify 95"  "alert" "$(classify_threshold 95)"
+assert_eq "classify 100" "alert" "$(classify_threshold 100)"
+
+# Invalid inputs return non-zero (no echo expected).
+if classify_threshold "abc" >/dev/null 2>&1; then
+    FAIL=$((FAIL + 1))
+    echo "FAIL: classify 'abc' — expected non-zero exit" >&2
+else
+    PASS=$((PASS + 1))
+fi
+if classify_threshold 150 >/dev/null 2>&1; then
+    FAIL=$((FAIL + 1))
+    echo "FAIL: classify 150 — expected non-zero exit" >&2
+else
+    PASS=$((PASS + 1))
+fi
+
+# ----- parse_df_percent -----------------------------------------------------
+# Simulates `df -P /` output. Use% column 5.
+DF_OK='Filesystem     1024-blocks      Used Available Capacity Mounted on
+/dev/root         30401152  17040640  13360512      57% /'
+DF_HIGH='Filesystem     1024-blocks      Used Available Capacity Mounted on
+/dev/root         30401152  29401152   1000000      97% /'
+DF_FULL='Filesystem     1024-blocks      Used Available Capacity Mounted on
+/dev/root         30401152  30401152         0     100% /'
+
+assert_eq "parse_df 57%"  "57"  "$(parse_df_percent "$DF_OK")"
+assert_eq "parse_df 97%"  "97"  "$(parse_df_percent "$DF_HIGH")"
+assert_eq "parse_df 100%" "100" "$(parse_df_percent "$DF_FULL")"
+
+# Pipeline: parse_df_percent | classify_threshold (the real call path).
+assert_eq "pipe 57->ok"     "ok"    "$(classify_threshold "$(parse_df_percent "$DF_OK")")"
+assert_eq "pipe 97->alert"  "alert" "$(classify_threshold "$(parse_df_percent "$DF_HIGH")")"
+assert_eq "pipe 100->alert" "alert" "$(classify_threshold "$(parse_df_percent "$DF_FULL")")"
+
+# ----- severity_priority ----------------------------------------------------
+assert_eq "prio ok"    "user.info"    "$(severity_priority ok)"
+assert_eq "prio warn"  "user.warning" "$(severity_priority warn)"
+assert_eq "prio error" "user.err"     "$(severity_priority error)"
+# alert maps to syslog `alert` (severity 1), NOT `crit` (severity 2).
+# Regression guard for PR #1686 r1 adv #1: previously mapped to user.crit,
+# which silently downgraded the highest-severity tier.
+assert_eq "prio alert" "user.alert"   "$(severity_priority alert)"
+
+# ----- disk-cleanup.sh /tmp pattern safety ----------------------------------
+# Regression guard for PR #1686 r1 adv #2: cleanup must NOT match a bare
+# `*.db` pattern in /tmp — that would nuke unrelated SQLite session files,
+# sqlite-pkg test outputs, and any debugging artifacts. Only named prefixes
+# (`staging-snap.*`, `cs-*`, `node-compile-cache`) are allowed.
+CLEANUP_SH="$SCRIPT_DIR/disk-cleanup.sh"
+if [ -f "$CLEANUP_SH" ]; then
+    if grep -Eq "^[[:space:]]*-name[[:space:]]+'\*\.db'" "$CLEANUP_SH"; then
+        FAIL=$((FAIL + 1))
+        echo "FAIL: disk-cleanup.sh contains bare -name '*.db' (data-loss footgun)" >&2
+    else
+        PASS=$((PASS + 1))
+    fi
+    # Sanity: the named-prefix patterns we DO want must still be present.
+    for pat in "staging-snap.\*" "cs-\*" "node-compile-cache"; do
+        if grep -Eq "\-name[[:space:]]+'${pat}'" "$CLEANUP_SH"; then
+            PASS=$((PASS + 1))
+        else
+            FAIL=$((FAIL + 1))
+            echo "FAIL: disk-cleanup.sh missing expected -name '${pat}' pattern" >&2
+        fi
+    done
+fi
+
+echo "----"
+echo "PASS=$PASS FAIL=$FAIL"
+[ "$FAIL" -eq 0 ]