feat(benchmarks): update benchmark workflow with JSON output and caching for results comparison

This commit is contained in:
Ivan
2026-04-30 12:15:08 -05:00
parent c8a1ce6240
commit 70cf79d768
2 changed files with 55 additions and 6 deletions
+29 -2
View File
@@ -1,10 +1,15 @@
# Benchmarks and integrity checks (push to main branches and workflow_dispatch).
# Results are stored in a runner cache and compared on every run; the job fails
# when any metric regresses beyond 150% of the stored baseline, and a commit
# comment is posted with the offending numbers.
#
# Pinned first-party actions (bump tag and SHA together when upgrading):
# actions/checkout@v6.0.1 8e8c483db84b4bee98b60c0593521ed34d9990e8
# actions/setup-python@v6.2.0 a309ff8b426b58ec0e2a45f0f869d46889d02405
# actions/setup-node@v6.1.0 395ad3262231945c25e8478fd5baf05154b1d79f
# actions/cache@v4.2.0 1bd1e32a3bdc45362d1e726936510720a7c30a57
# benchmark-action/github-action-benchmark@v1.22.0
# a60cea5bc7b49e15c1f58f411161f99e0df48372
name: Benchmarks
@@ -20,7 +25,7 @@ concurrency:
cancel-in-progress: true
permissions:
contents: read
contents: write
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
@@ -71,12 +76,34 @@ jobs:
- name: Setup Task
run: sh scripts/ci/setup-task.sh
- name: Restore benchmark baseline cache
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57
with:
path: ./cache
key: ${{ runner.os }}-bench-baseline-${{ github.ref_name }}
restore-keys: |
${{ runner.os }}-bench-baseline-
- name: Run benchmarks
run: |
set -euo pipefail
task bench 2>&1 | tee bench_results.txt
poetry run python tests/backend/run_comprehensive_benchmarks.py \
--json-output bench_results.json 2>&1 | tee bench_results.txt
- name: Run integrity tests
run: |
set -euo pipefail
task test:integrity 2>&1 | tee -a bench_results.txt
- name: Store and compare benchmark results
uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372
with:
name: MeshChatX Backend Benchmarks
tool: customSmallerIsBetter
output-file-path: bench_results.json
external-data-json-path: ./cache/benchmark-data.json
github-token: ${{ secrets.GITHUB_TOKEN }}
alert-threshold: "150%"
fail-on-alert: true
comment-on-alert: true
summary-always: true
+26 -4
View File
@@ -35,7 +35,7 @@ class BackendBenchmarker:
self.db.close()
shutil.rmtree(self.temp_dir)
def run_all(self, extreme=False):
def run_all(self, extreme=False, json_output_path=None):
print(f"\n{'=' * 20} BACKEND BENCHMARKING START {'=' * 20}")
print(f"Mode: {'EXTREME (Breaking Space)' if extreme else 'Standard'}")
print(f"Base Memory: {get_memory_usage_mb():.2f} MB")
@@ -53,7 +53,7 @@ class BackendBenchmarker:
self.bench_telephony_operations()
self.print_summary()
self.print_summary(json_output_path=json_output_path)
def bench_extreme_message_flood(self):
"""Insert 100,000 messages with large randomized content."""
@@ -305,7 +305,7 @@ class BackendBenchmarker:
_, res = log_call()
self.results.append(res)
def print_summary(self):
def print_summary(self, json_output_path=None):
print(f"\n{'=' * 20} BENCHMARK SUMMARY {'=' * 20}")
print(f"{'Benchmark Name':40} | {'Avg Time':10} | {'Mem Delta':10}")
print(f"{'-' * 40}-|-{'-' * 10}-|-{'-' * 10}")
@@ -316,6 +316,22 @@ class BackendBenchmarker:
print(f"{'=' * 59}")
print(f"Final Memory Usage: {get_memory_usage_mb():.2f} MB")
if json_output_path:
import json as _json
entries = [
{
"name": r.name,
"unit": "ms",
"value": round(r.duration_ms, 3),
"extra": f"Memory delta: {r.memory_delta_mb:.2f} MB",
}
for r in self.results
]
with open(json_output_path, "w") as f:
_json.dump(entries, f, indent=2)
print(f"Benchmark JSON written to {json_output_path}")
if __name__ == "__main__":
import argparse
@@ -326,10 +342,16 @@ if __name__ == "__main__":
action="store_true",
help="Run extreme stress tests",
)
parser.add_argument(
"--json-output",
metavar="PATH",
default=None,
help="Write benchmark results as github-action-benchmark customSmallerIsBetter JSON to PATH",
)
args = parser.parse_args()
bench = BackendBenchmarker()
try:
bench.run_all(extreme=args.extreme)
bench.run_all(extreme=args.extreme, json_output_path=args.json_output)
finally:
bench.cleanup()