Files
MeshChatX/.github/workflows/bench.yml
T

116 lines
4.4 KiB
YAML

# Benchmarks and integrity checks (push to main branches and workflow_dispatch).
# Results are stored in a runner cache and compared on every run; the job fails
# when any metric regresses beyond 150% of the stored baseline, and a commit
# comment is posted with the offending numbers.
#
# Pinned first-party actions (bump tag and SHA together when upgrading):
# actions/checkout@v6.0.1 8e8c483db84b4bee98b60c0593521ed34d9990e8
# actions/setup-python@v6.2.0 a309ff8b426b58ec0e2a45f0f869d46889d02405
# actions/setup-node@v6.1.0 395ad3262231945c25e8478fd5baf05154b1d79f
# actions/cache@v4.2.0 1bd1e32a3bdc45362d1e726936510720a7c30a57
# benchmark-action/github-action-benchmark@v1.22.0
# a60cea5bc7b49e15c1f58f411161f99e0df48372
name: Benchmarks
on:
workflow_dispatch:
push:
branches:
- master
- dev
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
pull-requests: write
env:
FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true
PYTHON_VERSION: "3.14"
NODE_VERSION: "24"
POETRY_VERSION: "2.3.4"
PNPM_VERSION: "10.33.0"
jobs:
bench:
runs-on: ubuntu-latest
timeout-minutes: 60
steps:
- name: Checkout
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Poetry (PyPI pin)
env:
POETRY_VERSION: ${{ env.POETRY_VERSION }}
run: bash scripts/ci/github-install-poetry.sh
- name: Cache Poetry downloads
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57
with:
path: ~/.cache/pypoetry
key: ${{ runner.os }}-pypoetry-${{ hashFiles('poetry.lock') }}
restore-keys: |
${{ runner.os }}-pypoetry-
- name: Enable pnpm (corepack)
run: corepack enable && corepack prepare "pnpm@${PNPM_VERSION}" --activate
- name: Set up Node
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f
with:
node-version: ${{ env.NODE_VERSION }}
cache: pnpm
cache-dependency-path: pnpm-lock.yaml
- name: Install dependencies
run: bash scripts/ci/github-install-deps.sh
- name: Setup Task
run: sh scripts/ci/setup-task.sh
- name: Restore benchmark baseline cache
uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57
with:
path: ./cache
key: ${{ runner.os }}-bench-baseline-${{ github.ref_name }}
restore-keys: |
${{ runner.os }}-bench-baseline-
- name: Run benchmarks
run: |
set -euo pipefail
poetry run python tests/backend/run_comprehensive_benchmarks.py \
--json-output bench_results.json 2>&1 | tee bench_results.txt
- name: Run integrity tests
run: |
set -euo pipefail
task test:integrity 2>&1 | tee -a bench_results.txt
- name: Store and compare benchmark results
uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372
with:
name: MeshChatX Backend Benchmarks
tool: customSmallerIsBetter
output-file-path: bench_results.json
external-data-json-path: ./cache/benchmark-data.json
github-token: ${{ secrets.GITHUB_TOKEN }}
# GitHub shared runners have 20-40% variance even with identical
# code. alert-threshold posts a comment; fail-threshold fails
# the job. Sub-ms operations are especially noisy so we keep
# the comment bar at 2x and the hard-fail bar at 3x.
alert-threshold: "200%"
fail-threshold: "300%"
fail-on-alert: true
comment-on-alert: true
summary-always: true