# Benchmarks and integrity checks (push to main branches and workflow_dispatch). # Results are stored in a runner cache and compared on every run; the job fails # when any metric regresses beyond 150% of the stored baseline, and a commit # comment is posted with the offending numbers. # # Pinned first-party actions (bump tag and SHA together when upgrading): # actions/checkout@v6.0.1 8e8c483db84b4bee98b60c0593521ed34d9990e8 # actions/setup-python@v6.2.0 a309ff8b426b58ec0e2a45f0f869d46889d02405 # actions/setup-node@v6.1.0 395ad3262231945c25e8478fd5baf05154b1d79f # actions/cache@v4.2.0 1bd1e32a3bdc45362d1e726936510720a7c30a57 # benchmark-action/github-action-benchmark@v1.22.0 # a60cea5bc7b49e15c1f58f411161f99e0df48372 name: Benchmarks on: workflow_dispatch: push: branches: - master - dev concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true permissions: contents: write pull-requests: write env: FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: true PYTHON_VERSION: "3.14" NODE_VERSION: "24" POETRY_VERSION: "2.3.4" PNPM_VERSION: "10.33.0" jobs: bench: runs-on: ubuntu-latest timeout-minutes: 60 steps: - name: Checkout uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 - name: Set up Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 with: python-version: ${{ env.PYTHON_VERSION }} - name: Install Poetry (PyPI pin) env: POETRY_VERSION: ${{ env.POETRY_VERSION }} run: bash scripts/ci/github-install-poetry.sh - name: Cache Poetry downloads uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 with: path: ~/.cache/pypoetry key: ${{ runner.os }}-pypoetry-${{ hashFiles('poetry.lock') }} restore-keys: | ${{ runner.os }}-pypoetry- - name: Enable pnpm (corepack) run: corepack enable && corepack prepare "pnpm@${PNPM_VERSION}" --activate - name: Set up Node uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f with: node-version: ${{ env.NODE_VERSION }} cache: pnpm cache-dependency-path: pnpm-lock.yaml - name: Install dependencies run: bash scripts/ci/github-install-deps.sh - name: Setup Task run: sh scripts/ci/setup-task.sh - name: Restore benchmark baseline cache uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 with: path: ./cache key: ${{ runner.os }}-bench-baseline-${{ github.ref_name }} restore-keys: | ${{ runner.os }}-bench-baseline- - name: Run benchmarks run: | set -euo pipefail poetry run python tests/backend/run_comprehensive_benchmarks.py \ --json-output bench_results.json 2>&1 | tee bench_results.txt - name: Run integrity tests run: | set -euo pipefail task test:integrity 2>&1 | tee -a bench_results.txt - name: Store and compare benchmark results uses: benchmark-action/github-action-benchmark@a60cea5bc7b49e15c1f58f411161f99e0df48372 with: name: MeshChatX Backend Benchmarks tool: customSmallerIsBetter output-file-path: bench_results.json external-data-json-path: ./cache/benchmark-data.json github-token: ${{ secrets.GITHUB_TOKEN }} # GitHub shared runners have 20-40% variance even with identical # code. alert-threshold posts a comment; fail-threshold fails # the job. Sub-ms operations are especially noisy so we keep # the comment bar at 2x and the hard-fail bar at 3x. alert-threshold: "200%" fail-threshold: "300%" fail-on-alert: true comment-on-alert: true summary-always: true