Skip to content

Commit 1e8692b

Browse files
simonrosenbergclaudexingyaoww
authored
Align eval labels with benchmarks build tiers (1, 50, 200) (#1254)
Co-authored-by: Claude <noreply@anthropic.com> Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
1 parent 824538d commit 1e8692b

File tree

1 file changed

+9
-10
lines changed

1 file changed

+9
-10
lines changed

.github/workflows/run-eval.yml

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,9 @@ on:
1919
type: choice
2020
options:
2121
- '1'
22-
- '2'
23-
- '10'
2422
- '50'
25-
- '100'
23+
- '200'
24+
- '500'
2625
model_stubs:
2726
description: Comma-separated model stubs to evaluate (must be allowlisted)
2827
required: false
@@ -45,7 +44,7 @@ env:
4544
EVAL_AGENT_IMAGE: ghcr.io/openhands/eval-agent-server
4645
EVAL_AGENT_TARGET: source-minimal
4746
# Polling configuration for workflow status checks
48-
MAX_POLL_ATTEMPTS: '80' # 80 attempts × 60s = 80 minutes max wait
47+
MAX_POLL_ATTEMPTS: '600' # 600 attempts × 60s = 10 hours max wait
4948
POLL_INTERVAL_SECONDS: '60'
5049

5150
jobs:
@@ -55,10 +54,10 @@ jobs:
5554
github.event_name == 'workflow_dispatch' ||
5655
(github.event_name == 'pull_request_target' &&
5756
(github.event.label.name == 'run-eval-1' ||
58-
github.event.label.name == 'run-eval-2' ||
5957
github.event.label.name == 'run-eval-50' ||
60-
github.event.label.name == 'run-eval-100'))
61-
runs-on: blacksmith-32vcpu-ubuntu-2204
58+
github.event.label.name == 'run-eval-200' ||
59+
github.event.label.name == 'run-eval-500'))
60+
runs-on: ubuntu-latest
6261
permissions:
6362
contents: read
6463
packages: write
@@ -71,7 +70,7 @@ jobs:
7170
uses: actions/checkout@v4
7271
with:
7372
ref: ${{ github.event_name == 'pull_request_target' && github.event.pull_request.base.sha || (github.event_name ==
74-
'workflow_dispatch' && github.event.inputs.branch) || github.ref }}
73+
'workflow_dispatch' && github.event.inputs.sdk_ref) || github.ref }}
7574
fetch-depth: 0
7675

7776
- name: Load allowlists
@@ -117,9 +116,9 @@ jobs:
117116
LABEL="${{ github.event.label.name }}"
118117
case "$LABEL" in
119118
run-eval-1) EVAL_LIMIT=1 ;;
120-
run-eval-2) EVAL_LIMIT=2 ;;
121119
run-eval-50) EVAL_LIMIT=50 ;;
122-
run-eval-100) EVAL_LIMIT=100 ;;
120+
run-eval-200) EVAL_LIMIT=200 ;;
121+
run-eval-500) EVAL_LIMIT=500 ;;
123122
*) echo "Unsupported label $LABEL" >&2; exit 1 ;;
124123
esac
125124
SDK_REF="${{ github.event.pull_request.head.ref }}"

0 commit comments

Comments
 (0)