Skip to content

Commit 7d8ec59

Browse files
Add GAIA image build workflow
This workflow builds a universal agent server image for GAIA benchmark evaluation. Unlike SWE-bench which requires per-instance images with specific repository environments, GAIA uses a single universal image for all instances since they share the same Python+Node.js environment (nikolaik/python-nodejs:python3.12-nodejs22). Workflow features: - Minimal configuration: only requires sdk-commit and optional target parameter - Builds one universal image tagged as: ghcr.io/openhands/eval-agent-server:{SDK_SHA}-gaia-binary-minimal - Can be triggered via workflow_dispatch or by adding 'build-gaia' label to PRs - Posts build status to issue #81 for tracking Note: Workflow filename is singular (build-gaia-image.yml) to reflect that it builds a single universal image, unlike SWE-bench which uses plural (build-swe-bench-images.yml) for its many per-instance images. This is a prerequisite for enabling GAIA benchmark evaluation on the Kubernetes evaluation infrastructure.
1 parent baf988e commit 7d8ec59

File tree

1 file changed

+209
-0
lines changed

1 file changed

+209
-0
lines changed
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
name: Build GAIA Images
2+
3+
on:
4+
pull_request_target:
5+
types: [labeled]
6+
workflow_dispatch:
7+
inputs:
8+
sdk-commit:
9+
description: 'Software Agent SDK commit/ref to use'
10+
required: true
11+
type: string
12+
target:
13+
description: 'Build target (default: binary-minimal)'
14+
required: false
15+
default: 'binary-minimal'
16+
type: choice
17+
options:
18+
- binary-minimal
19+
- source-minimal
20+
21+
concurrency:
22+
group: build-gaia-${{ github.ref }}
23+
cancel-in-progress: false
24+
25+
jobs:
26+
build-and-push:
27+
if: >
28+
github.event_name == 'workflow_dispatch' ||
29+
(github.event_name == 'pull_request_target' &&
30+
github.event.label.name == 'build-gaia')
31+
32+
runs-on:
33+
labels: ubuntu-latest
34+
35+
permissions:
36+
contents: read
37+
packages: write
38+
issues: write
39+
40+
steps:
41+
- name: Determine checkout ref
42+
id: checkout-ref
43+
run: |
44+
if [ -n "${{ github.event.pull_request.head.sha }}" ]; then
45+
echo "ref=${{ github.event.pull_request.head.sha }}" >> "$GITHUB_OUTPUT"
46+
echo "Using PR head SHA: ${{ github.event.pull_request.head.sha }}"
47+
else
48+
echo "ref=" >> "$GITHUB_OUTPUT"
49+
echo "Using default ref (the commit that triggered this workflow)"
50+
fi
51+
52+
- uses: actions/checkout@v4
53+
with:
54+
ref: ${{ steps.checkout-ref.outputs.ref }}
55+
submodules: recursive
56+
57+
- name: Update SDK submodule
58+
if: ${{ github.event_name == 'workflow_dispatch' && inputs.sdk-commit != '' }}
59+
run: |
60+
cd vendor/software-agent-sdk
61+
git fetch origin ${{ inputs.sdk-commit }}
62+
git checkout FETCH_HEAD
63+
SDK_SHA=$(git rev-parse HEAD)
64+
cd ../..
65+
git add vendor/software-agent-sdk
66+
echo "Updated SDK submodule to $SDK_SHA (from ${{ inputs.sdk-commit }})"
67+
68+
- name: Set up Docker Buildx with Blacksmith
69+
uses: useblacksmith/setup-docker-builder@v1
70+
71+
- name: Log in to GitHub Container Registry
72+
uses: docker/login-action@v3
73+
with:
74+
registry: ghcr.io
75+
username: ${{ github.actor }}
76+
password: ${{ secrets.GITHUB_TOKEN }}
77+
78+
- name: Install uv
79+
uses: astral-sh/setup-uv@v7
80+
with:
81+
enable-cache: true
82+
83+
- name: Install dependencies
84+
run: |
85+
make build
86+
87+
- name: Build and push GAIA image
88+
run: |
89+
set -euo pipefail
90+
91+
TARGET="${{ inputs.target || 'binary-minimal' }}"
92+
93+
CMD="uv run benchmarks/gaia/build_images.py \
94+
--image ghcr.io/openhands/eval-agent-server \
95+
--target ${TARGET} \
96+
--push"
97+
98+
echo "Running: $CMD"
99+
eval "$CMD"
100+
env:
101+
DOCKER_BUILDKIT: 1
102+
BUILDKIT_PROGRESS: plain
103+
104+
- name: Archive build logs
105+
if: always()
106+
run: |
107+
if [ -d builds ]; then
108+
tar -czf build-logs.tar.gz builds/
109+
echo "Build logs archived successfully"
110+
else
111+
echo "No builds directory found"
112+
fi
113+
114+
- name: Upload build logs
115+
if: always()
116+
uses: actions/upload-artifact@v4
117+
with:
118+
name: build-logs-${{ github.run_id }}
119+
path: build-logs.tar.gz
120+
retention-days: 7
121+
if-no-files-found: warn
122+
123+
- name: Display build summary
124+
if: always()
125+
run: |
126+
# GAIA builds a single universal image, so we just show success/failure
127+
MANIFEST_FILE=$(find builds -name "manifest.jsonl" -type f 2>/dev/null | head -1 || true)
128+
129+
if [ -z "$MANIFEST_FILE" ]; then
130+
echo "## Build Summary" >> "$GITHUB_STEP_SUMMARY"
131+
echo "❌ Build failed - no manifest found" >> "$GITHUB_STEP_SUMMARY"
132+
exit 0
133+
fi
134+
135+
# Read the single line from the manifest
136+
BUILD_DATA=$(cat "$MANIFEST_FILE")
137+
138+
echo "## GAIA Image Build Summary" >> "$GITHUB_STEP_SUMMARY"
139+
echo "" >> "$GITHUB_STEP_SUMMARY"
140+
141+
# Check if build succeeded
142+
if echo "$BUILD_DATA" | python3 -c "import sys, json; data = json.loads(sys.stdin.read()); sys.exit(0 if data.get('error') is None and len(data.get('tags', [])) > 0 else 1)"; then
143+
TAGS=$(echo "$BUILD_DATA" | python3 -c "import sys, json; data = json.loads(sys.stdin.read()); print('\n'.join([f'- \`{tag}\`' for tag in data.get('tags', [])]))")
144+
echo "✅ **Build Successful**" >> "$GITHUB_STEP_SUMMARY"
145+
echo "" >> "$GITHUB_STEP_SUMMARY"
146+
echo "**Built Image:**" >> "$GITHUB_STEP_SUMMARY"
147+
echo "$TAGS" >> "$GITHUB_STEP_SUMMARY"
148+
else
149+
ERROR=$(echo "$BUILD_DATA" | python3 -c "import sys, json; data = json.loads(sys.stdin.read()); print(data.get('error', 'Unknown error'))")
150+
echo "❌ **Build Failed**" >> "$GITHUB_STEP_SUMMARY"
151+
echo "" >> "$GITHUB_STEP_SUMMARY"
152+
echo "**Error:** $ERROR" >> "$GITHUB_STEP_SUMMARY"
153+
fi
154+
155+
- name: Comment on tracker issue
156+
if: success()
157+
run: |
158+
# Get SDK version
159+
SDK_SHA=$(git submodule status vendor/software-agent-sdk | awk '{print $1}' | sed 's/^[+-]//')
160+
161+
# Read the single manifest file
162+
MANIFEST_FILE=$(find builds -name "manifest.jsonl" -type f 2>/dev/null | head -1 || true)
163+
164+
if [ -z "$MANIFEST_FILE" ]; then
165+
echo "No manifest file found"
166+
exit 0
167+
fi
168+
169+
# Extract the image tag from the manifest
170+
IMAGE_TAG=$(cat "$MANIFEST_FILE" | python3 -c "
171+
import sys, json
172+
data = json.loads(sys.stdin.read())
173+
tags = data.get('tags', [])
174+
print(tags[0] if tags else 'unknown')
175+
")
176+
177+
if [ "$IMAGE_TAG" = "unknown" ]; then
178+
echo "No valid image tag found in manifest"
179+
exit 0
180+
fi
181+
182+
# Determine trigger source
183+
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
184+
TRIGGER="Manual trigger (workflow_dispatch)"
185+
elif [ "${{ github.event_name }}" = "pull_request" ]; then
186+
TRIGGER="Pull request [#${{ github.event.pull_request.number }}](${{ github.event.pull_request.html_url }})"
187+
else
188+
TRIGGER="${{ github.event_name }}"
189+
fi
190+
191+
# Post comment
192+
COMMENT_BODY=$(cat <<EOF
193+
## GAIA Image Build Complete ✅
194+
195+
**SDK Version:** [\`${SDK_SHA:0:7}\`](https://github.com/OpenHands/software-agent-sdk/commit/${SDK_SHA})
196+
**Image Tag:** \`${IMAGE_TAG}\`
197+
**Workflow Run:** [#${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})
198+
**Triggered by:** ${TRIGGER}
199+
EOF
200+
)
201+
202+
curl -L -X POST \
203+
-H "Accept: application/vnd.github+json" \
204+
-H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \
205+
-H "X-GitHub-Api-Version: 2022-11-28" \
206+
"${{ github.api_url }}/repos/${{ github.repository }}/issues/81/comments" \
207+
-d "$(jq -n --arg body "$COMMENT_BODY" '{body: $body}')"
208+
env:
209+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)