Skip to content

Commit 6614af7

Browse files
Fix GAIA evaluation: Use binary target instead of binary-minimal to include Chromium
- Change default build target from binary-minimal to binary in build-gaia-eval-image.yml - Update run_infer.py to look for gaia-binary-with-mcp image instead of gaia-binary-minimal-with-mcp - This ensures Chromium and other browser dependencies are available for GAIA tasks - Resolves 500 Internal Server Error: 'Chromium is required for browser operations'
1 parent c5cc86c commit 6614af7

File tree

2 files changed

+24
-12
lines changed

2 files changed

+24
-12
lines changed

.github/workflows/build-gaia-eval-image.yml

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@ on:
1212
required: true
1313
type: string
1414
target:
15-
description: 'Build target (default: binary-minimal)'
15+
description: 'Build target (default: binary)'
1616
required: false
17-
default: 'binary-minimal'
17+
default: 'binary'
1818
type: choice
1919
options:
20+
- binary
2021
- binary-minimal
2122
- source-minimal
2223

@@ -90,7 +91,7 @@ jobs:
9091
run: |
9192
set -euo pipefail
9293
93-
TARGET="${{ inputs.target || 'binary-minimal' }}"
94+
TARGET="${{ inputs.target || 'binary' }}"
9495
9596
CMD="uv run benchmarks/gaia/build_images.py \
9697
--image ghcr.io/openhands/eval-agent-server \
@@ -109,9 +110,16 @@ jobs:
109110
110111
# Get the SDK commit SHA for tagging
111112
SDK_SHA=$(git submodule status vendor/software-agent-sdk | awk '{print $1}' | sed 's/^[+-]//' | cut -c1-7)
112-
TARGET="${{ inputs.target || 'binary-minimal' }}"
113-
BASE_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia-${TARGET}"
114-
MCP_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia-${TARGET}-with-mcp"
113+
TARGET="${{ inputs.target || 'binary' }}"
114+
115+
# Compute base image tag: target suffix is only added for non-binary targets
116+
if [ "$TARGET" = "binary" ]; then
117+
BASE_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia"
118+
MCP_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia-with-mcp"
119+
else
120+
BASE_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia-${TARGET}"
121+
MCP_IMAGE="ghcr.io/openhands/eval-agent-server:${SDK_SHA}-gaia-${TARGET}-with-mcp"
122+
fi
115123
116124
echo "Building MCP-enhanced image..."
117125
echo " Base image: ${BASE_IMAGE}"
@@ -206,9 +214,13 @@ jobs:
206214
exit 0
207215
fi
208216
209-
# Construct MCP image tag
210-
TARGET="${{ inputs.target || 'binary-minimal' }}"
211-
MCP_IMAGE_TAG="ghcr.io/openhands/eval-agent-server:${SDK_SHA_SHORT}-gaia-${TARGET}-with-mcp"
217+
# Construct MCP image tag (target suffix only added for non-binary targets)
218+
TARGET="${{ inputs.target || 'binary' }}"
219+
if [ "$TARGET" = "binary" ]; then
220+
MCP_IMAGE_TAG="ghcr.io/openhands/eval-agent-server:${SDK_SHA_SHORT}-gaia-with-mcp"
221+
else
222+
MCP_IMAGE_TAG="ghcr.io/openhands/eval-agent-server:${SDK_SHA_SHORT}-gaia-${TARGET}-with-mcp"
223+
fi
212224
213225
# Determine trigger source
214226
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then

benchmarks/gaia/run_infer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ def prepare_workspace(self, instance: EvalInstance) -> RemoteWorkspace:
136136
# GAIA uses a universal agent server image (one image for all instances)
137137
# Built from nikolaik/python-nodejs:python3.12-nodejs22 base
138138
# Using MCP-enabled image to avoid 1-18 minute startup delays
139-
agent_server_image = (
140-
f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-gaia-binary-minimal-with-mcp"
141-
)
139+
# Using binary target (not binary-minimal) to include Chromium for browser operations
140+
# Note: binary target doesn't add target suffix to tag, so it's just gaia-with-mcp
141+
agent_server_image = f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-gaia-with-mcp"
142142

143143
if not image_exists(agent_server_image):
144144
raise RuntimeError(

0 commit comments

Comments
 (0)