|
37 | 37 | ) |
38 | 38 | from openhands.sdk.workspace import RemoteWorkspace |
39 | 39 | from openhands.tools.preset.default import get_default_tools |
40 | | -from openhands.workspace import APIRemoteWorkspace |
| 40 | +from openhands.workspace import APIRemoteWorkspace, DockerWorkspace |
41 | 41 |
|
42 | 42 |
|
43 | 43 | logger = get_logger(__name__) |
@@ -119,44 +119,73 @@ def prepare_workspace(self, instance: EvalInstance) -> RemoteWorkspace: |
119 | 119 | """Create workspace and copy necessary files.""" |
120 | 120 | logger.info(f"Preparing workspace for instance {instance.id}") |
121 | 121 |
|
122 | | - # GAIA uses remote workspace with a universal agent server image |
123 | | - # (unlike SWE-bench which needs per-instance images) |
124 | | - if self.metadata.workspace_type != "remote": |
125 | | - raise ValueError( |
126 | | - f"GAIA only supports workspace_type='remote', got '{self.metadata.workspace_type}'" |
127 | | - ) |
128 | | - |
129 | | - runtime_api_key = os.getenv("RUNTIME_API_KEY") |
130 | | - sdk_short_sha = os.getenv("SDK_SHORT_SHA", SDK_SHORT_SHA) |
131 | | - if not runtime_api_key: |
132 | | - raise ValueError( |
133 | | - "RUNTIME_API_KEY environment variable is not set for remote workspace" |
134 | | - ) |
135 | | - |
136 | 122 | # GAIA uses a universal agent server image (one image for all instances) |
137 | 123 | # Built from nikolaik/python-nodejs:python3.12-nodejs22 base |
138 | 124 | # Using MCP-enabled image to avoid 1-18 minute startup delays |
139 | 125 | # Using binary target (not binary-minimal) to include Chromium for browser operations |
140 | 126 | # Note: binary target doesn't add target suffix to tag, so it's just gaia-with-mcp |
| 127 | + sdk_short_sha = os.getenv("SDK_SHORT_SHA", SDK_SHORT_SHA) |
141 | 128 | agent_server_image = f"{EVAL_AGENT_SERVER_IMAGE}:{sdk_short_sha}-gaia-with-mcp" |
142 | 129 |
|
143 | | - if not image_exists(agent_server_image): |
144 | | - raise RuntimeError( |
145 | | - f"Agent server image {agent_server_image} does not exist in container registry. " |
146 | | - f"Run 'benchmarks/gaia/build_images.py --push' to build and push it first." |
| 130 | + if self.metadata.workspace_type == "docker": |
| 131 | + # For local testing, use DockerWorkspace with pre-built GAIA image |
| 132 | + SKIP_BUILD = os.getenv("SKIP_BUILD", "1").lower() in ("1", "true", "yes") |
| 133 | + logger.info(f"SKIP_BUILD={SKIP_BUILD}") |
| 134 | + if not SKIP_BUILD: |
| 135 | + from benchmarks.utils.build_utils import build_image |
| 136 | + |
| 137 | + logger.info( |
| 138 | + "Building GAIA workspace image. This may take a while...\n" |
| 139 | + "You can run benchmarks/gaia/build_images.py and set " |
| 140 | + "SKIP_BUILD=1 to skip building and use pre-built image." |
| 141 | + ) |
| 142 | + output = build_image( |
| 143 | + base_image="nikolaik/python-nodejs:python3.12-nodejs22", |
| 144 | + target_image=EVAL_AGENT_SERVER_IMAGE, |
| 145 | + custom_tag="gaia-with-mcp", |
| 146 | + target="binary", |
| 147 | + push=False, |
| 148 | + ) |
| 149 | + logger.info(f"Image build output: {output}") |
| 150 | + assert output.error is None, f"Image build failed: {output.error}" |
| 151 | + if agent_server_image not in output.tags: |
| 152 | + raise RuntimeError( |
| 153 | + f"Built image tags {output.tags} do not include expected tag " |
| 154 | + f"{agent_server_image}" |
| 155 | + ) |
| 156 | + |
| 157 | + workspace = DockerWorkspace( |
| 158 | + server_image=agent_server_image, |
| 159 | + working_dir="/workspace", |
147 | 160 | ) |
| 161 | + elif self.metadata.workspace_type == "remote": |
| 162 | + runtime_api_key = os.getenv("RUNTIME_API_KEY") |
| 163 | + if not runtime_api_key: |
| 164 | + raise ValueError( |
| 165 | + "RUNTIME_API_KEY environment variable is not set for remote workspace" |
| 166 | + ) |
148 | 167 |
|
149 | | - logger.info( |
150 | | - f"Using remote workspace with GAIA image {agent_server_image} (sdk sha: {sdk_short_sha})" |
151 | | - ) |
152 | | - workspace = APIRemoteWorkspace( |
153 | | - runtime_api_url=os.getenv( |
154 | | - "RUNTIME_API_URL", "https://runtime.eval.all-hands.dev" |
155 | | - ), |
156 | | - runtime_api_key=runtime_api_key, |
157 | | - server_image=agent_server_image, |
158 | | - target_type="binary", # GAIA images use binary target |
159 | | - ) |
| 168 | + if not image_exists(agent_server_image): |
| 169 | + raise RuntimeError( |
| 170 | + f"Agent server image {agent_server_image} does not exist in container registry. " |
| 171 | + f"Run 'benchmarks/gaia/build_images.py --push' to build and push it first." |
| 172 | + ) |
| 173 | + |
| 174 | + logger.info( |
| 175 | + f"Using remote workspace with GAIA image {agent_server_image} (sdk sha: {sdk_short_sha})" |
| 176 | + ) |
| 177 | + workspace = APIRemoteWorkspace( |
| 178 | + runtime_api_url=os.getenv( |
| 179 | + "RUNTIME_API_URL", "https://runtime.eval.all-hands.dev" |
| 180 | + ), |
| 181 | + runtime_api_key=runtime_api_key, |
| 182 | + server_image=agent_server_image, |
| 183 | + target_type="binary", # GAIA images use binary target |
| 184 | + ) |
| 185 | + else: |
| 186 | + raise ValueError( |
| 187 | + f"Unsupported workspace_type: {self.metadata.workspace_type}" |
| 188 | + ) |
160 | 189 |
|
161 | 190 | # Create workspace directory |
162 | 191 | workspace.execute_command("mkdir -p /workspace") |
|
0 commit comments