diff --git a/src/eval/general/eval.sh b/src/eval/general/eval.sh
new file mode 100755
index 0000000..a1164bd
--- /dev/null
+++ b/src/eval/general/eval.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# Wrapper around evaluate.py for data-engineering agent runs.
+#
+# Why this exists: the bind-mounted Python env at /opt/env contains the
+# `vllm` CLI binary at /opt/env/local/bin/vllm, and run_task.sh injects
+# that directory into PATH via `apptainer exec --env PATH=...`. However,
+# the codex CLI runs every shell command through `bash -lc "..."` (login
+# shell), which sources /etc/profile + ~/.bashrc and *overwrites* PATH
+# with the container's defaults — stripping out /opt/env/local/bin. As a
+# result the agent sees `vllm: command not found` and inspect_ai cannot
+# spawn its local vLLM server.
+#
+# This wrapper re-asserts the bind-mounted env on PATH and forwards all
+# arguments to evaluate.py. Agents should call `bash eval.sh ...` instead
+# of `python3 evaluate.py ...` for self-evals.
+export PATH="/opt/env/local/bin:/opt/env/bin:${PATH}"
+exec python3 /home/ben/task/evaluate.py "$@"
diff --git a/src/run_task.sh b/src/run_task.sh
index 7fd8108..d48f28d 100644
--- a/src/run_task.sh
+++ b/src/run_task.sh
@@ -73,6 +73,13 @@ if [ "$POST_TRAIN_BENCH_PROMPT" = "data_eng_prompt" ]; then
     cp src/eval/general/train_sft.py "${JOB_DIR}/task/"
     cp src/eval/general/dataset_audit.py "${JOB_DIR}/task/"
     cp src/eval/general/publish_experiment.py "${JOB_DIR}/task/"
+    # eval.sh wrapper: codex's `bash -lc` overwrites PATH and strips
+    # /opt/env/local/bin, so calling `python3 evaluate.py` directly fails
+    # to find the bind-mounted `vllm` CLI. This wrapper re-asserts PATH
+    # before exec'ing evaluate.py. Agents should `bash eval.sh ...` for
+    # self-evals.
+    cp src/eval/general/eval.sh "${JOB_DIR}/task/"
+    chmod +x "${JOB_DIR}/task/eval.sh"
     mkdir -p "${JOB_DIR}/task/experiments"
 fi