diff --git a/src/eval/general/eval.sh b/src/eval/general/eval.sh new file mode 100755 index 0000000..a1164bd --- /dev/null +++ b/src/eval/general/eval.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Wrapper around evaluate.py for data-engineering agent runs. +# +# Why this exists: the bind-mounted Python env at /opt/env contains the +# `vllm` CLI binary at /opt/env/local/bin/vllm, and run_task.sh injects +# that directory into PATH via `apptainer exec --env PATH=...`. However, +# the codex CLI runs every shell command through `bash -lc "..."` (login +# shell), which sources /etc/profile + ~/.bashrc and *overwrites* PATH +# with the container's defaults — stripping out /opt/env/local/bin. As a +# result the agent sees `vllm: command not found` and inspect_ai cannot +# spawn its local vLLM server. +# +# This wrapper re-asserts the bind-mounted env on PATH and forwards all +# arguments to evaluate.py. Agents should call `bash eval.sh ...` instead +# of `python3 evaluate.py ...` for self-evals. +export PATH="/opt/env/local/bin:/opt/env/bin:${PATH}" +exec python3 /home/ben/task/evaluate.py "$@" diff --git a/src/run_task.sh b/src/run_task.sh index 7fd8108..d48f28d 100644 --- a/src/run_task.sh +++ b/src/run_task.sh @@ -73,6 +73,13 @@ if [ "$POST_TRAIN_BENCH_PROMPT" = "data_eng_prompt" ]; then cp src/eval/general/train_sft.py "${JOB_DIR}/task/" cp src/eval/general/dataset_audit.py "${JOB_DIR}/task/" cp src/eval/general/publish_experiment.py "${JOB_DIR}/task/" + # eval.sh wrapper: codex's `bash -lc` overwrites PATH and strips + # /opt/env/local/bin, so calling `python3 evaluate.py` directly fails + # to find the bind-mounted `vllm` CLI. This wrapper re-asserts PATH + # before exec'ing evaluate.py. Agents should `bash eval.sh ...` for + # self-evals. + cp src/eval/general/eval.sh "${JOB_DIR}/task/" + chmod +x "${JOB_DIR}/task/eval.sh" mkdir -p "${JOB_DIR}/task/experiments" fi