From 970d08e066321690c3df6b0fce872bdd3178e11f Mon Sep 17 00:00:00 2001 From: shauryr Date: Fri, 29 May 2026 21:44:02 +0000 Subject: [PATCH] Add eval.sh wrapper to preserve /opt/env PATH for agent self-evals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The codex CLI runs every shell command via `bash -lc "..."` (login shell), which sources /etc/profile + ~/.bashrc and overwrites PATH — stripping the apptainer-injected /opt/env/local/bin entry where the bind-mounted `vllm` CLI lives. As a result agents see `vllm: command not found` and inspect_ai can't spawn a local server. Observed in both V1 (6h) and V2 (12h clean-slate) data-eng pilots: agents rediscover the issue and manually prefix commands with `PATH=/opt/env/local/bin:$PATH ...`, burning ~5 min of exploration each time. This adds a small `eval.sh` wrapper that re-asserts PATH and execs `python3 evaluate.py "$@"`. Copied into the task workspace only when POST_TRAIN_BENCH_PROMPT=data_eng_prompt, so default-prompt runs are unchanged. The data_eng_prompt.txt update to actually USE the wrapper lives in feature/v2-discipline (separate PR). --- src/eval/general/eval.sh | 17 +++++++++++++++++ src/run_task.sh | 7 +++++++ 2 files changed, 24 insertions(+) create mode 100755 src/eval/general/eval.sh diff --git a/src/eval/general/eval.sh b/src/eval/general/eval.sh new file mode 100755 index 0000000..a1164bd --- /dev/null +++ b/src/eval/general/eval.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Wrapper around evaluate.py for data-engineering agent runs. +# +# Why this exists: the bind-mounted Python env at /opt/env contains the +# `vllm` CLI binary at /opt/env/local/bin/vllm, and run_task.sh injects +# that directory into PATH via `apptainer exec --env PATH=...`. However, +# the codex CLI runs every shell command through `bash -lc "..."` (login +# shell), which sources /etc/profile + ~/.bashrc and *overwrites* PATH +# with the container's defaults — stripping out /opt/env/local/bin. As a +# result the agent sees `vllm: command not found` and inspect_ai cannot +# spawn its local vLLM server. +# +# This wrapper re-asserts the bind-mounted env on PATH and forwards all +# arguments to evaluate.py. Agents should call `bash eval.sh ...` instead +# of `python3 evaluate.py ...` for self-evals. +export PATH="/opt/env/local/bin:/opt/env/bin:${PATH}" +exec python3 /home/ben/task/evaluate.py "$@" diff --git a/src/run_task.sh b/src/run_task.sh index 7fd8108..d48f28d 100644 --- a/src/run_task.sh +++ b/src/run_task.sh @@ -73,6 +73,13 @@ if [ "$POST_TRAIN_BENCH_PROMPT" = "data_eng_prompt" ]; then cp src/eval/general/train_sft.py "${JOB_DIR}/task/" cp src/eval/general/dataset_audit.py "${JOB_DIR}/task/" cp src/eval/general/publish_experiment.py "${JOB_DIR}/task/" + # eval.sh wrapper: codex's `bash -lc` overwrites PATH and strips + # /opt/env/local/bin, so calling `python3 evaluate.py` directly fails + # to find the bind-mounted `vllm` CLI. This wrapper re-asserts PATH + # before exec'ing evaluate.py. Agents should `bash eval.sh ...` for + # self-evals. + cp src/eval/general/eval.sh "${JOB_DIR}/task/" + chmod +x "${JOB_DIR}/task/eval.sh" mkdir -p "${JOB_DIR}/task/experiments" fi