From a8547178698fc1563d67888b3ab30b3a916fb068 Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 13 Apr 2026 16:53:22 -0400 Subject: [PATCH 1/2] Add a setup script to configure cuopt in a sandbox on nemoclaw --- cuopt_on_nemoclaw/SETUP.md | 184 +++++ cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh | 745 ++++++++++++++++++ .../openclaw-skills/cuopt/SKILL.md | 370 +++++++++ cuopt_on_nemoclaw/probe_grpc.py | 23 + 4 files changed, 1322 insertions(+) create mode 100644 cuopt_on_nemoclaw/SETUP.md create mode 100755 cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh create mode 100644 cuopt_on_nemoclaw/openclaw-skills/cuopt/SKILL.md create mode 100644 cuopt_on_nemoclaw/probe_grpc.py diff --git a/cuopt_on_nemoclaw/SETUP.md b/cuopt_on_nemoclaw/SETUP.md new file mode 100644 index 0000000..118a265 --- /dev/null +++ b/cuopt_on_nemoclaw/SETUP.md @@ -0,0 +1,184 @@ +# cuOpt + NemoClaw Setup Guide + +The cuOpt server must be running on the host before the sandbox can connect to it. +If you don't have it running yet, see [Starting the cuOpt server](#starting-the-cuopt-server). + +Install NemoClaw and then add cuOpt configuration. + +### 1. Install NemoClaw if it's not already installed + +For an interactive install of NemoClaw, do the following +and specify 'cuopt' as the sandbox name when prompted + +```bash +curl -fsSL https://nvidia.com/nemoclaw.sh | bash +``` + +For a non-interactive install of NemoClaw you can set +the configuration with environment variables. See +the [NemoClaw documentation](https://docs.nvidia.com/nemoclaw/latest/inference/use-local-inference.html) for more details. For example: + +```bash +export NVIDIA_API_KEY="nvapi-..." +export NEMOCLAW_PROVIDER=build +export NEMOCLAW_MODEL=nvidia/nemotron-3-super-120b-a12b +export NEMOCLAW_SANDBOX_NAME=cuopt + +curl -fsSL https://nvidia.com/nemoclaw.sh | bash -s -- \ + --non-interactive --yes-i-accept-third-party-software +``` + +### 2. Add the cuOpt configuration to a sandbox + +The 'add' command takes a sandbox name as an argument. Here we use 'cuopt' but +it can be any existing sandbox. + +```bash +./cuopt_claw/nemoclaw_cuopt_setup.sh add cuopt +``` + +> **Watch for the firewall warning banner.** If UFW is active and ports 5000/5001 +> are not open to Docker interfaces, the script will print a prominent warning +> with `sudo ufw allow` commands to fix it. Sandbox connections will +> hang (timeout) until the firewall is configured. + +## What the setup script does + +- **add** — Add cuOpt to an existing sandbox: apply-policy → install → install-skill → test +- **apply-policy** — Merges cuOpt network rules into a running sandbox's policy +- **install** — Creates a Python venv (`/sandbox/cuopt`), installs `cuopt_sh_client`, `cuopt-cu12`, and `grpcio`, and configures `.bashrc` with the server alias +- **install-skill** — Uploads skill files from `cuopt_claw/openclaw-skills/` into the sandbox +- **test** — Smoke tests PyPI access and cuOpt server connectivity from inside the sandbox + +## Getting cuOpt data into the sandbox + +Upload files from the host: + +```bash +openshell sandbox upload cuopt /path/to/local/file.mps /sandbox/workspace/ +``` + +Or clone a git repository inside the sandbox to get sample datasets, for example: + +```bash +# From inside the sandbox (nemoclaw cuopt connect) +git clone https://github.com/NVIDIA/cuopt repo +``` + +### Quick test with a sample dataset + +After cloning, verify end-to-end with a small LP: + +If you are running the Python service, use cuopt_sh + +```bash +cuopt_sh -t LP /sandbox/repo/datasets/linear_programming/afiro_original.mps +``` + +If you are running the gRPC server, use cuopt_cli + +```bash +cuopt_cli /sandbox/repo/datasets/linear_programming/afiro_original.mps +``` + +## Talking to the agent + +```bash +openclaw agent --agent main -m "your prompt here" +``` + +Or use the interactive TUI: + +```bash +openshell term +``` + +## Adding cuopt to an existing venv in a sandbox + +To install cuopt into an existing venv instead of creating a new one (e.g. `/sandbox/.venv`): + +```bash +CUOPT_VENV=.venv ./cuopt_claw/nemoclaw_cuopt_setup.sh add my-sandbox +``` + +## Updating skills + +To modify agent skills, edit or add files under `cuopt_claw/openclaw-skills/`. +Each subdirectory containing a `SKILL.md` will be uploaded. Then re-run: + +```bash +./cuopt_claw/nemoclaw_cuopt_setup.sh install-skill cuopt +``` + +## File locations + +| What | Path | +|------|------| +| Setup script | `cuopt_claw/nemoclaw_cuopt_setup.sh` | +| gRPC probe | `cuopt_claw/probe_grpc.py` (uploaded to `/sandbox/probe_grpc.py`) | +| Skill source files | `cuopt_claw/openclaw-skills/cuopt/SKILL.md` | +| cuOpt venv in sandbox | `/sandbox/cuopt/` | + +## Starting the cuOpt server + +The cuOpt release includes two server interfaces. You can run either or both: + +| Interface | Port | Protocol | How to start | +|-----------|------|----------|-------------| +| REST (Python) | 5000 | HTTP | `python3 -m cuopt_server.cuopt_service` | +| gRPC (native) | 5001 | HTTP/2 | `cuopt_grpc_server` (included in `libcuopt`) | + +Install the server package (replace `cu12` with your CUDA version): + +```bash +pip install cuopt-server-cu12 --extra-index-url=https://pypi.nvidia.com +``` + +To start the Python REST server: + +```bash +python3 -m cuopt_server.cuopt_service +``` + +To start the gRPC server: + +```bash +cuopt_grpc_server +``` + +Or run a server in the cuOpt container (see NVIDIA cuOpt documentation for container +instructions). + +Verify what's running: + +```bash +# REST server +curl http://localhost:5000/cuopt/health + +# gRPC server +python3 probe_grpc.py +``` + +Leave the server(s) running — the sandbox connects through +`host.openshell.internal` on port 5000 (REST) and/or 5001 (gRPC). + +## Troubleshooting + +### Agent gets 403 Forbidden or connection timeout + +- Verify the cuOpt server is running: + - REST: `curl http://localhost:5000/cuopt/health` + - gRPC: `python3 probe_grpc.py` (or from inside the sandbox: `python3 /sandbox/probe_grpc.py`) +- Check the firewall: `sudo ufw status` — ports 5000 and 5001 must be open on Docker bridges +- Re-run `./cuopt_claw/nemoclaw_cuopt_setup.sh apply-policy cuopt` to repair the network policy + +## Advanced troubleshooting + +> **Warning:** The steps below modify sandbox internals and can break your setup. +> Use at your own risk. + +### Agent outputs raw XML tool calls instead of executing them + +If you see raw `` XML in agent output, the inference API may not +support the `openai-responses` format. Switch to `openai-completions` in +the sandbox's `openclaw.json` configuration. diff --git a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh new file mode 100755 index 0000000..99aec24 --- /dev/null +++ b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh @@ -0,0 +1,745 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +# NemoClaw cuOpt sandbox setup +# +# Subcommands: +# add [NAME] Add cuOpt to a sandbox: policy + install + skill + test. +# apply-policy [NAME] Add cuOpt network policy to a running sandbox. +# install [NAME] Install cuOpt packages in /sandbox/cuopt venv. +# install-skill [NAME] Upload the cuOpt skill into the sandbox. +# test [NAME] Smoke-test PyPI + cuOpt server reachability. +# +# Flags: +# -y, --yes Skip confirmation prompts (for CI/CD). +# --activate Add venv auto-activation to .bashrc (install only; +# always on for 'add'). +# +# Environment: +# CUOPT_SANDBOX Sandbox name (default: cuopt) +# CUOPT_VENV Venv directory name under /sandbox/ (default: cuopt) +# CUOPT_HOST cuOpt server hostname (default: "" = localhost only) +# Set to a hostname, IP, or k8s service to allow remote cuOpt. +# Localhost entries (host.openshell.internal / host.docker.internal) +# are always included. CUOPT_HOST adds an additional endpoint. +# CUOPT_PORT cuOpt REST server port (default: 5000) +# CUOPT_GRPC_PORT cuOpt gRPC server port (default: 5001) +# CUOPT_PYTHON_BIN Exact path to Python binary in sandbox image +# (default: auto-detected from running sandbox, or +# /usr/bin/python3.11). Must be exact — no globs. +# CUOPT_HOST_IP IP that host.openshell.internal resolves to +# (default: auto-detected from running sandbox, or +# 172.17.0.1). Needed for OpenShell allowed_ips. +# +# Examples: +# ./cuopt_claw/nemoclaw_cuopt_setup.sh add cuopt # Add cuOpt to sandbox "cuopt" +# ./cuopt_claw/nemoclaw_cuopt_setup.sh add my-assistant # Add cuOpt to any sandbox +# ./cuopt_claw/nemoclaw_cuopt_setup.sh apply-policy bob # Just fix network policy +# ./cuopt_claw/nemoclaw_cuopt_setup.sh test cuopt # Re-run smoke test +# ============================================================================= +set -euo pipefail + +CUOPT_SANDBOX="${CUOPT_SANDBOX:-cuopt}" +CUOPT_VENV="${CUOPT_VENV:-cuopt}" +CUOPT_HOST="${CUOPT_HOST:-}" +CUOPT_PORT="${CUOPT_PORT:-5000}" +CUOPT_GRPC_PORT="${CUOPT_GRPC_PORT:-5001}" +CUOPT_PYTHON_BIN="${CUOPT_PYTHON_BIN:-}" +CUOPT_HOST_IP="${CUOPT_HOST_IP:-}" +FORCE=false +ACTIVATE=false + +# ── Locate NemoClaw package root ───────────────────────────────── +find_nemoclaw_root() { + local bin + bin="$(command -v nemoclaw 2>/dev/null || true)" + if [[ -z "$bin" ]]; then + echo "error: nemoclaw not on PATH" >&2 + return 1 + fi + local resolved + resolved="$(readlink -f "$bin")" + local candidate + candidate="$(cd "$(dirname "$resolved")/.." && pwd)" + if [[ -f "$candidate/nemoclaw-blueprint/policies/openclaw-sandbox.yaml" ]]; then + echo "$candidate"; return 0 + fi + local npm_root + npm_root="$(npm root -g 2>/dev/null || true)" + if [[ -n "$npm_root" && -f "$npm_root/nemoclaw/nemoclaw-blueprint/policies/openclaw-sandbox.yaml" ]]; then + echo "$npm_root/nemoclaw"; return 0 + fi + echo "error: could not locate nemoclaw-blueprint/policies/openclaw-sandbox.yaml" >&2 + return 1 +} + + + +# ── Detect the exact Python binary path inside the sandbox image ── +# OpenShell requires exact binary paths (no globs). +detect_python_bin() { + if [[ -n "$CUOPT_PYTHON_BIN" ]]; then + echo "$CUOPT_PYTHON_BIN" + return + fi + + # Try detecting from a running sandbox + local sandbox="${1:-}" + if [[ -n "$sandbox" ]]; then + local resolved + resolved="$(echo 'readlink -f /usr/bin/python3 && exit' \ + | openshell sandbox connect "$sandbox" 2>/dev/null \ + | grep '^/usr/bin/python3' | head -1)" + if [[ -n "$resolved" ]]; then + echo "$resolved" + return + fi + fi + + echo >&2 " (no running sandbox to detect from — using default /usr/bin/python3.11;" + echo >&2 " set CUOPT_PYTHON_BIN to override)" + echo "/usr/bin/python3.11" +} + +# ── Detect the Docker host IP (for allowed_ips in policy) ───────── +# OpenShell requires allowed_ips on hostname-based endpoints so the proxy +# can match outbound connections (to resolved IPs) back to hostname rules. +detect_host_ip() { + if [[ -n "$CUOPT_HOST_IP" ]]; then + echo "$CUOPT_HOST_IP" + return + fi + + local sandbox="${1:-}" + if [[ -n "$sandbox" ]]; then + local ip + ip="$(echo 'getent hosts host.openshell.internal | awk "{print \$1}" && exit' \ + | openshell sandbox connect "$sandbox" 2>/dev/null \ + | grep -oE '^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -1)" + if [[ -n "$ip" ]]; then + echo "$ip" + return + fi + fi + + echo >&2 " (no running sandbox to detect from — using default 172.17.0.1;" + echo >&2 " set CUOPT_HOST_IP to override)" + echo "172.17.0.1" +} + +# ── Firewall check ──────────────────────────────────────────────── +# Docker containers need to reach the host on CUOPT_PORT and/or +# CUOPT_GRPC_PORT. If UFW drops that traffic, sandbox connections hang. +# Also detects stale rules for bridges that no longer exist (e.g. after +# nemoclaw destroy / onboard recreates the Docker network). +# Usage: check_firewall [port ...] +# If ports are given, only check those. Otherwise check both. +check_firewall() { + if ! command -v ufw &>/dev/null; then return 0; fi + local status + status="$(sudo -n ufw status 2>/dev/null || ufw status 2>/dev/null || true)" + if ! echo "$status" | grep -q "^Status: active"; then return 0; fi + + # Ports to check for missing rules (only services that are running) + local ports=("$@") + if [[ ${#ports[@]} -eq 0 ]]; then + ports=("${CUOPT_PORT}" "${CUOPT_GRPC_PORT}") + fi + # All cuOpt ports — used for stale rule cleanup regardless of what's running + local all_ports=("${CUOPT_PORT}" "${CUOPT_GRPC_PORT}") + + # Current Docker bridge interfaces on this host + local -a current_bridges=() + while IFS= read -r iface; do + [[ -n "$iface" ]] && current_bridges+=("$iface") + done < <(ip -o link show type bridge 2>/dev/null \ + | awk -F': ' '{print $2}' \ + | grep -E '^(docker|br-)' || true) + if [[ ${#current_bridges[@]} -eq 0 ]]; then return 0; fi + + # Bridge interfaces referenced in UFW rules + local -a rule_bridges=() + while IFS= read -r rb; do + [[ -n "$rb" ]] && rule_bridges+=("$rb") + done < <(echo "$status" | grep -oE "on (docker0|br-[a-f0-9]+)" \ + | awk '{print $2}' | sort -u) + + # Stale bridges: in UFW rules but not actually present on the host + local -a stale_bridges=() + for rb in "${rule_bridges[@]}"; do + local is_current=false + for cb in "${current_bridges[@]}"; do + if [[ "$rb" == "$cb" ]]; then is_current=true; break; fi + done + if [[ "$is_current" == false ]]; then + stale_bridges+=("$rb") + fi + done + + # Missing rules: current bridges that lack a rule for one of our ports. + # UFW format: "5001 on docker0 ALLOW Anywhere" (interface before ALLOW). + # A true blanket allow (not scoped to any interface, e.g. "5001 ALLOW Anywhere") + # covers all bridges. Interface-scoped rules only apply to that bridge. + local -a missing_rules=() + for port in "${ports[@]}"; do + if echo "$status" | grep -E "^${port} " | grep -v " on " \ + | grep -qE "ALLOW"; then + continue + fi + for cb in "${current_bridges[@]}"; do + if ! echo "$status" | grep -qE "^${port}.*on ${cb}.*ALLOW"; then + missing_rules+=("${cb}:${port}") + fi + done + done + + # Count actual stale rules (check all cuOpt ports, not just listening ones) + local stale_rule_count=0 + for sb in "${stale_bridges[@]}"; do + for port in "${all_ports[@]}"; do + if echo "$status" | grep -qE "^${port}.*on ${sb}"; then + ((stale_rule_count++)) || true + fi + done + done + + # Nothing to report + if [[ $stale_rule_count -eq 0 && ${#missing_rules[@]} -eq 0 ]]; then + return 0 + fi + + echo "" + echo "╔══════════════════════════════════════════════════════════════════╗" + echo "║ ⚠ FIREWALL WARNING ║" + echo "╚══════════════════════════════════════════════════════════════════╝" + + if [[ ${#stale_bridges[@]} -gt 0 ]]; then + local -a stale_cmds=() + for sb in "${stale_bridges[@]}"; do + for port in "${all_ports[@]}"; do + if echo "$status" | grep -qE "^${port}.*on ${sb}"; then + stale_cmds+=("sudo ufw delete allow in on ${sb} to any port ${port}") + fi + done + done + if [[ ${#stale_cmds[@]} -gt 0 ]]; then + echo "" + echo " Stale UFW rules found for Docker bridges that no longer" + echo " exist (likely from a previous sandbox). Delete them:" + echo "" + for cmd in "${stale_cmds[@]}"; do + echo " $cmd" + done + fi + fi + + if [[ ${#missing_rules[@]} -gt 0 ]]; then + echo "" + echo " Missing rules — sandbox connections to cuOpt will HANG:" + echo "" + for entry in "${missing_rules[@]}"; do + local iface="${entry%%:*}" + local port="${entry##*:}" + echo " sudo ufw allow in on ${iface} to any port ${port}" + done + fi + + echo "" + echo " Then retry: $0 test" + echo "" + echo "══════════════════════════════════════════════════════════════════════" + echo "" +} + +# ── Policy entry generation (shared by patch + apply-policy) ────── +# OpenShell binary paths must be exact — globs (*, **) are silently ignored. +# Hostname endpoints require allowed_ips so the proxy can match resolved IPs. +generate_policy_entries() { + local sandbox="${1:-}" + local python_bin + python_bin="$(detect_python_bin "$sandbox")" + echo " Using Python binary: $python_bin" >&2 + + local host_ip + host_ip="$(detect_host_ip "$sandbox")" + echo " Docker host IP: $host_ip" >&2 + + local remote_endpoint="" + if [[ -n "$CUOPT_HOST" ]]; then + remote_endpoint=" + - host: ${CUOPT_HOST} + port: ${CUOPT_PORT} + - host: ${CUOPT_HOST} + port: ${CUOPT_GRPC_PORT}" + fi + + cat </dev/null || true)" + if [[ -z "$current" ]]; then + echo "error: could not read policy for sandbox '$sandbox'." >&2 + echo " Is the sandbox running? Check with: openshell sandbox list" >&2 + exit 1 + fi + + # openshell policy get --full may include metadata fields (e.g. "Version") + # that openshell policy set rejects. Strip any top-level keys that aren't + # in the accepted schema: version, filesystem_policy, landlock, process, + # network_policies. + current="$(python3 -c " +import sys, re +allowed = {'version', 'filesystem_policy', 'landlock', 'process', 'network_policies'} +lines = sys.stdin.read().split('\n') +result = [] +skip = False +for line in lines: + m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*):', line) + if m: + key = m.group(1) + if key not in allowed: + skip = True + continue + else: + skip = False + if skip and line and line[0].isspace(): + continue + skip = False + result.append(line) +print('\n'.join(result)) +" <<< "$current")" + + local entries + entries="$(generate_policy_entries "$sandbox")" + if [[ -n "$CUOPT_HOST" ]]; then + echo "Remote cuOpt endpoint: ${CUOPT_HOST}:${CUOPT_PORT}" + fi + + # Merge entries into the network_policies section of the current policy. + # openshell policy set replaces the full policy, so we must read-merge-write. + # If our entries already exist, strip them first so they get re-added with + # freshly detected values (Python binary, host IP). + local merged + merged="$(python3 -c " +import sys + +current = sys.stdin.read() +entries = '''${entries}''' + +our_keys = {'pypi_public:', 'nvidia_pypi:', 'cuopt_host:'} + +lines = current.split('\n') +result = [] +in_np = False +inserted = False +skip_block = False + +for line in lines: + stripped = line.strip() + is_top_level = line and not line[0].isspace() and ':' in line + is_np_entry = (not is_top_level and stripped and + not stripped.startswith('#') and + stripped.endswith(':') and + line.startswith(' ') and not line.startswith(' ')) + + if stripped == 'network_policies:' or stripped.startswith('network_policies:'): + in_np = True + result.append(line) + continue + + if in_np and is_np_entry and stripped in our_keys: + skip_block = True + continue + + if skip_block: + if is_np_entry or (is_top_level and ':' in line): + skip_block = False + else: + continue + + if in_np and is_top_level and not inserted: + result.append(entries) + inserted = True + in_np = False + + result.append(line) + +if in_np and not inserted: + result.append(entries) + +if not any('network_policies' in l for l in lines): + result.append('') + result.append('network_policies:') + result.append(entries) + +print('\n'.join(result)) +" <<< "$current")" + + local tmpfile + tmpfile="$(mktemp /tmp/cuopt-policy-XXXXXX.yaml)" + echo "$merged" > "$tmpfile" + + openshell policy set --policy "$tmpfile" --wait "$sandbox" + rm -f "$tmpfile" + echo "Policy applied to sandbox '$sandbox'." +} + + +# ── install ─────────────────────────────────────────────────────── +cmd_install() { + local sandbox="${1:-$CUOPT_SANDBOX}" + local venv="/sandbox/${CUOPT_VENV}" + echo "Installing cuopt_sh_client in ${venv} venv (sandbox: $sandbox) ..." + + # Detect the sandbox's Python and check it against the policy. + local actual_python + actual_python="$(detect_python_bin "$sandbox")" + echo "Sandbox Python binary: $actual_python" + + local root policy_file + root="$(find_nemoclaw_root 2>/dev/null || true)" + if [[ -n "$root" ]]; then + policy_file="$root/nemoclaw-blueprint/policies/openclaw-sandbox.yaml" + if [[ -f "$policy_file" ]] && grep -q 'cuopt_host:' "$policy_file"; then + local policy_python + policy_python="$(grep -A 20 'cuopt_host:' "$policy_file" \ + | grep '{ path: /usr/bin/python' \ + | head -1 \ + | sed 's/.*{ path: \([^ }]*\).*/\1/')" + if [[ -n "$policy_python" && "$policy_python" != "$actual_python" ]]; then + echo "" + echo "WARNING: Python version mismatch!" + echo " Sandbox has: $actual_python" + echo " Policy expects: $policy_python" + echo "" + echo " Network requests from Python will be blocked (403 Forbidden)." + echo " Fix: re-run 'patch' to update the policy, then 'onboard' or 'apply-policy':" + echo " $0 patch" + echo " $0 apply-policy $sandbox" + echo "" + fi + fi + fi + + local commands=( + "python3 -m venv ${venv}" + "source ${venv}/bin/activate" + "pip install cuopt-sh-client cuopt-cu12==26.04 grpcio --extra-index-url=https://pypi.nvidia.com" + "python3 -c \"import cuopt_sh_client; print('cuopt_sh_client', cuopt_sh_client.__version__)\"" + ) + + local cuopt_ip="host.openshell.internal" + [[ -n "$CUOPT_HOST" ]] && cuopt_ip="$CUOPT_HOST" + + if [[ "$ACTIVATE" == true ]]; then + commands+=( + "" + "if ! grep -q '${venv}/bin/activate' /sandbox/.bashrc 2>/dev/null; then" + " echo '' >> /sandbox/.bashrc" + " echo '# cuOpt environment (added by nemoclaw_cuopt_setup.sh)' >> /sandbox/.bashrc" + " echo 'if [ -f ${venv}/bin/activate ]; then source ${venv}/bin/activate; fi' >> /sandbox/.bashrc" + " echo 'export CUOPT_SERVER=${cuopt_ip}:${CUOPT_PORT}' >> /sandbox/.bashrc" + " echo 'alias cuopt_sh=\"cuopt_sh -i ${cuopt_ip} -p ${CUOPT_PORT}\"' >> /sandbox/.bashrc" + " echo 'Added venv auto-activation + cuopt_sh alias to /sandbox/.bashrc'" + "fi" + ) + fi + + commands+=("exit") + printf '%s\n' "${commands[@]}" | openshell sandbox connect "$sandbox" + echo "Install complete." +} + +# ── test ────────────────────────────────────────────────────────── +cmd_test() { + local sandbox="${1:-$CUOPT_SANDBOX}" + local venv="/sandbox/${CUOPT_VENV}" + local grpc_host="host.openshell.internal" + local cuopt_url="http://host.openshell.internal:${CUOPT_PORT}" + if [[ -n "$CUOPT_HOST" ]]; then + grpc_host="${CUOPT_HOST}" + local scheme="http" + [[ "$CUOPT_PORT" == "443" ]] && scheme="https" + cuopt_url="${scheme}://${CUOPT_HOST}:${CUOPT_PORT}" + fi + # Check what's actually listening on the host before bothering the sandbox + local has_grpc=false has_rest=false + if ss -tlnH "sport = :${CUOPT_GRPC_PORT}" 2>/dev/null | grep -q .; then + has_grpc=true + fi + if ss -tlnH "sport = :${CUOPT_PORT}" 2>/dev/null | grep -q .; then + has_rest=true + fi + + if [[ "$has_grpc" == false && "$has_rest" == false ]]; then + echo "" + echo "No cuOpt server detected on the host." + echo " - Nothing listening on port ${CUOPT_PORT} (REST)" + echo " - Nothing listening on port ${CUOPT_GRPC_PORT} (gRPC)" + echo " Start a cuOpt server first, then re-run: $0 test ${sandbox}" + echo "" + return 1 + fi + + echo "Host services: REST=$(if $has_rest; then echo UP; else echo DOWN; fi) gRPC=$(if $has_grpc; then echo UP; else echo DOWN; fi)" + echo "Smoke-testing sandbox: $sandbox (venv: $venv) ..." + + local sandbox_cmds=" +source ${venv}/bin/activate +echo '--- pip check ---' +python3 -c \"import cuopt_sh_client; print('cuopt_sh_client', cuopt_sh_client.__version__)\" +" + + if [[ "$has_grpc" == true ]]; then + sandbox_cmds+=" +echo '' +echo '--- gRPC server (${grpc_host}:${CUOPT_GRPC_PORT}) ---' +CUOPT_REMOTE_HOST=${grpc_host} CUOPT_REMOTE_PORT=${CUOPT_GRPC_PORT} python3 /sandbox/probe_grpc.py || true +" + fi + + if [[ "$has_rest" == true ]]; then + sandbox_cmds+=" +echo '' +echo '--- REST server (${cuopt_url}) ---' +python3 -c \" +import requests +try: + r = requests.get('${cuopt_url}/cuopt/health', timeout=5) + print(f'REST: status {r.status_code}') + print(f'REST: {r.text[:300]}') +except Exception as e: + print(f'REST: NOT reachable ({e})') +\" +" + fi + + sandbox_cmds+=" +echo '' +exit +" + echo "$sandbox_cmds" | openshell sandbox connect "$sandbox" + echo "Test complete." + + # Only warn about firewall for ports that are actually listening + local check_ports=() + [[ "$has_rest" == true ]] && check_ports+=("${CUOPT_PORT}") + [[ "$has_grpc" == true ]] && check_ports+=("${CUOPT_GRPC_PORT}") + check_firewall "${check_ports[@]}" +} + +# ── install-skill ───────────────────────────────────────────────── +cmd_install_skill() { + local sandbox="${1:-$CUOPT_SANDBOX}" + local script_dir + script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + local skills_dir="$script_dir/openclaw-skills" + + if [[ ! -d "$skills_dir" ]]; then + echo "error: skills directory not found at $skills_dir" >&2 + exit 1 + fi + + echo "Installing skills into sandbox '$sandbox' ..." + for skill in "$skills_dir"/*/; do + local name + name="$(basename "$skill")" + if [[ -f "$skill/SKILL.md" ]]; then + echo " Uploading skill: $name" + if ! openshell sandbox upload "$sandbox" "$skill" "/sandbox/.openclaw-data/skills/$name" 2>&1; then + echo " warning: upload failed for skill '$name'" >&2 + fi + fi + done + + # The OpenClaw agent compacts paths under $HOME to ~/… in the system prompt. + # When the agent later tries to read that path, ~ may expand to a different + # directory (e.g. /root/) depending on the tool executor context. + # + # Workaround: install a small "cuopt-setup" guardrail skill into the OpenClaw + # bundled skills directory. That path is absolute (outside $HOME), so it never + # gets ~-compacted. The guardrail tells the agent where to find the real skill + # if the ~-based path fails. + # + # Best-effort — if docker exec is unavailable the managed skill still works + # whenever ~ resolves correctly. + local gw="${GATEWAY_CONTAINER:-openshell-cluster-nemoclaw}" + local ns="${K8S_NAMESPACE:-openshell}" + local bundled_dir="/usr/local/lib/node_modules/openclaw/skills/cuopt-setup" + + local guardrail_content + guardrail_content="$(cat <<'GUARDRAIL' +--- +name: cuopt-setup +description: "Read this FIRST when working with cuOpt. If you cannot read the cuopt skill at ~/.openclaw/skills/cuopt/SKILL.md, this tells you where the real file lives." +--- + +# cuOpt skill — path resolution + +The full cuOpt skill is installed at an absolute path that always works: + + /sandbox/.openclaw-data/skills/cuopt/SKILL.md + +Read that file now for complete cuOpt usage instructions. + +## Why this guardrail exists + +OpenClaw compacts skill paths to `~/…` in the system prompt. When you try +to read `~/.openclaw/skills/cuopt/SKILL.md`, the `~` may expand to `/root/` +or another directory that is not readable. If that happens: + +1. Do NOT give up on using cuOpt. +2. Read the skill from the absolute path above. +3. Also check `/sandbox/.openclaw/skills/cuopt/SKILL.md` (symlink to same file). +GUARDRAIL +)" + + local b64 + b64="$(printf '%s' "$guardrail_content" | base64 -w 0)" + + echo " Installing cuopt-setup guardrail into bundled skills dir ..." + docker exec "$gw" \ + kubectl exec -n "$ns" "$sandbox" -- \ + sh -c "mkdir -p '${bundled_dir}' && echo '${b64}' | base64 -d > '${bundled_dir}/SKILL.md'" \ + 2>/dev/null \ + || echo " warning: could not install cuopt-setup guardrail (non-fatal)" >&2 + + echo "Skills installed." + + # Upload gRPC probe script + local probe="$script_dir/probe_grpc.py" + if [[ -f "$probe" ]]; then + echo " Uploading probe_grpc.py" + if ! openshell sandbox upload "$sandbox" "$probe" "/sandbox/probe_grpc.py" 2>&1; then + echo " Upload failed — falling back to inline copy via sandbox connect" + local probe_content + probe_content="$(cat "$probe")" + printf '%s\n' \ + "cat > /sandbox/probe_grpc.py << 'PROBE_EOF'" \ + "$probe_content" \ + "PROBE_EOF" \ + "exit" \ + | openshell sandbox connect "$sandbox" >/dev/null 2>&1 + if openshell sandbox connect "$sandbox" -- test -f /sandbox/probe_grpc.py 2>/dev/null; then + echo " probe_grpc.py written via fallback" + else + echo " warning: failed to write probe_grpc.py into sandbox" >&2 + fi + fi + else + echo " warning: probe_grpc.py not found at $probe — skipping" >&2 + fi +} + + +# ── add (existing sandbox shortcut) ─────────────────────────────── +cmd_add() { + local sandbox="${1:-$CUOPT_SANDBOX}" + ACTIVATE=true + cmd_apply_policy "$sandbox" + cmd_install "$sandbox" + cmd_install_skill "$sandbox" + cmd_test "$sandbox" +} + + +# ── dispatch ────────────────────────────────────────────────────── +usage() { + sed -n '2,37p' "$0" +} + +main() { + # Pull out global flags before subcommand dispatch + local args=() + for arg in "$@"; do + case "$arg" in + -y|--yes) FORCE=true ;; + --activate) ACTIVATE=true ;; + *) args+=("$arg") ;; + esac + done + set -- "${args[@]+"${args[@]}"}" + + local sub="${1:-}" + shift || true + case "${sub}" in + apply-policy) cmd_apply_policy "${1:-}" ;; + install) cmd_install "${1:-}" ;; + install-skill) cmd_install_skill "${1:-}" ;; + test) cmd_test "${1:-}" ;; + add) cmd_add "${1:-}" ;; + help|-h|--help) usage ;; + *) + echo "unknown command: ${sub:-}" >&2 + usage >&2 + exit 1 + ;; + esac +} + +main "$@" diff --git a/cuopt_on_nemoclaw/openclaw-skills/cuopt/SKILL.md b/cuopt_on_nemoclaw/openclaw-skills/cuopt/SKILL.md new file mode 100644 index 0000000..67af76c --- /dev/null +++ b/cuopt_on_nemoclaw/openclaw-skills/cuopt/SKILL.md @@ -0,0 +1,370 @@ +--- +name: cuopt +description: Use NVIDIA cuOpt to solve vehicle routing (VRP/CVRPTW) and linear programming (LP/MIP) optimization problems. Use when the user asks to optimize routes, solve a routing problem, minimize cost, plan deliveries, solve an LP, or use cuOpt. +--- + +# NVIDIA cuOpt (Sandbox) + +Solve optimization problems using NVIDIA cuOpt from inside the sandbox. +This skill covers **sandbox-specific** setup — networking, venv, and connectivity. + +> **In this sandbox, LP/MILP must be solved through the remote cuOpt service +> on the host, not the local CUDA runtime.** There is no GPU inside the +> sandbox. If you see `cudaErrorInsufficientDriver` or similar CUDA errors, +> you accidentally invoked a local solve — set `CUOPT_REMOTE_HOST` and +> `CUOPT_REMOTE_PORT` to use the remote service instead. + +For **how to use cuOpt** (formulation, Python API, CLI, MPS format, routing, etc.), +read the upstream skills at: + + +Key upstream skills: +- `cuopt-lp-milp-api-python` — LP/MILP with the Python SDK (Problem class, examples, status checking) +- `cuopt-lp-milp-api-cli` — LP/MILP via `cuopt_cli` with MPS files +- `cuopt-routing-api-python` — Vehicle routing (VRP, TSP, PDP) with Python +- `lp-milp-formulation` — How to go from problem text to formulation +- `cuopt-user-rules` — Behavior rules: clarify before coding, verify results + +## Environment + +The cuOpt client and SDK are installed in a Python virtual environment at `/sandbox/cuopt`. +Activate it before any cuOpt work: + +```bash +source /sandbox/cuopt/bin/activate +``` + +If the venv doesn't exist, create it: + +```bash +python3 -m venv /sandbox/cuopt +source /sandbox/cuopt/bin/activate +pip install cuopt-sh-client cuopt-cu12==26.04 grpcio --extra-index-url=https://pypi.nvidia.com +``` + +## Networking — CRITICAL + +> **Always use `host.openshell.internal` as the server address.** +> Do NOT use `localhost`, `127.0.0.1`, or `0.0.0.0` — these resolve inside +> the sandbox container and will be **blocked** (403 Forbidden or timeout). + +Two server interfaces are available on the host: + +| Interface | Port | Protocol | Use for | +|-----------|------|----------|---------| +| REST | 5000 | HTTP | `cuopt_sh` CLI, `cuopt_sh_client` Python client, health checks | +| gRPC | 5001 | HTTP/2 | `cuopt_cli` remote execution, Python SDK remote solves | + +The `CUOPT_SERVER` environment variable (if set in `.bashrc`) contains the +REST `host:port` value. + +## Connectivity Checks — Do This First + +**Always verify connectivity before solving.** The host may be running one or +both cuOpt services. Either service alone is sufficient for LP/MILP — use +whichever is available. If both are up, either path works. + +Follow this checklist: + +1. **Activate the venv**: `source /sandbox/cuopt/bin/activate` +2. **Probe gRPC (port 5001)**: + ```bash + python3 /sandbox/probe_grpc.py + ``` + Expected: `server is reachable (host.openshell.internal:5001)`. + If reachable, you can use the **Python SDK** or **`cuopt_cli`** (set + `CUOPT_REMOTE_HOST` / `CUOPT_REMOTE_PORT`). +3. **Probe REST (port 5000)**: + ```bash + curl -sf http://host.openshell.internal:5000/cuopt/health + ``` + Expected: JSON like `{"status":"RUNNING",...}`. + If reachable, you can use **`cuopt_sh`** CLI or **`cuopt_sh_client`** Python client. +4. **If neither is reachable** — do not proceed. The cuOpt server is not + running on the host. Ask the operator to start it. + +**Valid configurations:** +- gRPC only (5001) — use Python SDK or `cuopt_cli` +- REST only (5000) — use `cuopt_sh -t LP file.mps` or `client.get_LP_solve("file.mps")` +- Both — use any tool; gRPC tools and REST tools both work for LP/MILP + +When checking gRPC, look for `Using remote GPU backend` in solve output to +confirm the solve actually ran on the host. + +## Using cuopt_cli (LP/MILP from MPS files) + +`cuopt_cli` is a native binary that solves LP/MILP from MPS files. For remote +execution from the sandbox, set these environment variables: + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +cuopt_cli problem.mps +``` + +For MPS format, options, and examples, see the upstream skill `cuopt-lp-milp-api-cli`. + +## Using the Python SDK (LP/MILP) — requires gRPC + +The Python SDK solves remotely via the gRPC server (port 5001). If gRPC is +not available, use the REST path instead (`cuopt_sh` or `get_LP_solve()`). +Set the environment variables before running: + +```bash +export CUOPT_REMOTE_HOST=host.openshell.internal +export CUOPT_REMOTE_PORT=5001 +``` + +Quick working example (expected: Optimal, objective = 10, x = 2, y = 2): + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +p = Problem("QuickLP") +x = p.addVariable(lb=0, vtype=CONTINUOUS, name="x") +y = p.addVariable(lb=0, vtype=CONTINUOUS, name="y") +p.addConstraint(x + y <= 4, name="total") +p.addConstraint(x <= 2, name="cap_x") +p.addConstraint(y <= 3, name="cap_y") +p.setObjective(3*x + 2*y, sense=MAXIMIZE) +p.solve(SolverSettings()) +print(p.Status.name, p.ObjValue, x.getValue(), y.getValue()) +``` + +If configured correctly you will see `Using remote GPU backend` in the output. + +For full API usage, modeling patterns, and examples, see the upstream skill +`cuopt-lp-milp-api-python`. + +## Using the REST interface (cuopt_sh / cuopt_sh_client) + +The REST interface on port 5000 supports LP/MILP and routing. Use it when +gRPC is unavailable, or when you prefer the REST path. + +### LP/MILP via REST — CLI + +```bash +cuopt_sh -t LP /path/to/problem.mps -i host.openshell.internal -p 5000 +``` + +### LP/MILP via REST — Python + +`get_LP_solve()` accepts these inputs: +- **MPS file path** (string ending in `.mps`) — the client parses it and sends JSON +- **`DataModel`** from `cuopt_mps_parser` — already parsed, sent as JSON +- **dict** — raw JSON problem data + +Do **not** pass a `Problem` object from `cuopt.linear_programming.problem` — +that is the Python SDK class (gRPC path), not the REST client's `DataModel`. + +```python +from cuopt_sh_client import CuOptServiceSelfHostClient + +client = CuOptServiceSelfHostClient( + ip="host.openshell.internal", port="5000" +) + +# Simplest: pass an MPS file path directly +result = client.get_LP_solve("problem.mps") +print(result) +``` + +### Routing via REST — Python + +```python +from cuopt_sh_client import CuOptServiceSelfHostClient + +client = CuOptServiceSelfHostClient( + ip="host.openshell.internal", port="5000" +) +solution = client.get_optimized_routes(data) +``` + +## Troubleshooting + +| Symptom | Cause | Fix | +|---------|-------|-----| +| `cudaErrorInsufficientDriver` or CUDA errors | Accidentally invoked local solve instead of remote service | Set `CUOPT_REMOTE_HOST=host.openshell.internal` and `CUOPT_REMOTE_PORT=5001` before solving | +| `403 Forbidden` | Wrong address or sandbox policy missing port | Use `host.openshell.internal`, not `localhost`. If address is correct, ask operator to run `nemoclaw_cuopt_setup.sh apply-policy` | +| `Connection refused` on `:5000` | REST service not running or host firewall blocking the port | Check if REST is needed; gRPC alone (5001) is sufficient for LP/MILP. If REST is needed, ask operator to start it | +| `server is not reachable` from `probe_grpc.py` | gRPC service not running, port 5001 not in sandbox policy, or host firewall | Verify gRPC server is running on host; ask operator to check policy and firewall | +| Connection timeout / hang | Server not running or host firewall blocking Docker | Ask operator to verify from host: `ss -tlnp \| grep 500` | +| Timeout through `10.200.0.1:3128` | Sandbox proxy cannot reach the destination | Ask operator to verify sandbox network policy includes the cuOpt ports | +| `ModuleNotFoundError` | Venv not activated | Run `source /sandbox/cuopt/bin/activate` | +| No `Using remote GPU backend` in output | Remote env vars not set or not picked up | Ensure `CUOPT_REMOTE_HOST` and `CUOPT_REMOTE_PORT` are exported before the Python process starts | + +--- + + + +## cuOpt Python SDK Quick Reference (LP/MILP) + +> **This section is a temporary local copy of SDK patterns that belong in the +> upstream skills. It will be removed once the upstream skills are updated.** + +### Imports + +```python +from cuopt.linear_programming.problem import ( + Problem, CONTINUOUS, INTEGER, MINIMIZE, MAXIMIZE, LinearExpression, +) +from cuopt.linear_programming.solver_settings import SolverSettings +``` + +### Expression Style + +cuOpt uses **operator overloading** for building constraints and objectives. +Do NOT pass coefficient dictionaries — `Variable` objects are not hashable. + +```python +# ✅ CORRECT — operator overloading +problem.addConstraint(2*x + 3*y <= 120, name="resource") +problem.setObjective(40*x + 30*y, sense=MAXIMIZE) + +# ❌ WRONG — dict-style coefficients (will fail) +problem.setObjective({x: 40, y: 30}, sense=MAXIMIZE) +``` + +For large numbers of terms, use `LinearExpression` to avoid recursion limits: + +```python +expr = LinearExpression(vars_list, coeffs_list, constant=0.0) +problem.addConstraint(expr <= 100) +``` + +### Reading Results + +After `problem.solve()`, results live on the **Problem object**, not a separate +solution object: + +```python +problem.solve(settings) + +# Status (PascalCase, not ALL_CAPS) +print(problem.Status.name) # e.g. "Optimal", "FeasibleFound" + +# Objective value +print(problem.ObjValue) + +# Variable values +print(x.getValue()) +print(y.getValue()) +``` + +**LP status values:** `Optimal`, `NoTermination`, `NumericalError`, +`PrimalInfeasible`, `DualInfeasible`, `IterationLimit`, `TimeLimit`, +`PrimalFeasible` + +**MILP status values:** `Optimal`, `FeasibleFound`, `Infeasible`, +`Unbounded`, `TimeLimit`, `NoTermination` + +### Complete Working Example (Smoke Test) + +This LP is a known-good test for the sandbox environment. Expected result: +Optimal, objective = 10, x = 2, y = 2. + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, MAXIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("SmokeTest") + +x = problem.addVariable(lb=0, vtype=CONTINUOUS, name="x") +y = problem.addVariable(lb=0, vtype=CONTINUOUS, name="y") + +problem.addConstraint(x + y <= 4, name="total") +problem.addConstraint(x <= 2, name="cap_x") +problem.addConstraint(y <= 3, name="cap_y") + +problem.setObjective(3*x + 2*y, sense=MAXIMIZE) + +settings = SolverSettings() +problem.solve(settings) + +print(f"Status: {problem.Status.name}") # Optimal +print(f"Objective: {problem.ObjValue}") # 10.0 +print(f"x = {x.getValue()}") # 2.0 +print(f"y = {y.getValue()}") # 2.0 +``` + +If running remotely, you should see `Using remote GPU backend` in the solver +log output — that confirms the solve ran on the host, not locally. + +### MILP Example (Integer Variables) + +```python +from cuopt.linear_programming.problem import Problem, CONTINUOUS, INTEGER, MINIMIZE +from cuopt.linear_programming.solver_settings import SolverSettings + +problem = Problem("FacilityLocation") + +# Binary variable: lb=0, ub=1, vtype=INTEGER +open_fac = problem.addVariable(lb=0, ub=1, vtype=INTEGER, name="open") +production = problem.addVariable(lb=0, vtype=CONTINUOUS, name="prod") + +problem.addConstraint(production <= 1000 * open_fac, name="link") +problem.setObjective(500*open_fac + 2*production, sense=MINIMIZE) + +settings = SolverSettings() +settings.set_parameter("time_limit", 120) +settings.set_parameter("mip_relative_gap", 0.01) + +problem.solve(settings) + +if problem.Status.name in ["Optimal", "FeasibleFound"]: + print(f"Open: {open_fac.getValue() > 0.5}") + print(f"Production: {production.getValue()}") + print(f"Cost: {problem.ObjValue}") +``` + +### Common Mistakes + +| Mistake | What happens | Fix | +|---------|-------------|-----| +| Dict-style coefficients `{x: 3}` | `TypeError: unhashable type` | Use operator overloading: `3*x` | +| `problem.Status.name == "OPTIMAL"` | Never matches (silent failure) | Use PascalCase: `"Optimal"` | +| Calling `getObjectiveValue()` | `AttributeError` | Use `problem.ObjValue` | +| Calling `solution.get_primal_solution()` | Wrong API layer | Use `x.getValue()` on each variable | +| Chained `+` with many vars | `RecursionError` | Use `LinearExpression(vars, coeffs)` | + +### cuopt_cli with MPS Files + +```bash +# Basic solve +cuopt_cli problem.mps + +# With options +cuopt_cli problem.mps --time-limit 120 --mip-relative-tolerance 0.01 + +# Remote execution (from sandbox) +CUOPT_REMOTE_HOST=host.openshell.internal CUOPT_REMOTE_PORT=5001 cuopt_cli problem.mps +``` + +### MPS Format Quick Reference + +``` +NAME +ROWS + N ← objective row (N = no constraint) + L ← ≤ constraint + G ← ≥ constraint + E ← = constraint +COLUMNS + ← coefficient for variable in row +RHS + ← right-hand side constants +BOUNDS ← optional (defaults: 0 ≤ x < ∞) + LO ← lower bound + UP ← upper bound + FX ← fixed value + FR ← free variable (−∞ to +∞) +ENDATA +``` + +MPS **minimizes** by default. To maximize, negate objective coefficients and +negate the final objective value. diff --git a/cuopt_on_nemoclaw/probe_grpc.py b/cuopt_on_nemoclaw/probe_grpc.py new file mode 100644 index 0000000..c2f2ecd --- /dev/null +++ b/cuopt_on_nemoclaw/probe_grpc.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import grpc + +channel = grpc.insecure_channel("host.openshell.internal:5001") +try: + grpc.channel_ready_future(channel).result(timeout=1) + print("server is reachable") +except grpc.FutureTimeoutError: + print("server is not reachable") From 2477735ed96d96f82555c84bf955def80226ca1d Mon Sep 17 00:00:00 2001 From: Trevor McKay Date: Mon, 13 Apr 2026 17:50:03 -0400 Subject: [PATCH 2/2] for cuopt_on_nemoclaw move heredocs into util scripts --- cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh | 93 ++-------------- .../utils/merge_policy_entries.py | 102 ++++++++++++++++++ .../utils/strip_policy_metadata.py | 59 ++++++++++ 3 files changed, 170 insertions(+), 84 deletions(-) create mode 100644 cuopt_on_nemoclaw/utils/merge_policy_entries.py create mode 100644 cuopt_on_nemoclaw/utils/strip_policy_metadata.py diff --git a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh index 99aec24..16b3354 100755 --- a/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh +++ b/cuopt_on_nemoclaw/nemoclaw_cuopt_setup.sh @@ -52,6 +52,8 @@ # ============================================================================= set -euo pipefail +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + CUOPT_SANDBOX="${CUOPT_SANDBOX:-cuopt}" CUOPT_VENV="${CUOPT_VENV:-cuopt}" CUOPT_HOST="${CUOPT_HOST:-}" @@ -264,7 +266,7 @@ check_firewall() { echo "" } -# ── Policy entry generation (shared by patch + apply-policy) ────── +# ── Policy entry generation (used by apply-policy) ─────────────── # OpenShell binary paths must be exact — globs (*, **) are silently ignored. # Hostname endpoints require allowed_ips so the proxy can match resolved IPs. generate_policy_entries() { @@ -350,29 +352,8 @@ cmd_apply_policy() { # openshell policy get --full may include metadata fields (e.g. "Version") # that openshell policy set rejects. Strip any top-level keys that aren't - # in the accepted schema: version, filesystem_policy, landlock, process, - # network_policies. - current="$(python3 -c " -import sys, re -allowed = {'version', 'filesystem_policy', 'landlock', 'process', 'network_policies'} -lines = sys.stdin.read().split('\n') -result = [] -skip = False -for line in lines: - m = re.match(r'^([A-Za-z_][A-Za-z0-9_]*):', line) - if m: - key = m.group(1) - if key not in allowed: - skip = True - continue - else: - skip = False - if skip and line and line[0].isspace(): - continue - skip = False - result.append(line) -print('\n'.join(result)) -" <<< "$current")" + # in the accepted schema. + current="$(python3 "$SCRIPT_DIR/utils/strip_policy_metadata.py" <<< "$current")" local entries entries="$(generate_policy_entries "$sandbox")" @@ -385,60 +366,7 @@ print('\n'.join(result)) # If our entries already exist, strip them first so they get re-added with # freshly detected values (Python binary, host IP). local merged - merged="$(python3 -c " -import sys - -current = sys.stdin.read() -entries = '''${entries}''' - -our_keys = {'pypi_public:', 'nvidia_pypi:', 'cuopt_host:'} - -lines = current.split('\n') -result = [] -in_np = False -inserted = False -skip_block = False - -for line in lines: - stripped = line.strip() - is_top_level = line and not line[0].isspace() and ':' in line - is_np_entry = (not is_top_level and stripped and - not stripped.startswith('#') and - stripped.endswith(':') and - line.startswith(' ') and not line.startswith(' ')) - - if stripped == 'network_policies:' or stripped.startswith('network_policies:'): - in_np = True - result.append(line) - continue - - if in_np and is_np_entry and stripped in our_keys: - skip_block = True - continue - - if skip_block: - if is_np_entry or (is_top_level and ':' in line): - skip_block = False - else: - continue - - if in_np and is_top_level and not inserted: - result.append(entries) - inserted = True - in_np = False - - result.append(line) - -if in_np and not inserted: - result.append(entries) - -if not any('network_policies' in l for l in lines): - result.append('') - result.append('network_policies:') - result.append(entries) - -print('\n'.join(result)) -" <<< "$current")" + merged="$(python3 "$SCRIPT_DIR/utils/merge_policy_entries.py" --entries "$entries" <<< "$current")" local tmpfile tmpfile="$(mktemp /tmp/cuopt-policy-XXXXXX.yaml)" @@ -478,8 +406,7 @@ cmd_install() { echo " Policy expects: $policy_python" echo "" echo " Network requests from Python will be blocked (403 Forbidden)." - echo " Fix: re-run 'patch' to update the policy, then 'onboard' or 'apply-policy':" - echo " $0 patch" + echo " Fix: re-run apply-policy to update the policy:" echo " $0 apply-policy $sandbox" echo "" fi @@ -596,9 +523,7 @@ exit # ── install-skill ───────────────────────────────────────────────── cmd_install_skill() { local sandbox="${1:-$CUOPT_SANDBOX}" - local script_dir - script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - local skills_dir="$script_dir/openclaw-skills" + local skills_dir="$SCRIPT_DIR/openclaw-skills" if [[ ! -d "$skills_dir" ]]; then echo "error: skills directory not found at $skills_dir" >&2 @@ -672,7 +597,7 @@ GUARDRAIL echo "Skills installed." # Upload gRPC probe script - local probe="$script_dir/probe_grpc.py" + local probe="$SCRIPT_DIR/probe_grpc.py" if [[ -f "$probe" ]]; then echo " Uploading probe_grpc.py" if ! openshell sandbox upload "$sandbox" "$probe" "/sandbox/probe_grpc.py" 2>&1; then diff --git a/cuopt_on_nemoclaw/utils/merge_policy_entries.py b/cuopt_on_nemoclaw/utils/merge_policy_entries.py new file mode 100644 index 0000000..9f66745 --- /dev/null +++ b/cuopt_on_nemoclaw/utils/merge_policy_entries.py @@ -0,0 +1,102 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Merge cuOpt network policy entries into an existing sandbox policy. + +Reads the current policy YAML from stdin and merges the new entries into +the network_policies section. If cuOpt entries already exist they are +replaced with the new values. + +Usage: + python3 merge_policy_entries.py --entries "" < current_policy.yaml +""" + +import argparse +import os +import sys + +OUR_KEYS = {"pypi_public:", "nvidia_pypi:", "cuopt_host:"} + + +def merge_entries(current: str, entries: str) -> str: + lines = current.split("\n") + result = [] + in_np = False + inserted = False + skip_block = False + + for line in lines: + stripped = line.strip() + is_top_level = line and not line[0].isspace() and ":" in line + is_np_entry = ( + not is_top_level + and stripped + and not stripped.startswith("#") + and stripped.endswith(":") + and line.startswith(" ") + and not line.startswith(" ") + ) + + if stripped == "network_policies:" or stripped.startswith( + "network_policies:" + ): + in_np = True + result.append(line) + continue + + if in_np and is_np_entry and stripped in OUR_KEYS: + skip_block = True + continue + + if skip_block: + if is_np_entry or (is_top_level and ":" in line): + skip_block = False + else: + continue + + if in_np and is_top_level and not inserted: + result.append(entries) + inserted = True + in_np = False + + result.append(line) + + if in_np and not inserted: + result.append(entries) + + if not any("network_policies" in l for l in lines): + result.append("") + result.append("network_policies:") + result.append(entries) + + return "\n".join(result) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Merge cuOpt entries into sandbox network policy" + ) + parser.add_argument( + "--entries", + default=os.environ.get("CUOPT_ENTRIES", ""), + help="YAML entries to merge (or set CUOPT_ENTRIES env var)", + ) + args = parser.parse_args() + + if not args.entries: + print("error: --entries or CUOPT_ENTRIES required", file=sys.stderr) + sys.exit(1) + + print(merge_entries(sys.stdin.read(), args.entries)) diff --git a/cuopt_on_nemoclaw/utils/strip_policy_metadata.py b/cuopt_on_nemoclaw/utils/strip_policy_metadata.py new file mode 100644 index 0000000..862d67a --- /dev/null +++ b/cuopt_on_nemoclaw/utils/strip_policy_metadata.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Strip unrecognized top-level YAML keys from openshell policy output. + +openshell policy get --full may include metadata fields (e.g. "Version") +that openshell policy set rejects. This script keeps only the keys in the +accepted schema and drops everything else. + +Usage: + openshell policy get --full | python3 strip_policy_metadata.py +""" + +import re +import sys + +ALLOWED_KEYS = { + "version", + "filesystem_policy", + "landlock", + "process", + "network_policies", +} + + +def strip_metadata(text: str) -> str: + lines = text.split("\n") + result = [] + skip = False + for line in lines: + m = re.match(r"^([A-Za-z_][A-Za-z0-9_]*):", line) + if m: + key = m.group(1) + if key not in ALLOWED_KEYS: + skip = True + continue + else: + skip = False + if skip and line and line[0].isspace(): + continue + skip = False + result.append(line) + return "\n".join(result) + + +if __name__ == "__main__": + print(strip_metadata(sys.stdin.read()))