Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions s02_tool_use/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
循环本身(agent_loop)与 s01 完全一致。
"""

import os, subprocess
import os, shlex, subprocess
from pathlib import Path

try:
Expand Down Expand Up @@ -44,11 +44,9 @@
# ═══════════════════════════════════════════════════════════

def run_bash(command: str) -> str:
dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]
if any(d in command for d in dangerous):
return "Error: Dangerous command blocked"
try:
r = subprocess.run(command, shell=True, cwd=WORKDIR,
args = shlex.split(command)
r = subprocess.run(args, shell=False, cwd=WORKDIR,
capture_output=True, text=True,
encoding="utf-8", errors="replace", timeout=120)
out = (r.stdout + r.stderr).strip()
Expand Down
44 changes: 44 additions & 0 deletions tests/test_invariant_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import pytest
import subprocess
import sys
import os

# Add the parent directory to sys.path to import the actual module
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))

from s02_tool_use.code import run_bash


@pytest.mark.parametrize("payload", [
# Exact exploit case: bypass deny list with shell metacharacters
"rm -rf /tmp/test; echo 'exploited'",
# Boundary case: command substitution to hide dangerous command
"$(echo 'sudo') id",
# Valid input: should pass through
"echo 'safe command'",
# Encoding trick: using backticks instead of $()
"`echo 'rm -rf /'`",
# Command variant: using dangerous command with spaces
"rm -rf /tmp/test",
])
def test_run_bash_blocks_dangerous_commands_under_adversarial_input(payload):
"""Invariant: The run_bash function must never execute dangerous shell commands,
regardless of shell metacharacters, encoding tricks, or command variants."""

result = run_bash(payload)

# The security property: if the command contains dangerous patterns,
# the function must return the error message or otherwise prevent execution
dangerous_patterns = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"]

# Check if any dangerous pattern appears in the payload (basic detection)
has_dangerous = any(pattern in payload.replace(" ", "") for pattern in dangerous_patterns)

if has_dangerous:
# For dangerous commands, we expect either an error message or safe output
# We don't require exact match, but we must ensure no dangerous execution occurred
assert "exploited" not in result, f"Dangerous command executed: {payload}"
assert "uid=" not in result, f"Privilege escalation occurred: {payload}"
else:
# For safe commands, we expect normal execution
assert "Error: Dangerous command blocked" not in result, f"Safe command blocked: {payload}"