diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
new file mode 100644
index 0000000..ab23c58
--- /dev/null
+++ b/.github/workflows/build.yml
@@ -0,0 +1,24 @@
+name: Build
+
+on:
+  push:
+    branches: [ main, develop ]
+  pull_request:
+    branches: [ main, develop ]
+  workflow_dispatch:
+
+jobs:
+  python-build:
+    name: Python Build & Test (${{ matrix.python-version }})
+    strategy:
+      matrix:
+        python-version: ['3.11', '3.12']
+    uses: ./.github/workflows/reusable-python.yml
+    with:
+      python-version: ${{ matrix.python-version }}
+      install-extras: test,typing
+      extra-packages: dbus-python keyring secretstorage
+      run-tests: true
+      test-command: pytest tests/ --cov=agent_codemode --cov-report term-missing
+      run-mypy: true
+      mypy-target: agent_codemode
diff --git a/.github/workflows/py-code-style.yml b/.github/workflows/py-code-style.yml
new file mode 100644
index 0000000..48897ef
--- /dev/null
+++ b/.github/workflows/py-code-style.yml
@@ -0,0 +1,44 @@
+name: Py Code Style
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+    paths:
+      - 'agent_codemode/**/*.py'
+      - 'tests/**/*.py'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - '.pre-commit-config.yaml'
+      - 'ruff.toml'
+      - '.ruff.toml'
+      - '.github/workflows/py-code-style.yml'
+      - '.github/workflows/reusable-python.yml'
+  pull_request:
+    branches:
+      - main
+      - develop
+    paths:
+      - 'agent_codemode/**/*.py'
+      - 'tests/**/*.py'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - '.pre-commit-config.yaml'
+      - 'ruff.toml'
+      - '.ruff.toml'
+      - '.github/workflows/py-code-style.yml'
+      - '.github/workflows/reusable-python.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check-code-style:
+    uses: ./.github/workflows/reusable-python.yml
+    with:
+      python-version: '3.11'
+      extra-packages: pre-commit
+      run-pre-commit: true
diff --git a/.github/workflows/py-tests.yml b/.github/workflows/py-tests.yml
new file mode 100644
index 0000000..0506cf6
--- /dev/null
+++ b/.github/workflows/py-tests.yml
@@ -0,0 +1,43 @@
+name: Py Tests
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'agent_codemode/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - 'requirements*.txt'
+      - '**/*.py'
+      - '.github/workflows/py-tests.yml'
+      - '.github/workflows/reusable-python.yml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'agent_codemode/**'
+      - 'tests/**'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - 'requirements*.txt'
+      - '**/*.py'
+      - '.github/workflows/py-tests.yml'
+      - '.github/workflows/reusable-python.yml'
+
+  workflow_dispatch:
+
+jobs:
+  tests:
+    strategy:
+      max-parallel: 1
+      matrix:
+        python-version: ['3.10', '3.11', '3.12', '3.13']
+    uses: ./.github/workflows/reusable-python.yml
+    with:
+      python-version: ${{ matrix.python-version }}
+      install-extras: test
+      extra-packages: dbus-python keyring secretstorage
+      run-tests: true
+      test-command: pytest tests/ --cov=agent_codemode --cov-report term-missing
diff --git a/.github/workflows/py-typing.yml b/.github/workflows/py-typing.yml
new file mode 100644
index 0000000..4f9f623
--- /dev/null
+++ b/.github/workflows/py-typing.yml
@@ -0,0 +1,44 @@
+name: Py Type Checking
+
+on:
+  push:
+    branches:
+      - main
+      - develop
+    paths:
+      - 'agent_codemode/**/*.py'
+      - 'tests/**/*.py'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - 'mypy.ini'
+      - '.mypy.ini'
+      - '.github/workflows/py-typing.yml'
+      - '.github/workflows/reusable-python.yml'
+  pull_request:
+    branches:
+      - main
+      - develop
+    paths:
+      - 'agent_codemode/**/*.py'
+      - 'tests/**/*.py'
+      - 'pyproject.toml'
+      - 'setup.py'
+      - 'mypy.ini'
+      - '.mypy.ini'
+      - '.github/workflows/py-typing.yml'
+      - '.github/workflows/reusable-python.yml'
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  type-check:
+    uses: ./.github/workflows/reusable-python.yml
+    with:
+      python-version: '3.12'
+      install-extras: typing
+      extra-packages: types-requests
+      run-mypy: true
+      mypy-target: agent_codemode
diff --git a/.github/workflows/reusable-python.yml b/.github/workflows/reusable-python.yml
new file mode 100644
index 0000000..18cd173
--- /dev/null
+++ b/.github/workflows/reusable-python.yml
@@ -0,0 +1,100 @@
+name: Reusable Python Workflow
+
+on:
+  workflow_call:
+    inputs:
+      python-version:
+        description: Python version to use
+        required: false
+        type: string
+        default: '3.11'
+      install-system-deps:
+        description: Install Linux system dependencies and unlock keyring
+        required: false
+        type: boolean
+        default: true
+      install-extras:
+        description: Optional extras to install from pyproject (for example test,typing)
+        required: false
+        type: string
+        default: ''
+      extra-packages:
+        description: Optional extra packages to install with uv pip
+        required: false
+        type: string
+        default: ''
+      run-tests:
+        description: Run tests
+        required: false
+        type: boolean
+        default: false
+      test-command:
+        description: Test command to run when run-tests=true
+        required: false
+        type: string
+        default: 'pytest -q'
+      run-mypy:
+        description: Run mypy
+        required: false
+        type: boolean
+        default: false
+      mypy-target:
+        description: Package/module path to type-check
+        required: false
+        type: string
+        default: ''
+      run-pre-commit:
+        description: Run pre-commit
+        required: false
+        type: boolean
+        default: false
+
+jobs:
+  python-checks:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Install system dependencies
+        if: inputs.install-system-deps
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libdbus-1-3 libdbus-1-dev libglib2.0-dev
+
+      - name: Checkout
+        uses: actions/checkout@v6
+
+      - name: Setup UV
+        uses: astral-sh/setup-uv@v7
+        with:
+          version: latest
+          python-version: ${{ inputs.python-version }}
+          activate-environment: true
+
+      - name: Install dependencies
+        run: |
+          uv pip install .
+          if [[ -n "${{ inputs.install-extras }}" ]]; then
+            uv pip install ".[${{ inputs.install-extras }}]"
+          fi
+          if [[ -n "${{ inputs.extra-packages }}" ]]; then
+            uv pip install ${{ inputs.extra-packages }}
+          fi
+
+      - name: Unlock keyring
+        if: inputs.install-system-deps
+        uses: t1m0thyj/unlock-keyring@v1
+
+      - name: Configure git to use https
+        run: git config --global hub.protocol https
+
+      - name: Run tests
+        if: inputs.run-tests
+        run: ${{ inputs.test-command }}
+
+      - name: Run mypy
+        if: inputs.run-mypy
+        run: mypy ${{ inputs.mypy-target }}
+
+      - name: Run pre-commit
+        if: inputs.run-pre-commit
+        run: pre-commit run --all-files
diff --git a/README.md b/README.md
index 81ebe8f..ab8f479 100644
--- a/README.md
+++ b/README.md
@@ -63,7 +63,7 @@ Same task, same MCP server — Code Mode uses significantly fewer tokens by comp
 |--------|-------------|
 | `allow_direct_tool_calls` | When `False` (default), `call_tool` is hidden; all execution flows through `execute_code` |
 | `max_tool_calls` | Safety cap limiting tool invocations per `execute_code` run |
-| `sandbox_variant` | Sandbox type for code execution (default: `"local-eval"`) |
+| `sandbox_variant` | Sandbox type for code execution (default: `"eval"`) |
 | `workspace_path` | Working directory for sandbox execution |
 | `generated_path` | Path where tool bindings are generated |
 | `skills_path` | Path for saved skills |
@@ -461,6 +461,29 @@ When running in a sandbox, state can persist between `execute_code` calls within
 - [Advanced Tool Use](https://www.anthropic.com/engineering/advanced-tool-use) - Anthropic
 - [Programmatic MCP Prototype](https://github.com/domdomegg/programmatic-mcp-prototype)
 
+## CI Workflows
+
+This repository uses a reusable GitHub Actions workflow at `.github/workflows/reusable-python.yml`.
+
+The following workflows call it:
+
+- `.github/workflows/build.yml`
+- `.github/workflows/py-tests.yml`
+- `.github/workflows/py-code-style.yml`
+- `.github/workflows/py-typing.yml`
+
+Reusable workflow inputs:
+
+- `python-version`: Python version to run.
+- `install-system-deps`: Install Linux dependencies and unlock keyring.
+- `install-extras`: Extras from `pyproject.toml` (for example `test,typing`).
+- `extra-packages`: Additional packages installed with `uv pip install`.
+- `run-tests`: Enable test execution.
+- `test-command`: Command used for tests.
+- `run-mypy`: Enable mypy.
+- `mypy-target`: Package or module passed to mypy.
+- `run-pre-commit`: Enable pre-commit checks.
+
 ## License
 
 BSD 3-Clause License
diff --git a/agent_codemode/__version__.py b/agent_codemode/__version__.py
index 0e547d7..bc08b8a 100644
--- a/agent_codemode/__version__.py
+++ b/agent_codemode/__version__.py
@@ -4,4 +4,4 @@
 
 """Agent Codemode."""
 
-__version__ = "0.1.1"
+__version__ = "0.1.3"
diff --git a/agent_codemode/composition/executor.py b/agent_codemode/composition/executor.py
index 5a1c376..7b16c0c 100644
--- a/agent_codemode/composition/executor.py
+++ b/agent_codemode/composition/executor.py
@@ -133,7 +133,7 @@ def set_skills_metadata(self, metadata: list[dict[str, Any]]) -> None:
         self._skills_metadata = metadata
 
     def _is_local_eval_sandbox(self) -> bool:
-        """Check if the sandbox is a local-eval type (has in-memory namespaces).
+        """Check if the sandbox is a eval type (has in-memory namespaces).
         
         This checks the actual sandbox instance, not the config, to handle
         cases where an external sandbox is passed that differs from config.
@@ -202,7 +202,7 @@ async def _setup_sandbox_environment(self) -> None:
             # Use /tmp so 'from generated.mcp...' works (files are at /tmp/generated/)
             sandbox_generated_path = "/tmp"
         else:
-            # For local-eval, use the parent directory so 'from generated.mcp...' works
+            # For eval, use the parent directory so 'from generated.mcp...' works
             # The generated_path might be './generated', we need its parent on sys.path
             sandbox_generated_path = str(generated_path.parent)
 
@@ -705,7 +705,7 @@ def generate_skills_in_sandbox(self) -> None:
         skill scripts **directly** in the Jupyter kernel, reading the script
         files from the shared ``skills_path`` on disk.  This avoids the
         HTTP proxy round-trip (call_tool → MCP proxy → agent-runtimes →
-        local-eval fallback) which caused blocking and deadlocks.
+        eval fallback) which caused blocking and deadlocks.
 
         The skills_path is the same between the agent-runtimes process and
         the Jupyter runtime (shared filesystem or mount).
@@ -717,7 +717,7 @@ def generate_skills_in_sandbox(self) -> None:
         if self._sandbox is None or not self._skills_metadata:
             return
 
-        # Skip for local-eval sandboxes (they use the on-disk generated files)
+        # Skip for eval sandboxes (they use the on-disk generated files)
         if self._is_local_eval_sandbox():
             return
 
@@ -1032,7 +1032,7 @@ async def execute(
             
             # Get the generated path for sys.path setup
             # For remote sandboxes, use /tmp so 'from generated.mcp...' works (files at /tmp/generated/)
-            # For local-eval, use parent of generated_path so 'from generated.mcp...' works
+            # For eval, use parent of generated_path so 'from generated.mcp...' works
             # Use actual sandbox type detection, not config
             is_local_eval = self._is_local_eval_sandbox()
             if not is_local_eval:
@@ -1086,7 +1086,7 @@ async def execute(
 '''
             # Branch based on actual sandbox type (already computed above)
             if is_local_eval:
-                # For local-eval, we can access _namespaces directly
+                # For eval, we can access _namespaces directly
                 return await self._execute_local_eval(code, setup_code, timeout)
             else:
                 # For Jupyter/remote sandboxes, use run_code()
@@ -1099,7 +1099,7 @@ async def _execute_local_eval(
         setup_code: str,
         timeout: Optional[float] = None,
     ) -> ExecutionResult:
-        """Execute code in local-eval sandbox with direct namespace access."""
+        """Execute code in eval sandbox with direct namespace access."""
         import sys
         import io
         import time
diff --git a/agent_codemode/toolset.py b/agent_codemode/toolset.py
index 95a8bb7..d407eef 100644
--- a/agent_codemode/toolset.py
+++ b/agent_codemode/toolset.py
@@ -100,7 +100,7 @@ class CodemodeToolset(AbstractToolset):
         
         registry: ToolRegistry | None = None
         config: CodeModeConfig = field(default_factory=CodeModeConfig)
-        sandbox: Any | None = None  # Optional pre-configured sandbox (e.g., LocalEvalSandbox)
+        sandbox: Any | None = None  # Optional pre-configured sandbox (e.g., EvalSandbox)
         allow_direct_tool_calls: bool | None = None
         allow_discovery_tools: bool = True
         tool_reranker: Callable[[list, str, Optional[str]], Awaitable[list]] | None = None
diff --git a/agent_codemode/types.py b/agent_codemode/types.py
index c0f069b..423f13a 100644
--- a/agent_codemode/types.py
+++ b/agent_codemode/types.py
@@ -223,7 +223,7 @@ class CodeModeConfig(BaseModel):
     workspace_path: str = "./workspace"
     skills_path: str = "./skills"
     generated_path: str = "./generated"
-    sandbox_variant: str = "local-eval"
+    sandbox_variant: str = "eval"
     sandbox_image: str | None = None
     allow_direct_tool_calls: bool = False
     max_tool_calls: int | None = None
diff --git a/docs/docs/skills/index.mdx b/docs/docs/skills/index.mdx
index acfa406..ec235c7 100644
--- a/docs/docs/skills/index.mdx
+++ b/docs/docs/skills/index.mdx
@@ -372,10 +372,10 @@ For Pydantic AI agents, use the `AgentSkillsToolset`:
 ```python
 from pydantic_ai import Agent
 from agent_skills import AgentSkillsToolset, SandboxExecutor
-from code_sandboxes import LocalEvalSandbox
+from code_sandboxes.eval_sandbox import EvalSandbox
 
 # Create toolset with sandbox execution
-sandbox = LocalEvalSandbox()
+sandbox = EvalSandbox()
 toolset = AgentSkillsToolset(
     directories=["./skills"],
     executor=SandboxExecutor(sandbox),
diff --git a/examples/patterns/codemode_example.py b/examples/patterns/codemode_example.py
index 98cca9e..990c984 100644
--- a/examples/patterns/codemode_example.py
+++ b/examples/patterns/codemode_example.py
@@ -86,7 +86,7 @@ async def example_code_execution():
     
     # Configure the executor
     config = CodeModeConfig(
-        sandbox_variant="local-eval",  # For development
+        sandbox_variant="eval",  # For development
         generated_path="./generated",
         skills_path="./skills",
     )
diff --git a/examples/simple/agent_cli.py b/examples/simple/agent_cli.py
index b02f548..c719f7d 100644
--- a/examples/simple/agent_cli.py
+++ b/examples/simple/agent_cli.py
@@ -33,7 +33,7 @@
 
 try:
     from agent_skills import AgentSkillsToolset, SandboxExecutor
-    from code_sandboxes import LocalEvalSandbox
+    from code_sandboxes.eval_sandbox import EvalSandbox
     HAS_AGENT_SKILLS = True
 except ImportError:
     HAS_AGENT_SKILLS = False
@@ -348,8 +348,8 @@ def create_agent(model: str, codemode: bool) -> tuple[Agent, object | None, obje
         shared_sandbox = None
         skills_toolset = None
         if HAS_AGENT_SKILLS:
-            shared_sandbox = LocalEvalSandbox()
-            logger.info("Created shared LocalEvalSandbox for codemode and skills toolsets")
+            shared_sandbox = EvalSandbox()
+            logger.info("Created shared EvalSandbox for codemode and skills toolsets")
 
         toolset = CodemodeToolset(
             registry=registry,
diff --git a/pyproject.toml b/pyproject.toml
index 2e91f22..dd3c6ec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ allow-direct-references = true
 
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
+testpaths = ["tests"]
 filterwarnings = [
   "error",
   "ignore:There is no current event loop:DeprecationWarning",
diff --git a/skills/github/scripts/get_repo.py b/skills/github/scripts/get_repo.py
index 86fb4dc..72397f0 100644
--- a/skills/github/scripts/get_repo.py
+++ b/skills/github/scripts/get_repo.py
@@ -112,8 +112,24 @@ def format_repo_details(repo: dict) -> str:
     return "\n".join(lines)
 
 
+class _HelpOnErrorParser(argparse.ArgumentParser):
+    """ArgumentParser that prints full help on invalid arguments."""
+
+    def error(self, message: str) -> None:  # noqa: D401
+        params_help = (
+            "\nValid parameters for get_repo:\n"
+            "  repo (positional)  owner/repo  e.g. 'datalayer/jupyter-ui'\n"
+            "  --format           table | json  (default: table)\n"
+        )
+        self.print_usage(sys.stderr)
+        print(f"\nerror: {message}", file=sys.stderr)
+        print(params_help, file=sys.stderr)
+        print("Please retry with valid parameters.", file=sys.stderr)
+        sys.exit(2)
+
+
 def main():
-    parser = argparse.ArgumentParser(
+    parser = _HelpOnErrorParser(
         description="Get details for a specific GitHub repository."
     )
     parser.add_argument(
diff --git a/skills/github/scripts/list_issues.py b/skills/github/scripts/list_issues.py
index c1fe9bc..d79c755 100644
--- a/skills/github/scripts/list_issues.py
+++ b/skills/github/scripts/list_issues.py
@@ -115,8 +115,26 @@ def format_table(issues: list[dict]) -> str:
     return "\n".join(lines)
 
 
+class _HelpOnErrorParser(argparse.ArgumentParser):
+    """ArgumentParser that prints full help on invalid arguments."""
+
+    def error(self, message: str) -> None:  # noqa: D401
+        params_help = (
+            "\nValid parameters for list_issues:\n"
+            "  repo (positional)  owner/repo  e.g. 'datalayer/agent-runtimes'\n"
+            "  --state            open | closed | all  (default: open)\n"
+            "  --format           table | json  (default: table)\n"
+            "  --limit            integer — max issues to return  (default: 50)\n"
+        )
+        self.print_usage(sys.stderr)
+        print(f"\nerror: {message}", file=sys.stderr)
+        print(params_help, file=sys.stderr)
+        print("Please retry with valid parameters.", file=sys.stderr)
+        sys.exit(2)
+
+
 def main():
-    parser = argparse.ArgumentParser(
+    parser = _HelpOnErrorParser(
         description="List issues for a GitHub repository."
     )
     parser.add_argument(
diff --git a/skills/github/scripts/list_prs.py b/skills/github/scripts/list_prs.py
index bbac3c1..a2e71dd 100644
--- a/skills/github/scripts/list_prs.py
+++ b/skills/github/scripts/list_prs.py
@@ -122,8 +122,26 @@ def format_table(prs: list[dict]) -> str:
     return "\n".join(lines)
 
 
+class _HelpOnErrorParser(argparse.ArgumentParser):
+    """ArgumentParser that prints full help on invalid arguments."""
+
+    def error(self, message: str) -> None:  # noqa: D401
+        params_help = (
+            "\nValid parameters for list_prs:\n"
+            "  repo (positional)  owner/repo  e.g. 'datalayer/agent-runtimes'\n"
+            "  --state            open | closed | all  (default: open)\n"
+            "  --format           table | json  (default: table)\n"
+            "  --limit            integer — max PRs to return  (default: 50)\n"
+        )
+        self.print_usage(sys.stderr)
+        print(f"\nerror: {message}", file=sys.stderr)
+        print(params_help, file=sys.stderr)
+        print("Please retry with valid parameters.", file=sys.stderr)
+        sys.exit(2)
+
+
 def main():
-    parser = argparse.ArgumentParser(
+    parser = _HelpOnErrorParser(
         description="List pull requests for a GitHub repository."
     )
     parser.add_argument(
diff --git a/skills/github/scripts/list_repos.py b/skills/github/scripts/list_repos.py
index c4b418d..55bafd6 100644
--- a/skills/github/scripts/list_repos.py
+++ b/skills/github/scripts/list_repos.py
@@ -116,8 +116,27 @@ def format_table(repos: list[dict]) -> str:
     return "\n".join(lines)
 
 
+class _HelpOnErrorParser(argparse.ArgumentParser):
+    """ArgumentParser that prints full help on invalid arguments."""
+
+    def error(self, message: str) -> None:  # noqa: D401
+        params_help = (
+            "\nValid parameters for list_repos:\n"
+            "  --visibility  all | public | private  (default: all)\n"
+            "  --sort        updated | created | pushed | full_name  (default: updated)\n"
+            "  --format      table | json  (default: table)\n"
+            "  --limit       integer — max repos to return\n"
+            "\nDo NOT pass 'per_page' — use 'limit' instead.\n"
+        )
+        self.print_usage(sys.stderr)
+        print(f"\nerror: {message}", file=sys.stderr)
+        print(params_help, file=sys.stderr)
+        print("Please retry with valid parameters.", file=sys.stderr)
+        sys.exit(2)
+
+
 def main():
-    parser = argparse.ArgumentParser(
+    parser = _HelpOnErrorParser(
         description="List all repositories for the authenticated GitHub user."
     )
     parser.add_argument(
diff --git a/skills/github/scripts/search_repos.py b/skills/github/scripts/search_repos.py
index 3eefcce..1ffa0f8 100644
--- a/skills/github/scripts/search_repos.py
+++ b/skills/github/scripts/search_repos.py
@@ -125,8 +125,30 @@ def format_table(repos: list[dict], total: int) -> str:
     return "\n".join(lines)
 
 
+class _HelpOnErrorParser(argparse.ArgumentParser):
+    """ArgumentParser that prints full help on invalid arguments."""
+
+    def error(self, message: str) -> None:  # noqa: D401
+        params_help = (
+            "\nValid parameters for search_repos:\n"
+            "  query (positional)  search text  e.g. 'jupyter notebook'\n"
+            "  --language          string — filter by language\n"
+            "  --user              string — filter by user/owner\n"
+            "  --org               string — filter by organization\n"
+            "  --sort              stars | forks | updated | best-match  (default: best-match)\n"
+            "  --format            table | json  (default: table)\n"
+            "  --limit             integer — max results  (default: 20)\n"
+            "\nDo NOT pass 'per_page' — use 'limit' instead.\n"
+        )
+        self.print_usage(sys.stderr)
+        print(f"\nerror: {message}", file=sys.stderr)
+        print(params_help, file=sys.stderr)
+        print("Please retry with valid parameters.", file=sys.stderr)
+        sys.exit(2)
+
+
 def main():
-    parser = argparse.ArgumentParser(
+    parser = _HelpOnErrorParser(
         description="Search GitHub repositories."
     )
     parser.add_argument(
diff --git a/tests/test_agent_minimal.py b/tests/test_agent_minimal.py
index 8a0945b..f725a25 100644
--- a/tests/test_agent_minimal.py
+++ b/tests/test_agent_minimal.py
@@ -30,7 +30,7 @@ async def main():
     # Create executor
     config = CodeModeConfig(
         workspace_path=Path(__file__).parent / "examples" / "agent",
-        sandbox_variant="local-eval"
+        sandbox_variant="eval"
     )
     executor = CodeModeExecutor(registry=registry, config=config)
     await executor.setup()
diff --git a/tests/test_direct_execution.py b/tests/test_direct_execution.py
index 3b5f3b5..b4962e1 100644
--- a/tests/test_direct_execution.py
+++ b/tests/test_direct_execution.py
@@ -30,7 +30,7 @@ async def main():
     print("\n=== Setting up executor ===")
     config = CodeModeConfig(
         workspace_path=Path(__file__).parent / "examples" / "agent",
-        sandbox_variant="local-eval"
+        sandbox_variant="eval"
     )
     executor = CodeModeExecutor(registry=registry, config=config)
     await executor.setup()
diff --git a/tests/test_executor_async.py b/tests/test_executor_async.py
index f74c6e3..07dc292 100644
--- a/tests/test_executor_async.py
+++ b/tests/test_executor_async.py
@@ -7,7 +7,7 @@
 
 async def main():
     # Create a sandbox
-    sandbox = Sandbox.create(variant="local-eval")
+    sandbox = Sandbox.create(variant="eval")
     sandbox.start()
     
     # Set up an executor mock