diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..ab23c58 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,24 @@ +name: Build + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + workflow_dispatch: + +jobs: + python-build: + name: Python Build & Test (${{ matrix.python-version }}) + strategy: + matrix: + python-version: ['3.11', '3.12'] + uses: ./.github/workflows/reusable-python.yml + with: + python-version: ${{ matrix.python-version }} + install-extras: test,typing + extra-packages: dbus-python keyring secretstorage + run-tests: true + test-command: pytest tests/ --cov=agent_codemode --cov-report term-missing + run-mypy: true + mypy-target: agent_codemode diff --git a/.github/workflows/py-code-style.yml b/.github/workflows/py-code-style.yml new file mode 100644 index 0000000..48897ef --- /dev/null +++ b/.github/workflows/py-code-style.yml @@ -0,0 +1,44 @@ +name: Py Code Style + +on: + push: + branches: + - main + - develop + paths: + - 'agent_codemode/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - 'setup.py' + - '.pre-commit-config.yaml' + - 'ruff.toml' + - '.ruff.toml' + - '.github/workflows/py-code-style.yml' + - '.github/workflows/reusable-python.yml' + pull_request: + branches: + - main + - develop + paths: + - 'agent_codemode/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - 'setup.py' + - '.pre-commit-config.yaml' + - 'ruff.toml' + - '.ruff.toml' + - '.github/workflows/py-code-style.yml' + - '.github/workflows/reusable-python.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check-code-style: + uses: ./.github/workflows/reusable-python.yml + with: + python-version: '3.11' + extra-packages: pre-commit + run-pre-commit: true diff --git a/.github/workflows/py-tests.yml b/.github/workflows/py-tests.yml new file mode 100644 index 0000000..0506cf6 --- /dev/null +++ b/.github/workflows/py-tests.yml @@ -0,0 +1,43 @@ +name: Py Tests + +on: + push: + branches: + - main + paths: + - 'agent_codemode/**' + - 'tests/**' + - 'pyproject.toml' + - 'setup.py' + - 'requirements*.txt' + - '**/*.py' + - '.github/workflows/py-tests.yml' + - '.github/workflows/reusable-python.yml' + pull_request: + branches: + - main + paths: + - 'agent_codemode/**' + - 'tests/**' + - 'pyproject.toml' + - 'setup.py' + - 'requirements*.txt' + - '**/*.py' + - '.github/workflows/py-tests.yml' + - '.github/workflows/reusable-python.yml' + + workflow_dispatch: + +jobs: + tests: + strategy: + max-parallel: 1 + matrix: + python-version: ['3.10', '3.11', '3.12', '3.13'] + uses: ./.github/workflows/reusable-python.yml + with: + python-version: ${{ matrix.python-version }} + install-extras: test + extra-packages: dbus-python keyring secretstorage + run-tests: true + test-command: pytest tests/ --cov=agent_codemode --cov-report term-missing diff --git a/.github/workflows/py-typing.yml b/.github/workflows/py-typing.yml new file mode 100644 index 0000000..4f9f623 --- /dev/null +++ b/.github/workflows/py-typing.yml @@ -0,0 +1,44 @@ +name: Py Type Checking + +on: + push: + branches: + - main + - develop + paths: + - 'agent_codemode/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - 'setup.py' + - 'mypy.ini' + - '.mypy.ini' + - '.github/workflows/py-typing.yml' + - '.github/workflows/reusable-python.yml' + pull_request: + branches: + - main + - develop + paths: + - 'agent_codemode/**/*.py' + - 'tests/**/*.py' + - 'pyproject.toml' + - 'setup.py' + - 'mypy.ini' + - '.mypy.ini' + - '.github/workflows/py-typing.yml' + - '.github/workflows/reusable-python.yml' + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + type-check: + uses: ./.github/workflows/reusable-python.yml + with: + python-version: '3.12' + install-extras: typing + extra-packages: types-requests + run-mypy: true + mypy-target: agent_codemode diff --git a/.github/workflows/reusable-python.yml b/.github/workflows/reusable-python.yml new file mode 100644 index 0000000..18cd173 --- /dev/null +++ b/.github/workflows/reusable-python.yml @@ -0,0 +1,100 @@ +name: Reusable Python Workflow + +on: + workflow_call: + inputs: + python-version: + description: Python version to use + required: false + type: string + default: '3.11' + install-system-deps: + description: Install Linux system dependencies and unlock keyring + required: false + type: boolean + default: true + install-extras: + description: Optional extras to install from pyproject (for example test,typing) + required: false + type: string + default: '' + extra-packages: + description: Optional extra packages to install with uv pip + required: false + type: string + default: '' + run-tests: + description: Run tests + required: false + type: boolean + default: false + test-command: + description: Test command to run when run-tests=true + required: false + type: string + default: 'pytest -q' + run-mypy: + description: Run mypy + required: false + type: boolean + default: false + mypy-target: + description: Package/module path to type-check + required: false + type: string + default: '' + run-pre-commit: + description: Run pre-commit + required: false + type: boolean + default: false + +jobs: + python-checks: + runs-on: ubuntu-latest + + steps: + - name: Install system dependencies + if: inputs.install-system-deps + run: | + sudo apt-get update + sudo apt-get install -y libdbus-1-3 libdbus-1-dev libglib2.0-dev + + - name: Checkout + uses: actions/checkout@v6 + + - name: Setup UV + uses: astral-sh/setup-uv@v7 + with: + version: latest + python-version: ${{ inputs.python-version }} + activate-environment: true + + - name: Install dependencies + run: | + uv pip install . + if [[ -n "${{ inputs.install-extras }}" ]]; then + uv pip install ".[${{ inputs.install-extras }}]" + fi + if [[ -n "${{ inputs.extra-packages }}" ]]; then + uv pip install ${{ inputs.extra-packages }} + fi + + - name: Unlock keyring + if: inputs.install-system-deps + uses: t1m0thyj/unlock-keyring@v1 + + - name: Configure git to use https + run: git config --global hub.protocol https + + - name: Run tests + if: inputs.run-tests + run: ${{ inputs.test-command }} + + - name: Run mypy + if: inputs.run-mypy + run: mypy ${{ inputs.mypy-target }} + + - name: Run pre-commit + if: inputs.run-pre-commit + run: pre-commit run --all-files diff --git a/README.md b/README.md index 81ebe8f..ab8f479 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ Same task, same MCP server — Code Mode uses significantly fewer tokens by comp |--------|-------------| | `allow_direct_tool_calls` | When `False` (default), `call_tool` is hidden; all execution flows through `execute_code` | | `max_tool_calls` | Safety cap limiting tool invocations per `execute_code` run | -| `sandbox_variant` | Sandbox type for code execution (default: `"local-eval"`) | +| `sandbox_variant` | Sandbox type for code execution (default: `"eval"`) | | `workspace_path` | Working directory for sandbox execution | | `generated_path` | Path where tool bindings are generated | | `skills_path` | Path for saved skills | @@ -461,6 +461,29 @@ When running in a sandbox, state can persist between `execute_code` calls within - [Advanced Tool Use](https://www.anthropic.com/engineering/advanced-tool-use) - Anthropic - [Programmatic MCP Prototype](https://github.com/domdomegg/programmatic-mcp-prototype) +## CI Workflows + +This repository uses a reusable GitHub Actions workflow at `.github/workflows/reusable-python.yml`. + +The following workflows call it: + +- `.github/workflows/build.yml` +- `.github/workflows/py-tests.yml` +- `.github/workflows/py-code-style.yml` +- `.github/workflows/py-typing.yml` + +Reusable workflow inputs: + +- `python-version`: Python version to run. +- `install-system-deps`: Install Linux dependencies and unlock keyring. +- `install-extras`: Extras from `pyproject.toml` (for example `test,typing`). +- `extra-packages`: Additional packages installed with `uv pip install`. +- `run-tests`: Enable test execution. +- `test-command`: Command used for tests. +- `run-mypy`: Enable mypy. +- `mypy-target`: Package or module passed to mypy. +- `run-pre-commit`: Enable pre-commit checks. + ## License BSD 3-Clause License diff --git a/agent_codemode/__version__.py b/agent_codemode/__version__.py index 0e547d7..bc08b8a 100644 --- a/agent_codemode/__version__.py +++ b/agent_codemode/__version__.py @@ -4,4 +4,4 @@ """Agent Codemode.""" -__version__ = "0.1.1" +__version__ = "0.1.3" diff --git a/agent_codemode/composition/executor.py b/agent_codemode/composition/executor.py index 5a1c376..7b16c0c 100644 --- a/agent_codemode/composition/executor.py +++ b/agent_codemode/composition/executor.py @@ -133,7 +133,7 @@ def set_skills_metadata(self, metadata: list[dict[str, Any]]) -> None: self._skills_metadata = metadata def _is_local_eval_sandbox(self) -> bool: - """Check if the sandbox is a local-eval type (has in-memory namespaces). + """Check if the sandbox is a eval type (has in-memory namespaces). This checks the actual sandbox instance, not the config, to handle cases where an external sandbox is passed that differs from config. @@ -202,7 +202,7 @@ async def _setup_sandbox_environment(self) -> None: # Use /tmp so 'from generated.mcp...' works (files are at /tmp/generated/) sandbox_generated_path = "/tmp" else: - # For local-eval, use the parent directory so 'from generated.mcp...' works + # For eval, use the parent directory so 'from generated.mcp...' works # The generated_path might be './generated', we need its parent on sys.path sandbox_generated_path = str(generated_path.parent) @@ -705,7 +705,7 @@ def generate_skills_in_sandbox(self) -> None: skill scripts **directly** in the Jupyter kernel, reading the script files from the shared ``skills_path`` on disk. This avoids the HTTP proxy round-trip (call_tool → MCP proxy → agent-runtimes → - local-eval fallback) which caused blocking and deadlocks. + eval fallback) which caused blocking and deadlocks. The skills_path is the same between the agent-runtimes process and the Jupyter runtime (shared filesystem or mount). @@ -717,7 +717,7 @@ def generate_skills_in_sandbox(self) -> None: if self._sandbox is None or not self._skills_metadata: return - # Skip for local-eval sandboxes (they use the on-disk generated files) + # Skip for eval sandboxes (they use the on-disk generated files) if self._is_local_eval_sandbox(): return @@ -1032,7 +1032,7 @@ async def execute( # Get the generated path for sys.path setup # For remote sandboxes, use /tmp so 'from generated.mcp...' works (files at /tmp/generated/) - # For local-eval, use parent of generated_path so 'from generated.mcp...' works + # For eval, use parent of generated_path so 'from generated.mcp...' works # Use actual sandbox type detection, not config is_local_eval = self._is_local_eval_sandbox() if not is_local_eval: @@ -1086,7 +1086,7 @@ async def execute( ''' # Branch based on actual sandbox type (already computed above) if is_local_eval: - # For local-eval, we can access _namespaces directly + # For eval, we can access _namespaces directly return await self._execute_local_eval(code, setup_code, timeout) else: # For Jupyter/remote sandboxes, use run_code() @@ -1099,7 +1099,7 @@ async def _execute_local_eval( setup_code: str, timeout: Optional[float] = None, ) -> ExecutionResult: - """Execute code in local-eval sandbox with direct namespace access.""" + """Execute code in eval sandbox with direct namespace access.""" import sys import io import time diff --git a/agent_codemode/toolset.py b/agent_codemode/toolset.py index 95a8bb7..d407eef 100644 --- a/agent_codemode/toolset.py +++ b/agent_codemode/toolset.py @@ -100,7 +100,7 @@ class CodemodeToolset(AbstractToolset): registry: ToolRegistry | None = None config: CodeModeConfig = field(default_factory=CodeModeConfig) - sandbox: Any | None = None # Optional pre-configured sandbox (e.g., LocalEvalSandbox) + sandbox: Any | None = None # Optional pre-configured sandbox (e.g., EvalSandbox) allow_direct_tool_calls: bool | None = None allow_discovery_tools: bool = True tool_reranker: Callable[[list, str, Optional[str]], Awaitable[list]] | None = None diff --git a/agent_codemode/types.py b/agent_codemode/types.py index c0f069b..423f13a 100644 --- a/agent_codemode/types.py +++ b/agent_codemode/types.py @@ -223,7 +223,7 @@ class CodeModeConfig(BaseModel): workspace_path: str = "./workspace" skills_path: str = "./skills" generated_path: str = "./generated" - sandbox_variant: str = "local-eval" + sandbox_variant: str = "eval" sandbox_image: str | None = None allow_direct_tool_calls: bool = False max_tool_calls: int | None = None diff --git a/docs/docs/skills/index.mdx b/docs/docs/skills/index.mdx index acfa406..ec235c7 100644 --- a/docs/docs/skills/index.mdx +++ b/docs/docs/skills/index.mdx @@ -372,10 +372,10 @@ For Pydantic AI agents, use the `AgentSkillsToolset`: ```python from pydantic_ai import Agent from agent_skills import AgentSkillsToolset, SandboxExecutor -from code_sandboxes import LocalEvalSandbox +from code_sandboxes.eval_sandbox import EvalSandbox # Create toolset with sandbox execution -sandbox = LocalEvalSandbox() +sandbox = EvalSandbox() toolset = AgentSkillsToolset( directories=["./skills"], executor=SandboxExecutor(sandbox), diff --git a/examples/patterns/codemode_example.py b/examples/patterns/codemode_example.py index 98cca9e..990c984 100644 --- a/examples/patterns/codemode_example.py +++ b/examples/patterns/codemode_example.py @@ -86,7 +86,7 @@ async def example_code_execution(): # Configure the executor config = CodeModeConfig( - sandbox_variant="local-eval", # For development + sandbox_variant="eval", # For development generated_path="./generated", skills_path="./skills", ) diff --git a/examples/simple/agent_cli.py b/examples/simple/agent_cli.py index b02f548..c719f7d 100644 --- a/examples/simple/agent_cli.py +++ b/examples/simple/agent_cli.py @@ -33,7 +33,7 @@ try: from agent_skills import AgentSkillsToolset, SandboxExecutor - from code_sandboxes import LocalEvalSandbox + from code_sandboxes.eval_sandbox import EvalSandbox HAS_AGENT_SKILLS = True except ImportError: HAS_AGENT_SKILLS = False @@ -348,8 +348,8 @@ def create_agent(model: str, codemode: bool) -> tuple[Agent, object | None, obje shared_sandbox = None skills_toolset = None if HAS_AGENT_SKILLS: - shared_sandbox = LocalEvalSandbox() - logger.info("Created shared LocalEvalSandbox for codemode and skills toolsets") + shared_sandbox = EvalSandbox() + logger.info("Created shared EvalSandbox for codemode and skills toolsets") toolset = CodemodeToolset( registry=registry, diff --git a/pyproject.toml b/pyproject.toml index 2e91f22..dd3c6ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ allow-direct-references = true [tool.pytest.ini_options] asyncio_mode = "auto" +testpaths = ["tests"] filterwarnings = [ "error", "ignore:There is no current event loop:DeprecationWarning", diff --git a/skills/github/scripts/get_repo.py b/skills/github/scripts/get_repo.py index 86fb4dc..72397f0 100644 --- a/skills/github/scripts/get_repo.py +++ b/skills/github/scripts/get_repo.py @@ -112,8 +112,24 @@ def format_repo_details(repo: dict) -> str: return "\n".join(lines) +class _HelpOnErrorParser(argparse.ArgumentParser): + """ArgumentParser that prints full help on invalid arguments.""" + + def error(self, message: str) -> None: # noqa: D401 + params_help = ( + "\nValid parameters for get_repo:\n" + " repo (positional) owner/repo e.g. 'datalayer/jupyter-ui'\n" + " --format table | json (default: table)\n" + ) + self.print_usage(sys.stderr) + print(f"\nerror: {message}", file=sys.stderr) + print(params_help, file=sys.stderr) + print("Please retry with valid parameters.", file=sys.stderr) + sys.exit(2) + + def main(): - parser = argparse.ArgumentParser( + parser = _HelpOnErrorParser( description="Get details for a specific GitHub repository." ) parser.add_argument( diff --git a/skills/github/scripts/list_issues.py b/skills/github/scripts/list_issues.py index c1fe9bc..d79c755 100644 --- a/skills/github/scripts/list_issues.py +++ b/skills/github/scripts/list_issues.py @@ -115,8 +115,26 @@ def format_table(issues: list[dict]) -> str: return "\n".join(lines) +class _HelpOnErrorParser(argparse.ArgumentParser): + """ArgumentParser that prints full help on invalid arguments.""" + + def error(self, message: str) -> None: # noqa: D401 + params_help = ( + "\nValid parameters for list_issues:\n" + " repo (positional) owner/repo e.g. 'datalayer/agent-runtimes'\n" + " --state open | closed | all (default: open)\n" + " --format table | json (default: table)\n" + " --limit integer — max issues to return (default: 50)\n" + ) + self.print_usage(sys.stderr) + print(f"\nerror: {message}", file=sys.stderr) + print(params_help, file=sys.stderr) + print("Please retry with valid parameters.", file=sys.stderr) + sys.exit(2) + + def main(): - parser = argparse.ArgumentParser( + parser = _HelpOnErrorParser( description="List issues for a GitHub repository." ) parser.add_argument( diff --git a/skills/github/scripts/list_prs.py b/skills/github/scripts/list_prs.py index bbac3c1..a2e71dd 100644 --- a/skills/github/scripts/list_prs.py +++ b/skills/github/scripts/list_prs.py @@ -122,8 +122,26 @@ def format_table(prs: list[dict]) -> str: return "\n".join(lines) +class _HelpOnErrorParser(argparse.ArgumentParser): + """ArgumentParser that prints full help on invalid arguments.""" + + def error(self, message: str) -> None: # noqa: D401 + params_help = ( + "\nValid parameters for list_prs:\n" + " repo (positional) owner/repo e.g. 'datalayer/agent-runtimes'\n" + " --state open | closed | all (default: open)\n" + " --format table | json (default: table)\n" + " --limit integer — max PRs to return (default: 50)\n" + ) + self.print_usage(sys.stderr) + print(f"\nerror: {message}", file=sys.stderr) + print(params_help, file=sys.stderr) + print("Please retry with valid parameters.", file=sys.stderr) + sys.exit(2) + + def main(): - parser = argparse.ArgumentParser( + parser = _HelpOnErrorParser( description="List pull requests for a GitHub repository." ) parser.add_argument( diff --git a/skills/github/scripts/list_repos.py b/skills/github/scripts/list_repos.py index c4b418d..55bafd6 100644 --- a/skills/github/scripts/list_repos.py +++ b/skills/github/scripts/list_repos.py @@ -116,8 +116,27 @@ def format_table(repos: list[dict]) -> str: return "\n".join(lines) +class _HelpOnErrorParser(argparse.ArgumentParser): + """ArgumentParser that prints full help on invalid arguments.""" + + def error(self, message: str) -> None: # noqa: D401 + params_help = ( + "\nValid parameters for list_repos:\n" + " --visibility all | public | private (default: all)\n" + " --sort updated | created | pushed | full_name (default: updated)\n" + " --format table | json (default: table)\n" + " --limit integer — max repos to return\n" + "\nDo NOT pass 'per_page' — use 'limit' instead.\n" + ) + self.print_usage(sys.stderr) + print(f"\nerror: {message}", file=sys.stderr) + print(params_help, file=sys.stderr) + print("Please retry with valid parameters.", file=sys.stderr) + sys.exit(2) + + def main(): - parser = argparse.ArgumentParser( + parser = _HelpOnErrorParser( description="List all repositories for the authenticated GitHub user." ) parser.add_argument( diff --git a/skills/github/scripts/search_repos.py b/skills/github/scripts/search_repos.py index 3eefcce..1ffa0f8 100644 --- a/skills/github/scripts/search_repos.py +++ b/skills/github/scripts/search_repos.py @@ -125,8 +125,30 @@ def format_table(repos: list[dict], total: int) -> str: return "\n".join(lines) +class _HelpOnErrorParser(argparse.ArgumentParser): + """ArgumentParser that prints full help on invalid arguments.""" + + def error(self, message: str) -> None: # noqa: D401 + params_help = ( + "\nValid parameters for search_repos:\n" + " query (positional) search text e.g. 'jupyter notebook'\n" + " --language string — filter by language\n" + " --user string — filter by user/owner\n" + " --org string — filter by organization\n" + " --sort stars | forks | updated | best-match (default: best-match)\n" + " --format table | json (default: table)\n" + " --limit integer — max results (default: 20)\n" + "\nDo NOT pass 'per_page' — use 'limit' instead.\n" + ) + self.print_usage(sys.stderr) + print(f"\nerror: {message}", file=sys.stderr) + print(params_help, file=sys.stderr) + print("Please retry with valid parameters.", file=sys.stderr) + sys.exit(2) + + def main(): - parser = argparse.ArgumentParser( + parser = _HelpOnErrorParser( description="Search GitHub repositories." ) parser.add_argument( diff --git a/tests/test_agent_minimal.py b/tests/test_agent_minimal.py index 8a0945b..f725a25 100644 --- a/tests/test_agent_minimal.py +++ b/tests/test_agent_minimal.py @@ -30,7 +30,7 @@ async def main(): # Create executor config = CodeModeConfig( workspace_path=Path(__file__).parent / "examples" / "agent", - sandbox_variant="local-eval" + sandbox_variant="eval" ) executor = CodeModeExecutor(registry=registry, config=config) await executor.setup() diff --git a/tests/test_direct_execution.py b/tests/test_direct_execution.py index 3b5f3b5..b4962e1 100644 --- a/tests/test_direct_execution.py +++ b/tests/test_direct_execution.py @@ -30,7 +30,7 @@ async def main(): print("\n=== Setting up executor ===") config = CodeModeConfig( workspace_path=Path(__file__).parent / "examples" / "agent", - sandbox_variant="local-eval" + sandbox_variant="eval" ) executor = CodeModeExecutor(registry=registry, config=config) await executor.setup() diff --git a/tests/test_executor_async.py b/tests/test_executor_async.py index f74c6e3..07dc292 100644 --- a/tests/test_executor_async.py +++ b/tests/test_executor_async.py @@ -7,7 +7,7 @@ async def main(): # Create a sandbox - sandbox = Sandbox.create(variant="local-eval") + sandbox = Sandbox.create(variant="eval") sandbox.start() # Set up an executor mock