✨ Feat: Add pytest and agent tests

PythonicNinja · PythonicNinja · commit 2ccf787bf10a · 2025-11-09T21:17:29.000+01:00
- Introduces a comprehensive test suite for the SimpleAgent.
- Adds new test files in the tests/ directory.
- Includes mock backends and tools for isolated testing.
- Implements tests for direct responses, tool execution, and edge cases.
- Updates Makefile to include a 'test' target.
- Adds pytest and pytest-cov to dev dependencies.
- Modifies pyproject.toml and requirements.txt to include testing dependencies.
diff --git a/Makefile b/Makefile
@@ -9,8 +9,9 @@ DEFAULT_PYTHON := $(if $(wildcard $(VENV_BIN)/python),$(VENV_BIN)/python,)
 
 PYTHON ?= $(if $(DEFAULT_PYTHON),$(DEFAULT_PYTHON),$(UVX) python)
 RUFF ?= $(UVX) ruff
+PYTEST ?= pytest -vv --cov=simple_agent --cov-report=term-missing
 
-.PHONY: venv run lint tools install clean
+.PHONY: venv run lint tools install clean test
 
 venv:
 	$(VENV_PYTHON) -m venv $(VENV)
@@ -25,6 +26,9 @@ run:
 tools:
 	$(PYTHON) main.py --list-tools
 
+test:
+	$(PYTEST)
+
 lint:
 	$(RUFF) check simple_agent main.py
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -15,6 +15,8 @@ dependencies = [
 [project.optional-dependencies]
 dev = [
     "ruff>=0.6",
+    "pytest>=8.3",
+    "pytest-cov>=5.0",
 ]
 
 [build-system]
diff --git a/requirements.txt b/requirements.txt
@@ -2,3 +2,5 @@ python-dotenv>=1.0
 requests>=2.32
 psutil>=5.9
 beautifulsoup4>=4.12
+pytest>=8.3
+pytest-cov>=7.0.0
diff --git a/simple_agent/config.py b/simple_agent/config.py
@@ -6,12 +6,10 @@
 from dataclasses import dataclass
 from functools import lru_cache
 from typing import Literal
-
 from dotenv import load_dotenv
 
 load_dotenv()
 
-
 BackendName = Literal["chatgpt", "gemini"]
 
 
@@ -46,7 +44,7 @@ def from_env(cls) -> "Settings":
                 "AGENT_SYSTEM_PROMPT",
                 "You are a concise assistant. Use tools only when strictly necessary.",
             )
-            or "You are a concise assistant. Use tools only when strictly necessary.",
+                          or "You are a concise assistant. Use tools only when strictly necessary.",
             openai_api_key=cls._get_env("OPENAI_API_KEY"),
             openai_model=cls._get_env("OPENAI_MODEL", "gpt-4o-mini"),
             gemini_api_key=cls._get_env("GEMINI_API_KEY"),
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,10 @@
+"""Ensure the project package is importable when running tests."""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
diff --git a/tests/test_agent.py b/tests/test_agent.py
@@ -0,0 +1,92 @@
+"""Tests for the SimpleAgent orchestration logic."""
+
+from __future__ import annotations
+
+from typing import Iterable, List
+
+import pytest
+
+from simple_agent.agent import SimpleAgent, _truncate
+from simple_agent.backends.base import LLMBackend, Message
+from simple_agent.tools.base import SimpleTool, Tool
+
+
+class DummyBackend(LLMBackend):
+    """Backend that returns predefined responses for each call."""
+
+    def __init__(self, responses: Iterable[str]) -> None:
+        self._responses = list(responses)
+        self.calls: List[List[Message]] = []
+
+    def generate(self, messages: List[Message]) -> str:
+        if not self._responses:
+            raise AssertionError("DummyBackend has no more responses queued.")
+        # Capture a shallow copy so tests can inspect the conversation.
+        self.calls.append([msg.copy() for msg in messages])
+        return self._responses.pop(0)
+
+
+class RecordingTool(SimpleTool):
+    """Tool that records the inputs it receives."""
+
+    def __init__(self) -> None:
+        super().__init__(name="echo", description="Echo the provided input.")
+        self.invocations: list[str] = []
+
+    def run(self, query: str) -> str:
+        self.invocations.append(query)
+        return f"tool ran with: {query}"
+
+
+def _make_agent(responses: Iterable[str], tools: Iterable[Tool] = ()) -> tuple[SimpleAgent, DummyBackend]:
+    backend = DummyBackend(responses)
+    agent = SimpleAgent(backend=backend, tools=list(tools), system_prompt="Be helpful.")
+    return agent, backend
+
+
+def test_agent_returns_direct_response_without_tool_use() -> None:
+    agent, backend = _make_agent(["  final answer  "])
+
+    result = agent.run("Question?")
+
+    assert result == "final answer"
+    assert len(backend.calls) == 1
+    assert backend.calls[0][0]["role"] == "system"
+
+
+def test_agent_executes_requested_tool_and_returns_model_reply() -> None:
+    tool = RecordingTool()
+    agent, backend = _make_agent(
+        responses=[
+            '{"tool":"echo","input":"calculate pi"}',
+            "Result is 3.14",
+        ],
+        tools=[tool],
+    )
+
+    result = agent.run("What is pi?", max_turns=2)
+
+    assert result == "Result is 3.14"
+    assert tool.invocations == ["calculate pi"]
+    assert len(backend.calls) == 2
+    # Ensure the second backend call contains the tool output in the history.
+    assert backend.calls[1][-1]["content"].startswith("[Tool:echo] tool ran with: calculate pi")
+
+
+@pytest.mark.parametrize(
+    "text,expected",
+    [
+        ('{"tool":"echo","input":"test"}', {"tool": "echo", "input": "test"}),
+        ("```json\n{\"tool\": \"echo\"}\n```", {"tool": "echo"}),
+        ("Some text", None),
+    ],
+)
+def test_maybe_extract_tool_request_variants(text: str, expected: dict | None) -> None:
+    assert SimpleAgent._maybe_extract_tool_request(text) == expected  # type: ignore[arg-type]
+
+
+def test_truncate_adds_ellipsis_when_text_is_long() -> None:
+    text = "abc" * 200
+    truncated = _truncate(text, limit=10)
+    assert truncated.endswith("…")
+    assert len(truncated) == 11
diff --git a/tests/test_python_tool.py b/tests/test_python_tool.py
@@ -0,0 +1,20 @@
+"""Tests for the Python sandbox helper functions."""
+
+from __future__ import annotations
+
+from simple_agent.tools.python_tool import _find_disallowed_imports
+
+
+def test_find_disallowed_imports_blocks_unknown_modules() -> None:
+    code = "import math\nimport secrets\nfrom collections import Counter"
+    blocked = _find_disallowed_imports(code, {"math", "collections"})
+
+    assert blocked == {"secrets"}
+
+
+def test_find_disallowed_imports_marks_relative_imports() -> None:
+    code = "from . import helpers"
+
+    blocked = _find_disallowed_imports(code, {"json"})
+
+    assert "<relative>" in blocked

Original file line number	Diff line number	Diff line change
`@@ -15,6 +15,8 @@ dependencies = [`
`15`	`15`	`[project.optional-dependencies]`
`16`	`16`	`dev = [`
`17`	`17`	`"ruff>=0.6",`
	`18`	`+ "pytest>=8.3",`
	`19`	`+ "pytest-cov>=5.0",`
`18`	`20`	`]`
`19`	`21`
`20`	`22`	`[build-system]`