From ff0da310006b6a88189c4ce6cc639d493a2a2576 Mon Sep 17 00:00:00 2001
From: "Tobias.Mikula" <tobias.mikula@absa.africa>
Date: Mon, 18 May 2026 16:34:41 +0200
Subject: [PATCH 1/2] Python standard skill

---
 skills/python-standards/SKILL.md              | 336 ++++++++++++++++++
 skills/python-standards/evals/evals.json      |  70 ++++
 .../python-standards/evals/trigger-eval.json  | 135 +++++++
 3 files changed, 541 insertions(+)
 create mode 100644 skills/python-standards/SKILL.md
 create mode 100644 skills/python-standards/evals/evals.json
 create mode 100644 skills/python-standards/evals/trigger-eval.json

diff --git a/skills/python-standards/SKILL.md b/skills/python-standards/SKILL.md
new file mode 100644
index 0000000..ad72d8a
--- /dev/null
+++ b/skills/python-standards/SKILL.md
@@ -0,0 +1,336 @@
+---
+name: python-standards
+description: >-
+  Python coding standards for CPS projects. Covers repository-first conventions, type annotations, error handling, testing, logging, documentation, async code, and dependency hygiene. Activate this skill whenever the user is editing or creating .py files, working in a Python project, or asks for help writing functions, classes, tests, modules, or fixing code - even if they do not say "Python" explicitly. Also triggers on questions about coding conventions, type hints, pytest, imports, logging setup, or dependency management in a Python context.
+---
+
+# Python Coding Standards
+
+These standards define how Python code is written across CPS projects. They focus on code quality, safety, and maintainability. Tooling choices vary by project and are not prescribed here.
+
+## Repository-first workflow
+
+Before applying these standards in an existing repository:
+
+- Inspect the nearby Python files and project configuration first.
+- Follow the repository's established formatter, import sorter, linter, type checker, test framework, packaging flow, and docstring style unless the user asks you to change them.
+- If conventions differ across the repo, prefer the pattern already used in the same package or service you are editing.
+- Only introduce new tools or broad convention changes when the user explicitly asks for them.
+
+## Type Annotations
+
+Python is dynamically typed, which means entire categories of bugs (type mismatches, contract violations, null references) can reach production undetected. Type annotations are the primary defence against this.
+
+### Rules
+
+- Annotate all public function signatures: both parameters and return types.
+- Annotate class attributes and instance variables.
+- Use built-in generics (`list[str]`, `dict[str, int]`, `tuple[int, ...]`) instead of importing from `typing` (Python 3.9+ supports this natively).
+- Use `X | None` instead of `Optional[X]` (Python 3.10+ union syntax).
+- Create type aliases for complex types to improve readability.
+
+```python
+# Type alias for a complex structure
+UserRecord = dict[str, str | int | None]
+
+def fetch_users(org_id: str, *, active_only: bool = True) -> list[UserRecord]:
+    ...
+```
+
+### `# type: ignore` is a last resort
+
+The correct response to a type error is to fix the code or refactor the types so the checker is satisfied. `# type: ignore` should only be used when the code is genuinely correct but the type system cannot express it. This is rare.
+
+When suppression is truly unavoidable:
+
+1. Always include the specific error code: `# type: ignore[assignment]`
+2. Always add a justification explaining why the suppression is necessary: `# type: ignore[override]  # covariant return is safe here`
+3. A bare `# type: ignore` without an error code is never acceptable!
+
+If you find yourself reaching for `# type: ignore`, first try these alternatives:
+- Add a `cast()` to make the type explicit.
+- Introduce a `Protocol` or `TypeVar` to express the constraint properly.
+- Refactor the code so the type checker can follow the logic.
+- Use `typing.overload` to express different return types for different inputs.
+
+## Code Structure
+
+### Imports
+
+- Prefer imports at the top of the file. Local imports are acceptable when they avoid optional dependencies at import time, break circular imports, or defer expensive startup work.
+- Group imports in three blocks separated by a blank line: standard library, third-party, local.
+- Prefer absolute imports. Relative imports are acceptable within a package when they improve clarity.
+- Never use wildcard imports (`from module import *`).
+
+```python
+import json
+import logging
+from pathlib import Path
+
+import httpx
+from pydantic import BaseModel
+
+from core.config import Settings
+from core.errors import AppError
+```
+
+### Module organisation
+
+- One primary concern per module. If a module has grown beyond ~300 lines, consider splitting it.
+- Keep `__init__.py` files minimal. They should re-export public API, not contain logic.
+- Separate business logic from infrastructure (I/O, environment variables, network calls). Business logic should be testable without mocking external systems.
+
+### Naming
+
+Follow PEP 8 conventions:
+- `snake_case` for functions, methods, variables, and module names.
+- `PascalCase` for classes.
+- `UPPER_SNAKE_CASE` for module-level constants.
+- Prefix private attributes and methods with a single underscore (`_internal_method`).
+- Avoid abbreviations unless they are universally understood (`id`, `url`, `http`).
+
+## Error Handling
+
+### Principles
+
+- Catch specific exceptions, never bare `except:` or `except Exception:`.
+- Use `raise ... from err` to preserve the exception chain. This makes debugging significantly easier because the full traceback is visible.
+- Create custom exception classes for domain-specific errors rather than reusing generic built-ins.
+- Fail fast and fail loudly: validate inputs early and raise immediately on invalid state.
+
+```python
+class VendorTimeoutError(AppError):
+    """Raised when a vendor API call exceeds the configured timeout."""
+
+    def __init__(self, vendor: str, timeout_seconds: float) -> None:
+        super().__init__(f"Vendor '{vendor}' did not respond within {timeout_seconds}s")
+        self.vendor = vendor
+        self.timeout_seconds = timeout_seconds
+
+
+def call_vendor(vendor: str, payload: dict[str, str]) -> VendorResponse:
+    try:
+        response = client.post(url, json=payload, timeout=timeout)
+    except httpx.TimeoutException as err:
+        raise VendorTimeoutError(vendor, timeout) from err
+```
+
+### Resource management
+
+- Always use context managers (`with`) for resources that need cleanup: files, connections, locks.
+- Prefer `contextlib.contextmanager` for lightweight resource wrappers.
+
+```python
+from collections.abc import Generator
+from contextlib import contextmanager
+
+@contextmanager
+def db_transaction(conn: Connection) -> Generator[Cursor, None, None]:
+    cursor = conn.cursor()
+    try:
+        yield cursor
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+```
+
+## Runtime Validation
+
+Static type checking catches many bugs, but it cannot verify data that crosses system boundaries (user input, API responses, configuration files, environment variables). Validate this data at the boundary where it enters the system.
+
+### Principles
+
+- Validate external data at the point of ingestion, not deep inside business logic.
+- Prefer strict validation: reject unknown fields and unexpected types rather than silently coercing.
+- Keep validated data immutable after creation when practical. This prevents accidental mutation as data flows through the system.
+- For vendor API responses, be lenient about unknown fields (vendors may add them) but strict about expected fields.
+- Define data contracts as explicit models or data classes, not raw dicts. Typed structures make the expected shape discoverable and enforceable.
+
+## Testing
+
+### Structure
+
+- Follow the repository's existing test layout. In repos that mirror the source tree, `src/handlers/` maps to `tests/unit/handlers/`.
+- Name test files `test_<module>.py` and test functions `test_<behaviour>` unless the repository already uses a different convention.
+- Separate unit tests (no I/O, no network) from integration tests (real services, containers).
+
+### Writing tests
+
+- Each test should verify one behaviour. If a test name contains "and", it is probably testing two things.
+- Use `assert expected == actual` (not the reverse). Put the known-good value on the left.
+- Use fixtures for shared setup. Avoid deep fixture chains that obscure what the test actually needs.
+- Mock external dependencies (databases, APIs, file systems) in unit tests. Never make real network calls.
+- Always use `pytest-mock`'s `mocker` fixture for mocking. Never import `unittest.mock` directly - it bypasses pytest's fixture lifecycle and teardown guarantees.
+
+```python
+import pytest
+from pytest_mock import MockerFixture
+
+@pytest.fixture
+def sample_config() -> AppConfig:
+    return AppConfig(region="eu-west-1", timeout=30)
+
+
+def test_timeout_raises_after_configured_duration(
+    sample_config: AppConfig,
+    mocker: MockerFixture,
+) -> None:
+    mocker.patch.object(HttpClient, "post", side_effect=httpx.TimeoutException("timeout"))
+    with pytest.raises(VendorTimeoutError) as exc_info:
+        call_vendor("test-vendor", {}, config=sample_config)
+    assert exc_info.value.timeout_seconds == 30
+```
+
+### Test hygiene
+
+- Tests must not depend on execution order.
+- Tests must not share mutable state. Each test starts from a clean slate.
+- Keep module-level setup lightweight so each test's inputs and expectations stay obvious.
+- Avoid testing implementation details; test observable behaviour. If a refactor doesn't change behaviour, tests shouldn't break.
+
+## Logging
+
+### Principles
+
+- Use `logging.getLogger(__name__)`, never `print()` in production code. Print statements are not structured, cannot be filtered by level, and disappear in containerised environments.
+- Use lazy formatting in log calls - pass values as arguments, never use f-strings or `%` formatting inline. This avoids the cost of string formatting when the log level is disabled.
+- Match the format specifier to the argument type: `%s` for strings, `%d` for integers, `%f` for floats. Typed specifiers are self-documenting and make type mismatches visible at the call site.
+
+```python
+logger.info("Processing %d items out of %d", processed, total)
+logger.warning("Retrying request to %s (attempt %d of %d)", url, attempt, max_attempts)
+```
+
+- If the repository uses structured logging, keep field names stable and pass important values as structured fields rather than hiding them in free-form strings.
+- F-strings are acceptable in exception messages where they are always evaluated: `raise ValueError(f"Invalid ID: {user_id}")`.
+
+### Log levels
+
+Use standard levels consistently:
+
+| Level | Use for |
+|-------|---------|
+| `DEBUG` | Development detail: variable values, control flow tracing |
+| `INFO` | Operational events: request handled, job started, config loaded |
+| `WARNING` | Recoverable issues: retry succeeded, deprecated feature used |
+| `ERROR` | Failures that need attention: unhandled exception, vendor error |
+| `CRITICAL` | Non-recoverable: service cannot start, data corruption detected |
+
+### Security
+
+- **Never log credentials, tokens, API keys, or PII.** The blast radius of a log leak is the same as a credential leak.
+- Sanitise or redact sensitive fields before logging.
+- Be cautious with `repr()` or `str()` on objects that may contain secrets.
+
+## Documentation
+
+### Docstrings
+
+- Write docstrings for all public modules, classes, and functions.
+- Start with a single summary line. This is what tools and humans scan first.
+- Follow the repository's existing docstring style (Google, NumPy, reST, or concise single-line forms). If the repository has no clear convention, choose one style and apply it consistently.
+- Use single backticks for inline code references in docstrings.
+
+```python
+def retry_with_backoff(
+    func: Callable[[], T],
+    max_attempts: int = 3,
+    base_delay: float = 1.0,
+) -> T:
+    """Execute `func` with exponential backoff on failure.
+
+    Args:
+        func: Zero-argument callable to retry.
+        max_attempts: Maximum number of attempts before giving up.
+        base_delay: Initial delay in seconds, doubled after each failure.
+
+    Returns:
+        The return value of `func` on success.
+
+    Raises:
+        RetryExhaustedError: If all attempts fail.
+    """
+```
+
+### Comments
+
+- Code should be self-explanatory through good naming. Comment the *why*, not the *what*.
+- Do not use separator comments (`# -----------`) to divide sections. Use functions or classes instead.
+- Remove commented-out code. Version control preserves history.
+
+## Idiomatic Python
+
+### Prefer modern syntax
+
+- Use f-strings for string formatting (except in log calls; see Logging above).
+- Use `pathlib.Path` instead of `os.path` for file system operations.
+- Use dataclasses or typed models instead of plain dicts for structured data.
+- Use `enum.Enum` for fixed sets of choices rather than string constants.
+- Use list/dict/set comprehensions instead of `map()`/`filter()` with lambdas.
+- Prefer `tuple` over `list` for sequences that should not change after creation.
+- Use `frozenset` for fixed sets that need to be hashable or used as dict keys.
+- Use `@dataclass(frozen=True)` for data objects that must not be mutated after construction - frozen dataclasses are hashable and raise `FrozenInstanceError` on accidental assignment.
+
+### Defensive patterns
+
+- Use `if x is None` rather than `if not x` when checking for `None`. Empty strings, zero, and empty collections are falsy but not `None`.
+- Prefer `dict.get(key, default)` over catching `KeyError` for optional lookups.
+- Use `functools.lru_cache` or `functools.cache` for expensive pure computations.
+- Avoid mutable default arguments (`def f(items: list[str] = [])`). Use `None` and create inside the function body.
+
+```python
+def process_items(items: list[str] | None = None) -> list[str]:
+    if items is None:
+        items = []
+    ...
+```
+
+### Async patterns
+
+When writing async code:
+
+- Use `async with` for async context managers (HTTP sessions, DB connections).
+- Use `asyncio.gather()` for concurrent independent tasks rather than sequential `await`.
+- Never mix sync and async I/O. A blocking call in an async function starves the event loop.
+- Use `asyncio.to_thread()` for blocking sync I/O or lightweight legacy sync helpers. For truly CPU-bound work, prefer a process pool, worker, or dedicated service.
+
+## Dependencies
+
+- Follow the repository's existing dependency-management approach. For new projects, or repos that already use modern packaging, prefer `pyproject.toml` with PEP 621 metadata.
+- If the repository uses `requirements.txt`, constraints files, Poetry, uv, or another established workflow, stay consistent unless the user explicitly asks for a migration.
+- Use version constraints that match the project type and release strategy: libraries often use compatible ranges, while applications often lock more tightly for reproducible deployments.
+- Separate runtime dependencies from dev/test dependencies when the existing tooling supports it.
+- When adding or changing a dependency, update the related lockfile or constraints file in the same change if the repository tracks one.
+
+## Database
+
+CPS projects prefer to access databases directly using `aiosql` to keep SQL out of Python string literals.
+
+- Define all queries in `.sql` files. `aiosql` loads them and exposes each named query as a typed callable. SQL stays reviewable, syntax-highlighted, and separated from application logic.
+- Name query files by domain: `users.sql`, `payments.sql`. One file per logical area keeps queries discoverable.
+- Never inline SQL as string literals in Python. String-embedded SQL cannot be reviewed independently, is not syntax-highlighted, and is a SQL-injection foothold when variables are interpolated carelessly.
+
+```python
+import aiosql
+import psycopg2
+
+# queries/users.sql contains:
+# -- name: get_active^
+# SELECT id, email FROM users WHERE active = true;
+
+queries = aiosql.from_path("queries/", "psycopg2")
+
+def get_active_users(conn: psycopg2.extensions.connection) -> list[UserRecord]:
+    return queries.users.get_active(conn)
+```
+
+Parameterised queries in `.sql` files are safe from injection by construction - `aiosql` passes values through the driver's parameter binding, never string interpolation.
+
+## Security
+
+- Never commit credentials, API keys, or tokens to source code. Load secrets from environment variables or a secrets manager at runtime.
+- Use the `secrets` module for generating tokens and random values, not `random` (which is not cryptographically secure).
+- Validate and sanitise all user-provided input before using it in queries, commands, or file paths.
+- Be explicit about timeouts on all network calls. Hanging connections are a denial-of-service vector.
+- Lock or pin dependencies according to the repository's release process, and audit them regularly. Supply-chain attacks through compromised packages are a real and growing threat.
diff --git a/skills/python-standards/evals/evals.json b/skills/python-standards/evals/evals.json
new file mode 100644
index 0000000..5096780
--- /dev/null
+++ b/skills/python-standards/evals/evals.json
@@ -0,0 +1,70 @@
+{
+  "skill_name": "python-standards",
+  "evals": [
+    {
+      "id": 1,
+      "category": "happy-path",
+      "prompt": "Write a function that fetches user profiles from an external API, with proper error handling and type annotations.",
+      "context": "User is editing src/services/user_service.py",
+      "expected_output": "Function has full type annotations (params + return), uses httpx or similar with an explicit timeout, catches specific exceptions, uses raise...from for chaining, and follows the coding standards (no bare except, proper naming).",
+      "files": [],
+      "expectations": [
+        "Function signature has type annotations for all parameters and return type",
+        "Network call has an explicit timeout set",
+        "Exception handling catches specific exceptions, not bare except or broad Exception",
+        "Uses raise...from to preserve exception chain",
+        "Naming follows snake_case for the function and variables",
+        "Uses modern Python syntax (union types or built-in generics where applicable)"
+      ]
+    },
+    {
+      "id": 2,
+      "category": "happy-path",
+      "prompt": "Create a validation module that accepts user registration input (name, email, age) and validates it at the boundary. Use dataclasses or pydantic.",
+      "context": "User is creating src/validators/registration.py",
+      "expected_output": "Module defines a typed model or dataclass for the input, validates at the ingestion boundary, rejects invalid data strictly, and keeps validated data immutable. Docstrings are present on public classes/functions.",
+      "files": [],
+      "expectations": [
+        "Uses a dataclass, pydantic model, or typed structure - not raw dicts",
+        "Validation happens at the boundary (constructor or parse method), not deep in logic",
+        "Invalid input is rejected with a clear error, not silently coerced",
+        "Public class/function has a docstring with a summary line",
+        "Type annotations are present on all public signatures",
+        "Imports are grouped correctly: stdlib, third-party, local"
+      ]
+    },
+    {
+      "id": 3,
+      "category": "happy-path",
+      "prompt": "Write tests for the retry utility. It retries a function with exponential backoff and raises RetryExhaustedError after max attempts.",
+      "context": "User is editing tests/unit/test_retry.py",
+      "expected_output": "Tests are well-structured: one behaviour per test, external calls are mocked, fixtures are used for shared setup. Assertions use expected == actual order. No real network calls.",
+      "files": [],
+      "expectations": [
+        "Each test function tests one behaviour (names do not contain 'and')",
+        "External dependencies are mocked, no real I/O or network calls",
+        "Uses assert expected == actual ordering",
+        "Tests verify observable behaviour: success after retry, failure after exhaustion, correct delay timing",
+        "Test function names follow test_<behaviour> convention",
+        "Return type annotation -> None is present on test functions"
+      ]
+    },
+    {
+      "id": 4,
+      "category": "happy-path",
+      "prompt": "Refactor this function to follow best practices:\n\ndef get_data(url, items=[]):\n    import requests\n    try:\n        r = requests.get(url)\n        data = r.json()\n    except:\n        print('error')\n        return None\n    for i in data:\n        items.append(i)\n    return items",
+      "context": "User is editing src/utils/fetcher.py",
+      "expected_output": "Refactored code fixes the mutable default argument, moves imports to top, catches specific exceptions, replaces print with logging, adds type annotations, and adds a timeout to the request.",
+      "files": [],
+      "expectations": [
+        "Mutable default argument is replaced with None + conditional creation inside the body",
+        "Import is at the top of the file (or justified if local)",
+        "Bare except is replaced with specific exception types",
+        "print() is replaced with logging or the error is raised/re-raised",
+        "Type annotations are added to the function signature",
+        "Network call includes an explicit timeout",
+        "Uses if x is None pattern rather than if not x for the None check"
+      ]
+    }
+  ]
+}
diff --git a/skills/python-standards/evals/trigger-eval.json b/skills/python-standards/evals/trigger-eval.json
new file mode 100644
index 0000000..bc4d4f1
--- /dev/null
+++ b/skills/python-standards/evals/trigger-eval.json
@@ -0,0 +1,135 @@
+[
+  {
+    "id": "t01-write-function-in-py-file",
+    "query": "Write a function that processes CSV files and returns aggregated results.",
+    "context": "User is editing src/etl/processor.py",
+    "should_trigger": true,
+    "reason": "User is in a .py file and asks to write a function - context-based trigger."
+  },
+  {
+    "id": "t02-review-module-quality",
+    "query": "Review this module for code quality and best practices.",
+    "context": "User is viewing src/handlers/auth.py",
+    "should_trigger": true,
+    "reason": "User is in a .py file and asks for quality review - context triggers the skill."
+  },
+  {
+    "id": "t03-refactor-tests",
+    "query": "Refactor these tests to follow better patterns and reduce duplication.",
+    "context": "User is editing tests/unit/test_auth.py",
+    "should_trigger": true,
+    "reason": "Editing a .py test file and asking to refactor - context-based trigger."
+  },
+  {
+    "id": "t04-conventions-question",
+    "query": "What are our coding conventions for type annotations and error handling?",
+    "context": "User is working in a Python project (pyproject.toml present)",
+    "should_trigger": true,
+    "reason": "Convention question inside a Python project - triggers on context + topic."
+  },
+  {
+    "id": "t05-new-service-class",
+    "query": "Create a service class that connects to Redis and handles caching with proper error handling.",
+    "context": "User is creating src/services/cache_service.py",
+    "should_trigger": true,
+    "reason": "Creating a new .py file - context is sufficient to trigger."
+  },
+  {
+    "id": "t06-fix-typing",
+    "query": "Add type annotations to this module and fix the mypy errors.",
+    "context": "User is editing src/utils/helpers.py",
+    "should_trigger": true,
+    "reason": "Working in .py file on type annotations - directly covered."
+  },
+  {
+    "id": "t07-logging-question",
+    "query": "How should I set up logging in this service? Is print() okay?",
+    "context": "User is editing src/services/notifier.py",
+    "should_trigger": true,
+    "reason": "Logging question while in a .py file - triggers on context + topic."
+  },
+  {
+    "id": "t08-async-function",
+    "query": "Write an async function that fetches data from multiple APIs concurrently.",
+    "context": "User is editing src/clients/multi_fetch.py",
+    "should_trigger": true,
+    "reason": "Writing async code in a .py file - skill covers async patterns."
+  },
+  {
+    "id": "t09-fix-bare-excepts",
+    "query": "Can you fix this file? It has bare excepts and mutable defaults everywhere.",
+    "context": "User is editing src/legacy/importer.py",
+    "should_trigger": true,
+    "reason": "Fixing code smells in a .py file - directly covered by the skill."
+  },
+  {
+    "id": "t10-add-dependency",
+    "query": "I'm adding httpx as a dependency. What's the right way to declare it?",
+    "context": "User is in a Python project with pyproject.toml",
+    "should_trigger": true,
+    "reason": "Dependency management in a Python project - covered by skill."
+  },
+  {
+    "id": "t11-explicit-python-mention",
+    "query": "Write a Python script that parses JSON logs and groups errors by type.",
+    "context": "No specific file open",
+    "should_trigger": true,
+    "reason": "Explicit 'Python' keyword in prompt - still triggers even without file context."
+  },
+  {
+    "id": "n01-javascript-function",
+    "query": "Write a TypeScript function that validates email addresses.",
+    "context": "User is editing src/validators/email.ts",
+    "should_trigger": false,
+    "reason": "TypeScript file context - not Python."
+  },
+  {
+    "id": "n02-review-pr",
+    "query": "Review this PR diff for breaking changes before we merge.",
+    "context": "User is viewing a GitHub PR",
+    "should_trigger": false,
+    "reason": "PR review request - routes to pr-review skill."
+  },
+  {
+    "id": "n03-create-issue",
+    "query": "Create a GitHub issue for the login timeout bug.",
+    "context": "No specific file open",
+    "should_trigger": false,
+    "reason": "Issue creation - routes to create-issue skill."
+  },
+  {
+    "id": "n04-kudos",
+    "query": "Give kudos to Martin for fixing the deployment pipeline.",
+    "context": "No specific file open",
+    "should_trigger": false,
+    "reason": "Kudos workflow - routes to kudos skill."
+  },
+  {
+    "id": "n05-sql-query",
+    "query": "Write a PostgreSQL query that joins users and orders tables.",
+    "context": "User is editing migrations/001_create_tables.sql",
+    "should_trigger": false,
+    "reason": "SQL file context - not Python."
+  },
+  {
+    "id": "n06-docker-setup",
+    "query": "Create a Dockerfile for this application.",
+    "context": "User is editing Dockerfile",
+    "should_trigger": false,
+    "reason": "Docker config task, not Python code."
+  },
+  {
+    "id": "n07-git-help",
+    "query": "How do I rebase my branch onto main and resolve conflicts?",
+    "context": "No specific file open",
+    "should_trigger": false,
+    "reason": "Git workflow question, not Python coding."
+  },
+  {
+    "id": "n08-crossall-update",
+    "query": "Generate the crossall update report for the last two weeks.",
+    "context": "No specific file open",
+    "should_trigger": false,
+    "reason": "Progress reporting - routes to crossall-changes skill."
+  }
+]

From c1291f35dca8580a47b1166ae3032144a2991e50 Mon Sep 17 00:00:00 2001
From: "Tobias.Mikula" <tobias.mikula@absa.africa>
Date: Thu, 11 Jun 2026 12:17:18 +0200
Subject: [PATCH 2/2] New version of the skill based on the review comments.

---
 skills/python-standards/SKILL.md              | 350 ++++--------------
 .../python-standards/references/examples.md   | 106 ++++++
 skills/python-standards/references/testing.md |  52 +++
 3 files changed, 222 insertions(+), 286 deletions(-)
 create mode 100644 skills/python-standards/references/examples.md
 create mode 100644 skills/python-standards/references/testing.md

diff --git a/skills/python-standards/SKILL.md b/skills/python-standards/SKILL.md
index ad72d8a..f04a9e1 100644
--- a/skills/python-standards/SKILL.md
+++ b/skills/python-standards/SKILL.md
@@ -1,336 +1,114 @@
 ---
 name: python-standards
 description: >-
-  Python coding standards for CPS projects. Covers repository-first conventions, type annotations, error handling, testing, logging, documentation, async code, and dependency hygiene. Activate this skill whenever the user is editing or creating .py files, working in a Python project, or asks for help writing functions, classes, tests, modules, or fixing code - even if they do not say "Python" explicitly. Also triggers on questions about coding conventions, type hints, pytest, imports, logging setup, or dependency management in a Python context.
+  Python coding standards, that covers repository-first conventions, type annotations, error handling, testing, logging, documentation, async code, and dependency hygiene. Activate this skill whenever the user is editing or creating .py files, working in a Python project, or asks for help writing functions, classes, tests, modules, or fixing code - even if they do not say "Python" explicitly. Also triggers on questions about coding conventions, type hints, pytest, imports, logging setup, or dependency management in a Python context.
 ---
 
 # Python Coding Standards
 
-These standards define how Python code is written across CPS projects. They focus on code quality, safety, and maintainability. Tooling choices vary by project and are not prescribed here.
+## Initial phase - Context probe
 
-## Repository-first workflow
+**Repo conventions first.** Check for `pyproject.toml`, `setup.cfg`, `.pre-commit-config.yaml`, `CONTRIBUTING.md`. If found: read and follow them. Skip any rule below that conflicts. Don't introduce new tools unless the user explicitly asks.
 
-Before applying these standards in an existing repository:
+**Infer regime and extras from file path:**
 
-- Inspect the nearby Python files and project configuration first.
-- Follow the repository's established formatter, import sorter, linter, type checker, test framework, packaging flow, and docstring style unless the user asks you to change them.
-- If conventions differ across the repo, prefer the pattern already used in the same package or service you are editing.
-- Only introduce new tools or broad convention changes when the user explicitly asks for them.
+| Signal | Regime | Load extras |
+|--------|--------|-------------|
+| Path contains `tests/`, or filename matches `test_*.py` / `*_test.py` | strict | load `references/testing.md` |
+| Path in `src/`, `app/`, or a named service package | strict | - |
+| Path in `scripts/`, `tools/`, `notebooks/`, or a one-off utility | relaxed | - |
+| Ambiguous - none of the above match | **ask the user**: "strict (production) or relaxed (script)?" | based on answer |
 
-## Type Annotations
+**Relaxed regime:** annotate public API only; `print()` allowed; docstrings recommended but not required.
 
-Python is dynamically typed, which means entire categories of bugs (type mismatches, contract violations, null references) can reach production undetected. Type annotations are the primary defence against this.
+---
 
-### Rules
+## Type Annotations
 
-- Annotate all public function signatures: both parameters and return types.
+- Annotate all public function signatures (params + return). Relaxed: public API only.
 - Annotate class attributes and instance variables.
-- Use built-in generics (`list[str]`, `dict[str, int]`, `tuple[int, ...]`) instead of importing from `typing` (Python 3.9+ supports this natively).
-- Use `X | None` instead of `Optional[X]` (Python 3.10+ union syntax).
+- Use built-in generics: `list[str]`, `dict[str, int]`, `tuple[int, ...]` (Python 3.9+).
+- Use `X | None` not `Optional[X]` (Python 3.10+).
 - Create type aliases for complex types to improve readability.
-
-```python
-# Type alias for a complex structure
-UserRecord = dict[str, str | int | None]
-
-def fetch_users(org_id: str, *, active_only: bool = True) -> list[UserRecord]:
-    ...
-```
-
-### `# type: ignore` is a last resort
-
-The correct response to a type error is to fix the code or refactor the types so the checker is satisfied. `# type: ignore` should only be used when the code is genuinely correct but the type system cannot express it. This is rare.
-
-When suppression is truly unavoidable:
-
-1. Always include the specific error code: `# type: ignore[assignment]`
-2. Always add a justification explaining why the suppression is necessary: `# type: ignore[override]  # covariant return is safe here`
-3. A bare `# type: ignore` without an error code is never acceptable!
-
-If you find yourself reaching for `# type: ignore`, first try these alternatives:
-- Add a `cast()` to make the type explicit.
-- Introduce a `Protocol` or `TypeVar` to express the constraint properly.
-- Refactor the code so the type checker can follow the logic.
-- Use `typing.overload` to express different return types for different inputs.
+- `# type: ignore` only when the type system genuinely can't express correct code. Always include the error code and a justification: `# type: ignore[assignment]  # covariant return is safe here`. A bare `# type: ignore` is never acceptable.
 
 ## Code Structure
 
-### Imports
-
-- Prefer imports at the top of the file. Local imports are acceptable when they avoid optional dependencies at import time, break circular imports, or defer expensive startup work.
-- Group imports in three blocks separated by a blank line: standard library, third-party, local.
-- Prefer absolute imports. Relative imports are acceptable within a package when they improve clarity.
-- Never use wildcard imports (`from module import *`).
-
-```python
-import json
-import logging
-from pathlib import Path
-
-import httpx
-from pydantic import BaseModel
+**Imports:** top of file; three blocks (stdlib / third-party / local) separated by blank lines; never `from x import *`. Local imports acceptable to break circular deps or defer startup cost.
 
-from core.config import Settings
-from core.errors import AppError
-```
+**Module:** one primary concern per module; `__init__.py` re-exports only, no logic; separate business logic from I/O so it's testable without mocking infrastructure.
 
-### Module organisation
-
-- One primary concern per module. If a module has grown beyond ~300 lines, consider splitting it.
-- Keep `__init__.py` files minimal. They should re-export public API, not contain logic.
-- Separate business logic from infrastructure (I/O, environment variables, network calls). Business logic should be testable without mocking external systems.
-
-### Naming
-
-Follow PEP 8 conventions:
-- `snake_case` for functions, methods, variables, and module names.
-- `PascalCase` for classes.
-- `UPPER_SNAKE_CASE` for module-level constants.
-- Prefix private attributes and methods with a single underscore (`_internal_method`).
-- Avoid abbreviations unless they are universally understood (`id`, `url`, `http`).
+**Naming (PEP 8):** `snake_case` functions/variables, `PascalCase` classes, `UPPER_SNAKE_CASE` constants, `_prefix` for private. No abbreviations unless universally understood (`id`, `url`, `http`).
 
 ## Error Handling
 
-### Principles
-
-- Catch specific exceptions, never bare `except:` or `except Exception:`.
-- Use `raise ... from err` to preserve the exception chain. This makes debugging significantly easier because the full traceback is visible.
-- Create custom exception classes for domain-specific errors rather than reusing generic built-ins.
-- Fail fast and fail loudly: validate inputs early and raise immediately on invalid state.
-
-```python
-class VendorTimeoutError(AppError):
-    """Raised when a vendor API call exceeds the configured timeout."""
-
-    def __init__(self, vendor: str, timeout_seconds: float) -> None:
-        super().__init__(f"Vendor '{vendor}' did not respond within {timeout_seconds}s")
-        self.vendor = vendor
-        self.timeout_seconds = timeout_seconds
-
-
-def call_vendor(vendor: str, payload: dict[str, str]) -> VendorResponse:
-    try:
-        response = client.post(url, json=payload, timeout=timeout)
-    except httpx.TimeoutException as err:
-        raise VendorTimeoutError(vendor, timeout) from err
-```
-
-### Resource management
-
-- Always use context managers (`with`) for resources that need cleanup: files, connections, locks.
-- Prefer `contextlib.contextmanager` for lightweight resource wrappers.
-
-```python
-from collections.abc import Generator
-from contextlib import contextmanager
-
-@contextmanager
-def db_transaction(conn: Connection) -> Generator[Cursor, None, None]:
-    cursor = conn.cursor()
-    try:
-        yield cursor
-        conn.commit()
-    except Exception:
-        conn.rollback()
-        raise
-```
+- Catch specific exceptions. Broad `except Exception:` is acceptable only at top-level boundaries (e.g., request handlers, background workers) to log and re-raise or convert to a safe response - never silently swallow.
+- Use `raise ... from err` to preserve the chain.
+- Create domain-specific exception classes instead of reusing generic built-ins.
+- Fail fast: validate inputs early, raise immediately on invalid state.
+- Use `with` for all resources needing cleanup (files, connections, locks).
 
 ## Runtime Validation
 
-Static type checking catches many bugs, but it cannot verify data that crosses system boundaries (user input, API responses, configuration files, environment variables). Validate this data at the boundary where it enters the system.
-
-### Principles
-
-- Validate external data at the point of ingestion, not deep inside business logic.
-- Prefer strict validation: reject unknown fields and unexpected types rather than silently coercing.
-- Keep validated data immutable after creation when practical. This prevents accidental mutation as data flows through the system.
-- For vendor API responses, be lenient about unknown fields (vendors may add them) but strict about expected fields.
-- Define data contracts as explicit models or data classes, not raw dicts. Typed structures make the expected shape discoverable and enforceable.
-
-## Testing
-
-### Structure
-
-- Follow the repository's existing test layout. In repos that mirror the source tree, `src/handlers/` maps to `tests/unit/handlers/`.
-- Name test files `test_<module>.py` and test functions `test_<behaviour>` unless the repository already uses a different convention.
-- Separate unit tests (no I/O, no network) from integration tests (real services, containers).
-
-### Writing tests
-
-- Each test should verify one behaviour. If a test name contains "and", it is probably testing two things.
-- Use `assert expected == actual` (not the reverse). Put the known-good value on the left.
-- Use fixtures for shared setup. Avoid deep fixture chains that obscure what the test actually needs.
-- Mock external dependencies (databases, APIs, file systems) in unit tests. Never make real network calls.
-- Always use `pytest-mock`'s `mocker` fixture for mocking. Never import `unittest.mock` directly - it bypasses pytest's fixture lifecycle and teardown guarantees.
-
-```python
-import pytest
-from pytest_mock import MockerFixture
-
-@pytest.fixture
-def sample_config() -> AppConfig:
-    return AppConfig(region="eu-west-1", timeout=30)
-
-
-def test_timeout_raises_after_configured_duration(
-    sample_config: AppConfig,
-    mocker: MockerFixture,
-) -> None:
-    mocker.patch.object(HttpClient, "post", side_effect=httpx.TimeoutException("timeout"))
-    with pytest.raises(VendorTimeoutError) as exc_info:
-        call_vendor("test-vendor", {}, config=sample_config)
-    assert exc_info.value.timeout_seconds == 30
-```
-
-### Test hygiene
-
-- Tests must not depend on execution order.
-- Tests must not share mutable state. Each test starts from a clean slate.
-- Keep module-level setup lightweight so each test's inputs and expectations stay obvious.
-- Avoid testing implementation details; test observable behaviour. If a refactor doesn't change behaviour, tests shouldn't break.
+- Validate external data (user input, API responses, env vars) at the ingestion boundary, not deep in logic.
+- Strict by default: reject unknown fields and unexpected types; don't silently coerce.
+- Keep validated data immutable after creation when practical.
+- For vendor responses: lenient on unknown fields, strict on expected fields.
 
 ## Logging
 
-### Principles
-
-- Use `logging.getLogger(__name__)`, never `print()` in production code. Print statements are not structured, cannot be filtered by level, and disappear in containerised environments.
-- Use lazy formatting in log calls - pass values as arguments, never use f-strings or `%` formatting inline. This avoids the cost of string formatting when the log level is disabled.
-- Match the format specifier to the argument type: `%s` for strings, `%d` for integers, `%f` for floats. Typed specifiers are self-documenting and make type mismatches visible at the call site.
-
-```python
-logger.info("Processing %d items out of %d", processed, total)
-logger.warning("Retrying request to %s (attempt %d of %d)", url, attempt, max_attempts)
-```
-
-- If the repository uses structured logging, keep field names stable and pass important values as structured fields rather than hiding them in free-form strings.
-- F-strings are acceptable in exception messages where they are always evaluated: `raise ValueError(f"Invalid ID: {user_id}")`.
-
-### Log levels
-
-Use standard levels consistently:
+- Use `logging.getLogger(__name__)` - never `print()` in production code.
+- Lazy format: pass values as args - `logger.info("Processing %d items", count)` - not f-strings in log calls.
+- f-strings are fine in exception messages (always evaluated): `raise ValueError(f"Invalid ID: {user_id}")`.
+- Never log credentials, tokens, API keys, or PII. Sanitise or redact sensitive fields before logging.
 
 | Level | Use for |
 |-------|---------|
-| `DEBUG` | Development detail: variable values, control flow tracing |
-| `INFO` | Operational events: request handled, job started, config loaded |
+| `DEBUG` | Dev detail: variable values, control flow |
+| `INFO` | Operational events: request handled, job started |
 | `WARNING` | Recoverable issues: retry succeeded, deprecated feature used |
-| `ERROR` | Failures that need attention: unhandled exception, vendor error |
-| `CRITICAL` | Non-recoverable: service cannot start, data corruption detected |
-
-### Security
-
-- **Never log credentials, tokens, API keys, or PII.** The blast radius of a log leak is the same as a credential leak.
-- Sanitise or redact sensitive fields before logging.
-- Be cautious with `repr()` or `str()` on objects that may contain secrets.
+| `ERROR` | Failures needing attention |
+| `CRITICAL` | Non-recoverable: service can't start, data corruption |
 
 ## Documentation
 
-### Docstrings
-
-- Write docstrings for all public modules, classes, and functions.
-- Start with a single summary line. This is what tools and humans scan first.
-- Follow the repository's existing docstring style (Google, NumPy, reST, or concise single-line forms). If the repository has no clear convention, choose one style and apply it consistently.
-- Use single backticks for inline code references in docstrings.
-
-```python
-def retry_with_backoff(
-    func: Callable[[], T],
-    max_attempts: int = 3,
-    base_delay: float = 1.0,
-) -> T:
-    """Execute `func` with exponential backoff on failure.
-
-    Args:
-        func: Zero-argument callable to retry.
-        max_attempts: Maximum number of attempts before giving up.
-        base_delay: Initial delay in seconds, doubled after each failure.
-
-    Returns:
-        The return value of `func` on success.
-
-    Raises:
-        RetryExhaustedError: If all attempts fail.
-    """
-```
-
-### Comments
-
-- Code should be self-explanatory through good naming. Comment the *why*, not the *what*.
-- Do not use separator comments (`# -----------`) to divide sections. Use functions or classes instead.
-- Remove commented-out code. Version control preserves history.
+- Docstring on every public module, class, and function. First line: single summary sentence.
+- Follow the repo's existing docstring style (Google, NumPy, reST). No convention → pick one and be consistent.
+- Comment the *why*, not the *what*. Remove commented-out code. No separator comments (`# ---`).
 
 ## Idiomatic Python
 
-### Prefer modern syntax
-
-- Use f-strings for string formatting (except in log calls; see Logging above).
-- Use `pathlib.Path` instead of `os.path` for file system operations.
-- Use dataclasses or typed models instead of plain dicts for structured data.
-- Use `enum.Enum` for fixed sets of choices rather than string constants.
-- Use list/dict/set comprehensions instead of `map()`/`filter()` with lambdas.
-- Prefer `tuple` over `list` for sequences that should not change after creation.
-- Use `frozenset` for fixed sets that need to be hashable or used as dict keys.
-- Use `@dataclass(frozen=True)` for data objects that must not be mutated after construction - frozen dataclasses are hashable and raise `FrozenInstanceError` on accidental assignment.
-
-### Defensive patterns
-
-- Use `if x is None` rather than `if not x` when checking for `None`. Empty strings, zero, and empty collections are falsy but not `None`.
-- Prefer `dict.get(key, default)` over catching `KeyError` for optional lookups.
-- Use `functools.lru_cache` or `functools.cache` for expensive pure computations.
-- Avoid mutable default arguments (`def f(items: list[str] = [])`). Use `None` and create inside the function body.
-
-```python
-def process_items(items: list[str] | None = None) -> list[str]:
-    if items is None:
-        items = []
-    ...
-```
-
-### Async patterns
-
-When writing async code:
-
-- Use `async with` for async context managers (HTTP sessions, DB connections).
-- Use `asyncio.gather()` for concurrent independent tasks rather than sequential `await`.
-- Never mix sync and async I/O. A blocking call in an async function starves the event loop.
-- Use `asyncio.to_thread()` for blocking sync I/O or lightweight legacy sync helpers. For truly CPU-bound work, prefer a process pool, worker, or dedicated service.
+- f-strings for formatting (except log calls - see above).
+- `pathlib.Path` over `os.path`.
+- Dataclasses or typed models over plain dicts. `enum.Enum` for fixed sets.
+- `@dataclass(frozen=True)` for immutable value objects.
+- List/dict/set comprehensions over `map()`/`filter()` with lambdas.
+- `if x is None` not `if not x` for None checks.
+- `dict.get(key, default)` for optional lookups. Use assignment expressions (`:=`) to combine fetch and check: `if value := d.get("key"):` avoids a redundant double-lookup.
+- No mutable default arguments - use `None` + conditional creation in body.
+- `asyncio.gather()` for concurrent independent tasks. Never block the event loop with sync I/O.
+- `asyncio.to_thread()` for blocking sync helpers; process pool for CPU-bound work.
 
 ## Dependencies
 
-- Follow the repository's existing dependency-management approach. For new projects, or repos that already use modern packaging, prefer `pyproject.toml` with PEP 621 metadata.
-- If the repository uses `requirements.txt`, constraints files, Poetry, uv, or another established workflow, stay consistent unless the user explicitly asks for a migration.
-- Use version constraints that match the project type and release strategy: libraries often use compatible ranges, while applications often lock more tightly for reproducible deployments.
-- Separate runtime dependencies from dev/test dependencies when the existing tooling supports it.
-- When adding or changing a dependency, update the related lockfile or constraints file in the same change if the repository tracks one.
+- Follow the repo's existing dependency-management approach.
+- For new projects or repos already using modern packaging, prefer `pyproject.toml` with PEP 621.
+- Match version constraints to project type: libraries → compatible ranges; applications → tighter lockfiles.
+- Separate runtime from dev/test dependencies when tooling supports it.
+- When adding or changing a dependency, update the related lockfile in the same change.
 
 ## Database
 
-CPS projects prefer to access databases directly using `aiosql` to keep SQL out of Python string literals.
-
-- Define all queries in `.sql` files. `aiosql` loads them and exposes each named query as a typed callable. SQL stays reviewable, syntax-highlighted, and separated from application logic.
-- Name query files by domain: `users.sql`, `payments.sql`. One file per logical area keeps queries discoverable.
-- Never inline SQL as string literals in Python. String-embedded SQL cannot be reviewed independently, is not syntax-highlighted, and is a SQL-injection foothold when variables are interpolated carelessly.
-
-```python
-import aiosql
-import psycopg2
-
-# queries/users.sql contains:
-# -- name: get_active^
-# SELECT id, email FROM users WHERE active = true;
-
-queries = aiosql.from_path("queries/", "psycopg2")
-
-def get_active_users(conn: psycopg2.extensions.connection) -> list[UserRecord]:
-    return queries.users.get_active(conn)
-```
-
-Parameterised queries in `.sql` files are safe from injection by construction - `aiosql` passes values through the driver's parameter binding, never string interpolation.
+- Never inline SQL as string literals in Python.
+- Define queries in `.sql` files. Consider `aiosql` to load them as typed callables. Not mandated; follow what the repo already uses.
+- Always pass query parameters through the driver's binding - never string interpolation.
 
 ## Security
 
-- Never commit credentials, API keys, or tokens to source code. Load secrets from environment variables or a secrets manager at runtime.
-- Use the `secrets` module for generating tokens and random values, not `random` (which is not cryptographically secure).
+- Never commit credentials, API keys, or tokens. Load secrets from env vars or a secrets manager at runtime.
+- Use the `secrets` module for tokens and random values - `random` is not cryptographically secure.
 - Validate and sanitise all user-provided input before using it in queries, commands, or file paths.
-- Be explicit about timeouts on all network calls. Hanging connections are a denial-of-service vector.
-- Lock or pin dependencies according to the repository's release process, and audit them regularly. Supply-chain attacks through compromised packages are a real and growing threat.
+- Set explicit timeouts on all network calls - hanging connections are a denial-of-service vector.
+- Keep dependencies locked per the repo's process and audit them regularly.
+
+For code patterns and examples, see `references/examples.md`.
diff --git a/skills/python-standards/references/examples.md b/skills/python-standards/references/examples.md
new file mode 100644
index 0000000..879cbee
--- /dev/null
+++ b/skills/python-standards/references/examples.md
@@ -0,0 +1,106 @@
+# Code Examples
+
+On-demand reference patterns. Load when you need a concrete illustration of a rule.
+
+## Type annotations
+
+```python
+# Type alias for a complex structure
+UserRecord = dict[str, str | int | None]
+
+def fetch_users(org_id: str, *, active_only: bool = True) -> list[UserRecord]:
+    ...
+```
+
+## Imports
+
+```python
+import json
+import logging
+from pathlib import Path
+
+import httpx
+from pydantic import BaseModel
+
+from core.config import Settings
+from core.errors import AppError
+```
+
+## Error handling - custom exception + chaining
+
+```python
+class VendorTimeoutError(AppError):
+    """Raised when a vendor API call exceeds the configured timeout."""
+
+    def __init__(self, vendor: str, timeout_seconds: float) -> None:
+        super().__init__(f"Vendor '{vendor}' did not respond within {timeout_seconds}s")
+        self.vendor = vendor
+        self.timeout_seconds = timeout_seconds
+
+
+def call_vendor(vendor: str, payload: dict[str, str]) -> VendorResponse:
+    try:
+        response = client.post(url, json=payload, timeout=timeout)
+    except httpx.TimeoutException as err:
+        raise VendorTimeoutError(vendor, timeout) from err
+```
+
+## Resource management - context manager
+
+```python
+from collections.abc import Generator
+from contextlib import contextmanager
+
+@contextmanager
+def db_transaction(conn: Connection) -> Generator[Cursor, None, None]:
+    cursor = conn.cursor()
+    try:
+        yield cursor
+        conn.commit()
+    except Exception:
+        conn.rollback()
+        raise
+```
+
+## Logging
+
+```python
+logger.info("Processing %d items out of %d", processed, total)
+logger.warning("Retrying request to %s (attempt %d of %d)", url, attempt, max_attempts)
+```
+
+## Mutable default argument
+
+```python
+# Wrong
+def process_items(items: list[str] = []) -> list[str]: ...
+
+# Right
+def process_items(items: list[str] | None = None) -> list[str]:
+    if items is None:
+        items = []
+    ...
+```
+
+## Docstring
+
+```python
+def retry_with_backoff(
+    func: Callable[[], T],
+    max_attempts: int = 3,
+    base_delay: float = 1.0,
+) -> T:
+    """Execute `func` with exponential backoff on failure.
+
+    Args:
+        func: Zero-argument callable to retry.
+        max_attempts: Maximum number of attempts before giving up.
+        base_delay: Initial delay in seconds, doubled after each failure.
+
+    Returns:
+        The return value of `func` on success.
+
+    Raises:
+        RetryExhaustedError: If all attempts fail.
+    """
+```
diff --git a/skills/python-standards/references/testing.md b/skills/python-standards/references/testing.md
new file mode 100644
index 0000000..17d064b
--- /dev/null
+++ b/skills/python-standards/references/testing.md
@@ -0,0 +1,52 @@
+# Testing Reference
+Loaded automatically when working in test files (`tests/`, `test_*.py`, `*_test.py`).
+
+## Structure
+- Follow the repository's existing test layout first.
+- Name test files `test_<module>.py` and functions `test_<behaviour>` unless the repo already uses a different convention.
+- Separate tests with no I/O from tests that use real services or containers.
+- `unit/` subdirectory is one common pattern but not required - match what the repo already has.
+- Group unit tests under a class decorated with `@pytest.mark.unit`.
+
+```python
+@pytest.mark.unit
+class TestJobState:
+    def test_terminal_states(self) -> None:
+        assert JobState.COMPLETED.value == "completed"
+        assert JobState.FAILED.value == "failed"
+
+    def test_non_terminal_states(self) -> None:
+        assert JobState.ACCEPTED.value == "accepted"
+        assert JobState.PROCESSING.value == "processing"
+```
+
+## Writing tests
+- One behaviour per test. If the name contains "and", it is probably two tests.
+- `assert expected == actual` - known-good value on the left.
+- Fixtures for shared setup. Avoid deep fixture chains that obscure what the test actually needs.
+- Importing inside a test function is acceptable when testing a fresh import (e.g. singleton initialisation).
+- Mock external dependencies (databases, APIs, file systems) in unit tests. No real network calls.
+- Use `pytest-mock`'s `mocker` fixture - never import `unittest.mock` directly.
+
+```python
+import pytest
+from pytest_mock import MockerFixture
+
+@pytest.fixture
+def sample_config() -> AppConfig:
+    return AppConfig(region="eu-west-1", timeout=30)
+
+
+@pytest.mark.unit
+class TestVendorClient:
+    def test_timeout_raises_after_configured_duration(self, sample_config: AppConfig, mocker: MockerFixture) -> None:
+        mocker.patch.object(HttpClient, "post", side_effect=httpx.TimeoutException("timeout"))
+        with pytest.raises(VendorTimeoutError) as exc_info:
+            call_vendor("test-vendor", {}, config=sample_config)
+        assert exc_info.value.timeout_seconds == 30
+```
+
+## Test hygiene
+- Tests must not depend on execution order.
+- Tests must not share mutable state - each test starts from a clean slate.
+- Test observable behaviour, not implementation details. A refactor that doesn't change behaviour shouldn't break tests.