diff --git a/.githooks/pre-push b/.githooks/pre-push index d96954b5..55a0b5a9 100755 --- a/.githooks/pre-push +++ b/.githooks/pre-push @@ -16,9 +16,9 @@ echo "pre-push: running make typecheck" make typecheck # Check extras if they exist and have changes -if [ -d "evaluators/contrib/galileo" ]; then - echo "pre-push: checking evaluators/contrib/galileo" - cd evaluators/contrib/galileo +if [ -d "rules/contrib/galileo" ]; then + echo "pre-push: checking rules/contrib/galileo" + cd rules/contrib/galileo uv run --extra dev ruff check --config ../../../pyproject.toml src/ uv run --extra dev mypy --config-file ../../../pyproject.toml src/ cd "$REPO_ROOT" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71cb3cbf..b3eb3749 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -74,7 +74,7 @@ jobs: "coverage-server.xml", "coverage-sdk.xml", ] - contrib_reports = sorted(path.name for path in Path(".").glob("coverage-evaluators-*.xml")) + contrib_reports = sorted(path.name for path in Path(".").glob("coverage-rules-*.xml")) print(f"files={','.join([*base_reports, *contrib_reports])}") PY diff --git a/.github/workflows/pr-title.yaml b/.github/workflows/pr-title.yaml index 4770a84f..eedfd139 100644 --- a/.github/workflows/pr-title.yaml +++ b/.github/workflows/pr-title.yaml @@ -33,7 +33,7 @@ jobs: server models engine - evaluators + rules ci docs infra diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 39e23bc1..17e9e5f0 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -53,23 +53,23 @@ jobs: run: | rm -rf release-dists mkdir -p release-dists/models - mkdir -p release-dists/evaluators/builtin - mkdir -p release-dists/evaluators/contrib - mkdir -p release-dists/pypi/evaluators + mkdir -p release-dists/rules/builtin + mkdir -p release-dists/rules/contrib + mkdir -p release-dists/pypi/rules mkdir -p release-dists/sdks/python mkdir -p release-dists/server cp -R models/dist release-dists/models/ - cp -R evaluators/builtin/dist release-dists/evaluators/builtin/ - cp evaluators/builtin/dist/* release-dists/pypi/evaluators/ + cp -R rules/builtin/dist release-dists/rules/builtin/ + cp rules/builtin/dist/* release-dists/pypi/rules/ cp -R sdks/python/dist release-dists/sdks/python/ cp -R server/dist release-dists/server/ - for contrib_dir in evaluators/contrib/*/dist; do + for contrib_dir in rules/contrib/*/dist; do contrib_name="$(basename "$(dirname "$contrib_dir")")" - mkdir -p "release-dists/evaluators/contrib/$contrib_name" - cp -R "$contrib_dir" "release-dists/evaluators/contrib/$contrib_name/" - cp "$contrib_dir"/* release-dists/pypi/evaluators/ + mkdir -p "release-dists/rules/contrib/$contrib_name" + cp -R "$contrib_dir" "release-dists/rules/contrib/$contrib_name/" + cp "$contrib_dir"/* release-dists/pypi/rules/ done - name: Upload built distributions @@ -101,7 +101,7 @@ jobs: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - publish-evaluators: + publish-rules: runs-on: ubuntu-latest needs: [release, publish-models] if: needs.release.outputs.released == 'true' @@ -115,20 +115,20 @@ jobs: name: release-dists path: release-dists - - name: Publish evaluator distributions to PyPI + - name: Publish rule distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - packages-dir: release-dists/pypi/evaluators/ + packages-dir: release-dists/pypi/rules/ user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} publish-server: runs-on: ubuntu-latest - needs: [release, publish-evaluators] + needs: [release, publish-rules] if: >- always() && needs.release.outputs.released == 'true' && - needs.publish-evaluators.result == 'success' + needs.publish-rules.result == 'success' permissions: id-token: write @@ -148,11 +148,11 @@ jobs: publish-sdk: runs-on: ubuntu-latest - needs: [release, publish-evaluators] + needs: [release, publish-rules] if: >- always() && needs.release.outputs.released == 'true' && - needs.publish-evaluators.result == 'success' + needs.publish-rules.result == 'success' permissions: id-token: write @@ -172,7 +172,7 @@ jobs: upload-release-assets: runs-on: ubuntu-latest - needs: [release, publish-models, publish-evaluators, publish-server, publish-sdk] + needs: [release, publish-models, publish-rules, publish-server, publish-sdk] if: >- always() && needs.release.outputs.released == 'true' @@ -194,10 +194,10 @@ jobs: shopt -s nullglob assets=( release-dists/models/dist/* - release-dists/evaluators/builtin/dist/* + release-dists/rules/builtin/dist/* release-dists/sdks/python/dist/* release-dists/server/dist/* - release-dists/evaluators/contrib/*/dist/* + release-dists/rules/contrib/*/dist/* ) if [ ${#assets[@]} -eq 0 ]; then diff --git a/AGENTS.md b/AGENTS.md index a409e886..1b73b85e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,12 +22,12 @@ Forwarded targets: ## Repo layout (uv workspace members) -- `models/`: shared Pydantic v2 models and evaluator base classes (`models/src/agent_control_models/`) -- `engine/`: **control evaluation engine and evaluator system** — all evaluation logic, evaluator discovery, and evaluator orchestration lives here (`engine/src/agent_control_engine/`) +- `models/`: shared Pydantic v2 models and rule base classes (`models/src/agent_control_models/`) +- `engine/`: **control evaluation engine and rule system** — all evaluation logic, rule discovery, and rule orchestration lives here (`engine/src/agent_control_engine/`) - `server/`: FastAPI server (`server/src/agent_control_server/`) - `sdks/python/`: Python SDK — uses engine for evaluation (`sdks/python/src/agent_control/`) -- `evaluators/builtin/`: builtin evaluator implementations (`evaluators/builtin/src/agent_control_evaluators/`) -- `evaluators/contrib/`: optional evaluator packages (e.g., `evaluators/contrib/galileo/`) +- `rules/builtin/`: builtin rule implementations (`rules/builtin/src/agent_control_rules/`) +- `rules/contrib/`: optional rule packages (e.g., `rules/contrib/galileo/`) - `ui/`: Nextjs based web app to manage agent controls - `examples/`: runnable examples (ruff has relaxed import rules here) @@ -67,16 +67,16 @@ All testing guidance (including "behavior changes require tests") lives in `TEST 4) add SDK wrapper in `sdks/python/src/agent_control/` 5) add tests (server + SDK) and update docs/examples if user-facing -- Add a new builtin evaluator: - 1) implement evaluator class extending `Evaluator` in `evaluators/builtin/src/agent_control_evaluators/` - 2) use `@register_evaluator` decorator (from `agent_control_evaluators`) - 3) add entry point in `evaluators/builtin/pyproject.toml` for auto-discovery - 4) add tests in the evaluators/builtin package - 5) evaluator is automatically available to server and SDK via `discover_evaluators()` +- Add a new builtin rule: + 1) implement rule class extending `Rule` in `rules/builtin/src/agent_control_rules/` + 2) use `@register_rule` decorator (from `agent_control_rules`) + 3) add entry point in `rules/builtin/pyproject.toml` for auto-discovery + 4) add tests in the rules/builtin package + 5) rule is automatically available to server and SDK via `discover_rules()` -- Add an external evaluator package: - 1) copy `evaluators/contrib/template/` as a starting point - 2) implement evaluator class extending `Evaluator` from `agent_control_evaluators` +- Add an external rule package: + 1) copy `rules/contrib/template/` as a starting point + 2) implement rule class extending `Rule` from `agent_control_rules` 3) add entry point using `org.name` format (e.g., `galileo.luna`) 4) package is discovered automatically when installed alongside agent-control diff --git a/CHANGELOG.md b/CHANGELOG.md index 3697da46..3a6e979a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ ### Bug Fixes -- **evaluators**: Use SQLGlot native extra without direct sqlglotc pin +- **rules**: Use SQLGlot native extra without direct sqlglotc pin ([#243](https://github.com/agentcontrol/agent-control/pull/243), [`6884ce9`](https://github.com/agentcontrol/agent-control/commit/6884ce9c16f85763f029ffba99867993548fc2ae)) @@ -28,7 +28,7 @@ ### Bug Fixes -- **evaluators**: Configure Luna scorer API URL +- **rules**: Configure Luna scorer API URL ([#237](https://github.com/agentcontrol/agent-control/pull/237), [`fc516f0`](https://github.com/agentcontrol/agent-control/commit/fc516f05dc8fbb71a35db2831e617e0a222e5f87)) @@ -56,7 +56,7 @@ ### Bug Fixes -- **evaluators**: Remove Luna2 evaluator +- **rules**: Remove Luna2 rule ([#234](https://github.com/agentcontrol/agent-control/pull/234), [`ccc4fc6`](https://github.com/agentcontrol/agent-control/commit/ccc4fc6d4dc3228169d9db0178d14596349739f1)) @@ -84,7 +84,7 @@ ### Features -- **evaluators**: Add new lluna client +- **rules**: Add new lluna client ([#213](https://github.com/agentcontrol/agent-control/pull/213), [`f65beb9`](https://github.com/agentcontrol/agent-control/commit/f65beb9d14dca8248c35a620d47af3298f1fe5e7)) @@ -249,7 +249,7 @@ ### Features -- **evaluators**: Add built-in budget evaluator for per-agent cost tracking +- **rules**: Add built-in budget rule for per-agent cost tracking ([#144](https://github.com/agentcontrol/agent-control/pull/144), [`d4ce113`](https://github.com/agentcontrol/agent-control/commit/d4ce113488c42e47cbba9364f47a2905e4cc5b35)) @@ -343,13 +343,13 @@ ([#150](https://github.com/agentcontrol/agent-control/pull/150), [`90265ba`](https://github.com/agentcontrol/agent-control/commit/90265ba79132d8a8669f92948cf73568eb38fd5a)) -- **server**: Remove unused evaluator config store +- **server**: Remove unused rule config store ([#152](https://github.com/agentcontrol/agent-control/pull/152), [`dea2873`](https://github.com/agentcontrol/agent-control/commit/dea2873241004c9398909609d4b665808270290e)) ### Features -- **evaluators**: Add starts_with/ends_with mode to list evaluator +- **rules**: Add starts_with/ends_with mode to list rule ([#154](https://github.com/agentcontrol/agent-control/pull/154), [`bf1f7d7`](https://github.com/agentcontrol/agent-control/commit/bf1f7d7cf271a231a165699d44f726467b49bd50)) @@ -427,7 +427,7 @@ ### Bug Fixes -- **server**: Reject empty string list evaluator values +- **server**: Reject empty string list rule values ([#121](https://github.com/agentcontrol/agent-control/pull/121), [`2c55d27`](https://github.com/agentcontrol/agent-control/commit/2c55d27fc4e1b2b3235a2b672226e77f3365c1bf)) @@ -448,7 +448,7 @@ ### Chores -- **evaluators**: Pin sqlglotc ([#131](https://github.com/agentcontrol/agent-control/pull/131), +- **rules**: Pin sqlglotc ([#131](https://github.com/agentcontrol/agent-control/pull/131), [`1d3ebcc`](https://github.com/agentcontrol/agent-control/commit/1d3ebcceff832dc7595329c2a36861260b2e9dda)) ### Refactoring @@ -470,7 +470,7 @@ ([#99](https://github.com/agentcontrol/agent-control/pull/99), [`f038aa4`](https://github.com/agentcontrol/agent-control/commit/f038aa4cc048e331f59fa4bcc6df0c266f14953e)) -- **evaluators**: Address Cisco AI Defense review issues +- **rules**: Address Cisco AI Defense review issues ([#103](https://github.com/agentcontrol/agent-control/pull/103), [`81da81b`](https://github.com/agentcontrol/agent-control/commit/81da81bdab5b1a8d3ca4904b8cd9e4ed4843a5ba)) @@ -512,7 +512,7 @@ ### Bug Fixes -- **server**: Escape ILIKE pattern in evaluator-configs list endpoint +- **server**: Escape ILIKE pattern in rule-configs list endpoint ([#108](https://github.com/agentcontrol/agent-control/pull/108), [`cb6e89f`](https://github.com/agentcontrol/agent-control/commit/cb6e89f134bd8cea944bbb99419b3d4a26f98d50)) @@ -558,7 +558,7 @@ ([#105](https://github.com/agentcontrol/agent-control/pull/105), [`84dfa57`](https://github.com/agentcontrol/agent-control/commit/84dfa57edd0020da147942e06bff065384bdcead)) -- **evaluators**: Add Cisco AI Defense evaluator and examples +- **rules**: Add Cisco AI Defense rule and examples ([#60](https://github.com/agentcontrol/agent-control/pull/60), [`52e2416`](https://github.com/agentcontrol/agent-control/commit/52e241657103f90ba4a6a54fefd68836db36fd16)) @@ -571,7 +571,7 @@ ### Bug Fixes -- **sdk): Revert "fix(sdk**: Bundle evaluators in sdk wheel" +- **sdk): Revert "fix(sdk**: Bundle rules in sdk wheel" ([#90](https://github.com/agentcontrol/agent-control/pull/90), [`b516ea6`](https://github.com/agentcontrol/agent-control/commit/b516ea6375257b2116dc68f5974ffd833fd0f783)) @@ -580,7 +580,7 @@ ### Bug Fixes -- **sdk**: Bundle evaluators in sdk wheel +- **sdk**: Bundle rules in sdk wheel ([#89](https://github.com/agentcontrol/agent-control/pull/89), [`ea5889a`](https://github.com/agentcontrol/agent-control/commit/ea5889a1de5cd79b7b4fd59b2a914ffb7294c158)) @@ -589,7 +589,7 @@ ### Bug Fixes -- **evaluators**: Migrate sqlglot rs extra to sqlglot c +- **rules**: Migrate sqlglot rs extra to sqlglot c ([#86](https://github.com/agentcontrol/agent-control/pull/86), [`5e3e48c`](https://github.com/agentcontrol/agent-control/commit/5e3e48cb67b80e6035c074fba6e42cc652194813)) @@ -830,7 +830,7 @@ ### Features -- **evaluators**: Add required_column_values for multi-tenant SQL validation +- **rules**: Add required_column_values for multi-tenant SQL validation ([#30](https://github.com/agentcontrol/agent-control/pull/30), [`532386c`](https://github.com/agentcontrol/agent-control/commit/532386cb933e5d3f07c0939f41701500ef4f4007)) @@ -843,13 +843,13 @@ ### Documentation -- **examples**: Update examples and docs for evaluators reorganization +- **examples**: Update examples and docs for rules reorganization ([#19](https://github.com/agentcontrol/agent-control/pull/19), [`9cb1851`](https://github.com/agentcontrol/agent-control/commit/9cb1851c442536aeb344bdbd889629167984e7da)) ### Features -- **sdk**: Export ControlScope, ControlMatch, and EvaluatorResult models +- **sdk**: Export ControlScope, ControlMatch, and RuleResult models ([#18](https://github.com/agentcontrol/agent-control/pull/18), [`0d49cad`](https://github.com/agentcontrol/agent-control/commit/0d49cad9da5c9e76c32d652fadbc69cec698a611)) @@ -858,14 +858,14 @@ ### Bug Fixes -- **docs**: Add Example for Evaluator Extension +- **docs**: Add Example for Rule Extension ([#3](https://github.com/agentcontrol/agent-control/pull/3), [`c2a70b3`](https://github.com/agentcontrol/agent-control/commit/c2a70b335fb55481ad63b367ca87ba46de085343)) - **docs**: Update README ([#2](https://github.com/agentcontrol/agent-control/pull/2), [`379bb15`](https://github.com/agentcontrol/agent-control/commit/379bb158700b93aa549ef00eda57ccc2f88cb71f)) -- **infra**: Install engine/evaluators in server image +- **infra**: Install engine/rules in server image ([#14](https://github.com/agentcontrol/agent-control/pull/14), [`d5ae157`](https://github.com/agentcontrol/agent-control/commit/d5ae1571071afd34a00b376e650d9e1ce02f0b2d)) @@ -885,13 +885,13 @@ ### Refactoring -- **evaluators**: Split into builtin + extra packages for PyPI +- **rules**: Split into builtin + extra packages for PyPI ([#5](https://github.com/agentcontrol/agent-control/pull/5), [`0e0a78a`](https://github.com/agentcontrol/agent-control/commit/0e0a78a9fa9c39a5709299c2e3d77c0609110280)) ### Breaking Changes -- **evaluators**: Evaluator reorganization with new package structure +- **rules**: Rule reorganization with new package structure ## v4.0.0 (2026-02-03) @@ -957,7 +957,7 @@ ### Features -- Basic setup for evaluator store flow ([#4](https://github.com/agentcontrol/agent-control/pull/4), +- Basic setup for rule store flow ([#4](https://github.com/agentcontrol/agent-control/pull/4), [`dda9f70`](https://github.com/agentcontrol/agent-control/commit/dda9f70eb7b6e2bfa991ed71b270251ab51d3c9d)) - Inital set of ui fixes ([#7](https://github.com/agentcontrol/agent-control/pull/7), @@ -1002,14 +1002,14 @@ - Tighten evaluation error handling and preserve control data ([`52a1ef8`](https://github.com/agentcontrol/agent-control/commit/52a1ef8127aca382e373ee6b6433a2d527e6e5e2)) -- **examples**: Update crew ai example to use evaluator +- **examples**: Update crew ai example to use rule ([#93](https://github.com/agentcontrol/agent-control/pull/93), [`1c65084`](https://github.com/agentcontrol/agent-control/commit/1c6508434860ed5bb56c622a721197c5a8f7ad4e)) - **sdk**: Fix logging ([#77](https://github.com/agentcontrol/agent-control/pull/77), [`b1f078c`](https://github.com/agentcontrol/agent-control/commit/b1f078c52c29ac048a9bcbea09252786e842acbd)) -- **sdk**: Plugin to evaluator.. agent_protect to agent_control +- **sdk**: Plugin to rule.. agent_protect to agent_control ([#88](https://github.com/agentcontrol/agent-control/pull/88), [`fc9b088`](https://github.com/agentcontrol/agent-control/commit/fc9b088fcd091132a1e38deae372b73fc2834beb)) @@ -1033,7 +1033,7 @@ ### Refactoring -- **evaluators**: Rename plugin to evaluator throughout +- **rules**: Rename plugin to rule throughout ([#81](https://github.com/agentcontrol/agent-control/pull/81), [`0134682`](https://github.com/agentcontrol/agent-control/commit/0134682c1d0f167528d7267507dbcf3a1e7b3192)) @@ -1052,7 +1052,7 @@ ### Features -- **server**: Add evaluator config store +- **server**: Add rule config store ([#78](https://github.com/agentcontrol/agent-control/pull/78), [`cc14aa6`](https://github.com/agentcontrol/agent-control/commit/cc14aa68391fd7fd4a187364a0a9a9fe712129fe)) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 321aaacc..56f003eb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,9 +25,9 @@ We welcome examples showing how Agent Control works with different agent framewo See existing examples in [examples/](examples/) for the expected structure. -## 2. Contribute New Evaluators +## 2. Contribute New Rules -See [evaluators/contrib/template/README.md](evaluators/contrib/template/README.md) for instructions on how to create a new evaluator package. +See [rules/contrib/template/README.md](rules/contrib/template/README.md) for instructions on how to create a new rule package. ## 3. Improve Code and Documentation diff --git a/Makefile b/Makefile index d6b4786a..560eacbc 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all contrib-verify scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush evaluators-test evaluators-lint evaluators-lint-fix evaluators-typecheck evaluators-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish +.PHONY: help sync openapi-spec openapi-spec-check test test-extras test-all contrib-verify scripts-test models-test test-models test-sdk lint lint-fix typecheck check build build-models build-server build-sdk publish publish-models publish-server publish-sdk hooks-install hooks-uninstall prepush rules-test rules-lint rules-lint-fix rules-typecheck rules-build contrib-test contrib-lint contrib-lint-fix contrib-typecheck contrib-build sdk-ts-generate sdk-ts-overlay-test sdk-ts-name-check sdk-ts-generate-check sdk-ts-build sdk-ts-test sdk-ts-lint sdk-ts-typecheck sdk-ts-release-check sdk-ts-publish-dry-run sdk-ts-publish telemetry-test telemetry-lint telemetry-lint-fix telemetry-typecheck telemetry-build telemetry-publish # Workspace package names PACK_MODELS := agent-control-models @@ -6,7 +6,7 @@ PACK_SERVER := agent-control-server PACK_SDK := agent-control PACK_ENGINE := agent-control-engine PACK_TELEMETRY := agent-control-telemetry -PACK_EVALUATORS := agent-control-evaluators +PACK_RULES := agent-control-rules OPENAPI_SPEC_PATH := server/.generated/openapi.json # Directories @@ -16,8 +16,8 @@ SDK_DIR := sdks/python TS_SDK_DIR := sdks/typescript ENGINE_DIR := engine TELEMETRY_DIR := telemetry -EVALUATORS_DIR := evaluators/builtin -CONTRIB_DIR := evaluators/contrib +RULES_DIR := rules/builtin +CONTRIB_DIR := rules/contrib UI_DIR := ui define run-contrib-target @@ -41,11 +41,11 @@ help: @echo " make openapi-spec-check - verify OpenAPI generation succeeds" @echo "" @echo "Test:" - @echo " make test - run tests for core packages and all discovered contrib evaluators" + @echo " make test - run tests for core packages and all discovered contrib rules" @echo " make contrib-verify - verify root contrib packaging contract wiring" @echo " make scripts-test - run root contrib packaging contract tests" @echo " make models-test - run shared model tests with coverage" - @echo " make test-extras - run tests for all discovered contrib evaluators" + @echo " make test-extras - run tests for all discovered contrib rules" @echo " make test-all - alias for make test" @echo " make sdk-ts-test - run TypeScript SDK tests" @echo "" @@ -94,7 +94,7 @@ openapi-spec-check: openapi-spec # Test # --------------------------- -test: contrib-verify scripts-test models-test telemetry-test server-test engine-test sdk-test evaluators-test contrib-test +test: contrib-verify scripts-test models-test telemetry-test server-test engine-test sdk-test rules-test contrib-test contrib-verify: uv run python scripts/contrib_packages.py verify @@ -110,7 +110,7 @@ test-models: models-test telemetry-test: $(MAKE) -C $(TELEMETRY_DIR) test -# Run tests for discovered contrib evaluators +# Run tests for discovered contrib rules test-extras: contrib-test # Run all tests (alias for test) @@ -123,17 +123,17 @@ check: test lint typecheck # Quality # --------------------------- -lint: engine-lint telemetry-lint evaluators-lint contrib-lint +lint: engine-lint telemetry-lint rules-lint contrib-lint uv run --package $(PACK_MODELS) ruff check --config pyproject.toml models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml sdks/python/src -lint-fix: engine-lint-fix telemetry-lint-fix evaluators-lint-fix contrib-lint-fix +lint-fix: engine-lint-fix telemetry-lint-fix rules-lint-fix contrib-lint-fix uv run --package $(PACK_MODELS) ruff check --config pyproject.toml --fix models/src uv run --package $(PACK_SERVER) ruff check --config pyproject.toml --fix server/src uv run --package $(PACK_SDK) ruff check --config pyproject.toml --fix sdks/python/src -typecheck: engine-typecheck telemetry-typecheck evaluators-typecheck contrib-typecheck +typecheck: engine-typecheck telemetry-typecheck rules-typecheck contrib-typecheck uv run --package $(PACK_MODELS) mypy --config-file pyproject.toml models/src uv run --package $(PACK_SERVER) mypy --config-file pyproject.toml server/src uv run --package $(PACK_SDK) mypy --config-file pyproject.toml sdks/python/src @@ -151,7 +151,7 @@ telemetry-typecheck: # Build / Publish # --------------------------- -build: build-models build-server build-sdk engine-build telemetry-build evaluators-build contrib-build +build: build-models build-server build-sdk engine-build telemetry-build rules-build contrib-build build-models: cd $(MODELS_DIR) && uv build @@ -239,20 +239,20 @@ engine-%: sdk-%: $(MAKE) -C $(SDK_DIR) $(patsubst sdk-%,%,$@) -evaluators-test: - $(MAKE) -C $(EVALUATORS_DIR) test +rules-test: + $(MAKE) -C $(RULES_DIR) test -evaluators-lint: - $(MAKE) -C $(EVALUATORS_DIR) lint +rules-lint: + $(MAKE) -C $(RULES_DIR) lint -evaluators-lint-fix: - $(MAKE) -C $(EVALUATORS_DIR) lint-fix +rules-lint-fix: + $(MAKE) -C $(RULES_DIR) lint-fix -evaluators-typecheck: - $(MAKE) -C $(EVALUATORS_DIR) typecheck +rules-typecheck: + $(MAKE) -C $(RULES_DIR) typecheck -evaluators-build: - $(MAKE) -C $(EVALUATORS_DIR) build +rules-build: + $(MAKE) -C $(RULES_DIR) build .PHONY: server-% server-%: diff --git a/README.md b/README.md index 8acf30cf..0d9eedf5 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ Enforce runtime guardrails through a centralized control layer—configure once - **Centralized safety** - define controls once, apply across agents, update without redeploying - **Runtime configuration** - manage controls via API or UI, no code changes needed -- **Pluggable evaluators** - built-in (regex, list, JSON, SQL) or bring your own +- **Pluggable rules** - built-in (regex, list, JSON, SQL) or bring your own - **Framework support** - works with LangChain, CrewAI, Google ADK, AWS Strands, and more ## Quick Start @@ -242,7 +242,7 @@ async def setup(): "scope": {"stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\b\d{3}-\d{2}-\d{4}\b"}, }, @@ -264,7 +264,7 @@ async def setup(): asyncio.run(setup()) ``` -Controls now store leaf `selector` and `evaluator` definitions under `condition`, which also enables composite `and`, `or`, and `not` trees. +Controls now store leaf `selector` and `rule` definitions under `condition`, which also enables composite `and`, `or`, and `not` trees. **Tip**: If you prefer a visual flow, use the UI instead - see the [UI Quickstart](https://docs.agentcontrol.dev/core/ui-quickstart). @@ -298,7 +298,7 @@ Explore working examples for popular frameworks. Agent Control evaluates agent inputs and outputs against controls you configure at runtime. That keeps guardrail logic out of prompt code and tool code, while still letting teams update protections centrally. -Read more about [Controls](https://docs.agentcontrol.dev/concepts/controls) and Learn how controls, selectors, and evaluators work +Read more about [Controls](https://docs.agentcontrol.dev/concepts/controls) and Learn how controls, selectors, and rules work ## Performance diff --git a/TESTING.md b/TESTING.md index 7cb86231..841994d8 100644 --- a/TESTING.md +++ b/TESTING.md @@ -117,19 +117,19 @@ Specific guidance: - **SDK**: use symbols exported from `sdks/python/src/agent_control/__init__.py`. - **Database seeding**: direct row insertion is acceptable for migration tests, otherwise prefer public setup flows. -## Evaluator-specific expectations +## Rule-specific expectations -When adding or changing evaluators, tests should cover at least these three cases: +When adding or changing rules, tests should cover at least these three cases: 1. Null or empty input: returns `matched=False` and no error. 2. Normal evaluation: returns the correct `matched` result for the configured threshold or predicate. -3. Infrastructure failure: returns `matched=False` with `error` set, unless the evaluator intentionally uses a different documented error policy. +3. Infrastructure failure: returns `matched=False` with `error` set, unless the rule intentionally uses a different documented error policy. -Additional evaluator rules worth testing when relevant: +Additional rule behavior worth testing when relevant: - `error` is for infrastructure failures, not normal evaluation outcomes. -- Evaluators are reused across concurrent requests, so avoid request-scoped state on `self`. -- Pre-compiled patterns, timeout handling, and async boundaries should be covered when they are part of the evaluator behavior. +- Rules are reused across concurrent requests, so avoid request-scoped state on `self`. +- Pre-compiled patterns, timeout handling, and async boundaries should be covered when they are part of the rule behavior. ## Running tests diff --git a/codecov.yml b/codecov.yml index cd3c83db..fee0a59f 100644 --- a/codecov.yml +++ b/codecov.yml @@ -2,6 +2,6 @@ fixes: - "agent_control_server/::server/src/agent_control_server/" - "agent_control_engine/::engine/src/agent_control_engine/" - "agent_control_models/::models/src/agent_control_models/" - - "agent_control_evaluators/::evaluators/builtin/src/agent_control_evaluators/" + - "agent_control_rules/::rules/builtin/src/agent_control_rules/" - "agent_control_telemetry/::telemetry/src/agent_control_telemetry/" - "agent_control/::sdks/python/src/agent_control/" diff --git a/docs/README.md b/docs/README.md index 58d8df8a..2cecd13f 100644 --- a/docs/README.md +++ b/docs/README.md @@ -6,7 +6,7 @@ This repository keeps documentation concise. The full documentation lives on the - [Overview](https://docs.agentcontrol.dev/core/overview) — Overview of Agent Control concepts and how the system fits together - [Quickstart](https://docs.agentcontrol.dev/core/quickstart) — Install, start the server, and protect your first agent -- [Concepts](https://docs.agentcontrol.dev/concepts/overview) — Controls, scopes, selectors, evaluators, and actions +- [Concepts](https://docs.agentcontrol.dev/concepts/overview) — Controls, scopes, selectors, rules, and actions - [Controls](https://docs.agentcontrol.dev/concepts/controls) — Define and configure control rules - [Reference](https://docs.agentcontrol.dev/core/reference) — SDK and server API reference - [Configuration](https://docs.agentcontrol.dev/core/configuration) — Environment variables, auth, and database settings @@ -19,13 +19,13 @@ This repository keeps documentation concise. The full documentation lives on the - [Agent Control Demo](https://docs.agentcontrol.dev/examples/agent-control-demo) — End-to-end demo with server-side controls - [LangChain SQL](https://docs.agentcontrol.dev/examples/langchain-sql) — SQL injection protection for LangChain agents - [CrewAI](https://docs.agentcontrol.dev/examples/crewai) — Guardrails combined with CrewAI orchestration -- [DeepEval](https://docs.agentcontrol.dev/examples/deepeval) — Custom evaluator built on GEval metrics +- [DeepEval](https://docs.agentcontrol.dev/examples/deepeval) — Custom rule built on GEval metrics ## Component Docs - [Python SDK](https://docs.agentcontrol.dev/sdk/python-sdk) — SDK usage, decorators, and client APIs - [TypeScript SDK](https://docs.agentcontrol.dev/sdk/typescript-sdk) — Generated client and usage patterns - [Server](https://docs.agentcontrol.dev/components/server) — FastAPI server setup and APIs -- [Engine](https://docs.agentcontrol.dev/components/engine) — Evaluation engine behavior and evaluator discovery +- [Engine](https://docs.agentcontrol.dev/components/engine) — Evaluation engine behavior and rule discovery - [Models](https://docs.agentcontrol.dev/components/models) — Shared Pydantic models and schemas -- [Evaluators](https://docs.agentcontrol.dev/components/evaluators) — Built-in and external evaluator integrations +- [Rules](https://docs.agentcontrol.dev/components/rules) — Built-in and external rule integrations diff --git a/engine/README.md b/engine/README.md index 1170c38e..065baa74 100644 --- a/engine/README.md +++ b/engine/README.md @@ -1,21 +1,21 @@ # Agent Control Engine -The engine is the runtime that evaluates controls, resolves selectors, and runs evaluators. It is used by the server and SDK to apply control logic consistently. +The engine is the runtime that evaluates controls, resolves selectors, and runs rules. It is used by the server and SDK to apply control logic consistently. ## What this package provides -- Evaluator discovery via Python entry points +- Rule discovery via Python entry points - Selector evaluation and payload extraction -- Evaluator execution and result aggregation -- Cached evaluator instances for performance +- Rule execution and result aggregation +- Cached rule instances for performance -## Evaluator discovery +## Rule discovery ```python -from agent_control_engine import discover_evaluators, list_evaluators +from agent_control_engine import discover_rules, list_rules -discover_evaluators() -print(list_evaluators()) +discover_rules() +print(list_rules()) ``` Full guide: https://docs.agentcontrol.dev/components/engine diff --git a/engine/pyproject.toml b/engine/pyproject.toml index 27ea4ae0..1d9c0bbf 100644 --- a/engine/pyproject.toml +++ b/engine/pyproject.toml @@ -5,7 +5,7 @@ description = "Control execution engine for Agent Control" requires-python = ">=3.12" dependencies = [ "agent-control-models>=3.0.0", - "agent-control-evaluators>=3.0.0", + "agent-control-rules>=3.0.0", "google-re2>=1.1", ] authors = [ @@ -32,4 +32,4 @@ packages = ["src/agent_control_engine"] [tool.uv.sources] agent-control-models = { workspace = true } -agent-control-evaluators = { workspace = true } +agent-control-rules = { workspace = true } diff --git a/engine/src/agent_control_engine/__init__.py b/engine/src/agent_control_engine/__init__.py index 8c8966c7..f2e064d1 100644 --- a/engine/src/agent_control_engine/__init__.py +++ b/engine/src/agent_control_engine/__init__.py @@ -1,4 +1,4 @@ -"""Agent Control Engine - Rule execution logic and evaluator system.""" +"""Agent Control Engine - Rule execution logic and rule system.""" from importlib.metadata import PackageNotFoundError, version @@ -7,20 +7,20 @@ except PackageNotFoundError: __version__ = "0.0.0.dev" -from agent_control_evaluators import ( - clear_evaluator_cache, - discover_evaluators, - ensure_evaluators_discovered, - get_evaluator_instance, - list_evaluators, - reset_evaluator_discovery, +from agent_control_rules import ( + clear_rule_cache, + discover_rules, + ensure_rules_discovered, + get_rule_instance, + list_rules, + reset_rule_discovery, ) __all__ = [ - "clear_evaluator_cache", - "discover_evaluators", - "ensure_evaluators_discovered", - "get_evaluator_instance", - "list_evaluators", - "reset_evaluator_discovery", + "clear_rule_cache", + "discover_rules", + "ensure_rules_discovered", + "get_rule_instance", + "list_rules", + "reset_rule_discovery", ] diff --git a/engine/src/agent_control_engine/core.py b/engine/src/agent_control_engine/core.py index b2cd81b3..bdf9490d 100644 --- a/engine/src/agent_control_engine/core.py +++ b/engine/src/agent_control_engine/core.py @@ -12,7 +12,6 @@ from typing import Any, Literal, Protocol import re2 -from agent_control_evaluators import get_evaluator_instance from agent_control_models import ( ConditionNode, ControlAction, @@ -20,8 +19,9 @@ ControlScope, EvaluationRequest, EvaluationResponse, - EvaluatorResult, + RuleResult, ) +from agent_control_rules import get_rule_instance from .selectors import select_data @@ -44,8 +44,8 @@ def _env_positive_int(*names: str, default: int) -> int: return default -# Default timeout for evaluator execution (seconds) -DEFAULT_EVALUATOR_TIMEOUT = float(os.environ.get("EVALUATOR_TIMEOUT_SECONDS", "30")) +# Default timeout for rule execution (seconds) +DEFAULT_RULE_TIMEOUT = float(os.environ.get("RULE_TIMEOUT_SECONDS", "30")) # Max concurrent evaluations (limits task spawning overhead for large policies). # Prefer the namespaced env var; MAX_CONCURRENT_EVALUATIONS is kept for compatibility. @@ -149,7 +149,7 @@ def _selected_data_preview_value( def _selected_data_preview(value: Any) -> dict[str, Any]: - """Return UI-safe selector output details for evaluator-level inspection.""" + """Return UI-safe selector output details for rule-level inspection.""" preview, truncated = _selected_data_preview_value(value) return { "type": type(value).__name__, @@ -199,14 +199,14 @@ class _EvalTask: item: ControlWithIdentity task: asyncio.Task[None] | None = None - result: EvaluatorResult | None = None + result: RuleResult | None = None @dataclass class _ConditionEvaluation: """Internal result for recursive condition evaluation.""" - result: EvaluatorResult + result: RuleResult trace: dict[str, Any] @@ -239,7 +239,7 @@ def __init__( @staticmethod def _truncated_message(message: str | None) -> str | None: - """Truncate long evaluator messages in condition traces.""" + """Truncate long rule messages in condition traces.""" if not message: return None if len(message) <= 200: @@ -256,9 +256,9 @@ def _build_error_result( error: str, *, message_prefix: str = "Evaluation failed", - ) -> EvaluatorResult: - """Create a failed evaluator result from an internal error string.""" - return EvaluatorResult( + ) -> RuleResult: + """Create a failed rule result from an internal error string.""" + return RuleResult( matched=False, confidence=0.0, message=f"{message_prefix}: {error}", @@ -276,10 +276,10 @@ def _skipped_trace(self, node: ConditionNode, reason: str) -> dict[str, Any]: if node.is_leaf(): leaf_parts = node.leaf_parts() if leaf_parts is None: - raise ValueError("Leaf condition must contain selector and evaluator") - selector, evaluator = leaf_parts + raise ValueError("Leaf condition must contain selector and rule") + selector, rule = leaf_parts trace["selector_path"] = selector.path - trace["evaluator_name"] = evaluator.name + trace["rule_name"] = rule.name trace["confidence"] = None trace["error"] = None return trace @@ -296,9 +296,9 @@ async def _evaluate_leaf( request: EvaluationRequest, semaphore: asyncio.Semaphore, ) -> _ConditionEvaluation: - """Evaluate a leaf selector/evaluator pair. + """Evaluate a leaf selector/rule pair. - The shared semaphore limits concurrent leaf evaluator executions across + The shared semaphore limits concurrent leaf rule executions across the entire engine run. Composite conditions evaluate serially, so a single control only holds one semaphore slot at a time, but multi-leaf controls may acquire and release that shared slot more than once while @@ -306,29 +306,29 @@ async def _evaluate_leaf( """ leaf_parts = node.leaf_parts() if leaf_parts is None: - raise ValueError("Leaf condition must contain selector and evaluator") - selector, evaluator_spec = leaf_parts + raise ValueError("Leaf condition must contain selector and rule") + selector, rule_spec = leaf_parts selector_path = selector.path or "*" data = select_data(request.step, selector_path) try: async with semaphore: - evaluator = get_evaluator_instance(evaluator_spec) - timeout = evaluator.get_timeout_seconds() + rule = get_rule_instance(rule_spec) + timeout = rule.get_timeout_seconds() if timeout <= 0: - timeout = DEFAULT_EVALUATOR_TIMEOUT + timeout = DEFAULT_RULE_TIMEOUT result = await asyncio.wait_for( - evaluator.evaluate(data), + rule.evaluate(data), timeout=timeout, ) except TimeoutError: - error_msg = f"TimeoutError: Evaluator exceeded {timeout}s timeout" + error_msg = f"TimeoutError: Rule exceeded {timeout}s timeout" logger.warning( - "Evaluator timeout for control '%s' (evaluator: %s): %s", + "Rule timeout for control '%s' (rule: %s): %s", item.name, - evaluator_spec.name, + rule_spec.name, error_msg, exc_info=True, ) @@ -336,9 +336,9 @@ async def _evaluate_leaf( except Exception as e: error_msg = self._format_exception(e) logger.error( - "Evaluator error for control '%s' (evaluator: %s): %s", + "Rule error for control '%s' (rule: %s): %s", item.name, - evaluator_spec.name, + rule_spec.name, error_msg, exc_info=True, ) @@ -349,7 +349,7 @@ async def _evaluate_leaf( "evaluated": True, "matched": result.matched, "selector_path": selector_path, - "evaluator_name": evaluator_spec.name, + "rule_name": rule_spec.name, "confidence": result.confidence, "error": result.error, "message": self._truncated_message(result.message), @@ -372,17 +372,17 @@ def _build_composite_result( trace: dict[str, Any], metadata: dict[str, Any] | None = None, error: str | None = None, - ) -> EvaluatorResult: - """Create a composite evaluator result with a condition trace.""" + ) -> RuleResult: + """Create a composite rule result with a condition trace.""" result_metadata = dict(metadata or {}) result_metadata["condition_trace"] = trace if error is not None: - return EvaluatorResult( + return RuleResult( matched=False, confidence=0.0, message=( - "Condition evaluation aborted due to a child evaluator error: " + "Condition evaluation aborted due to a child rule error: " f"{error}" ), metadata=result_metadata, @@ -390,7 +390,7 @@ def _build_composite_result( ) message = "Condition tree matched" if matched else "Condition tree did not match" - return EvaluatorResult( + return RuleResult( matched=matched, confidence=confidence, message=message, @@ -406,7 +406,7 @@ def _composite_metadata( """Select stable child metadata to preserve on composite results. The engine_selected_data_preview value in this metadata is not all - evaluator inputs. It is the bounded selected value preview from the leaf + rule inputs. It is the bounded selected value preview from the leaf metadata the engine preserves for the final composite result: - or where one child matches: engine_selected_data_preview comes from the matching child. @@ -418,7 +418,7 @@ def _composite_metadata( first evaluated child. - not: engine_selected_data_preview comes from its child. """ - source_result: EvaluatorResult | None = None + source_result: RuleResult | None = None if matched: source_result = next( ( @@ -604,7 +604,7 @@ def get_applicable_controls( control_id=item.id, control_name=item.name, action=control_def.action.decision, - result=EvaluatorResult( + result=RuleResult( matched=False, confidence=0.0, message=( @@ -655,7 +655,7 @@ async def process(self, request: EvaluationRequest) -> EvaluationResponse: matches: list[ControlMatch] = [] is_safe = True deny_found = asyncio.Event() - # The concurrency cap applies to visited leaf evaluator executions, not + # The concurrency cap applies to visited leaf rule executions, not # whole top-level controls. Composite trees are still walked serially. semaphore = asyncio.Semaphore(MAX_CONCURRENT_EVALUATIONS) diff --git a/engine/tests/conftest.py b/engine/tests/conftest.py index 0c669091..35e887a3 100644 --- a/engine/tests/conftest.py +++ b/engine/tests/conftest.py @@ -2,17 +2,17 @@ import pytest -from agent_control_engine import clear_evaluator_cache, reset_evaluator_discovery -from agent_control_evaluators import clear_evaluators +from agent_control_engine import clear_rule_cache, reset_rule_discovery +from agent_control_rules import clear_rules @pytest.fixture(autouse=True) -def clean_evaluator_state() -> None: - """Clean up evaluator registry and discovery state before each test. +def clean_rule_state() -> None: + """Clean up rule registry and discovery state before each test. This fixture runs automatically for all tests to ensure isolation. Tests that mock entry_points won't pollute the registry for other tests. """ - clear_evaluators() - reset_evaluator_discovery() - clear_evaluator_cache() + clear_rules() + reset_rule_discovery() + clear_rule_cache() diff --git a/engine/tests/test_core.py b/engine/tests/test_core.py index baa46bab..ef402fea 100644 --- a/engine/tests/test_core.py +++ b/engine/tests/test_core.py @@ -11,18 +11,18 @@ from typing import Any import pytest -from agent_control_engine import clear_evaluator_cache +from agent_control_engine import clear_rule_cache from agent_control_engine.core import ControlEngine, _compile_regex -from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator from agent_control_models import ( ControlAction, ControlDefinition, EvaluationRequest, - EvaluatorResult, - EvaluatorSpec, + RuleResult, + RuleSpec, SteeringContext, Step, ) +from agent_control_rules import Rule, RuleMetadata, register_rule from pydantic import BaseModel # ============================================================================= @@ -31,12 +31,12 @@ class SimpleConfig(BaseModel): - """Simple config for test evaluators.""" + """Simple config for test rules.""" value: str = "default" -# Shared state for coordination between test evaluators +# Shared state for coordination between test rules _execution_log: list[str] = [] _blocker_event: asyncio.Event | None = None @@ -48,19 +48,19 @@ def reset_test_state() -> None: _blocker_event = asyncio.Event() -class AllowEvaluator(Evaluator[SimpleConfig]): - """Evaluator that always allows (matched=False).""" +class AllowRule(Rule[SimpleConfig]): + """Rule that always allows (matched=False).""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-allow", version="1.0.0", description="Always allows", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"allow:{self.config.value}:start") - result = EvaluatorResult( + result = RuleResult( matched=False, confidence=1.0, message="Allowed", @@ -69,19 +69,19 @@ async def evaluate(self, data: Any) -> EvaluatorResult: return result -class DenyEvaluator(Evaluator[SimpleConfig]): - """Evaluator that always denies (matched=True).""" +class DenyRule(Rule[SimpleConfig]): + """Rule that always denies (matched=True).""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-deny", version="1.0.0", description="Always denies", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"deny:{self.config.value}:start") - result = EvaluatorResult( + result = RuleResult( matched=True, confidence=1.0, message="Denied", @@ -90,26 +90,26 @@ async def evaluate(self, data: Any) -> EvaluatorResult: return result -class BlockerEvaluator(Evaluator[SimpleConfig]): - """Evaluator that blocks until cancelled or event is set. +class BlockerRule(Rule[SimpleConfig]): + """Rule that blocks until cancelled or event is set. Used to test cancellation behavior. """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-blocker", version="1.0.0", description="Blocks until cancelled", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"blocker:{self.config.value}:start") try: # Wait indefinitely (should be cancelled) await _blocker_event.wait() # type: ignore _execution_log.append(f"blocker:{self.config.value}:end") - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Blocker completed (should not happen in cancel test)", @@ -119,45 +119,45 @@ async def evaluate(self, data: Any) -> EvaluatorResult: raise -class SlowEvaluator(Evaluator[SimpleConfig]): - """Evaluator that sleeps briefly before returning.""" +class SlowRule(Rule[SimpleConfig]): + """Rule that sleeps briefly before returning.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-slow", version="1.0.0", description="Sleeps then allows", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"slow:{self.config.value}:start") await asyncio.sleep(0.05) # 50ms _execution_log.append(f"slow:{self.config.value}:end") - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Slow completed", ) -class MetadataEvaluator(Evaluator[SimpleConfig]): - """Evaluator that emits structured metadata for propagation tests.""" +class MetadataRule(Rule[SimpleConfig]): + """Rule that emits structured metadata for propagation tests.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-metadata", version="1.0.0", description="Returns metadata while matching by config prefix", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"metadata:{self.config.value}:start") matched = self.config.value.startswith("match") - result = EvaluatorResult( + result = RuleResult( matched=matched, confidence=0.8 if matched else 0.4, message=f"Metadata {self.config.value}", - metadata={"source": self.config.value, "selected_data": f"evaluator:{data}"}, + metadata={"source": self.config.value, "selected_data": f"rule:{data}"}, ) _execution_log.append(f"metadata:{self.config.value}:end") return result @@ -173,34 +173,34 @@ class MockControlWithIdentity: @pytest.fixture(autouse=True) -def setup_test_evaluators(): - """Register test evaluators and reset state before each test.""" +def setup_test_rules(): + """Register test rules and reset state before each test.""" reset_test_state() - clear_evaluator_cache() - - # Register evaluators (may already be registered) - for evaluator_cls in [ - AllowEvaluator, - DenyEvaluator, - BlockerEvaluator, - SlowEvaluator, - MetadataEvaluator, + clear_rule_cache() + + # Register rules (may already be registered) + for rule_cls in [ + AllowRule, + DenyRule, + BlockerRule, + SlowRule, + MetadataRule, ]: try: - register_evaluator(evaluator_cls) + register_rule(rule_cls) except ValueError: pass # Already registered yield reset_test_state() - clear_evaluator_cache() + clear_rule_cache() def make_control( control_id: int, name: str, - evaluator: str, + rule: str, action: str = "deny", config_value: str = "default", *, @@ -239,8 +239,8 @@ def make_control( scope=scope, condition={ "selector": selector or {"path": "*"}, - "evaluator": EvaluatorSpec( - name=evaluator, + "rule": RuleSpec( + name=rule, config={"value": config_value}, ), }, @@ -501,32 +501,32 @@ async def test_no_matches_when_all_allow(self): # ============================================================================= -class ErrorEvaluator(Evaluator[SimpleConfig]): - """Evaluator that always raises an exception.""" +class ErrorRule(Rule[SimpleConfig]): + """Rule that always raises an exception.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-error", version="1.0.0", description="Always raises an error", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"error:{self.config.value}:start") raise RuntimeError(f"Intentional error from {self.config.value}") class TimeoutConfig(BaseModel): - """Config for timeout evaluator with custom timeout.""" + """Config for timeout rule with custom timeout.""" value: str = "default" timeout_ms: int = 100 # Very short timeout for testing -class TimeoutEvaluator(Evaluator[TimeoutConfig]): - """Evaluator that sleeps longer than its timeout.""" +class TimeoutRule(Rule[TimeoutConfig]): + """Rule that sleeps longer than its timeout.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-timeout", version="1.0.0", description="Sleeps longer than timeout", @@ -534,12 +534,12 @@ class TimeoutEvaluator(Evaluator[TimeoutConfig]): ) config_model = TimeoutConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: _execution_log.append(f"timeout:{self.config.value}:start") # Sleep for 5 seconds - way longer than the 100ms timeout await asyncio.sleep(5.0) _execution_log.append(f"timeout:{self.config.value}:end") - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Should never reach here", @@ -550,22 +550,22 @@ class TestErrorHandling: """Tests for error handling - fail-closed for deny controls, error field.""" @pytest.fixture(autouse=True) - def register_error_evaluator(self): - """Register ErrorEvaluator for these tests.""" + def register_error_rule(self): + """Register ErrorRule for these tests.""" try: - register_evaluator(ErrorEvaluator) + register_rule(ErrorRule) except ValueError: pass # Already registered @pytest.mark.asyncio - async def test_evaluator_error_fails_closed_for_deny(self): + async def test_rule_error_fails_closed_for_deny(self): """Test that deny controls fail closed when they error. - Given: A deny control with an evaluator that throws an exception + Given: A deny control with a rule that throws an exception When: The engine processes the request Then: The request is marked unsafe (fail-closed) and confidence is 0 """ - # Given: A deny control with an error-throwing evaluator + # Given: A deny control with an error-throwing rule controls = [ make_control(1, "error_control", "test-error", action="deny", config_value="err"), ] @@ -588,7 +588,7 @@ async def test_evaluator_error_fails_closed_for_deny(self): # Error should be captured assert result.errors is not None assert len(result.errors) == 1 - # The evaluator should have started + # The rule should have started assert "error:err:start" in _execution_log @pytest.mark.asyncio @@ -697,17 +697,17 @@ async def raise_unexpected(*_args: object, **_kwargs: object) -> object: assert "unexpected traversal bug" in result.errors[0].result.error @pytest.mark.asyncio - async def test_missing_evaluator_error_sets_error_field(self): - """Test that missing evaluator error sets error field in result. + async def test_missing_rule_error_sets_error_field(self): + """Test that missing rule error sets error field in result. - Given: A deny control with an evaluator that doesn't exist + Given: A deny control with a rule that doesn't exist When: The engine processes the request Then: The error field is set, is_safe=False (deny fails closed) """ - # Given: A deny control with non-existent evaluator + # Given: A deny control with non-existent rule controls = [ make_control( - 1, "missing_evaluator", "nonexistent-evaluator", action="deny", config_value="m" + 1, "missing_rule", "nonexistent-rule", action="deny", config_value="m" ), ] engine = ControlEngine(controls) @@ -728,13 +728,13 @@ async def test_missing_evaluator_error_sets_error_field(self): # Error should be captured assert result.errors is not None assert len(result.errors) == 1 - assert result.errors[0].control_name == "missing_evaluator" + assert result.errors[0].control_name == "missing_rule" assert result.errors[0].result.error is not None - assert "nonexistent-evaluator" in result.errors[0].result.error.lower() + assert "nonexistent-rule" in result.errors[0].result.error.lower() @pytest.mark.asyncio - async def test_errors_array_exposes_evaluator_failures(self): - """Test that errors array exposes all evaluator failures. + async def test_errors_array_exposes_rule_failures(self): + """Test that errors array exposes all rule failures. Given: Multiple controls, some throw errors, some succeed When: The engine processes the request @@ -964,17 +964,17 @@ async def test_confidence_zero_when_deny_errors_despite_other_successes(self): # ============================================================================= -class PayloadEchoEvaluator(Evaluator[SimpleConfig]): - """Evaluator that inspects full payload when path is omitted ("*").""" +class PayloadEchoRule(Rule[SimpleConfig]): + """Rule that inspects full payload when path is omitted ("*").""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-payload-echo", version="1.0.0", description="Echo payload info", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: # If we received the full payload as JSON, it has dict keys for type/name if isinstance(data, dict): step_type = data.get("type") @@ -990,14 +990,14 @@ async def evaluate(self, data: Any) -> EvaluatorResult: _execution_log.append(f"payload_step:{step_type}:{step_name}") else: _execution_log.append("payload_step:") - return EvaluatorResult(matched=False, confidence=1.0, message="ok") + return RuleResult(matched=False, confidence=1.0, message="ok") class TestSelectorStepScoping: @pytest.fixture(autouse=True) - def register_payload_evaluator(self): + def register_payload_rule(self): try: - register_evaluator(PayloadEchoEvaluator) + register_rule(PayloadEchoRule) except ValueError: pass @@ -1103,7 +1103,7 @@ async def test_or_semantics_names_or_regex(self): @pytest.mark.asyncio async def test_path_optional_defaults_to_star(self): - # Given: path omitted; evaluator should receive full payload + # Given: path omitted; rule should receive full payload controls = [ make_control( 1, @@ -1135,34 +1135,34 @@ def test_invalid_step_name_regex_rejected(self): scope={"step_types": ["tool"], "stages": ["pre"], "step_name_regex": "("}, condition={ "selector": {"path": "input"}, - "evaluator": EvaluatorSpec(name="test-allow", config={"value": "x"}), + "rule": RuleSpec(name="test-allow", config={"value": "x"}), }, action={"decision": "observe"}, ) class TestTimeoutEnforcement: - """Tests for per-evaluator timeout enforcement.""" + """Tests for per-rule timeout enforcement.""" @pytest.fixture(autouse=True) - def register_timeout_evaluator(self): - """Register TimeoutEvaluator for these tests.""" + def register_timeout_rule(self): + """Register TimeoutRule for these tests.""" try: - register_evaluator(TimeoutEvaluator) + register_rule(TimeoutRule) except ValueError: pass # Already registered @pytest.mark.asyncio - async def test_evaluator_timeout_is_enforced(self): - """Test that evaluators are killed after their timeout expires. + async def test_rule_timeout_is_enforced(self): + """Test that rules are killed after their timeout expires. - Given: A control with an evaluator that sleeps longer than its timeout + Given: A control with a rule that sleeps longer than its timeout When: The engine processes the request Then: The evaluation times out and error is captured """ import time - # Given: A control with a timeout evaluator (100ms timeout, 5s sleep) + # Given: A control with a timeout rule (100ms timeout, 5s sleep) controls = [ MockControlWithIdentity( id=1, @@ -1174,7 +1174,7 @@ async def test_evaluator_timeout_is_enforced(self): scope={"step_types": ["llm"], "stages": ["pre"]}, condition={ "selector": {"path": "input"}, - "evaluator": EvaluatorSpec( + "rule": RuleSpec( name="test-timeout", config={"value": "t1", "timeout_ms": 100}, ), @@ -1198,7 +1198,7 @@ async def test_evaluator_timeout_is_enforced(self): # Then: Should complete quickly (timeout, not full 5s sleep) assert elapsed < 1.0, f"Expected timeout ~0.1s but took {elapsed:.2f}s" - # And: Evaluator should have started + # And: Rule should have started assert "timeout:t1:start" in _execution_log # But not finished (was killed) assert "timeout:t1:end" not in _execution_log @@ -1214,8 +1214,8 @@ async def test_evaluator_timeout_is_enforced(self): assert result.confidence == 0.0 @pytest.mark.asyncio - async def test_timeout_does_not_affect_fast_evaluators(self): - """Test that fast evaluators complete normally without timeout issues. + async def test_timeout_does_not_affect_fast_rules(self): + """Test that fast rules complete normally without timeout issues. Given: A mix of fast and slow (timing out) controls When: The engine processes the request @@ -1234,7 +1234,7 @@ async def test_timeout_does_not_affect_fast_evaluators(self): scope={"step_types": ["llm"], "stages": ["pre"]}, condition={ "selector": {"path": "input"}, - "evaluator": EvaluatorSpec( + "rule": RuleSpec( name="test-timeout", config={"value": "slow", "timeout_ms": 100}, ), @@ -1253,11 +1253,11 @@ async def test_timeout_does_not_affect_fast_evaluators(self): ) result = await engine.process(request) - # Then: Fast evaluator should have completed normally + # Then: Fast rule should have completed normally assert "allow:f1:start" in _execution_log assert "allow:f1:end" in _execution_log - # And: Slow evaluator should have timed out + # And: Slow rule should have timed out assert "timeout:slow:start" in _execution_log assert "timeout:slow:end" not in _execution_log @@ -1271,6 +1271,96 @@ async def test_timeout_does_not_affect_fast_evaluators(self): # Confidence is 0.5 (1 success, 1 error out of 2) assert result.confidence == 0.5 + @pytest.mark.asyncio + async def test_non_positive_rule_timeout_uses_default_rule_timeout( + self, + monkeypatch: pytest.MonkeyPatch, + ): + """Rules reporting non-positive timeouts fall back to DEFAULT_RULE_TIMEOUT.""" + import time + + import agent_control_engine.core as core_module + + monkeypatch.setattr(core_module, "DEFAULT_RULE_TIMEOUT", 0.01) + controls = [ + MockControlWithIdentity( + id=1, + name="fallback_timeout", + control=ControlDefinition( + description="Test fallback timeout", + enabled=True, + execution="server", + scope={"step_types": ["llm"], "stages": ["pre"]}, + condition={ + "selector": {"path": "input"}, + "rule": RuleSpec( + name="test-timeout", + config={"value": "fallback", "timeout_ms": 0}, + ), + }, + action={"decision": "deny"}, + ), + ) + ] + engine = ControlEngine(controls) + request = EvaluationRequest( + agent_name="00000000-0000-0000-0000-000000000001", + step=Step(type="llm", name="test-step", input="test", output=None), + stage="pre", + ) + + start = time.monotonic() + result = await engine.process(request) + elapsed = time.monotonic() - start + + assert elapsed < 1.0 + assert result.errors is not None + assert "TimeoutError: Rule exceeded 0.01s timeout" in result.errors[0].result.error + + +class _IncompleteLeafNode: + """Runtime-only stub for defensive incomplete-leaf branches.""" + + def is_leaf(self) -> bool: + return True + + def leaf_parts(self) -> None: + return None + + def kind(self) -> str: + return "leaf" + + def children_in_order(self) -> list[object]: + return [] + + +class TestDefensiveConditionBranches: + """Cover defensive branches unreachable through validated Pydantic models.""" + + def test_skipped_trace_rejects_incomplete_leaf(self): + engine = ControlEngine([]) + + with pytest.raises(ValueError, match="Leaf condition must contain selector and rule"): + engine._skipped_trace(_IncompleteLeafNode(), "short-circuit") # type: ignore[arg-type] + + @pytest.mark.asyncio + async def test_evaluate_leaf_rejects_incomplete_leaf(self): + engine = ControlEngine([]) + item = make_control(1, "incomplete", "test-allow") + request = EvaluationRequest( + agent_name="00000000-0000-0000-0000-000000000001", + step=Step(type="llm", name="test-step", input="test", output=None), + stage="pre", + ) + + with pytest.raises(ValueError, match="Leaf condition must contain selector and rule"): + await engine._evaluate_leaf( # type: ignore[arg-type] + item, + _IncompleteLeafNode(), + request, + asyncio.Semaphore(1), + ) + # ============================================================================= # Test: Concurrency Limit @@ -1319,7 +1409,7 @@ def test_max_concurrency_env_reads_legacy_name( def test_max_concurrency_env_rejects_non_positive_values( self, monkeypatch: pytest.MonkeyPatch ) -> None: - """The concurrency cap must always allow at least one evaluator.""" + """The concurrency cap must always allow at least one rule.""" import agent_control_engine.core as core_module monkeypatch.setenv("AGENT_CONTROL_MAX_CONCURRENT_EVALUATIONS", "0") @@ -1349,17 +1439,17 @@ async def test_concurrency_limited_to_max(self, monkeypatch: pytest.MonkeyPatch) _max_concurrent = 0 _lock = asyncio.Lock() - class ConcurrencyTracker(Evaluator[SimpleConfig]): - """Evaluator that tracks concurrent execution count.""" + class ConcurrencyTracker(Rule[SimpleConfig]): + """Rule that tracks concurrent execution count.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="test-concurrency", version="1.0.0", description="Tracks concurrency", ) config_model = SimpleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: nonlocal _concurrent_count, _max_concurrent async with _lock: _concurrent_count += 1 @@ -1367,10 +1457,10 @@ async def evaluate(self, data: Any) -> EvaluatorResult: await asyncio.sleep(0.05) # Small delay to overlap async with _lock: _concurrent_count -= 1 - return EvaluatorResult(matched=False, confidence=1.0, message="ok") + return RuleResult(matched=False, confidence=1.0, message="ok") try: - register_evaluator(ConcurrencyTracker) + register_rule(ConcurrencyTracker) except ValueError: pass @@ -1402,10 +1492,10 @@ class TestConditionTrees: """Tests for recursive condition evaluation and trace metadata.""" @pytest.fixture(autouse=True) - def register_error_evaluator(self): - """Register ErrorEvaluator for these tests.""" + def register_error_rule(self): + """Register ErrorRule for these tests.""" try: - register_evaluator(ErrorEvaluator) + register_rule(ErrorRule) except ValueError: pass @@ -1426,11 +1516,11 @@ async def test_or_short_circuit_records_skipped_trace(self): "or": [ { "selector": {"path": "input"}, - "evaluator": {"name": "test-deny", "config": {"value": "match"}}, + "rule": {"name": "test-deny", "config": {"value": "match"}}, }, { "selector": {"path": "input"}, - "evaluator": {"name": "test-slow", "config": {"value": "skip"}}, + "rule": {"name": "test-slow", "config": {"value": "skip"}}, }, ] }, @@ -1478,7 +1568,7 @@ async def test_leaf_metadata_includes_selector_selected_data_preview(self): scope={"step_types": ["tool"], "stages": ["pre"]}, condition={ "selector": {"path": "input.city"}, - "evaluator": {"name": "test-deny", "config": {"value": "match"}}, + "rule": {"name": "test-deny", "config": {"value": "match"}}, }, action={"decision": "observe"}, ), @@ -1525,7 +1615,7 @@ async def test_leaf_selected_data_preview_is_bounded_and_redacted(self): scope={"step_types": ["tool"], "stages": ["pre"]}, condition={ "selector": {"path": "input"}, - "evaluator": {"name": "test-deny", "config": {"value": "match"}}, + "rule": {"name": "test-deny", "config": {"value": "match"}}, }, action={"decision": "observe"}, ), @@ -1561,9 +1651,9 @@ async def test_leaf_selected_data_preview_is_bounded_and_redacted(self): assert len(preview["value"]["prompt"]) == 500 @pytest.mark.asyncio - async def test_engine_selected_data_does_not_overwrite_evaluator_metadata(self): - """Engine-owned selector data should not collide with evaluator-owned metadata.""" - # Given: an evaluator that deliberately returns its own selected_data key + async def test_engine_selected_data_does_not_overwrite_rule_metadata(self): + """Engine-owned selector data should not collide with rule-owned metadata.""" + # Given: a rule that deliberately returns its own selected_data key controls = [ MockControlWithIdentity( id=1, @@ -1575,7 +1665,7 @@ async def test_engine_selected_data_does_not_overwrite_evaluator_metadata(self): scope={"step_types": ["llm"], "stages": ["pre"]}, condition={ "selector": {"path": "input"}, - "evaluator": {"name": "test-metadata", "config": {"value": "match"}}, + "rule": {"name": "test-metadata", "config": {"value": "match"}}, }, action={"decision": "observe"}, ), @@ -1591,11 +1681,11 @@ async def test_engine_selected_data_does_not_overwrite_evaluator_metadata(self): # When: processing the request result = await engine.process(request) - # Then: evaluator-owned metadata remains intact and engine-owned data is namespaced. + # Then: rule-owned metadata remains intact and engine-owned data is namespaced. assert result.matches is not None metadata = result.matches[0].result.metadata assert metadata is not None - assert metadata["selected_data"] == "evaluator:raw input" + assert metadata["selected_data"] == "rule:raw input" assert metadata["engine_selected_data"] == "raw input" assert metadata["engine_selected_data_preview"] == { "type": "str", @@ -1620,21 +1710,21 @@ async def test_composite_results_preserve_decisive_child_metadata(self): "or": [ { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "test-metadata", "config": {"value": "miss-left"}, }, }, { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "test-metadata", "config": {"value": "match-right"}, }, }, { "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "test-slow", "config": {"value": "skip-tail"}, }, @@ -1660,7 +1750,7 @@ async def test_composite_results_preserve_decisive_child_metadata(self): metadata = result.matches[0].result.metadata assert metadata is not None assert metadata["source"] == "match-right" - assert metadata["selected_data"] == "evaluator:chosen" + assert metadata["selected_data"] == "rule:chosen" assert metadata["engine_selected_data_preview"] == { "type": "str", "value": "chosen", @@ -1672,7 +1762,7 @@ async def test_composite_results_preserve_decisive_child_metadata(self): @pytest.mark.asyncio async def test_and_condition_all_children_match_records_full_trace(self): """A fully-evaluated AND tree should record every child and produce a match.""" - # Given: an AND tree where every leaf evaluator matches + # Given: an AND tree where every leaf rule matches controls = [ MockControlWithIdentity( id=1, @@ -1686,11 +1776,11 @@ async def test_and_condition_all_children_match_records_full_trace(self): "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": "test-deny", "config": {"value": "first"}}, + "rule": {"name": "test-deny", "config": {"value": "first"}}, }, { "selector": {"path": "input"}, - "evaluator": {"name": "test-deny", "config": {"value": "second"}}, + "rule": {"name": "test-deny", "config": {"value": "second"}}, }, ] }, @@ -1736,7 +1826,7 @@ async def test_not_condition_inverts_child_result(self): condition={ "not": { "selector": {"path": "input"}, - "evaluator": {"name": "test-allow", "config": {"value": "child"}}, + "rule": {"name": "test-allow", "config": {"value": "child"}}, } }, action={"decision": "observe"}, @@ -1764,8 +1854,8 @@ async def test_not_condition_inverts_child_result(self): @pytest.mark.asyncio async def test_not_condition_propagates_child_error_trace(self): - """NOT should surface child evaluator failures as composite errors.""" - # Given: a NOT tree whose child evaluator raises an error + """NOT should surface child rule failures as composite errors.""" + # Given: a NOT tree whose child rule raises an error controls = [ MockControlWithIdentity( id=1, @@ -1778,7 +1868,7 @@ async def test_not_condition_propagates_child_error_trace(self): condition={ "not": { "selector": {"path": "input"}, - "evaluator": {"name": "test-error", "config": {"value": "boom"}}, + "rule": {"name": "test-error", "config": {"value": "boom"}}, } }, action={"decision": "observe"}, @@ -1808,7 +1898,7 @@ async def test_not_condition_propagates_child_error_trace(self): @pytest.mark.asyncio async def test_or_condition_all_children_non_match_records_full_trace(self): """A fully-evaluated OR tree should record every child and produce a non-match.""" - # Given: an OR tree where every leaf evaluator returns non-match + # Given: an OR tree where every leaf rule returns non-match controls = [ MockControlWithIdentity( id=1, @@ -1822,11 +1912,11 @@ async def test_or_condition_all_children_non_match_records_full_trace(self): "or": [ { "selector": {"path": "input"}, - "evaluator": {"name": "test-allow", "config": {"value": "first"}}, + "rule": {"name": "test-allow", "config": {"value": "first"}}, }, { "selector": {"path": "input"}, - "evaluator": {"name": "test-allow", "config": {"value": "second"}}, + "rule": {"name": "test-allow", "config": {"value": "second"}}, }, ] }, @@ -1859,7 +1949,7 @@ async def test_or_condition_all_children_non_match_records_full_trace(self): @pytest.mark.asyncio async def test_and_error_records_skipped_children_in_trace(self): """Errors in composite conditions should preserve trace context for skipped branches.""" - # Given: an AND tree whose first child evaluator errors + # Given: an AND tree whose first child rule errors controls = [ MockControlWithIdentity( id=1, @@ -1873,11 +1963,11 @@ async def test_and_error_records_skipped_children_in_trace(self): "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": "test-error", "config": {"value": "boom"}}, + "rule": {"name": "test-error", "config": {"value": "boom"}}, }, { "selector": {"path": "input"}, - "evaluator": {"name": "test-slow", "config": {"value": "skip"}}, + "rule": {"name": "test-slow", "config": {"value": "skip"}}, }, ] }, @@ -1905,7 +1995,7 @@ async def test_and_error_records_skipped_children_in_trace(self): assert trace["matched"] is None assert trace["short_circuit_reason"] == "error" assert result.errors[0].result.message.startswith( - "Condition evaluation aborted due to a child evaluator error:" + "Condition evaluation aborted due to a child rule error:" ) assert trace["children"][0]["evaluated"] is True assert "Intentional error from boom" in trace["children"][0]["error"] @@ -1915,7 +2005,7 @@ async def test_and_error_records_skipped_children_in_trace(self): @pytest.mark.asyncio async def test_or_error_records_skipped_children_in_trace(self): """OR error traces should be marked indeterminate, not as a definitive non-match.""" - # Given: an OR tree whose first child evaluator errors + # Given: an OR tree whose first child rule errors controls = [ MockControlWithIdentity( id=1, @@ -1929,11 +2019,11 @@ async def test_or_error_records_skipped_children_in_trace(self): "or": [ { "selector": {"path": "input"}, - "evaluator": {"name": "test-error", "config": {"value": "boom"}}, + "rule": {"name": "test-error", "config": {"value": "boom"}}, }, { "selector": {"path": "input"}, - "evaluator": {"name": "test-allow", "config": {"value": "skip"}}, + "rule": {"name": "test-allow", "config": {"value": "skip"}}, }, ] }, @@ -1961,7 +2051,7 @@ async def test_or_error_records_skipped_children_in_trace(self): assert trace["matched"] is None assert trace["short_circuit_reason"] == "error" assert result.errors[0].result.message.startswith( - "Condition evaluation aborted due to a child evaluator error:" + "Condition evaluation aborted due to a child rule error:" ) assert trace["children"][0]["evaluated"] is True assert "Intentional error from boom" in trace["children"][0]["error"] @@ -1977,7 +2067,7 @@ async def test_or_error_records_skipped_children_in_trace(self): def make_control_with_execution( control_id: int, name: str, - evaluator: str, + rule: str, action: str = "deny", config_value: str = "default", *, @@ -2009,8 +2099,8 @@ def make_control_with_execution( scope=scope, condition={ "selector": {"path": path}, - "evaluator": EvaluatorSpec( - name=evaluator, + "rule": RuleSpec( + name=rule, config={"value": config_value}, ), }, @@ -2336,20 +2426,20 @@ class TestSteerErrorHandling: async def test_steer_control_error_non_blocking(self): """Test that steer control errors don't block execution (unlike deny errors). - Given: A steer control with an evaluator that errors + Given: A steer control with a rule that errors When: Engine processes the request Then: Result is still safe (steer errors are non-blocking) Error is logged for observability Coverage: Lines 299 (steer_errored = True), 340-344 (logging) """ reset_test_state() - register_evaluator(ErrorEvaluator) + register_rule(ErrorRule) controls = [ make_control( 1, "steer-with-error", - evaluator="test-error", + rule="test-error", config_value="steer", action="steer", steering_context=SteeringContext(message="Steering guidance"), @@ -2398,18 +2488,18 @@ async def test_steer_control_error_non_blocking(self): async def test_deny_control_error_blocks(self): """Test that deny control errors DO block execution (fail closed). - Given: A deny control with an evaluator that errors + Given: A deny control with a rule that errors When: Engine processes the request Then: Result is unsafe (deny errors fail closed) """ reset_test_state() - register_evaluator(ErrorEvaluator) + register_rule(ErrorRule) controls = [ make_control( 1, "deny-with-error", - evaluator="test-error", + rule="test-error", config_value="deny", action="deny", ), @@ -2444,20 +2534,20 @@ async def test_mixed_deny_and_steer_errors(self): Coverage: Lines 299, 340-344 """ reset_test_state() - register_evaluator(ErrorEvaluator) + register_rule(ErrorRule) controls = [ make_control( 1, "deny-error", - evaluator="test-error", + rule="test-error", config_value="deny", action="deny", ), make_control( 2, "steer-error", - evaluator="test-error", + rule="test-error", config_value="steer", action="steer", steering_context=SteeringContext(message="Steer guidance"), @@ -2539,7 +2629,7 @@ class MockControl: }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "test-allow", "config": {"value": "test"}, }, diff --git a/engine/tests/test_discovery.py b/engine/tests/test_discovery.py index 920aa366..9f4c9703 100644 --- a/engine/tests/test_discovery.py +++ b/engine/tests/test_discovery.py @@ -1,4 +1,4 @@ -"""Tests for evaluator auto-discovery.""" +"""Tests for rule auto-discovery.""" from typing import Any from unittest.mock import MagicMock, patch @@ -6,130 +6,130 @@ from pydantic import BaseModel from agent_control_engine import ( - discover_evaluators, - ensure_evaluators_discovered, - list_evaluators, - reset_evaluator_discovery, + discover_rules, + ensure_rules_discovered, + list_rules, + reset_rule_discovery, ) -from agent_control_evaluators import ( - Evaluator, - EvaluatorMetadata, - clear_evaluators, - get_evaluator, - register_evaluator, +from agent_control_rules import ( + Rule, + RuleMetadata, + clear_rules, + get_rule, + register_rule, ) -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult -class TestDiscoverEvaluators: - """Tests for discover_evaluators() function.""" +class TestDiscoverRules: + """Tests for discover_rules() function.""" - def test_discover_evaluators_loads_builtins(self) -> None: - """Test that built-in evaluators are loaded.""" - discover_evaluators() + def test_discover_rules_loads_builtins(self) -> None: + """Test that built-in rules are loaded.""" + discover_rules() - evaluators = list_evaluators() - assert "regex" in evaluators - assert "list" in evaluators + rules = list_rules() + assert "regex" in rules + assert "list" in rules - @patch("agent_control_evaluators._discovery.entry_points") - def test_discover_evaluators_loads_entry_points( + @patch("agent_control_rules._discovery.entry_points") + def test_discover_rules_loads_entry_points( self, mock_entry_points: MagicMock ) -> None: - """Test that entry point evaluators are discovered.""" + """Test that entry point rules are discovered.""" - # Create mock evaluator + # Create mock rule class MockConfig(BaseModel): pass - class MockEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( - name="mock-ep-evaluator", + class MockRule(Rule[MockConfig]): + metadata = RuleMetadata( + name="mock-ep-rule", version="1.0.0", - description="Test evaluator", + description="Test rule", ) config_model = MockConfig - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") mock_ep = MagicMock() - mock_ep.name = "mock-ep-evaluator" - mock_ep.load.return_value = MockEvaluator + mock_ep.name = "mock-ep-rule" + mock_ep.load.return_value = MockRule mock_entry_points.return_value = [mock_ep] - count = discover_evaluators() + count = discover_rules() - mock_entry_points.assert_called_once_with(group="agent_control.evaluators") - evaluators = list_evaluators() - assert "mock-ep-evaluator" in evaluators + mock_entry_points.assert_called_once_with(group="agent_control.rules") + rules = list_rules() + assert "mock-ep-rule" in rules # Count only includes entry-point registrations (not built-ins loaded via import) assert count >= 1 - @patch("agent_control_evaluators._discovery.entry_points") - def test_discover_evaluators_handles_load_error( + @patch("agent_control_rules._discovery.entry_points") + def test_discover_rules_handles_load_error( self, mock_entry_points: MagicMock ) -> None: - """Test graceful handling of evaluator load errors.""" + """Test graceful handling of rule load errors.""" mock_ep = MagicMock() - mock_ep.name = "bad-evaluator" + mock_ep.name = "bad-rule" mock_ep.load.side_effect = ImportError("Missing dependency") mock_entry_points.return_value = [mock_ep] # Should not raise - discover_evaluators() + discover_rules() - def test_discover_evaluators_only_runs_once(self) -> None: + def test_discover_rules_only_runs_once(self) -> None: """Test that discovery only runs once.""" - count1 = discover_evaluators() - count2 = discover_evaluators() + count1 = discover_rules() + count2 = discover_rules() - # First call loads evaluators, second call returns 0 (already discovered) + # First call loads rules, second call returns 0 (already discovered) assert count2 == 0 - # Verify evaluators are available (count may be 0 if no entry-point evaluators) - evaluators = list_evaluators() - assert "regex" in evaluators - assert "list" in evaluators - - def test_ensure_evaluators_discovered_triggers_discovery(self) -> None: - """Test that ensure_evaluators_discovered triggers discovery.""" - ensure_evaluators_discovered() - - evaluators = list_evaluators() - # Should have at least built-in evaluators - assert isinstance(evaluators, dict) - assert "regex" in evaluators - assert "list" in evaluators - - def test_reset_evaluator_discovery_allows_rediscovery(self) -> None: - """Test that reset_evaluator_discovery allows discovery to run again.""" - discover_evaluators() - evaluators1 = list_evaluators() - assert "regex" in evaluators1 + # Verify rules are available (count may be 0 if no entry-point rules) + rules = list_rules() + assert "regex" in rules + assert "list" in rules + + def test_ensure_rules_discovered_triggers_discovery(self) -> None: + """Test that ensure_rules_discovered triggers discovery.""" + ensure_rules_discovered() + + rules = list_rules() + # Should have at least built-in rules + assert isinstance(rules, dict) + assert "regex" in rules + assert "list" in rules + + def test_reset_rule_discovery_allows_rediscovery(self) -> None: + """Test that reset_rule_discovery allows discovery to run again.""" + discover_rules() + rules1 = list_rules() + assert "regex" in rules1 # After reset, discovery should run again - reset_evaluator_discovery() - clear_evaluators() + reset_rule_discovery() + clear_rules() - discover_evaluators() - evaluators2 = list_evaluators() - assert "regex" in evaluators2 - assert "list" in evaluators2 + discover_rules() + rules2 = list_rules() + assert "regex" in rules2 + assert "list" in rules2 - @patch("agent_control_evaluators._discovery.entry_points") - def test_discover_evaluators_skips_unavailable( + @patch("agent_control_rules._discovery.entry_points") + def test_discover_rules_skips_unavailable( self, mock_entry_points: MagicMock ) -> None: - """Test that evaluators with is_available() returning False are skipped.""" + """Test that rules with is_available() returning False are skipped.""" class MockConfig(BaseModel): pass - class UnavailableEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( - name="unavailable-evaluator", + class UnavailableRule(Rule[MockConfig]): + metadata = RuleMetadata( + name="unavailable-rule", version="1.0.0", - description="Evaluator with missing deps", + description="Rule with missing deps", ) config_model = MockConfig @@ -137,35 +137,35 @@ class UnavailableEvaluator(Evaluator[MockConfig]): def is_available(cls) -> bool: return False # Simulate missing dependency - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") mock_ep = MagicMock() - mock_ep.name = "unavailable-evaluator" - mock_ep.load.return_value = UnavailableEvaluator + mock_ep.name = "unavailable-rule" + mock_ep.load.return_value = UnavailableRule mock_entry_points.return_value = [mock_ep] - count = discover_evaluators() + count = discover_rules() - # Evaluator should NOT be registered - evaluators = list_evaluators() - assert "unavailable-evaluator" not in evaluators + # Rule should NOT be registered + rules = list_rules() + assert "unavailable-rule" not in rules assert count == 0 - @patch("agent_control_evaluators._discovery.entry_points") - def test_discover_evaluators_registers_available( + @patch("agent_control_rules._discovery.entry_points") + def test_discover_rules_registers_available( self, mock_entry_points: MagicMock ) -> None: - """Test that evaluators with is_available() returning True are registered.""" + """Test that rules with is_available() returning True are registered.""" class MockConfig(BaseModel): pass - class AvailableEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( - name="available-evaluator", + class AvailableRule(Rule[MockConfig]): + metadata = RuleMetadata( + name="available-rule", version="1.0.0", - description="Evaluator with all deps", + description="Rule with all deps", ) config_model = MockConfig @@ -173,61 +173,61 @@ class AvailableEvaluator(Evaluator[MockConfig]): def is_available(cls) -> bool: return True - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") mock_ep = MagicMock() - mock_ep.name = "available-evaluator" - mock_ep.load.return_value = AvailableEvaluator + mock_ep.name = "available-rule" + mock_ep.load.return_value = AvailableRule mock_entry_points.return_value = [mock_ep] - count = discover_evaluators() + count = discover_rules() - # Evaluator should be registered - evaluators = list_evaluators() - assert "available-evaluator" in evaluators + # Rule should be registered + rules = list_rules() + assert "available-rule" in rules assert count == 1 class TestIsAvailable: - """Tests for the is_available() evaluator method.""" + """Tests for the is_available() rule method.""" def test_base_class_is_available_returns_true(self) -> None: - """Test that base Evaluator.is_available() returns True by default.""" + """Test that base Rule.is_available() returns True by default.""" class MockConfig(BaseModel): pass - class TestEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( - name="test-evaluator", + class TestRule(Rule[MockConfig]): + metadata = RuleMetadata( + name="test-rule", version="1.0.0", description="Test", ) config_model = MockConfig - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") # Default is_available() should return True - assert TestEvaluator.is_available() is True + assert TestRule.is_available() is True -class TestRegisterEvaluatorRespectsIsAvailable: - """Tests that @register_evaluator decorator respects is_available().""" +class TestRegisterRuleRespectsIsAvailable: + """Tests that @register_rule decorator respects is_available().""" - def test_register_evaluator_skips_unavailable(self) -> None: - """Test that @register_evaluator skips evaluators where is_available() returns False.""" + def test_register_rule_skips_unavailable(self) -> None: + """Test that @register_rule skips rules where is_available() returns False.""" class MockConfig(BaseModel): pass - @register_evaluator - class UnavailableEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( + @register_rule + class UnavailableRule(Rule[MockConfig]): + metadata = RuleMetadata( name="test-unavailable-decorated", version="1.0.0", - description="Evaluator with unavailable deps", + description="Rule with unavailable deps", ) config_model = MockConfig @@ -235,24 +235,24 @@ class UnavailableEvaluator(Evaluator[MockConfig]): def is_available(cls) -> bool: return False # Simulate missing dependency - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") - # Evaluator should NOT be registered despite using @register_evaluator - assert get_evaluator("test-unavailable-decorated") is None + # Rule should NOT be registered despite using @register_rule + assert get_rule("test-unavailable-decorated") is None - def test_register_evaluator_registers_available(self) -> None: - """Test that @register_evaluator registers evaluators where is_available() returns True.""" + def test_register_rule_registers_available(self) -> None: + """Test that @register_rule registers rules where is_available() returns True.""" class MockConfig(BaseModel): pass - @register_evaluator - class AvailableEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( + @register_rule + class AvailableRule(Rule[MockConfig]): + metadata = RuleMetadata( name="test-available-decorated", version="1.0.0", - description="Evaluator with all deps", + description="Rule with all deps", ) config_model = MockConfig @@ -260,29 +260,29 @@ class AvailableEvaluator(Evaluator[MockConfig]): def is_available(cls) -> bool: return True - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") - # Evaluator should be registered - assert get_evaluator("test-available-decorated") is not None + # Rule should be registered + assert get_rule("test-available-decorated") is not None - def test_register_evaluator_default_is_available(self) -> None: - """Test that @register_evaluator works when is_available() is not overridden.""" + def test_register_rule_default_is_available(self) -> None: + """Test that @register_rule works when is_available() is not overridden.""" class MockConfig(BaseModel): pass - @register_evaluator - class DefaultEvaluator(Evaluator[MockConfig]): - metadata = EvaluatorMetadata( + @register_rule + class DefaultRule(Rule[MockConfig]): + metadata = RuleMetadata( name="test-default-available", version="1.0.0", - description="Evaluator with default is_available", + description="Rule with default is_available", ) config_model = MockConfig - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=0.0, message="test") + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=0.0, message="test") - # Evaluator should be registered (default is_available returns True) - assert get_evaluator("test-default-available") is not None + # Rule should be registered (default is_available returns True) + assert get_rule("test-default-available") is not None diff --git a/engine/tests/test_evaluator_integrations.py b/engine/tests/test_evaluator_integrations.py deleted file mode 100644 index 3bfadd9e..00000000 --- a/engine/tests/test_evaluator_integrations.py +++ /dev/null @@ -1,281 +0,0 @@ -"""Tests for evaluator system integration with the unified architecture. - -These tests verify the evaluator system works correctly with the engine. -""" - -from typing import Any - -# Import to ensure built-in evaluators are registered -import agent_control_evaluators # noqa: F401 -import pytest -from agent_control_engine import get_evaluator_instance -from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator -from agent_control_models import EvaluatorResult, EvaluatorSpec -from pydantic import BaseModel - - -class MockConfig(BaseModel): - """Config for mock evaluator.""" - - threshold: float = 0.5 - - -class MockTestEvaluator(Evaluator[MockConfig]): - """Mock evaluator for engine testing.""" - - metadata = EvaluatorMetadata( - name="test-mock-evaluator", - version="1.0.0", - description="Test evaluator for engine tests", - ) - config_model = MockConfig - - async def evaluate(self, data: Any) -> EvaluatorResult: - """Mock evaluation.""" - value = float(data) if isinstance(data, (int, float)) else 0.0 - matched = value > self.config.threshold - - return EvaluatorResult( - matched=matched, - confidence=1.0, - message=f"Value {value} vs threshold {self.config.threshold}", - metadata={"value": value, "threshold": self.config.threshold}, - ) - - -class TestEvaluatorArchitecture: - """Tests verifying the evaluator architecture.""" - - def test_evaluator_is_abc_subclass(self): - """Test Evaluator is an ABC.""" - # Given/When: Checking Evaluator base class - from abc import ABC - - # Then: Should be subclass of ABC - assert issubclass(Evaluator, ABC) - - def test_evaluator_has_required_attributes(self): - """Test evaluators have required class attributes.""" - # Given/When: Checking MockTestEvaluator - # Then: Should have required attributes - assert hasattr(MockTestEvaluator, "metadata") - assert hasattr(MockTestEvaluator, "config_model") - assert MockTestEvaluator.metadata.name == "test-mock-evaluator" - - def test_evaluator_from_dict(self): - """Test creating evaluator from dict config.""" - # Given/When: Creating evaluator from dict - evaluator = MockTestEvaluator.from_dict({"threshold": 0.7}) - - # Then: Config should be parsed correctly - assert isinstance(evaluator.config, MockConfig) - assert evaluator.config.threshold == 0.7 - - -class TestMockEvaluatorEvaluation: - """Tests for mock evaluator evaluation.""" - - @pytest.fixture(autouse=True) - def register_mock(self): - """Register mock evaluator for tests.""" - register_evaluator(MockTestEvaluator) - yield - # Don't clear - other tests need built-in evaluators - - @pytest.mark.asyncio - async def test_evaluate_matched(self): - """Test evaluation when threshold exceeded.""" - # Given: Mock evaluator with threshold 0.5 - config = EvaluatorSpec(name="test-mock-evaluator", config={"threshold": 0.5}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating value above threshold - result = await evaluator.evaluate(0.8) - - # Then: Should match with metadata - assert result.matched is True - assert result.confidence == 1.0 - assert result.metadata["value"] == 0.8 - assert result.metadata["threshold"] == 0.5 - - @pytest.mark.asyncio - async def test_evaluate_not_matched(self): - """Test evaluation when below threshold.""" - # Given: Mock evaluator with threshold 0.9 - config = EvaluatorSpec(name="test-mock-evaluator", config={"threshold": 0.9}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating value below threshold - result = await evaluator.evaluate(0.3) - - # Then: Should not match - assert result.matched is False - - @pytest.mark.asyncio - async def test_multiple_evaluations(self): - """Test multiple evaluations with same evaluator.""" - # Given: Mock evaluator with threshold 0.5 - config = EvaluatorSpec(name="test-mock-evaluator", config={"threshold": 0.5}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating multiple values - results = [ - await evaluator.evaluate(0.2), - await evaluator.evaluate(0.6), - await evaluator.evaluate(0.9), - ] - - # Then: Results depend on threshold comparison - assert results[0].matched is False # 0.2 < 0.5 - assert results[1].matched is True # 0.6 > 0.5 - assert results[2].matched is True # 0.9 > 0.5 - - -class TestEvaluatorMetadata: - """Tests for evaluator metadata.""" - - def test_access_metadata(self): - """Test that evaluator metadata is accessible.""" - # Given/When: Accessing MockTestEvaluator metadata - # Then: All fields should be correct - assert MockTestEvaluator.metadata.name == "test-mock-evaluator" - assert MockTestEvaluator.metadata.version == "1.0.0" - assert MockTestEvaluator.metadata.description == "Test evaluator for engine tests" - - def test_config_schema(self): - """Test that config model provides JSON schema.""" - # Given/When: Getting JSON schema from config model - schema = MockTestEvaluator.config_model.model_json_schema() - - # Then: Schema should include threshold property - assert "properties" in schema - assert "threshold" in schema["properties"] - - -class TestBuiltInEvaluators: - """Tests for built-in evaluators.""" - - def test_regex_evaluator_registered(self): - """Test regex evaluator is registered.""" - # Given/When: Getting regex evaluator - from agent_control_engine import list_evaluators - evaluator = list_evaluators().get("regex") - - # Then: Should be registered with correct name - assert evaluator is not None - assert evaluator.metadata.name == "regex" - - def test_list_evaluator_registered(self): - """Test list evaluator is registered.""" - # Given/When: Getting list evaluator - from agent_control_engine import list_evaluators - evaluator = list_evaluators().get("list") - - # Then: Should be registered with correct name - assert evaluator is not None - assert evaluator.metadata.name == "list" - - -class TestRegexEvaluatorFlags: - """Tests for regex evaluator flag handling.""" - - @pytest.mark.asyncio - async def test_regex_case_sensitive_by_default(self): - """Test regex is case-sensitive by default. - - Given: A regex pattern without flags - When: Evaluating against different case text - Then: Only exact case matches - """ - # Given: Regex for "SECRET" without flags - config = EvaluatorSpec( - name="regex", - config={"pattern": "SECRET"} - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Exact case matches - result = await evaluator.evaluate("the SECRET is here") - assert result.matched is True - - # When/Then: Different case does NOT match - result = await evaluator.evaluate("the secret is here") - assert result.matched is False - - result = await evaluator.evaluate("the Secret is here") - assert result.matched is False - - @pytest.mark.asyncio - async def test_regex_ignorecase_flag(self): - """Test regex IGNORECASE flag works. - - Given: A regex pattern with IGNORECASE flag - When: Evaluating against different case text - Then: All cases match - """ - # Given: Regex for "SECRET" with IGNORECASE flag - config = EvaluatorSpec( - name="regex", - config={"pattern": "SECRET", "flags": ["IGNORECASE"]} - ) - evaluator = get_evaluator_instance(config) - - # When/Then: All case variations should match - result = await evaluator.evaluate("the SECRET is here") - assert result.matched is True - - result = await evaluator.evaluate("the secret is here") - assert result.matched is True - - result = await evaluator.evaluate("the Secret is here") - assert result.matched is True - - result = await evaluator.evaluate("the sEcReT is here") - assert result.matched is True - - @pytest.mark.asyncio - async def test_regex_short_i_flag(self): - """Test regex short 'I' flag works. - - Given: A regex pattern with 'I' flag (short for IGNORECASE) - When: Evaluating against different case text - Then: All cases match - """ - # Given: Regex with short "I" flag - config = EvaluatorSpec( - name="regex", - config={"pattern": "password", "flags": ["I"]} - ) - evaluator = get_evaluator_instance(config) - - # When/Then: All case variations should match - result = await evaluator.evaluate("PASSWORD") - assert result.matched is True - - result = await evaluator.evaluate("password") - assert result.matched is True - - result = await evaluator.evaluate("Password") - assert result.matched is True - - @pytest.mark.asyncio - async def test_regex_ignorecase_lowercase_flag(self): - """Test regex ignorecase flag works with lowercase. - - Given: A regex pattern with lowercase 'ignorecase' flag - When: Evaluating against different case text - Then: All cases match - """ - # Given: Regex with lowercase flag variant - config = EvaluatorSpec( - name="regex", - config={"pattern": "admin", "flags": ["ignorecase"]} - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Should work with lowercase flag - result = await evaluator.evaluate("ADMIN") - assert result.matched is True - - result = await evaluator.evaluate("admin") - assert result.matched is True diff --git a/engine/tests/test_evaluators.py b/engine/tests/test_evaluators.py deleted file mode 100644 index 019ab8db..00000000 --- a/engine/tests/test_evaluators.py +++ /dev/null @@ -1,277 +0,0 @@ -"""Tests for unified evaluator factory.""" - -import pytest -from agent_control_engine import ( - clear_evaluator_cache, - get_evaluator_instance, - list_evaluators, -) -from agent_control_models import EvaluatorSpec -from agent_control_evaluators import ( - ListEvaluator, - RegexEvaluator, - RegexEvaluatorConfig, -) - - -class TestRegexEvaluator: - """Tests for the regex evaluator via the evaluator factory.""" - - @pytest.mark.asyncio - async def test_basic_match(self): - """Test regex matches SSN pattern.""" - # Given: A regex evaluator with SSN pattern - config = EvaluatorSpec(name="regex", config={"pattern": r"\d{3}-\d{2}-\d{4}"}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating text containing SSN - result = await evaluator.evaluate("My SSN is 123-45-6789") - - # Then: Should match with high confidence - assert result.matched is True - assert result.confidence == 1.0 - - @pytest.mark.asyncio - async def test_no_match(self): - """Test regex doesn't match when pattern not found.""" - # Given: A regex evaluator with SSN pattern - config = EvaluatorSpec(name="regex", config={"pattern": r"\d{3}-\d{2}-\d{4}"}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating text without pattern - result = await evaluator.evaluate("No numbers here") - - # Then: Should not match - assert result.matched is False - assert result.confidence == 1.0 - - @pytest.mark.asyncio - async def test_non_string_input(self): - """Test non-string input is converted to string.""" - # Given: A regex evaluator - config = EvaluatorSpec(name="regex", config={"pattern": r"123"}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating non-string input - result = await evaluator.evaluate(12345) - - # Then: Should match after conversion - assert result.matched is True - - @pytest.mark.asyncio - async def test_none_input(self): - """Test handling of None input.""" - # Given: A regex evaluator - config = EvaluatorSpec(name="regex", config={"pattern": r".*"}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating None - result = await evaluator.evaluate(None) - - # Then: Should not match and return message - assert result.matched is False - assert result.message == "No data to match" - - def test_invalid_regex_pattern(self): - """Test invalid regex pattern raises error.""" - # Given/When: Creating config with invalid pattern - # Then: Should raise ValueError - with pytest.raises(ValueError): - RegexEvaluatorConfig(pattern="[") - - @pytest.mark.asyncio - async def test_empty_pattern_matches_everything(self): - """Test empty pattern matches everything.""" - # Given: A regex evaluator with empty pattern - config = EvaluatorSpec(name="regex", config={"pattern": ""}) - evaluator = get_evaluator_instance(config) - - # When: Evaluating any text - result = await evaluator.evaluate("something") - - # Then: Should match - assert result.matched is True - - -class TestListEvaluator: - """Tests for the list evaluator via the evaluator factory.""" - - @pytest.mark.asyncio - async def test_any_match(self): - """Test list evaluator with any/match logic.""" - # Given: A list evaluator with blocklist items - config = EvaluatorSpec( - name="list", - config={"values": ["bad", "evil"], "logic": "any", "match_on": "match"}, - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Blocklist items match, others don't - assert (await evaluator.evaluate("bad")).matched is True - assert (await evaluator.evaluate("evil")).matched is True - assert (await evaluator.evaluate("good")).matched is False - - @pytest.mark.asyncio - async def test_any_no_match(self): - """Test list evaluator as allowlist (any/no_match).""" - # Given: A list evaluator as allowlist - config = EvaluatorSpec( - name="list", - config={"values": ["safe", "ok"], "logic": "any", "match_on": "no_match"}, - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Allowlist items don't match, others do - assert (await evaluator.evaluate("safe")).matched is False - assert (await evaluator.evaluate("ok")).matched is False - assert (await evaluator.evaluate("dangerous")).matched is True - - @pytest.mark.asyncio - async def test_all_match(self): - """Test list evaluator with all/match logic.""" - # Given: A list evaluator with all/match logic - config = EvaluatorSpec( - name="list", - config={"values": ["valid1", "valid2"], "logic": "all", "match_on": "match"}, - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Matches only when all values present - assert (await evaluator.evaluate(["valid1", "valid2"])).matched is True - assert (await evaluator.evaluate(["valid1", "invalid"])).matched is False - assert (await evaluator.evaluate([])).matched is False - - @pytest.mark.asyncio - async def test_case_insensitive(self): - """Test case-insensitive matching.""" - # Given: A case-insensitive list evaluator - config = EvaluatorSpec( - name="list", - config={"values": ["MixedCase"], "case_sensitive": False, "match_on": "match"}, - ) - evaluator = get_evaluator_instance(config) - - # When/Then: Matches regardless of case - assert (await evaluator.evaluate("mixedcase")).matched is True - assert (await evaluator.evaluate("MIXEDCASE")).matched is True - - -class TestGetEvaluatorInstance: - """Tests for the get_evaluator_instance factory function.""" - - def test_get_evaluator_instance_returns_correct_type(self): - """Test factory returns correct evaluator type.""" - # Given: An evaluator config - config = EvaluatorSpec(name="regex", config={"pattern": "abc"}) - # When: Getting evaluator - evaluator = get_evaluator_instance(config) - - # Then: Returns correct evaluator type - assert isinstance(evaluator, RegexEvaluator) - assert evaluator.config.pattern == "abc" - - def test_get_evaluator_instance_unknown_evaluator(self): - """Test error when evaluator not found.""" - # Given: Config for nonexistent evaluator - config = EvaluatorSpec(name="nonexistent", config={}) - - # When/Then: Should raise ValueError - with pytest.raises(ValueError, match="not found"): - get_evaluator_instance(config) - - def test_list_evaluators(self): - """Test listing available evaluators.""" - # Given/When: Getting available evaluators - evaluators = list_evaluators() - - # Then: Should include built-in evaluators - assert "regex" in evaluators - assert "list" in evaluators - - -class TestEvaluatorCache: - """Tests for evaluator instance caching.""" - - def setup_method(self): - """Clear cache before each test.""" - clear_evaluator_cache() - - def teardown_method(self): - """Clear cache after each test.""" - clear_evaluator_cache() - - def test_evaluator_cache_hit(self): - """Test that same config returns same cached instance.""" - # Given: An evaluator config - config = EvaluatorSpec(name="regex", config={"pattern": "test"}) - - # When: First call creates instance - evaluator1 = get_evaluator_instance(config) - # When: Second call with same config - evaluator2 = get_evaluator_instance(config) - - # Then: Should return same cached instance - assert evaluator1 is evaluator2, "Same config should return cached instance" - - def test_evaluator_cache_miss_different_config(self): - """Test that different configs return different instances.""" - # Given: Two different configs - config1 = EvaluatorSpec(name="regex", config={"pattern": "test1"}) - config2 = EvaluatorSpec(name="regex", config={"pattern": "test2"}) - - # When: Getting evaluators - evaluator1 = get_evaluator_instance(config1) - evaluator2 = get_evaluator_instance(config2) - - # Then: Should return different instances - assert evaluator1 is not evaluator2, "Different configs should return different instances" - - def test_evaluator_cache_miss_different_evaluator(self): - """Test that same config but different evaluators return different instances.""" - # Given: Two configs with different evaluators - config1 = EvaluatorSpec(name="regex", config={"pattern": "bad"}) - config2 = EvaluatorSpec(name="list", config={"values": ["bad"]}) - - # When: Getting evaluators - evaluator1 = get_evaluator_instance(config1) - evaluator2 = get_evaluator_instance(config2) - - # Then: Should return different evaluator types - assert evaluator1 is not evaluator2 - assert isinstance(evaluator1, RegexEvaluator) - assert isinstance(evaluator2, ListEvaluator) - - def test_evaluator_cache_clear_all(self): - """Test that clear_evaluator_cache clears all entries.""" - # Given: Two cached evaluators - config1 = EvaluatorSpec(name="regex", config={"pattern": "test1"}) - config2 = EvaluatorSpec(name="list", config={"values": ["test"]}) - evaluator1a = get_evaluator_instance(config1) - evaluator2a = get_evaluator_instance(config2) - - # When: Clearing cache - clear_evaluator_cache() - - # When: Getting instances again - evaluator1b = get_evaluator_instance(config1) - evaluator2b = get_evaluator_instance(config2) - - # Then: Both should be new instances - assert evaluator1a is not evaluator1b, "Should be new instance after clear" - assert evaluator2a is not evaluator2b, "Should be new instance after clear" - - -class TestCacheSizeClamping: - """Tests for EVALUATOR_CACHE_SIZE clamping behavior.""" - - def test_cache_size_is_clamped_to_minimum(self): - """Verify cache size is clamped to at least 1. - - Given: EVALUATOR_CACHE_SIZE constant exists - When: Module is imported - Then: The value should be at least 1 (MIN_CACHE_SIZE) - """ - from agent_control_evaluators._factory import EVALUATOR_CACHE_SIZE, MIN_CACHE_SIZE - - assert EVALUATOR_CACHE_SIZE >= MIN_CACHE_SIZE - assert MIN_CACHE_SIZE == 1 diff --git a/engine/tests/test_rule_integrations.py b/engine/tests/test_rule_integrations.py new file mode 100644 index 00000000..4b08adb1 --- /dev/null +++ b/engine/tests/test_rule_integrations.py @@ -0,0 +1,281 @@ +"""Tests for rule system integration with the unified architecture. + +These tests verify the rule system works correctly with the engine. +""" + +from typing import Any + +# Import to ensure built-in rules are registered +import agent_control_rules # noqa: F401 +import pytest +from agent_control_engine import get_rule_instance +from agent_control_rules import Rule, RuleMetadata, register_rule +from agent_control_models import RuleResult, RuleSpec +from pydantic import BaseModel + + +class MockConfig(BaseModel): + """Config for mock rule.""" + + threshold: float = 0.5 + + +class MockTestRule(Rule[MockConfig]): + """Mock rule for engine testing.""" + + metadata = RuleMetadata( + name="test-mock-rule", + version="1.0.0", + description="Test rule for engine tests", + ) + config_model = MockConfig + + async def evaluate(self, data: Any) -> RuleResult: + """Mock evaluation.""" + value = float(data) if isinstance(data, (int, float)) else 0.0 + matched = value > self.config.threshold + + return RuleResult( + matched=matched, + confidence=1.0, + message=f"Value {value} vs threshold {self.config.threshold}", + metadata={"value": value, "threshold": self.config.threshold}, + ) + + +class TestRuleArchitecture: + """Tests verifying the rule architecture.""" + + def test_rule_is_abc_subclass(self): + """Test Rule is an ABC.""" + # Given/When: Checking Rule base class + from abc import ABC + + # Then: Should be subclass of ABC + assert issubclass(Rule, ABC) + + def test_rule_has_required_attributes(self): + """Test rules have required class attributes.""" + # Given/When: Checking MockTestRule + # Then: Should have required attributes + assert hasattr(MockTestRule, "metadata") + assert hasattr(MockTestRule, "config_model") + assert MockTestRule.metadata.name == "test-mock-rule" + + def test_rule_from_dict(self): + """Test creating rule from dict config.""" + # Given/When: Creating rule from dict + rule = MockTestRule.from_dict({"threshold": 0.7}) + + # Then: Config should be parsed correctly + assert isinstance(rule.config, MockConfig) + assert rule.config.threshold == 0.7 + + +class TestMockRuleEvaluation: + """Tests for mock rule evaluation.""" + + @pytest.fixture(autouse=True) + def register_mock(self): + """Register mock rule for tests.""" + register_rule(MockTestRule) + yield + # Don't clear - other tests need built-in rules + + @pytest.mark.asyncio + async def test_evaluate_matched(self): + """Test evaluation when threshold exceeded.""" + # Given: Mock rule with threshold 0.5 + config = RuleSpec(name="test-mock-rule", config={"threshold": 0.5}) + rule = get_rule_instance(config) + + # When: Evaluating value above threshold + result = await rule.evaluate(0.8) + + # Then: Should match with metadata + assert result.matched is True + assert result.confidence == 1.0 + assert result.metadata["value"] == 0.8 + assert result.metadata["threshold"] == 0.5 + + @pytest.mark.asyncio + async def test_evaluate_not_matched(self): + """Test evaluation when below threshold.""" + # Given: Mock rule with threshold 0.9 + config = RuleSpec(name="test-mock-rule", config={"threshold": 0.9}) + rule = get_rule_instance(config) + + # When: Evaluating value below threshold + result = await rule.evaluate(0.3) + + # Then: Should not match + assert result.matched is False + + @pytest.mark.asyncio + async def test_multiple_evaluations(self): + """Test multiple evaluations with same rule.""" + # Given: Mock rule with threshold 0.5 + config = RuleSpec(name="test-mock-rule", config={"threshold": 0.5}) + rule = get_rule_instance(config) + + # When: Evaluating multiple values + results = [ + await rule.evaluate(0.2), + await rule.evaluate(0.6), + await rule.evaluate(0.9), + ] + + # Then: Results depend on threshold comparison + assert results[0].matched is False # 0.2 < 0.5 + assert results[1].matched is True # 0.6 > 0.5 + assert results[2].matched is True # 0.9 > 0.5 + + +class TestRuleMetadata: + """Tests for rule metadata.""" + + def test_access_metadata(self): + """Test that rule metadata is accessible.""" + # Given/When: Accessing MockTestRule metadata + # Then: All fields should be correct + assert MockTestRule.metadata.name == "test-mock-rule" + assert MockTestRule.metadata.version == "1.0.0" + assert MockTestRule.metadata.description == "Test rule for engine tests" + + def test_config_schema(self): + """Test that config model provides JSON schema.""" + # Given/When: Getting JSON schema from config model + schema = MockTestRule.config_model.model_json_schema() + + # Then: Schema should include threshold property + assert "properties" in schema + assert "threshold" in schema["properties"] + + +class TestBuiltInRules: + """Tests for built-in rules.""" + + def test_regex_rule_registered(self): + """Test regex rule is registered.""" + # Given/When: Getting regex rule + from agent_control_engine import list_rules + rule = list_rules().get("regex") + + # Then: Should be registered with correct name + assert rule is not None + assert rule.metadata.name == "regex" + + def test_list_rule_registered(self): + """Test list rule is registered.""" + # Given/When: Getting list rule + from agent_control_engine import list_rules + rule = list_rules().get("list") + + # Then: Should be registered with correct name + assert rule is not None + assert rule.metadata.name == "list" + + +class TestRegexRuleFlags: + """Tests for regex rule flag handling.""" + + @pytest.mark.asyncio + async def test_regex_case_sensitive_by_default(self): + """Test regex is case-sensitive by default. + + Given: A regex pattern without flags + When: Evaluating against different case text + Then: Only exact case matches + """ + # Given: Regex for "SECRET" without flags + config = RuleSpec( + name="regex", + config={"pattern": "SECRET"} + ) + rule = get_rule_instance(config) + + # When/Then: Exact case matches + result = await rule.evaluate("the SECRET is here") + assert result.matched is True + + # When/Then: Different case does NOT match + result = await rule.evaluate("the secret is here") + assert result.matched is False + + result = await rule.evaluate("the Secret is here") + assert result.matched is False + + @pytest.mark.asyncio + async def test_regex_ignorecase_flag(self): + """Test regex IGNORECASE flag works. + + Given: A regex pattern with IGNORECASE flag + When: Evaluating against different case text + Then: All cases match + """ + # Given: Regex for "SECRET" with IGNORECASE flag + config = RuleSpec( + name="regex", + config={"pattern": "SECRET", "flags": ["IGNORECASE"]} + ) + rule = get_rule_instance(config) + + # When/Then: All case variations should match + result = await rule.evaluate("the SECRET is here") + assert result.matched is True + + result = await rule.evaluate("the secret is here") + assert result.matched is True + + result = await rule.evaluate("the Secret is here") + assert result.matched is True + + result = await rule.evaluate("the sEcReT is here") + assert result.matched is True + + @pytest.mark.asyncio + async def test_regex_short_i_flag(self): + """Test regex short 'I' flag works. + + Given: A regex pattern with 'I' flag (short for IGNORECASE) + When: Evaluating against different case text + Then: All cases match + """ + # Given: Regex with short "I" flag + config = RuleSpec( + name="regex", + config={"pattern": "password", "flags": ["I"]} + ) + rule = get_rule_instance(config) + + # When/Then: All case variations should match + result = await rule.evaluate("PASSWORD") + assert result.matched is True + + result = await rule.evaluate("password") + assert result.matched is True + + result = await rule.evaluate("Password") + assert result.matched is True + + @pytest.mark.asyncio + async def test_regex_ignorecase_lowercase_flag(self): + """Test regex ignorecase flag works with lowercase. + + Given: A regex pattern with lowercase 'ignorecase' flag + When: Evaluating against different case text + Then: All cases match + """ + # Given: Regex with lowercase flag variant + config = RuleSpec( + name="regex", + config={"pattern": "admin", "flags": ["ignorecase"]} + ) + rule = get_rule_instance(config) + + # When/Then: Should work with lowercase flag + result = await rule.evaluate("ADMIN") + assert result.matched is True + + result = await rule.evaluate("admin") + assert result.matched is True diff --git a/engine/tests/test_rules.py b/engine/tests/test_rules.py new file mode 100644 index 00000000..250b7a3a --- /dev/null +++ b/engine/tests/test_rules.py @@ -0,0 +1,277 @@ +"""Tests for unified rule factory.""" + +import pytest +from agent_control_engine import ( + clear_rule_cache, + get_rule_instance, + list_rules, +) +from agent_control_models import RuleSpec +from agent_control_rules import ( + ListRule, + RegexRule, + RegexRuleConfig, +) + + +class TestRegexRule: + """Tests for the regex rule via the rule factory.""" + + @pytest.mark.asyncio + async def test_basic_match(self): + """Test regex matches SSN pattern.""" + # Given: A regex rule with SSN pattern + config = RuleSpec(name="regex", config={"pattern": r"\d{3}-\d{2}-\d{4}"}) + rule = get_rule_instance(config) + + # When: Evaluating text containing SSN + result = await rule.evaluate("My SSN is 123-45-6789") + + # Then: Should match with high confidence + assert result.matched is True + assert result.confidence == 1.0 + + @pytest.mark.asyncio + async def test_no_match(self): + """Test regex doesn't match when pattern not found.""" + # Given: A regex rule with SSN pattern + config = RuleSpec(name="regex", config={"pattern": r"\d{3}-\d{2}-\d{4}"}) + rule = get_rule_instance(config) + + # When: Evaluating text without pattern + result = await rule.evaluate("No numbers here") + + # Then: Should not match + assert result.matched is False + assert result.confidence == 1.0 + + @pytest.mark.asyncio + async def test_non_string_input(self): + """Test non-string input is converted to string.""" + # Given: A regex rule + config = RuleSpec(name="regex", config={"pattern": r"123"}) + rule = get_rule_instance(config) + + # When: Evaluating non-string input + result = await rule.evaluate(12345) + + # Then: Should match after conversion + assert result.matched is True + + @pytest.mark.asyncio + async def test_none_input(self): + """Test handling of None input.""" + # Given: A regex rule + config = RuleSpec(name="regex", config={"pattern": r".*"}) + rule = get_rule_instance(config) + + # When: Evaluating None + result = await rule.evaluate(None) + + # Then: Should not match and return message + assert result.matched is False + assert result.message == "No data to match" + + def test_invalid_regex_pattern(self): + """Test invalid regex pattern raises error.""" + # Given/When: Creating config with invalid pattern + # Then: Should raise ValueError + with pytest.raises(ValueError): + RegexRuleConfig(pattern="[") + + @pytest.mark.asyncio + async def test_empty_pattern_matches_everything(self): + """Test empty pattern matches everything.""" + # Given: A regex rule with empty pattern + config = RuleSpec(name="regex", config={"pattern": ""}) + rule = get_rule_instance(config) + + # When: Evaluating any text + result = await rule.evaluate("something") + + # Then: Should match + assert result.matched is True + + +class TestListRule: + """Tests for the list rule via the rule factory.""" + + @pytest.mark.asyncio + async def test_any_match(self): + """Test list rule with any/match logic.""" + # Given: A list rule with blocklist items + config = RuleSpec( + name="list", + config={"values": ["bad", "evil"], "logic": "any", "match_on": "match"}, + ) + rule = get_rule_instance(config) + + # When/Then: Blocklist items match, others don't + assert (await rule.evaluate("bad")).matched is True + assert (await rule.evaluate("evil")).matched is True + assert (await rule.evaluate("good")).matched is False + + @pytest.mark.asyncio + async def test_any_no_match(self): + """Test list rule as allowlist (any/no_match).""" + # Given: A list rule as allowlist + config = RuleSpec( + name="list", + config={"values": ["safe", "ok"], "logic": "any", "match_on": "no_match"}, + ) + rule = get_rule_instance(config) + + # When/Then: Allowlist items don't match, others do + assert (await rule.evaluate("safe")).matched is False + assert (await rule.evaluate("ok")).matched is False + assert (await rule.evaluate("dangerous")).matched is True + + @pytest.mark.asyncio + async def test_all_match(self): + """Test list rule with all/match logic.""" + # Given: A list rule with all/match logic + config = RuleSpec( + name="list", + config={"values": ["valid1", "valid2"], "logic": "all", "match_on": "match"}, + ) + rule = get_rule_instance(config) + + # When/Then: Matches only when all values present + assert (await rule.evaluate(["valid1", "valid2"])).matched is True + assert (await rule.evaluate(["valid1", "invalid"])).matched is False + assert (await rule.evaluate([])).matched is False + + @pytest.mark.asyncio + async def test_case_insensitive(self): + """Test case-insensitive matching.""" + # Given: A case-insensitive list rule + config = RuleSpec( + name="list", + config={"values": ["MixedCase"], "case_sensitive": False, "match_on": "match"}, + ) + rule = get_rule_instance(config) + + # When/Then: Matches regardless of case + assert (await rule.evaluate("mixedcase")).matched is True + assert (await rule.evaluate("MIXEDCASE")).matched is True + + +class TestGetRuleInstance: + """Tests for the get_rule_instance factory function.""" + + def test_get_rule_instance_returns_correct_type(self): + """Test factory returns correct rule type.""" + # Given: A rule config + config = RuleSpec(name="regex", config={"pattern": "abc"}) + # When: Getting rule + rule = get_rule_instance(config) + + # Then: Returns correct rule type + assert isinstance(rule, RegexRule) + assert rule.config.pattern == "abc" + + def test_get_rule_instance_unknown_rule(self): + """Test error when rule not found.""" + # Given: Config for nonexistent rule + config = RuleSpec(name="nonexistent", config={}) + + # When/Then: Should raise ValueError + with pytest.raises(ValueError, match="not found"): + get_rule_instance(config) + + def test_list_rules(self): + """Test listing available rules.""" + # Given/When: Getting available rules + rules = list_rules() + + # Then: Should include built-in rules + assert "regex" in rules + assert "list" in rules + + +class TestRuleCache: + """Tests for rule instance caching.""" + + def setup_method(self): + """Clear cache before each test.""" + clear_rule_cache() + + def teardown_method(self): + """Clear cache after each test.""" + clear_rule_cache() + + def test_rule_cache_hit(self): + """Test that same config returns same cached instance.""" + # Given: A rule config + config = RuleSpec(name="regex", config={"pattern": "test"}) + + # When: First call creates instance + rule1 = get_rule_instance(config) + # When: Second call with same config + rule2 = get_rule_instance(config) + + # Then: Should return same cached instance + assert rule1 is rule2, "Same config should return cached instance" + + def test_rule_cache_miss_different_config(self): + """Test that different configs return different instances.""" + # Given: Two different configs + config1 = RuleSpec(name="regex", config={"pattern": "test1"}) + config2 = RuleSpec(name="regex", config={"pattern": "test2"}) + + # When: Getting rules + rule1 = get_rule_instance(config1) + rule2 = get_rule_instance(config2) + + # Then: Should return different instances + assert rule1 is not rule2, "Different configs should return different instances" + + def test_rule_cache_miss_different_rule(self): + """Test that same config but different rules return different instances.""" + # Given: Two configs with different rules + config1 = RuleSpec(name="regex", config={"pattern": "bad"}) + config2 = RuleSpec(name="list", config={"values": ["bad"]}) + + # When: Getting rules + rule1 = get_rule_instance(config1) + rule2 = get_rule_instance(config2) + + # Then: Should return different rule types + assert rule1 is not rule2 + assert isinstance(rule1, RegexRule) + assert isinstance(rule2, ListRule) + + def test_rule_cache_clear_all(self): + """Test that clear_rule_cache clears all entries.""" + # Given: Two cached rules + config1 = RuleSpec(name="regex", config={"pattern": "test1"}) + config2 = RuleSpec(name="list", config={"values": ["test"]}) + rule1a = get_rule_instance(config1) + rule2a = get_rule_instance(config2) + + # When: Clearing cache + clear_rule_cache() + + # When: Getting instances again + rule1b = get_rule_instance(config1) + rule2b = get_rule_instance(config2) + + # Then: Both should be new instances + assert rule1a is not rule1b, "Should be new instance after clear" + assert rule2a is not rule2b, "Should be new instance after clear" + + +class TestCacheSizeClamping: + """Tests for RULE_CACHE_SIZE clamping behavior.""" + + def test_cache_size_is_clamped_to_minimum(self): + """Verify cache size is clamped to at least 1. + + Given: RULE_CACHE_SIZE constant exists + When: Module is imported + Then: The value should be at least 1 (MIN_CACHE_SIZE) + """ + from agent_control_rules._factory import RULE_CACHE_SIZE, MIN_CACHE_SIZE + + assert RULE_CACHE_SIZE >= MIN_CACHE_SIZE + assert MIN_CACHE_SIZE == 1 diff --git a/evaluators/builtin/README.md b/evaluators/builtin/README.md deleted file mode 100644 index 63023f37..00000000 --- a/evaluators/builtin/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# Agent Control Built-in Evaluators - -Built-in evaluators provide common checks like regex matching, list matching, JSON validation, and SQL validation. They are discovered automatically via Python entry points and used by the server and SDK runtime. - -## What this package provides - -- `regex` evaluator for pattern matching -- `list` evaluator for allow/deny lists -- `json` evaluator for schema validation -- `sql` evaluator for query validation - -## Install - -```bash -pip install agent-control-evaluators -``` - -## Discover evaluators - -```python -from agent_control_evaluators import discover_evaluators, list_evaluators - -discover_evaluators() -print(list_evaluators()) -``` - -Full guide: https://docs.agentcontrol.dev/concepts/evaluators/built-in-evaluators diff --git a/evaluators/builtin/pyproject.toml b/evaluators/builtin/pyproject.toml deleted file mode 100644 index 18e14a87..00000000 --- a/evaluators/builtin/pyproject.toml +++ /dev/null @@ -1,41 +0,0 @@ -[project] -name = "agent-control-evaluators" -version = "8.1.2" -description = "Builtin evaluators for agent-control" -readme = "README.md" -requires-python = ">=3.12" -license = { text = "Apache-2.0" } -authors = [{ name = "Agent Control Team" }] -dependencies = [ - "agent-control-models>=7.5.0", - "pydantic>=2.12.4", - "google-re2>=1.1", - "jsonschema>=4.0.0", - "sqlglot[c]>=30.11.0,<30.12.0", -] - -[project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=7.5.0"] -budget = ["agent-control-evaluator-budget>=7.5.0"] -cisco = ["agent-control-evaluator-cisco>=7.5.0"] -dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0"] - -[project.entry-points."agent_control.evaluators"] -regex = "agent_control_evaluators.regex:RegexEvaluator" -list = "agent_control_evaluators.list:ListEvaluator" -json = "agent_control_evaluators.json:JSONEvaluator" -sql = "agent_control_evaluators.sql:SQLEvaluator" - -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluators"] - -[tool.uv.sources] -agent-control-models = { workspace = true } -# For local dev: use local contrib packages instead of PyPI -agent-control-evaluator-galileo = { path = "../contrib/galileo", editable = true } -agent-control-evaluator-budget = { path = "../contrib/budget", editable = true } -agent-control-evaluator-cisco = { path = "../contrib/cisco", editable = true } diff --git a/evaluators/builtin/src/agent_control_evaluators/__init__.py b/evaluators/builtin/src/agent_control_evaluators/__init__.py deleted file mode 100644 index e1f81afb..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/__init__.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Agent Control Evaluators. - -This package contains builtin evaluator implementations for agent-control. -Built-in evaluators (regex, list, json, sql) are registered automatically on import. - -Available evaluators: - Built-in (no namespace): - - regex: Regular expression matching - - list: List-based value matching - - json: JSON validation - - sql: SQL query validation - -Naming convention: - - Built-in: "regex", "list", "json", "sql" - - External: "provider.name" (e.g., "galileo.luna") - - Agent-scoped: "agent:name" (custom code deployed with agent) - -External evaluators are installed via separate packages (e.g., agent-control-evaluator-galileo). -Custom evaluators are Evaluator classes deployed with the engine. -Their schemas are registered via initAgent for validation purposes. -""" - -from importlib.metadata import PackageNotFoundError, version - -try: - __version__ = version("agent-control-evaluators") -except PackageNotFoundError: - __version__ = "0.0.0.dev" - -# Core infrastructure - export from _base and _registry -from agent_control_evaluators._base import ( - Evaluator, - EvaluatorConfig, - EvaluatorMetadata, -) -from agent_control_evaluators._discovery import ( - discover_evaluators, - ensure_evaluators_discovered, - list_evaluators, - reset_evaluator_discovery, -) -from agent_control_evaluators._factory import clear_evaluator_cache, get_evaluator_instance -from agent_control_evaluators._registry import ( - clear_evaluators, - get_all_evaluators, - get_evaluator, - register_evaluator, -) - -# Import built-in evaluators to auto-register them -from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig -from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig -from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig -from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig - -__all__ = [ - # Core infrastructure - "Evaluator", - "EvaluatorConfig", - "EvaluatorMetadata", - "register_evaluator", - "get_evaluator", - "get_all_evaluators", - "clear_evaluators", - "discover_evaluators", - "ensure_evaluators_discovered", - "reset_evaluator_discovery", - "list_evaluators", - "get_evaluator_instance", - "clear_evaluator_cache", - # Built-in evaluators - "RegexEvaluator", - "RegexEvaluatorConfig", - "ListEvaluator", - "ListEvaluatorConfig", - "JSONEvaluator", - "JSONEvaluatorConfig", - "SQLEvaluator", - "SQLEvaluatorConfig", -] diff --git a/evaluators/builtin/src/agent_control_evaluators/_discovery.py b/evaluators/builtin/src/agent_control_evaluators/_discovery.py deleted file mode 100644 index a733c2b4..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/_discovery.py +++ /dev/null @@ -1,109 +0,0 @@ -"""Evaluator discovery via entry points.""" - -from __future__ import annotations - -import logging -import threading -from importlib.metadata import entry_points -from typing import TYPE_CHECKING, Any - -from agent_control_evaluators._registry import ( - get_all_evaluators, - get_evaluator, - register_evaluator, -) - -if TYPE_CHECKING: - from agent_control_evaluators._base import Evaluator - -logger = logging.getLogger(__name__) - -_DISCOVERY_COMPLETE = False -_DISCOVERY_LOCK = threading.Lock() - - -def discover_evaluators() -> int: - """Discover and register evaluators via entry points. - - All evaluators (built-in and third-party) are discovered via the - 'agent_control.evaluators' entry point group. Evaluators are only registered - if their `is_available()` method returns True. - - Safe to call multiple times - only runs discovery once. - Thread-safe via lock. - - Returns: - Number of evaluators discovered - """ - global _DISCOVERY_COMPLETE - - # Fast path without lock - if _DISCOVERY_COMPLETE: - return 0 - - with _DISCOVERY_LOCK: - # Double-check after acquiring lock - if _DISCOVERY_COMPLETE: - return 0 - - discovered = 0 - - # Discover ALL evaluators (built-in and third-party) via entry points. - # Only register evaluators where is_available() returns True. - try: - eps = entry_points(group="agent_control.evaluators") - for ep in eps: - try: - evaluator_class = ep.load() - name = evaluator_class.metadata.name - - # Skip if already registered - if get_evaluator(name) is not None: - continue - - # Check if evaluator dependencies are satisfied - if not evaluator_class.is_available(): - logger.debug(f"Evaluator '{name}' not available, skipping") - continue - - register_evaluator(evaluator_class) - logger.debug(f"Registered evaluator: {name}") - discovered += 1 - except Exception as e: - logger.warning(f"Failed to load evaluator '{ep.name}': {e}") - except Exception as e: - logger.debug(f"Entry point discovery not available: {e}") - - _DISCOVERY_COMPLETE = True - logger.debug(f"Evaluator discovery complete: {discovered} new evaluators") - return discovered - - -def ensure_evaluators_discovered() -> None: - """Ensure evaluator discovery has run. Call this before using evaluators.""" - if not _DISCOVERY_COMPLETE: - discover_evaluators() - - -def reset_evaluator_discovery() -> None: - """Reset discovery state. Useful for testing.""" - global _DISCOVERY_COMPLETE - with _DISCOVERY_LOCK: - _DISCOVERY_COMPLETE = False - - -# ============================================================================= -# Public evaluator API -# ============================================================================= - - -def list_evaluators() -> dict[str, type[Evaluator[Any]]]: - """List all registered evaluators. - - This function ensures evaluator discovery has run before returning results. - - Returns: - Dictionary mapping evaluator names to evaluator classes - """ - ensure_evaluators_discovered() - return get_all_evaluators() diff --git a/evaluators/builtin/src/agent_control_evaluators/_factory.py b/evaluators/builtin/src/agent_control_evaluators/_factory.py deleted file mode 100644 index 772903df..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/_factory.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Evaluator factory with instance caching.""" - -from __future__ import annotations - -import json -import logging -import os -from collections import OrderedDict -from typing import TYPE_CHECKING, Any - -from agent_control_evaluators._discovery import list_evaluators - -if TYPE_CHECKING: - from agent_control_models import EvaluatorSpec - - from agent_control_evaluators._base import Evaluator - -logger = logging.getLogger(__name__) - -# Configuration -DEFAULT_CACHE_SIZE = 100 -MIN_CACHE_SIZE = 1 # Minimum to avoid infinite loop in eviction - - -def _parse_cache_size() -> int: - """Parse EVALUATOR_CACHE_SIZE from env with safe fallback.""" - raw = os.environ.get("EVALUATOR_CACHE_SIZE") - if raw is None: - return DEFAULT_CACHE_SIZE - try: - return int(raw) - except ValueError: - logger.warning( - f"Invalid EVALUATOR_CACHE_SIZE '{raw}', using default {DEFAULT_CACHE_SIZE}" - ) - return DEFAULT_CACHE_SIZE - - -EVALUATOR_CACHE_SIZE = max(_parse_cache_size(), MIN_CACHE_SIZE) - -# LRU cache for evaluator instances: cache_key -> Evaluator instance -_EVALUATOR_CACHE: OrderedDict[str, Evaluator[Any]] = OrderedDict() - - -def _config_hash(config: dict[str, Any]) -> str: - """Create a hashable key from config dict.""" - return json.dumps(config, sort_keys=True, default=str) - - -def get_evaluator_instance(evaluator_spec: EvaluatorSpec) -> Evaluator[Any]: - """Get or create a cached evaluator instance from specification. - - Uses LRU caching to reuse evaluator instances with the same config. - Cache key is: {evaluator_name}:{config_hash} - - WARNING: Evaluator instances are cached and reused across requests! - Evaluator implementations MUST be stateless - do not store mutable - request-scoped state on the evaluator instance. See Evaluator - docstring for details on safe patterns. - - Args: - evaluator_spec: The evaluator specification with name and config - - Returns: - Evaluator instance (cached or new) - - Raises: - ValueError: If evaluator not found - """ - # Build cache key - cache_key = f"{evaluator_spec.name}:{_config_hash(evaluator_spec.config)}" - - # Check cache - if cache_key in _EVALUATOR_CACHE: - # Move to end (most recently used) - _EVALUATOR_CACHE.move_to_end(cache_key) - logger.debug(f"Cache hit for evaluator: {evaluator_spec.name}") - return _EVALUATOR_CACHE[cache_key] - - # Cache miss - create new instance - evaluators = list_evaluators() - evaluator_cls = evaluators.get(evaluator_spec.name) - - if evaluator_cls is None: - raise ValueError( - f"Evaluator '{evaluator_spec.name}' not found. " - f"Available evaluators: {', '.join(evaluators.keys())}" - ) - - logger.debug(f"Cache miss, creating evaluator: {evaluator_spec.name}") - instance = evaluator_cls.from_dict(evaluator_spec.config) - - # Evict oldest if cache is full - while len(_EVALUATOR_CACHE) >= EVALUATOR_CACHE_SIZE: - evicted_key, _ = _EVALUATOR_CACHE.popitem(last=False) - logger.debug(f"Evicted evaluator from cache: {evicted_key}") - - # Cache the instance - _EVALUATOR_CACHE[cache_key] = instance - return instance - - -def clear_evaluator_cache() -> None: - """Clear all cached evaluator instances. Useful for testing.""" - _EVALUATOR_CACHE.clear() diff --git a/evaluators/builtin/src/agent_control_evaluators/_registry.py b/evaluators/builtin/src/agent_control_evaluators/_registry.py deleted file mode 100644 index bd93e63e..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/_registry.py +++ /dev/null @@ -1,87 +0,0 @@ -"""Evaluator registry for registration and lookup.""" - -from __future__ import annotations - -import logging -from typing import TYPE_CHECKING, Any - -if TYPE_CHECKING: - from agent_control_evaluators._base import Evaluator - -logger = logging.getLogger(__name__) - -# ============================================================================= -# Evaluator Registry -# ============================================================================= - -_EVALUATOR_REGISTRY: dict[str, type[Evaluator[Any]]] = {} - - -def register_evaluator( - evaluator_class: type[Evaluator[Any]], -) -> type[Evaluator[Any]]: - """Register an evaluator class by its metadata name. - - Can be used as a decorator or called directly. Respects the evaluator's - is_available() method - evaluators with unavailable dependencies are - silently skipped. - - Args: - evaluator_class: Evaluator class to register - - Returns: - The same evaluator class (for decorator usage) - - Raises: - ValueError: If evaluator name already registered with different class - - Example: - ```python - @register_evaluator - class MyEvaluator(Evaluator[MyConfig]): - metadata = EvaluatorMetadata(name="my-evaluator", ...) - ... - ``` - """ - name = evaluator_class.metadata.name - - # Check if evaluator dependencies are satisfied - if not evaluator_class.is_available(): - logger.debug(f"Evaluator '{name}' not available (is_available=False), skipping") - return evaluator_class - - if name in _EVALUATOR_REGISTRY: - # Allow re-registration of same class (e.g., during hot reload) - if _EVALUATOR_REGISTRY[name] is evaluator_class: - return evaluator_class - raise ValueError(f"Evaluator '{name}' is already registered") - - _EVALUATOR_REGISTRY[name] = evaluator_class - logger.debug(f"Registered evaluator: {name} v{evaluator_class.metadata.version}") - return evaluator_class - - -def get_evaluator(name: str) -> type[Evaluator[Any]] | None: - """Get a registered evaluator by name. - - Args: - name: Evaluator name to look up - - Returns: - Evaluator class if found, None otherwise - """ - return _EVALUATOR_REGISTRY.get(name) - - -def get_all_evaluators() -> dict[str, type[Evaluator[Any]]]: - """Get all registered evaluators. - - Returns: - Dictionary mapping evaluator names to evaluator classes - """ - return dict(_EVALUATOR_REGISTRY) - - -def clear_evaluators() -> None: - """Clear all registered evaluators. Useful for testing.""" - _EVALUATOR_REGISTRY.clear() diff --git a/evaluators/builtin/src/agent_control_evaluators/json/__init__.py b/evaluators/builtin/src/agent_control_evaluators/json/__init__.py deleted file mode 100644 index a24322f9..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/json/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""JSON validation evaluator.""" - -from agent_control_evaluators.json.config import JSONEvaluatorConfig -from agent_control_evaluators.json.evaluator import JSONEvaluator - -__all__ = ["JSONEvaluator", "JSONEvaluatorConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/list/__init__.py b/evaluators/builtin/src/agent_control_evaluators/list/__init__.py deleted file mode 100644 index ff7ad17e..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/list/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""List evaluator for value matching.""" - -from agent_control_evaluators.list.config import ListEvaluatorConfig -from agent_control_evaluators.list.evaluator import ListEvaluator - -__all__ = ["ListEvaluator", "ListEvaluatorConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/regex/__init__.py b/evaluators/builtin/src/agent_control_evaluators/regex/__init__.py deleted file mode 100644 index 8a03bcae..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/regex/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Regex evaluator for pattern matching.""" - -from agent_control_evaluators.regex.config import RegexEvaluatorConfig -from agent_control_evaluators.regex.evaluator import RegexEvaluator - -__all__ = ["RegexEvaluator", "RegexEvaluatorConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/sql/__init__.py b/evaluators/builtin/src/agent_control_evaluators/sql/__init__.py deleted file mode 100644 index 3f7402e2..00000000 --- a/evaluators/builtin/src/agent_control_evaluators/sql/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""SQL validation evaluator.""" - -from agent_control_evaluators.sql.config import SQLEvaluatorConfig -from agent_control_evaluators.sql.evaluator import SQLEvaluator - -__all__ = ["SQLEvaluator", "SQLEvaluatorConfig"] diff --git a/evaluators/builtin/tests/__init__.py b/evaluators/builtin/tests/__init__.py deleted file mode 100644 index 9cf66e72..00000000 --- a/evaluators/builtin/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for agent_control_evaluators package.""" diff --git a/evaluators/builtin/tests/json/__init__.py b/evaluators/builtin/tests/json/__init__.py deleted file mode 100644 index 5f848dd5..00000000 --- a/evaluators/builtin/tests/json/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for the JSON evaluator.""" diff --git a/evaluators/builtin/tests/sql/__init__.py b/evaluators/builtin/tests/sql/__init__.py deleted file mode 100644 index 541fa0d1..00000000 --- a/evaluators/builtin/tests/sql/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for the SQL evaluator.""" diff --git a/evaluators/builtin/tests/sql/test_sqlglot_runtime.py b/evaluators/builtin/tests/sql/test_sqlglot_runtime.py deleted file mode 100644 index bf00a33b..00000000 --- a/evaluators/builtin/tests/sql/test_sqlglot_runtime.py +++ /dev/null @@ -1,17 +0,0 @@ -"""SQLGlot runtime integration tests.""" - -from sqlglot import exp - -from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig - - -def test_sqlglot_public_imports_support_sql_evaluator(): - """SQLGlot's public API should remain importable with the native extra installed.""" - # Given: the SQL evaluator package imports SQLGlot's public expression module - assert exp.Select is not None - - # When: constructing the SQL evaluator - evaluator = SQLEvaluator(SQLEvaluatorConfig(blocked_operations=["DROP"])) - - # Then: the evaluator can be created without SQLGlot import shadowing failures - assert evaluator.metadata.name == "sql" diff --git a/evaluators/builtin/tests/test_base.py b/evaluators/builtin/tests/test_base.py deleted file mode 100644 index 776a8d01..00000000 --- a/evaluators/builtin/tests/test_base.py +++ /dev/null @@ -1,140 +0,0 @@ -"""Tests for evaluator base classes. - -Architecture: Evaluators take config at __init__, evaluate() only takes data. -""" - -import pytest -from typing import Any - -from agent_control_evaluators import Evaluator, EvaluatorConfig, EvaluatorMetadata -from agent_control_models import EvaluatorResult - - -class MockConfig(EvaluatorConfig): - """Config model for mock evaluator.""" - - should_match: bool = False - timeout_ms: int = 5000 - - -class MockEvaluator(Evaluator[MockConfig]): - """A mock evaluator for testing.""" - - metadata = EvaluatorMetadata( - name="mock-evaluator", - version="1.0.0", - description="A mock evaluator for testing", - requires_api_key=False, - timeout_ms=5000, - ) - config_model = MockConfig - - async def evaluate(self, data: Any) -> EvaluatorResult: - """Simple mock evaluation.""" - return EvaluatorResult( - matched=self.config.should_match, - confidence=1.0, - message="Mock evaluation", - metadata={"data": str(data)}, - ) - - -class TestEvaluatorMetadata: - """Tests for EvaluatorMetadata dataclass.""" - - def test_metadata_with_defaults(self): - """Test metadata with default values.""" - metadata = EvaluatorMetadata( - name="test-evaluator", - version="1.0.0", - description="Test evaluator", - ) - - assert metadata.name == "test-evaluator" - assert metadata.version == "1.0.0" - assert metadata.description == "Test evaluator" - assert metadata.requires_api_key is False - assert metadata.timeout_ms == 10000 - - def test_metadata_with_all_fields(self): - """Test metadata with all fields specified.""" - metadata = EvaluatorMetadata( - name="full-evaluator", - version="2.0.0", - description="Full evaluator", - requires_api_key=True, - timeout_ms=15000, - ) - - assert metadata.name == "full-evaluator" - assert metadata.version == "2.0.0" - assert metadata.requires_api_key is True - assert metadata.timeout_ms == 15000 - - -class TestEvaluator: - """Tests for Evaluator base class.""" - - def test_evaluator_is_abstract(self): - """Test that Evaluator is an ABC.""" - from abc import ABC - assert issubclass(Evaluator, ABC) - - def test_mock_evaluator_metadata(self): - """Test that mock evaluator has correct metadata.""" - assert MockEvaluator.metadata.name == "mock-evaluator" - assert MockEvaluator.metadata.version == "1.0.0" - assert MockEvaluator.metadata.timeout_ms == 5000 - - @pytest.mark.asyncio - async def test_mock_evaluator_evaluate(self): - """Test mock evaluator evaluation.""" - evaluator = MockEvaluator.from_dict({"should_match": True}) - - result = await evaluator.evaluate("test data") - - assert result.matched is True - assert result.confidence == 1.0 - assert result.metadata["data"] == "test data" - - @pytest.mark.asyncio - async def test_mock_evaluator_evaluate_no_match(self): - """Test mock evaluator evaluation without match.""" - evaluator = MockEvaluator.from_dict({"should_match": False}) - - result = await evaluator.evaluate("test data") - - assert result.matched is False - - def test_evaluator_config_stored(self): - """Test that evaluator stores config.""" - evaluator = MockEvaluator.from_dict({"should_match": True}) - - assert isinstance(evaluator.config, MockConfig) - assert evaluator.config.should_match is True - - def test_get_timeout_seconds_from_config(self): - """Test timeout conversion from config.""" - evaluator = MockEvaluator.from_dict({"timeout_ms": 3000}) - - assert evaluator.get_timeout_seconds() == 3.0 - - def test_get_timeout_seconds_different_values(self): - """Test timeout with different values.""" - evaluator1 = MockEvaluator.from_dict({"timeout_ms": 7500}) - evaluator2 = MockEvaluator.from_dict({"timeout_ms": 1000}) - - assert evaluator1.get_timeout_seconds() == 7.5 - assert evaluator2.get_timeout_seconds() == 1.0 - - def test_get_timeout_seconds_from_default(self): - """Test timeout uses metadata default when not in config.""" - evaluator = MockEvaluator.from_dict({}) # No timeout_ms in config - - # MockConfig has default timeout_ms=5000 - assert evaluator.get_timeout_seconds() == 5.0 - - def test_cannot_instantiate_abstract_class(self): - """Test that Evaluator cannot be instantiated directly.""" - with pytest.raises(TypeError, match="abstract"): - Evaluator({}) # type: ignore diff --git a/evaluators/builtin/tests/test_discovery.py b/evaluators/builtin/tests/test_discovery.py deleted file mode 100644 index 62876412..00000000 --- a/evaluators/builtin/tests/test_discovery.py +++ /dev/null @@ -1,187 +0,0 @@ -"""Tests for entry-point-based evaluator discovery.""" - -from __future__ import annotations - -from typing import Any -from unittest.mock import MagicMock, patch - -import pytest -from agent_control_evaluators import ( - Evaluator, - EvaluatorConfig, - EvaluatorMetadata, - clear_evaluators, - discover_evaluators, - ensure_evaluators_discovered, - get_all_evaluators, - list_evaluators, - register_evaluator, - reset_evaluator_discovery, -) -from agent_control_evaluators import _discovery as discovery_module -from agent_control_models import EvaluatorResult - - -class _DiscoveryConfig(EvaluatorConfig): - pass - - -def _make_class(*, name: str, available: bool = True) -> type[Evaluator[_DiscoveryConfig]]: - class _Dummy(Evaluator[_DiscoveryConfig]): - metadata = EvaluatorMetadata(name=name, version="1.0.0", description="") - config_model = _DiscoveryConfig - - @classmethod - def is_available(cls) -> bool: - return available - - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=1.0, message="") - - _Dummy.__name__ = f"Discovery_{name.replace('-', '_')}" - return _Dummy - - -@pytest.fixture -def isolated_discovery(): - """Snapshot registry + discovery flag, restore on teardown.""" - snapshot = dict(get_all_evaluators()) - clear_evaluators() - reset_evaluator_discovery() - yield - clear_evaluators() - reset_evaluator_discovery() - for cls in snapshot.values(): - register_evaluator(cls) - - -def _make_fake_entry_point(name: str, evaluator_class: type[Any]) -> MagicMock: - """Build a MagicMock that mimics importlib.metadata.EntryPoint.""" - ep = MagicMock() - ep.name = name - ep.load.return_value = evaluator_class - return ep - - -def test_discover_evaluators_registers_available_classes(isolated_discovery): - """Discover walks the entry-point group and registers each available class.""" - cls = _make_class(name="disc-a") - fake_ep = _make_fake_entry_point("disc-a", cls) - - with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): - count = discover_evaluators() - - assert count == 1 - assert get_all_evaluators().get("disc-a") is cls - - -def test_discover_evaluators_skips_unavailable_classes(isolated_discovery): - """Evaluators whose is_available() is False must NOT be registered.""" - cls = _make_class(name="disc-unavailable", available=False) - fake_ep = _make_fake_entry_point("disc-unavailable", cls) - - with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): - count = discover_evaluators() - - assert count == 0 - assert "disc-unavailable" not in get_all_evaluators() - - -def test_discover_evaluators_skips_already_registered(isolated_discovery): - """Already-registered names are skipped without raising.""" - cls = _make_class(name="disc-existing") - register_evaluator(cls) - - fake_ep = _make_fake_entry_point("disc-existing", cls) - with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): - count = discover_evaluators() - - assert count == 0 - - -def test_discover_evaluators_only_runs_once(isolated_discovery): - """Repeat calls short-circuit on the _DISCOVERY_COMPLETE flag.""" - cls = _make_class(name="disc-once") - fake_ep = _make_fake_entry_point("disc-once", cls) - - with patch.object( - discovery_module, "entry_points", return_value=[fake_ep] - ) as patched: - first = discover_evaluators() - second = discover_evaluators() - - # First call discovers, second returns 0 without consulting entry_points. - assert first == 1 - assert second == 0 - assert patched.call_count == 1 - - -def test_discover_evaluators_swallows_load_failures(isolated_discovery): - """A broken entry point is logged and skipped, not propagated.""" - bad_ep = MagicMock() - bad_ep.name = "broken" - bad_ep.load.side_effect = RuntimeError("boom") - - good_cls = _make_class(name="disc-good") - good_ep = _make_fake_entry_point("disc-good", good_cls) - - with patch.object(discovery_module, "entry_points", return_value=[bad_ep, good_ep]): - count = discover_evaluators() - - assert count == 1 - assert get_all_evaluators().get("disc-good") is good_cls - - -def test_discover_evaluators_handles_entry_points_failure(isolated_discovery): - """If entry_points() itself raises, discovery completes with zero results.""" - with patch.object( - discovery_module, - "entry_points", - side_effect=RuntimeError("entry-point system unavailable"), - ): - count = discover_evaluators() - - assert count == 0 - - -def test_reset_evaluator_discovery_allows_rerun(isolated_discovery): - """reset_evaluator_discovery clears the completed flag so discover runs again.""" - cls = _make_class(name="disc-reset") - fake_ep = _make_fake_entry_point("disc-reset", cls) - - with patch.object( - discovery_module, "entry_points", return_value=[fake_ep] - ) as patched: - discover_evaluators() - clear_evaluators() - reset_evaluator_discovery() - count = discover_evaluators() - - assert count == 1 - assert patched.call_count == 2 - - -def test_ensure_evaluators_discovered_runs_once(isolated_discovery): - """ensure_evaluators_discovered is the lazy-init entry point.""" - cls = _make_class(name="disc-ensure") - fake_ep = _make_fake_entry_point("disc-ensure", cls) - - with patch.object( - discovery_module, "entry_points", return_value=[fake_ep] - ) as patched: - ensure_evaluators_discovered() - ensure_evaluators_discovered() - - assert patched.call_count == 1 - assert get_all_evaluators().get("disc-ensure") is cls - - -def test_list_evaluators_triggers_discovery(isolated_discovery): - """list_evaluators is the convenience accessor; it must trigger discovery.""" - cls = _make_class(name="disc-list") - fake_ep = _make_fake_entry_point("disc-list", cls) - - with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): - result = list_evaluators() - - assert result.get("disc-list") is cls diff --git a/evaluators/builtin/tests/test_factory.py b/evaluators/builtin/tests/test_factory.py deleted file mode 100644 index 4bba4b82..00000000 --- a/evaluators/builtin/tests/test_factory.py +++ /dev/null @@ -1,172 +0,0 @@ -"""Tests for the LRU-cached evaluator factory.""" - -from __future__ import annotations - -import importlib -from typing import Any - -import pytest -from agent_control_evaluators import ( - Evaluator, - EvaluatorConfig, - EvaluatorMetadata, - clear_evaluator_cache, - clear_evaluators, - get_all_evaluators, - get_evaluator_instance, - register_evaluator, -) -from agent_control_evaluators import _factory as factory_module -from agent_control_models import EvaluatorResult, EvaluatorSpec - - -class _FactoryConfig(EvaluatorConfig): - payload: str = "default" - - -class _FactoryEvaluator(Evaluator[_FactoryConfig]): - metadata = EvaluatorMetadata(name="factory-dummy", version="1.0.0", description="") - config_model = _FactoryConfig - - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=1.0, message="") - - -@pytest.fixture -def isolated_factory(): - """Snapshot registry/cache so factory tests don't leak state.""" - snapshot = dict(get_all_evaluators()) - clear_evaluators() - clear_evaluator_cache() - register_evaluator(_FactoryEvaluator) - yield - clear_evaluator_cache() - clear_evaluators() - for cls in snapshot.values(): - register_evaluator(cls) - - -def test_get_evaluator_instance_returns_evaluator(isolated_factory): - spec = EvaluatorSpec(name="factory-dummy", config={"payload": "p1"}) - - instance = get_evaluator_instance(spec) - - assert isinstance(instance, _FactoryEvaluator) - assert instance.config.payload == "p1" - - -def test_get_evaluator_instance_caches_by_config(isolated_factory): - spec_a = EvaluatorSpec(name="factory-dummy", config={"payload": "same"}) - spec_b = EvaluatorSpec(name="factory-dummy", config={"payload": "same"}) - - first = get_evaluator_instance(spec_a) - second = get_evaluator_instance(spec_b) - - # Same config = same cached instance. - assert first is second - - -def test_get_evaluator_instance_treats_different_configs_separately(isolated_factory): - spec_a = EvaluatorSpec(name="factory-dummy", config={"payload": "a"}) - spec_b = EvaluatorSpec(name="factory-dummy", config={"payload": "b"}) - - instance_a = get_evaluator_instance(spec_a) - instance_b = get_evaluator_instance(spec_b) - - assert instance_a is not instance_b - assert instance_a.config.payload == "a" - assert instance_b.config.payload == "b" - - -def test_get_evaluator_instance_raises_for_unknown_evaluator(isolated_factory): - with pytest.raises(ValueError, match="not found"): - get_evaluator_instance(EvaluatorSpec(name="no-such-evaluator", config={})) - - -def test_clear_evaluator_cache_forces_recreation(isolated_factory): - spec = EvaluatorSpec(name="factory-dummy", config={"payload": "p"}) - - first = get_evaluator_instance(spec) - clear_evaluator_cache() - second = get_evaluator_instance(spec) - - assert first is not second - - -def test_get_evaluator_instance_evicts_oldest_when_full(isolated_factory, monkeypatch): - """LRU eviction: when cache is full, the least-recently-used entry is dropped.""" - # Force a tiny cache so we can observe eviction without overhead. - monkeypatch.setattr(factory_module, "EVALUATOR_CACHE_SIZE", 2) - - spec_a = EvaluatorSpec(name="factory-dummy", config={"payload": "a"}) - spec_b = EvaluatorSpec(name="factory-dummy", config={"payload": "b"}) - spec_c = EvaluatorSpec(name="factory-dummy", config={"payload": "c"}) - - first_a = get_evaluator_instance(spec_a) - get_evaluator_instance(spec_b) - # Insert third → "a" is the LRU and must be evicted. - get_evaluator_instance(spec_c) - - re_a = get_evaluator_instance(spec_a) - # "a" was evicted: new instance must NOT be the original. - assert re_a is not first_a - - -def test_get_evaluator_instance_moves_hit_to_most_recent( - isolated_factory, monkeypatch -): - """Cache hit must refresh LRU recency so the touched entry isn't evicted next.""" - monkeypatch.setattr(factory_module, "EVALUATOR_CACHE_SIZE", 2) - - spec_a = EvaluatorSpec(name="factory-dummy", config={"payload": "a"}) - spec_b = EvaluatorSpec(name="factory-dummy", config={"payload": "b"}) - spec_c = EvaluatorSpec(name="factory-dummy", config={"payload": "c"}) - - first_a = get_evaluator_instance(spec_a) - get_evaluator_instance(spec_b) - # Touch "a" so "b" becomes the LRU. - re_a = get_evaluator_instance(spec_a) - assert re_a is first_a - - # Inserting "c" should evict "b", not "a". - get_evaluator_instance(spec_c) - - refetched_a = get_evaluator_instance(spec_a) - assert refetched_a is first_a # still cached - - -def test_parse_cache_size_uses_default_when_unset(monkeypatch): - monkeypatch.delenv("EVALUATOR_CACHE_SIZE", raising=False) - reloaded = importlib.reload(factory_module) - try: - assert reloaded.EVALUATOR_CACHE_SIZE == factory_module.DEFAULT_CACHE_SIZE - finally: - importlib.reload(factory_module) - - -def test_parse_cache_size_falls_back_on_invalid_value(monkeypatch): - monkeypatch.setenv("EVALUATOR_CACHE_SIZE", "not-a-number") - reloaded = importlib.reload(factory_module) - try: - assert reloaded.EVALUATOR_CACHE_SIZE == reloaded.DEFAULT_CACHE_SIZE - finally: - importlib.reload(factory_module) - - -def test_parse_cache_size_clamps_to_minimum(monkeypatch): - monkeypatch.setenv("EVALUATOR_CACHE_SIZE", "0") - reloaded = importlib.reload(factory_module) - try: - # Anything below MIN_CACHE_SIZE is clamped to avoid infinite eviction loops. - assert reloaded.EVALUATOR_CACHE_SIZE >= reloaded.MIN_CACHE_SIZE - finally: - importlib.reload(factory_module) - - -def test_parse_cache_size_accepts_valid_int(monkeypatch): - monkeypatch.setenv("EVALUATOR_CACHE_SIZE", "42") - reloaded = importlib.reload(factory_module) - try: - assert reloaded.EVALUATOR_CACHE_SIZE == 42 - finally: - importlib.reload(factory_module) diff --git a/evaluators/builtin/tests/test_registry.py b/evaluators/builtin/tests/test_registry.py deleted file mode 100644 index 6b663129..00000000 --- a/evaluators/builtin/tests/test_registry.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Tests for the in-memory evaluator registry.""" - -from __future__ import annotations - -from typing import Any - -import pytest -from agent_control_evaluators import ( - Evaluator, - EvaluatorConfig, - EvaluatorMetadata, - clear_evaluators, - get_all_evaluators, - get_evaluator, - register_evaluator, -) -from agent_control_models import EvaluatorResult - - -class _DummyConfig(EvaluatorConfig): - pass - - -def _make_class(*, name: str, available: bool = True) -> type[Evaluator[_DummyConfig]]: - """Build a fresh Evaluator subclass with the supplied metadata name.""" - - class _Dummy(Evaluator[_DummyConfig]): - metadata = EvaluatorMetadata( - name=name, - version="1.0.0", - description="", - ) - config_model = _DummyConfig - - @classmethod - def is_available(cls) -> bool: - return available - - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=1.0, message="") - - _Dummy.__name__ = f"Dummy_{name.replace('-', '_')}" - return _Dummy - - -@pytest.fixture -def isolated_registry(): - """Snapshot and restore the global registry so tests don't leak state.""" - snapshot = dict(get_all_evaluators()) - clear_evaluators() - yield - clear_evaluators() - for cls in snapshot.values(): - register_evaluator(cls) - - -def test_register_and_lookup_evaluator(isolated_registry): - cls = _make_class(name="reg-a") - - register_evaluator(cls) - - assert get_evaluator("reg-a") is cls - - -def test_get_evaluator_returns_none_when_not_registered(isolated_registry): - assert get_evaluator("does-not-exist") is None - - -def test_get_all_evaluators_returns_copy(isolated_registry): - cls = _make_class(name="reg-copy") - register_evaluator(cls) - - snapshot = get_all_evaluators() - snapshot["evil"] = cls # mutate the returned dict - - # Internal registry must not reflect external mutation. - assert "evil" not in get_all_evaluators() - - -def test_register_is_idempotent_for_same_class(isolated_registry): - cls = _make_class(name="reg-idem") - - register_evaluator(cls) - # Registering the exact same class again must not raise. - assert register_evaluator(cls) is cls - - -def test_register_rejects_name_collision_with_different_class(isolated_registry): - first = _make_class(name="reg-conflict") - second = _make_class(name="reg-conflict") - register_evaluator(first) - - with pytest.raises(ValueError, match="already registered"): - register_evaluator(second) - - -def test_register_skips_unavailable_evaluators(isolated_registry): - cls = _make_class(name="reg-unavailable", available=False) - - # Should not raise and should not register. - assert register_evaluator(cls) is cls - assert get_evaluator("reg-unavailable") is None - - -def test_clear_evaluators_empties_registry(isolated_registry): - register_evaluator(_make_class(name="reg-c1")) - register_evaluator(_make_class(name="reg-c2")) - assert len(get_all_evaluators()) == 2 - - clear_evaluators() - - assert get_all_evaluators() == {} - - -def test_register_decorator_returns_class(isolated_registry): - cls = _make_class(name="reg-decorator") - # The function is documented as decorator-compatible: it must return the class. - decorated = register_evaluator(cls) - assert decorated is cls diff --git a/evaluators/contrib/README.md b/evaluators/contrib/README.md deleted file mode 100644 index 91beb9b9..00000000 --- a/evaluators/contrib/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Agent Control Contrib Evaluators - -Contributed evaluators and templates for extending Agent Control. - -- `galileo/` — Luna evaluator integration -- `template/` — Starter template for adding new evaluators - -Full guide: https://docs.agentcontrol.dev/concepts/evaluators/custom-evaluators diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py b/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py deleted file mode 100644 index c747c4f8..00000000 --- a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Budget evaluator for per-agent LLM cost and token tracking.""" - -from agent_control_evaluator_budget.budget.config import ( - BudgetEvaluatorConfig, - BudgetLimitRule, - ModelPricing, -) -from agent_control_evaluator_budget.budget.evaluator import BudgetEvaluator -from agent_control_evaluator_budget.budget.memory_store import InMemoryBudgetStore -from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore - -# Note: clear_budget_stores is a testing utility and is intentionally not -# re-exported here. Import it directly from the evaluator submodule in tests: -# from agent_control_evaluator_budget.budget.evaluator import clear_budget_stores - -__all__ = [ - "BudgetEvaluator", - "BudgetEvaluatorConfig", - "BudgetLimitRule", - "BudgetSnapshot", - "BudgetStore", - "InMemoryBudgetStore", - "ModelPricing", -] diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/__init__.py b/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/__init__.py deleted file mode 100644 index 9c2e5a5b..00000000 --- a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .config import CiscoAIDefenseConfig -from .evaluator import CiscoAIDefenseEvaluator - -__all__ = ["CiscoAIDefenseEvaluator", "CiscoAIDefenseConfig"] - diff --git a/evaluators/contrib/cisco/tests/__init__.py b/evaluators/contrib/cisco/tests/__init__.py deleted file mode 100644 index e9222bc4..00000000 --- a/evaluators/contrib/cisco/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for the Cisco AI Defense contrib evaluator.""" diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py deleted file mode 100644 index 5606bf5d..00000000 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/__init__.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Agent Control Evaluator - Galileo. - -This package provides Galileo evaluators for agent-control. - -Available evaluators: - - galileo.luna: Galileo Luna direct scorer evaluation - -Installation: - pip install agent-control-evaluator-galileo - -Or via the agent-control-evaluators convenience extra: - pip install agent-control-evaluators[galileo] -""" - -from importlib.metadata import PackageNotFoundError, version - -try: - __version__ = version("agent-control-evaluator-galileo") -except PackageNotFoundError: - __version__ = "0.0.0.dev" - -from agent_control_evaluator_galileo.luna import ( - LUNA_AVAILABLE, - GalileoLunaClient, - LunaEvaluator, - LunaEvaluatorConfig, - LunaOperator, - ScorerInvokeRequest, - ScorerInvokeResponse, -) - -__all__ = [ - "GalileoLunaClient", - "ScorerInvokeRequest", - "ScorerInvokeResponse", - "LunaEvaluator", - "LunaEvaluatorConfig", - "LunaOperator", - "LUNA_AVAILABLE", -] diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py b/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py deleted file mode 100644 index b26feaac..00000000 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -"""Galileo Luna direct scorer evaluator.""" - -from agent_control_evaluator_galileo.luna.client import ( - GalileoLunaClient, - ScorerInvokeInputs, - ScorerInvokeRequest, - ScorerInvokeResponse, -) -from agent_control_evaluator_galileo.luna.config import LunaEvaluatorConfig, LunaOperator -from agent_control_evaluator_galileo.luna.evaluator import LUNA_AVAILABLE, LunaEvaluator - -__all__ = [ - "GalileoLunaClient", - "ScorerInvokeInputs", - "ScorerInvokeRequest", - "ScorerInvokeResponse", - "LunaEvaluatorConfig", - "LunaOperator", - "LunaEvaluator", - "LUNA_AVAILABLE", -] diff --git a/examples/README.md b/examples/README.md index ffe9b46a..b51699d4 100644 --- a/examples/README.md +++ b/examples/README.md @@ -12,7 +12,7 @@ This directory contains runnable examples for Agent Control. Each example has it | Google ADK Callbacks | Lower-level ADK lifecycle hook integration for manual model and tool guardrails. | https://docs.agentcontrol.dev/examples/google-adk-callbacks | | Google ADK Decorator | Tool-only `@control()` pattern for explicit ADK tool protection. | https://docs.agentcontrol.dev/examples/google-adk-decorator | | Customer Support Agent | Enterprise scenario with PII protection, prompt-injection defense, and multiple tools. | https://docs.agentcontrol.dev/examples/customer-support | -| DeepEval | Build a custom evaluator using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval | +| DeepEval | Build a custom rule using DeepEval GEval metrics. | https://docs.agentcontrol.dev/examples/deepeval | | Galileo Luna Direct | Direct `/scorers/invoke` Luna evaluation with a composite Agent Control condition. | `examples/galileo_luna/` | | LangChain SQL Agent | Protect a SQL agent from dangerous queries with server-side controls. | https://docs.agentcontrol.dev/examples/langchain-sql | | Steer Action Demo | Banking transfer agent showcasing observe, deny, and steer actions. | https://docs.agentcontrol.dev/examples/steer-action-demo | diff --git a/examples/agent_control_demo/setup_controls.py b/examples/agent_control_demo/setup_controls.py index 3c3a97c1..b1264d1b 100644 --- a/examples/agent_control_demo/setup_controls.py +++ b/examples/agent_control_demo/setup_controls.py @@ -114,7 +114,7 @@ async def create_regex_control(client: AgentControlClient) -> int: "scope": {"step_types": ["llm"], "stages": ["post"]}, # Check AFTER "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b", # SSN pattern @@ -128,7 +128,7 @@ async def create_regex_control(client: AgentControlClient) -> int: print(f"Creating control: block-ssn-output") print(f" Type: Regex") - print(f" Pattern: {control_definition['condition']['evaluator']['config']['pattern']}") + print(f" Pattern: {control_definition['condition']['rule']['config']['pattern']}") print(f" Stages: {', '.join(control_definition['scope']['stages'])}") print(f" Action: {control_definition['action']['decision']}") @@ -148,7 +148,7 @@ async def create_list_control(client: AgentControlClient) -> int: "scope": {"step_types": ["llm"], "stages": ["pre"]}, # Check BEFORE "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["DROP", "DELETE", "TRUNCATE", "ALTER", "GRANT"], @@ -165,8 +165,8 @@ async def create_list_control(client: AgentControlClient) -> int: print(f"Creating control: block-dangerous-sql") print(f" Type: List") - print(f" Values: {control_definition['condition']['evaluator']['config']['values']}") - print(f" Logic: {control_definition['condition']['evaluator']['config']['logic']}") + print(f" Values: {control_definition['condition']['rule']['config']['values']}") + print(f" Logic: {control_definition['condition']['rule']['config']['logic']}") print(f" Stages: {', '.join(control_definition['scope']['stages'])}") print(f" Action: {control_definition['action']['decision']}") @@ -220,12 +220,12 @@ async def list_agent_controls(client: AgentControlClient, agent_name: str) -> li print(f" ID: {ctrl.get('id')}") ctrl_def = ctrl.get("control", {}) print(f" Enabled: {ctrl_def.get('enabled', True)}") - evaluator_name = ( + rule_name = ( ctrl_def.get("condition", {}) - .get("evaluator", {}) + .get("rule", {}) .get("name", "unknown") ) - print(f" Evaluator: {evaluator_name}") + print(f" Rule: {rule_name}") scope = ctrl_def.get("scope", {}) or {} stages = scope.get("stages", []) stage_label = ", ".join(stages) if stages else "unknown" @@ -254,7 +254,7 @@ async def update_control(client: AgentControlClient, control_id: int) -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -303,9 +303,9 @@ async def get_control_data(client: AgentControlClient, control_id: int) -> dict: print(f"✓ Retrieved control {control_id}:") print(f" Description: {data.get('description', 'N/A')}") condition = data.get("condition", {}) - evaluator = condition.get("evaluator", {}) - print(f" Evaluator: {evaluator.get('name', 'N/A')}") - print(f" Values: {evaluator.get('config', {}).get('values', [])}") + rule = condition.get("rule", {}) + print(f" Rule: {rule.get('name', 'N/A')}") + print(f" Values: {rule.get('config', {}).get('values', [])}") print(f" Tags: {data.get('tags', [])}") return data diff --git a/examples/agent_control_demo/update_controls.py b/examples/agent_control_demo/update_controls.py index 5afcd63b..6a07daa3 100644 --- a/examples/agent_control_demo/update_controls.py +++ b/examples/agent_control_demo/update_controls.py @@ -60,7 +60,7 @@ async def allow_ssn(client: AgentControlClient, control_id: int) -> None: "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b", @@ -102,7 +102,7 @@ async def block_ssn(client: AgentControlClient, control_id: int) -> None: "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b", diff --git a/examples/cisco_ai_defense/README.md b/examples/cisco_ai_defense/README.md index f3766c85..ecfd11f4 100644 --- a/examples/cisco_ai_defense/README.md +++ b/examples/cisco_ai_defense/README.md @@ -60,14 +60,14 @@ uv run chat_inspect_demo.py --debug # also prints raw responses for allowed and 1) Ensure the server is running and you have an API key (X-API-Key) - - Preferred: install the evaluator into the workspace venv, then run the server normally: + - Preferred: install the rule into the workspace venv, then run the server normally: ```bash - uv pip install -e evaluators/contrib/cisco + uv pip install -e rules/contrib/cisco make server-run ``` -2) Install the Cisco AI Defense evaluator (this repo package) into the server environment, or run `make sync` at the repo root if developing locally. Provide `AI_DEFENSE_API_KEY` in the server environment. +2) Install the Cisco AI Defense rule (this repo package) into the server environment, or run `make sync` at the repo root if developing locally. Provide `AI_DEFENSE_API_KEY` in the server environment. 3) Seed controls and attach them to your agent by name: @@ -102,8 +102,8 @@ make -C examples/cisco_ai_defense decorator-all-run ### Troubleshooting -- Evaluator not found: ensure the server has the evaluator package installed and entry points discovered (`/api/v1/evaluators` lists `cisco.ai_defense`). -- Missing keys: set both `AGENT_CONTROL_API_KEY` (server) and `AI_DEFENSE_API_KEY` (server env for evaluator calls). +- Rule not found: ensure the server has the rule package installed and entry points discovered (`/api/v1/rules` lists `cisco.ai_defense`). +- Missing keys: set both `AGENT_CONTROL_API_KEY` (server) and `AI_DEFENSE_API_KEY` (server env for rule calls). - If controls with the same names already exist for another agent, this demo uses unique control names derived from your `AGENT_NAME`, so reruns are safe. diff --git a/examples/cisco_ai_defense/chat_guarded_all.py b/examples/cisco_ai_defense/chat_guarded_all.py index 5b3e2fcf..e0daeaac 100644 --- a/examples/cisco_ai_defense/chat_guarded_all.py +++ b/examples/cisco_ai_defense/chat_guarded_all.py @@ -7,7 +7,7 @@ Prereqs: 1) Run the server and assign API keys - 2) Install the Cisco AI Defense evaluator (this repo package) into the server env + 2) Install the Cisco AI Defense rule (this repo package) into the server env 3) Seed controls and attach to the agent with examples/cisco_ai_defense/setup_ai_defense_controls.py 4) Set env: AGENT_CONTROL_URL, AGENT_CONTROL_API_KEY diff --git a/examples/cisco_ai_defense/chat_guarded_post.py b/examples/cisco_ai_defense/chat_guarded_post.py index 1ab72bb7..f345a5c1 100644 --- a/examples/cisco_ai_defense/chat_guarded_post.py +++ b/examples/cisco_ai_defense/chat_guarded_post.py @@ -1,7 +1,7 @@ """Decorator-based example focusing on POST (output) checks. Prereqs: - 1) Run the server with Cisco AI Defense evaluator available + 1) Run the server with Cisco AI Defense rule available 2) Seed controls and attach to the agent with examples/cisco_ai_defense/setup_ai_defense_controls.py 3) Set env: AGENT_CONTROL_URL, AGENT_CONTROL_API_KEY diff --git a/examples/cisco_ai_defense/chat_inspect_demo.py b/examples/cisco_ai_defense/chat_inspect_demo.py index 4577ab8d..5cb610b0 100644 --- a/examples/cisco_ai_defense/chat_inspect_demo.py +++ b/examples/cisco_ai_defense/chat_inspect_demo.py @@ -71,7 +71,7 @@ class InspectOutcome: class ChatInspectClient: """Standalone direct-HTTP client used by the demo. - This example intentionally avoids importing the contrib evaluator package so + This example intentionally avoids importing the contrib rule package so the direct API demo can run with only the example environment dependencies. """ diff --git a/examples/cisco_ai_defense/pyproject.toml b/examples/cisco_ai_defense/pyproject.toml index 95185d18..f60fb5d2 100644 --- a/examples/cisco_ai_defense/pyproject.toml +++ b/examples/cisco_ai_defense/pyproject.toml @@ -11,7 +11,7 @@ dependencies = [ # When using SDK from path/editable, engine, models, and telemetry are not vendored "agent-control-engine>=5.2.0", "agent-control-models>=5.2.0", - "agent-control-evaluators>=5.2.0", + "agent-control-rules>=5.2.0", "agent-control-telemetry>=5.2.0", ] @@ -38,5 +38,5 @@ select = ["E", "F", "I"] agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/cisco_ai_defense/setup_ai_defense_controls.py b/examples/cisco_ai_defense/setup_ai_defense_controls.py index efcf2ca7..acb7ac71 100644 --- a/examples/cisco_ai_defense/setup_ai_defense_controls.py +++ b/examples/cisco_ai_defense/setup_ai_defense_controls.py @@ -1,15 +1,15 @@ """Setup Cisco AI Defense controls and attach to an agent. This script creates two controls (pre: input, post: output) that use the -external evaluator `cisco.ai_defense`, then attaches them directly to the +external rule `cisco.ai_defense`, then attaches them directly to the specified agent by name. The operations are idempotent and safe to rerun. Env: AGENT_CONTROL_URL - server base URL (e.g., http://localhost:8000) AGENT_CONTROL_API_KEY - server API key (sent as X-API-Key) AGENT_NAME - agent name to attach controls to (default: ai-defense-demo) - AI_DEFENSE_API_URL - optional override endpoint for evaluator config - AI_DEFENSE_TIMEOUT_S - optional timeout for evaluator config (default 15) + AI_DEFENSE_API_URL - optional override endpoint for rule config + AI_DEFENSE_TIMEOUT_S - optional timeout for rule config (default 15) """ from __future__ import annotations @@ -21,7 +21,7 @@ from agent_control import Agent, AgentControlClient, agents, controls -EVALUATOR_NAME = "cisco.ai_defense" +RULE_NAME = "cisco.ai_defense" def _headers() -> dict[str, str]: @@ -82,22 +82,22 @@ async def main() -> int: except Exception as e: # noqa: BLE001 print(f"ℹ️ Agent may already exist: {e}") - # Verify evaluator is available - ev = await client.http_client.get("/api/v1/evaluators", headers=_headers()) + # Verify rule is available + ev = await client.http_client.get("/api/v1/rules", headers=_headers()) ev.raise_for_status() data = ev.json() if isinstance(data, dict): - names = set(map(str, (data.get("evaluators", {}) or data).keys())) + names = set(map(str, (data.get("rules", {}) or data).keys())) else: names = set() - if EVALUATOR_NAME not in names: + if RULE_NAME not in names: print( - f"Evaluator '{EVALUATOR_NAME}' not found on server. Ensure the server env has the " - "evaluator installed and entry points discovered." + f"Rule '{RULE_NAME}' not found on server. Ensure the server env has the " + "rule installed and entry points discovered." ) return 2 - # Build evaluator config shared parts + # Build rule config shared parts base_config: dict[str, Any] = { "api_key_env": "AI_DEFENSE_API_KEY", "timeout_ms": int(timeout_s * 1000), @@ -115,8 +115,8 @@ async def main() -> int: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { - "name": EVALUATOR_NAME, + "rule": { + "name": RULE_NAME, "config": {**base_config, "payload_field": "input"}, }, }, @@ -131,8 +131,8 @@ async def main() -> int: "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { - "name": EVALUATOR_NAME, + "rule": { + "name": RULE_NAME, "config": {**base_config, "payload_field": "output"}, }, }, diff --git a/examples/crewai/README.md b/examples/crewai/README.md index 6d465f31..d525c48b 100644 --- a/examples/crewai/README.md +++ b/examples/crewai/README.md @@ -93,11 +93,11 @@ uv run --active python setup_controls.py uv run --active python -m steering_financial_agent.main ``` -### 2. [Evaluator Showcase](./evaluator_showcase/) -- All 4 Built-in Evaluators +### 2. [Rule Showcase](./rule_showcase/) -- All 4 Built-in Rules -Demonstrates every built-in evaluator in a data-analyst scenario: +Demonstrates every built-in rule in a data-analyst scenario: -| Evaluator | Stage | Purpose | +| Rule | Stage | Purpose | |-----------|-------|---------| | **SQL** | PRE | Block DROP/DELETE, enforce LIMIT, prevent injection | | **LIST** | PRE | Restrict access to sensitive tables | @@ -105,9 +105,9 @@ Demonstrates every built-in evaluator in a data-analyst scenario: | **JSON** | PRE | Validate required fields, enforce constraints, steer for missing data | ```bash -cd examples/crewai/evaluator_showcase +cd examples/crewai/rule_showcase uv run --active python setup_controls.py -uv run --active python -m evaluator_showcase.main +uv run --active python -m rule_showcase.main ``` ### 3. [Secure Research Crew](./secure_research_crew/) -- Multi-Agent Crew with Per-Role Policies @@ -129,7 +129,7 @@ A production-quality **3-agent sequential crew** (Researcher, Analyst, Writer) w **5 scenarios** -- all run without LLM calls (direct tool testing): -| # | Scenario | Agent | Evaluator | Action | Result | +| # | Scenario | Agent | Rule | Action | Result | |---|----------|-------|-----------|--------|--------| | 1 | Happy path | All 3 | All | observe | Report generated with sources | | 2 | SQL injection | Researcher | SQL | deny | "Multiple SQL statements not allowed" | @@ -168,9 +168,9 @@ A complete **CrewAI Flow** using `@start`, `@listen`, and `@router` decorators w | 1 | Blog post | low_risk -> auto-publish | Published | | 2 | Press release | high_risk -> compliance review | Steered (exec summary), then published | | 3 | Internal memo | escalation -> human review | Steered: pending manager approval | -| 4 | Invalid request | intake blocked | JSON evaluator: missing fields | -| 5 | Banned topic | draft blocked | LIST evaluator: "insider trading" detected | -| 6 | PII in draft | draft blocked | REGEX evaluator: email/SSN/phone detected | +| 4 | Invalid request | intake blocked | JSON rule: missing fields | +| 5 | Banned topic | draft blocked | LIST rule: "insider trading" detected | +| 6 | PII in draft | draft blocked | REGEX rule: email/SSN/phone detected | ```bash cd examples/crewai/content_publishing_flow @@ -190,10 +190,10 @@ uv run --active python -m content_publishing_flow.main | **deny** action | Yes | Yes | Yes | Yes | | **steer** action | Yes | Yes | Yes | Yes | | **observe** action | Yes | | | | -| Regex evaluator | | Yes | Yes | Yes | -| List evaluator | Yes | Yes | Yes | Yes | -| JSON evaluator | Yes | Yes | Yes | Yes | -| SQL evaluator | | Yes | Yes | | +| Regex rule | | Yes | Yes | Yes | +| List rule | Yes | Yes | Yes | Yes | +| JSON rule | Yes | Yes | Yes | Yes | +| SQL rule | | Yes | Yes | | | Steering context + retry loop | Yes | Yes | Yes | Yes | | ControlViolationError handling | Yes | Yes | Yes | Yes | | ControlSteerError handling | Yes | Yes | Yes | Yes | diff --git a/examples/crewai/content_publishing_flow/README.md b/examples/crewai/content_publishing_flow/README.md index db734731..5bf5ae37 100644 --- a/examples/crewai/content_publishing_flow/README.md +++ b/examples/crewai/content_publishing_flow/README.md @@ -6,7 +6,7 @@ A complete CrewAI Flow example demonstrating routing (`@router`), embedded crews - **CrewAI Flows** with `@start`, `@listen`, and `@router` decorators - **Routing logic** that directs content through different pipelines based on type -- **Agent Control integration** at every stage (JSON, LIST, REGEX evaluators + STEER actions) +- **Agent Control integration** at every stage (JSON, LIST, REGEX rules + STEER actions) - **Pydantic state management** across flow stages - **Steering with retry** for corrective actions (e.g., adding missing Executive Summary) - **Human-in-the-loop** via STEER action for manager approval @@ -15,11 +15,11 @@ A complete CrewAI Flow example demonstrating routing (`@router`), embedded crews ``` @start: intake_request - | JSON evaluator: require topic, audience, content_type + | JSON rule: require topic, audience, content_type | @listen(intake_request): research - | Researcher: LIST evaluator (block banned sources) - | Fact-Checker: REGEX evaluator (flag unverified claims) + | Researcher: LIST rule (block banned sources) + | Fact-Checker: REGEX rule (flag unverified claims) | @listen(research): draft_content | Writer: REGEX (block PII), LIST (block banned topics) @@ -51,9 +51,9 @@ A complete CrewAI Flow example demonstrating routing (`@router`), embedded crews | 1 | Blog post | topic + audience + "blog_post" | low_risk | Intake -> Research -> Draft -> Auto-publish | | 2 | Press release | topic + audience + "press_release" | high_risk | Intake -> Research -> Draft -> Compliance review -> Publish | | 3 | Internal memo | topic + audience + "internal_memo" | escalation | Intake -> Research -> Draft -> Human review (STEER) | -| 4 | Invalid request | missing fields | blocked | JSON evaluator blocks at intake | -| 5 | Banned topic | draft contains "insider trading" | blocked | LIST evaluator blocks at draft | -| 6 | PII in draft | draft contains email/phone/SSN | blocked | REGEX evaluator blocks at draft | +| 4 | Invalid request | missing fields | blocked | JSON rule blocks at intake | +| 5 | Banned topic | draft contains "insider trading" | blocked | LIST rule blocks at draft | +| 6 | PII in draft | draft contains email/phone/SSN | blocked | REGEX rule blocks at draft | ## Prerequisites @@ -80,7 +80,7 @@ curl http://localhost:8000/health ### 2. Install Dependencies -`agent-control-sdk` and `crewai` have an incompatible transitive dependency on `pydantic` (crewai caps at `<2.12`, the SDK evaluators require `>=2.12.4`). Install in two steps to work around this: +`agent-control-sdk` and `crewai` have an incompatible transitive dependency on `pydantic` (crewai caps at `<2.12`, the SDK rules require `>=2.12.4`). Install in two steps to work around this: ```bash cd examples/crewai/content_publishing_flow @@ -88,8 +88,8 @@ cd examples/crewai/content_publishing_flow # Install crewai and all other deps via normal resolver uv pip install -e . -# Install agent-control-sdk separately, skipping the conflicting evaluators dep -# (this example uses server-mode execution and does not need evaluators locally) +# Install agent-control-sdk separately, skipping the conflicting rules dep +# (this example uses server-mode execution and does not need rules locally) uv pip install agent-control-sdk==7.5.0 --no-deps uv pip install httpx pydantic-settings docstring-parser google-re2 jsonschema ``` @@ -117,7 +117,7 @@ uv run --active python -m content_publishing_flow.main ## Controls Reference -| Control Name | Evaluator | Stage | Step | Action | +| Control Name | Rule | Stage | Step | Action | |---|---|---|---|---| | flow-intake-validation | JSON (required_fields) | pre | validate_request | deny | | flow-research-banned-sources | LIST (unreliable sources) | post | research_topic | deny | diff --git a/examples/crewai/content_publishing_flow/setup_controls.py b/examples/crewai/content_publishing_flow/setup_controls.py index e98f80d4..0ed4b932 100644 --- a/examples/crewai/content_publishing_flow/setup_controls.py +++ b/examples/crewai/content_publishing_flow/setup_controls.py @@ -2,11 +2,11 @@ Setup script for Content Publishing Flow controls. Creates Agent Control controls for each stage of the CrewAI Flow pipeline: -- Intake: JSON evaluator to validate required fields -- Research: LIST evaluator to block banned sources -- Fact-Check: REGEX evaluator to flag unverified claims -- Draft: REGEX evaluator for PII, LIST evaluator for banned topics -- Compliance: JSON evaluator for legal review fields +- Intake: JSON rule to validate required fields +- Research: LIST rule to block banned sources +- Fact-Check: REGEX rule to flag unverified claims +- Draft: REGEX rule for PII, LIST rule for banned topics +- Compliance: JSON rule for legal review fields - Editor: REGEX for PII cleanup (executive summary check is client-side) - Human Review: STEER action for manager approval on internal memos @@ -98,7 +98,7 @@ async def setup_publishing_controls(): control_ids: list[tuple[int, str]] = [] # ------------------------------------------------------------------ - # INTAKE STAGE: JSON evaluator - require topic, audience, content_type + # INTAKE STAGE: JSON rule - require topic, audience, content_type # ------------------------------------------------------------------ intake_validation = { "description": "Validate content request has required fields (topic, audience, content_type)", @@ -113,7 +113,7 @@ async def setup_publishing_controls(): "selector": { "path": "input.request", }, - "evaluator": { + "rule": { "name": "json", "config": { "required_fields": ["topic", "audience", "content_type"], @@ -126,7 +126,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-intake-validation")) # ------------------------------------------------------------------ - # RESEARCH STAGE: LIST evaluator - block banned sources + # RESEARCH STAGE: LIST rule - block banned sources # ------------------------------------------------------------------ banned_sources = { "description": "Block research that references banned or unreliable sources", @@ -141,7 +141,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -163,7 +163,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-research-banned-sources")) # ------------------------------------------------------------------ - # FACT-CHECK STAGE: REGEX evaluator - flag unverified claims/URLs + # FACT-CHECK STAGE: REGEX rule - flag unverified claims/URLs # ------------------------------------------------------------------ unverified_claims = { "description": "Flag fact-check results that contain unverified claim markers", @@ -178,7 +178,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -196,7 +196,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-factcheck-unverified")) # ------------------------------------------------------------------ - # DRAFT STAGE: REGEX evaluator - block PII in draft content + # DRAFT STAGE: REGEX rule - block PII in draft content # ------------------------------------------------------------------ draft_pii = { "description": "Block drafts containing PII (SSN, email, phone)", @@ -211,7 +211,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -230,7 +230,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-draft-pii-block")) # ------------------------------------------------------------------ - # DRAFT STAGE: LIST evaluator - block banned topics + # DRAFT STAGE: LIST rule - block banned topics # ------------------------------------------------------------------ banned_topics = { "description": "Block drafts that contain banned or restricted topics", @@ -245,7 +245,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -267,7 +267,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-draft-banned-topics")) # ------------------------------------------------------------------ - # COMPLIANCE STAGE: JSON evaluator - require disclaimer + legal_reviewed + # COMPLIANCE STAGE: JSON rule - require disclaimer + legal_reviewed # ------------------------------------------------------------------ legal_review = { "description": "Require disclaimer and legal_reviewed=true in compliance output", @@ -282,7 +282,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "json", "config": { "required_fields": ["disclaimer", "legal_reviewed"], @@ -295,7 +295,7 @@ async def setup_publishing_controls(): control_ids.append((cid, "flow-compliance-legal-review")) # ------------------------------------------------------------------ - # EDITOR STAGE: REGEX evaluator - clean PII from edited content + # EDITOR STAGE: REGEX rule - clean PII from edited content # ------------------------------------------------------------------ editor_pii = { "description": "Block edited content that still contains PII", @@ -310,7 +310,7 @@ async def setup_publishing_controls(): "selector": { "path": "output", }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -331,7 +331,7 @@ async def setup_publishing_controls(): # NOTE: Executive summary check for press releases is handled # client-side in the flow code (compliance_review stage), because # detecting the ABSENCE of text requires negative lookahead which - # the regex evaluator does not support. + # the regex rule does not support. # ------------------------------------------------------------------ # HUMAN REVIEW STAGE: STEER - pause for manager approval @@ -349,7 +349,7 @@ async def setup_publishing_controls(): "selector": { "path": "input.content_type", }, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["internal_memo"], @@ -385,7 +385,7 @@ async def setup_publishing_controls(): "selector": { "path": "input.content", }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( diff --git a/examples/crewai/content_publishing_flow/src/content_publishing_flow/main.py b/examples/crewai/content_publishing_flow/src/content_publishing_flow/main.py index e16bdc71..0366390d 100644 --- a/examples/crewai/content_publishing_flow/src/content_publishing_flow/main.py +++ b/examples/crewai/content_publishing_flow/src/content_publishing_flow/main.py @@ -7,7 +7,7 @@ Flow Architecture: @start: intake_request -> Validates and classifies the content request - -> Control: JSON evaluator (require topic, audience, content_type) + -> Control: JSON rule (require topic, audience, content_type) @listen(intake_request): research -> 2-agent crew: Researcher + Fact-Checker @@ -636,7 +636,7 @@ def main(): # ================================================================== # SCENARIO 4: Invalid Request - Missing Required Fields # ================================================================== - # Use direct tool call to demonstrate JSON evaluator blocking + # Use direct tool call to demonstrate JSON rule blocking run_direct_tool_scenario( title="SCENARIO 4: Invalid Request (missing fields -> JSON block)", tool_label="Intake Validation", @@ -646,7 +646,7 @@ def main(): ) # ================================================================== - # SCENARIO 5: Banned Topic - LIST evaluator blocks draft + # SCENARIO 5: Banned Topic - LIST rule blocks draft # ================================================================== # Direct tool call with content that contains a banned topic async def _write_draft_banned( @@ -677,7 +677,7 @@ async def _write_draft_banned( ) # ================================================================== - # SCENARIO 6: PII in Draft - REGEX evaluator blocks + # SCENARIO 6: PII in Draft - REGEX rule blocks # ================================================================== async def _write_draft_pii( topic: str, audience: str, content_type: str, research: str @@ -728,7 +728,7 @@ async def _write_draft_pii( +-- "escalation" --> human_review Controls Applied: - Intake: JSON evaluator (required fields) + Intake: JSON rule (required fields) Research: LIST (banned sources), REGEX (unverified claims) Draft: REGEX (PII), LIST (banned topics) Compliance: JSON (legal fields), REGEX (PII), STEER (exec summary) @@ -739,9 +739,9 @@ async def _write_draft_pii( 1. Blog post -> low_risk -> auto-publish (happy path) 2. Press release -> high_risk -> compliance review + steering 3. Internal memo -> escalation -> human review (STEER) - 4. Missing fields -> JSON evaluator blocks at intake - 5. Banned topic -> LIST evaluator blocks at draft - 6. PII in draft -> REGEX evaluator blocks at draft + 4. Missing fields -> JSON rule blocks at intake + 5. Banned topic -> LIST rule blocks at draft + 6. PII in draft -> REGEX rule blocks at draft """) diff --git a/examples/crewai/content_publishing_flow/src/content_publishing_flow/tools/validate_request.py b/examples/crewai/content_publishing_flow/src/content_publishing_flow/tools/validate_request.py index 8a071d84..eddf273f 100644 --- a/examples/crewai/content_publishing_flow/src/content_publishing_flow/tools/validate_request.py +++ b/examples/crewai/content_publishing_flow/src/content_publishing_flow/tools/validate_request.py @@ -5,7 +5,7 @@ async def _validate_request(request: dict) -> str: """Validate that the content request has required fields.""" - # The JSON evaluator on the server checks for topic, audience, content_type. + # The JSON rule on the server checks for topic, audience, content_type. # If they are present the control passes; if missing it denies. topic = request.get("topic", "") audience = request.get("audience", "") diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/__init__.py b/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/__init__.py deleted file mode 100644 index 136c55d0..00000000 --- a/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from evaluator_showcase.tools.run_sql_query import create_sql_tool -from evaluator_showcase.tools.analyze_data import create_analysis_tool - -__all__ = ["create_sql_tool", "create_analysis_tool"] diff --git a/examples/crewai/evaluator_showcase/.env.example b/examples/crewai/rule_showcase/.env.example similarity index 100% rename from examples/crewai/evaluator_showcase/.env.example rename to examples/crewai/rule_showcase/.env.example diff --git a/examples/crewai/evaluator_showcase/README.md b/examples/crewai/rule_showcase/README.md similarity index 69% rename from examples/crewai/evaluator_showcase/README.md rename to examples/crewai/rule_showcase/README.md index 4b25f720..5cb7434c 100644 --- a/examples/crewai/evaluator_showcase/README.md +++ b/examples/crewai/rule_showcase/README.md @@ -1,10 +1,10 @@ -# CrewAI Data Analyst - Evaluator Showcase +# CrewAI Data Analyst - Rule Showcase -Demonstrates all four built-in Agent Control evaluators in a realistic data-analyst scenario using CrewAI. +Demonstrates all four built-in Agent Control rules in a realistic data-analyst scenario using CrewAI. -## Evaluators +## Rules -| Evaluator | Stage | Purpose | Example | +| Rule | Stage | Purpose | Example | |-----------|-------|---------|---------| | **SQL** | PRE | Validate query structure and safety | Block DROP, enforce LIMIT | | **LIST** | PRE | Access control via allowlists/blocklists | Restrict sensitive tables | @@ -13,7 +13,7 @@ Demonstrates all four built-in Agent Control evaluators in a realistic data-anal ## Scenarios -### SQL Evaluator +### SQL Rule | # | Query | Outcome | |---|-------|---------| | 1a | `SELECT ... FROM orders LIMIT 10` | ALLOWED | @@ -21,20 +21,20 @@ Demonstrates all four built-in Agent Control evaluators in a realistic data-anal | 1c | `SELECT * FROM orders` | DENIED (missing required LIMIT) | | 1d | `DELETE FROM orders WHERE ...` | DENIED (blocked operation) | -### LIST Evaluator +### LIST Rule | # | Table | Outcome | |---|-------|---------| | 2a | `orders` | ALLOWED (not restricted) | | 2b | `salary_data` | DENIED (restricted table) | | 2c | `audit_log` | DENIED (restricted table) | -### REGEX Evaluator +### REGEX Rule | # | Results Contain | Outcome | |---|-----------------|---------| | 3a | Order data (no PII) | ALLOWED | | 3b | SSN `123-45-6789` + email | DENIED (PII detected post-execution) | -### JSON Evaluator +### JSON Rule | # | Request | Outcome | |---|---------|---------| | 4a | All fields valid | ALLOWED | @@ -44,11 +44,11 @@ Demonstrates all four built-in Agent Control evaluators in a realistic data-anal ## Controls Created -- `sql-safety-check` — SQL evaluator: block destructive ops, enforce LIMIT -- `restrict-sensitive-tables` — LIST evaluator: block salary_data, audit_log, etc. -- `pii-in-query-results` — REGEX evaluator: detect SSN/email/credit cards in output -- `validate-analysis-request` — JSON evaluator: require dataset + date_range, constrain max_rows -- `steer-require-purpose` — JSON evaluator with STEER: collect analysis purpose for audit +- `sql-safety-check` — SQL rule: block destructive ops, enforce LIMIT +- `restrict-sensitive-tables` — LIST rule: block salary_data, audit_log, etc. +- `pii-in-query-results` — REGEX rule: detect SSN/email/credit cards in output +- `validate-analysis-request` — JSON rule: require dataset + date_range, constrain max_rows +- `steer-require-purpose` — JSON rule with STEER: collect analysis purpose for audit ## Prerequisites @@ -63,15 +63,15 @@ Demonstrates all four built-in Agent Control evaluators in a realistic data-anal make sync # Navigate to example -cd examples/crewai/evaluator_showcase +cd examples/crewai/rule_showcase # Install example dependencies # Note: agent-control-sdk and crewai have an incompatible transitive dependency on pydantic -# (crewai caps at <2.12, the SDK evaluators require >=2.12.4). Install in two steps: +# (crewai caps at <2.12, the SDK rules require >=2.12.4). Install in two steps: uv pip install -e . -# Install agent-control-sdk separately, skipping the conflicting evaluators dep -# (this example uses server-mode execution and does not need evaluators locally) +# Install agent-control-sdk separately, skipping the conflicting rules dep +# (this example uses server-mode execution and does not need rules locally) uv pip install agent-control-sdk==7.5.0 --no-deps uv pip install httpx pydantic-settings docstring-parser google-re2 jsonschema @@ -82,34 +82,34 @@ export OPENAI_API_KEY="your-key" uv run --active python setup_controls.py # Run the demo -uv run --active python -m evaluator_showcase.main +uv run --active python -m rule_showcase.main ``` ## Key Insight -Each evaluator serves a different purpose at a different stage: +Each rule serves a different purpose at a different stage: ``` Request arrives | v ┌─────────────────────┐ - │ SQL Evaluator (PRE) │ Is this query structurally safe? + │ SQL Rule (PRE) │ Is this query structurally safe? └──────────┬──────────┘ v ┌──────────────────────┐ - │ LIST Evaluator (PRE) │ Is the target table allowed? + │ LIST Rule (PRE) │ Is the target table allowed? └──────────┬───────────┘ v ┌──────────────────────┐ - │ JSON Evaluator (PRE) │ Are required fields present and valid? + │ JSON Rule (PRE) │ Are required fields present and valid? └──────────┬───────────┘ v Query Executes | v ┌───────────────────────┐ - │ REGEX Evaluator (POST) │ Do results contain PII patterns? + │ REGEX Rule (POST) │ Do results contain PII patterns? └───────────┬───────────┘ v Return Results diff --git a/examples/crewai/evaluator_showcase/pyproject.toml b/examples/crewai/rule_showcase/pyproject.toml similarity index 63% rename from examples/crewai/evaluator_showcase/pyproject.toml rename to examples/crewai/rule_showcase/pyproject.toml index 3b4decc6..1254ab16 100644 --- a/examples/crewai/evaluator_showcase/pyproject.toml +++ b/examples/crewai/rule_showcase/pyproject.toml @@ -1,7 +1,7 @@ [project] -name = "evaluator-showcase" +name = "rule-showcase" version = "0.1.0" -description = "CrewAI agent demonstrating all built-in evaluators: regex, list, json, and sql" +description = "CrewAI agent demonstrating all built-in rules: regex, list, json, and sql" requires-python = ">=3.12" dependencies = [ "crewai[tools]>=1.10.1", @@ -17,15 +17,15 @@ dev = [ ] [project.scripts] -evaluator_showcase = "evaluator_showcase.main:run" -run_crew = "evaluator_showcase.main:run" +rule_showcase = "rule_showcase.main:run" +run_crew = "rule_showcase.main:run" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/evaluator_showcase"] +packages = ["src/rule_showcase"] [tool.crewai] type = "crew" diff --git a/examples/crewai/evaluator_showcase/setup_controls.py b/examples/crewai/rule_showcase/setup_controls.py similarity index 92% rename from examples/crewai/evaluator_showcase/setup_controls.py rename to examples/crewai/rule_showcase/setup_controls.py index dc4ab96d..6ac42adf 100644 --- a/examples/crewai/evaluator_showcase/setup_controls.py +++ b/examples/crewai/rule_showcase/setup_controls.py @@ -1,7 +1,7 @@ """ -Setup script for the CrewAI Evaluator Showcase. +Setup script for the CrewAI Rule Showcase. -Creates controls using ALL four built-in evaluators in a realistic +Creates controls using ALL four built-in rules in a realistic data-analyst agent scenario: REGEX - Block PII patterns (SSN, credit cards) in query results @@ -45,7 +45,7 @@ async def setup(): # ── Control Definitions ───────────────────────────────────────── control_defs = [ # ┌─────────────────────────────────────────────────────────┐ - # │ SQL EVALUATOR │ + # │ SQL RULE │ # │ Validates SQL queries for safety before execution. │ # │ Blocks DROP/DELETE/TRUNCATE, enforces LIMIT, │ # │ prevents multi-statement injection. │ @@ -66,7 +66,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "blocked_operations": [ @@ -88,7 +88,7 @@ async def setup(): }, ), # ┌─────────────────────────────────────────────────────────┐ - # │ LIST EVALUATOR │ + # │ LIST RULE │ # │ Restricts which tables the agent can query. │ # │ Contains-match against the SQL query text to catch │ # │ references to sensitive tables. │ @@ -109,7 +109,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -128,7 +128,7 @@ async def setup(): }, ), # ┌─────────────────────────────────────────────────────────┐ - # │ REGEX EVALUATOR │ + # │ REGEX RULE │ # │ Scans query RESULTS for PII patterns after execution. │ # │ Catches SSNs, credit card numbers, and email │ # │ addresses that might leak through query results. │ @@ -149,7 +149,7 @@ async def setup(): }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -166,7 +166,7 @@ async def setup(): }, ), # ┌─────────────────────────────────────────────────────────┐ - # │ JSON EVALUATOR │ + # │ JSON RULE │ # │ Validates the analysis request structure before the │ # │ agent starts working. Ensures required fields exist │ # │ and constraints are met (date range, row limit). │ @@ -187,7 +187,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.request"}, - "evaluator": { + "rule": { "name": "json", "config": { "required_fields": ["dataset", "date_range"], @@ -205,7 +205,7 @@ async def setup(): }, ), # ┌─────────────────────────────────────────────────────────┐ - # │ JSON EVALUATOR (STEER) │ + # │ JSON RULE (STEER) │ # │ When analysis request is missing the optional │ # │ "purpose" field, steer the agent to collect it. │ # │ This demonstrates json + steer together. │ @@ -225,7 +225,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.request"}, - "evaluator": { + "rule": { "name": "json", "config": { "required_fields": ["purpose"], @@ -250,19 +250,19 @@ async def setup(): # ── Create Controls & Associate with Agent ────────────────────── control_ids = [] for name, data in control_defs: - evaluator = data["condition"]["evaluator"]["name"].upper() + rule = data["condition"]["rule"]["name"].upper() decision = data["action"]["decision"].upper() try: result = await controls.create_control(client, name=name, data=data) cid = result["control_id"] - print(f" [{evaluator:5s}|{decision:5s}] Created: {name} (ID: {cid})") + print(f" [{rule:5s}|{decision:5s}] Created: {name} (ID: {cid})") control_ids.append(cid) except Exception as e: if "409" in str(e): clist = await controls.list_controls(client, name=name, limit=1) if clist["controls"]: cid = clist["controls"][0]["id"] - print(f" [{evaluator:5s}|EXIST] Already exists: {name} (ID: {cid})") + print(f" [{rule:5s}|EXIST] Already exists: {name} (ID: {cid})") control_ids.append(cid) else: raise @@ -278,7 +278,7 @@ async def setup(): # ── Summary ───────────────────────────────────────────────────── print() - print("Setup complete! Evaluators configured:") + print("Setup complete! Rules configured:") print() print(" SQL - Block destructive ops, enforce LIMIT, prevent injection") print(" LIST - Restrict access to sensitive tables") @@ -286,12 +286,12 @@ async def setup(): print(" JSON - Validate request structure and field constraints") print(" JSON - Steer agent to provide analysis purpose") print() - print("Run the demo: uv run --active python -m evaluator_showcase.main") + print("Run the demo: uv run --active python -m rule_showcase.main") if __name__ == "__main__": print("=" * 60) - print(" CrewAI Data Analyst - Evaluator Showcase Setup") + print(" CrewAI Data Analyst - Rule Showcase Setup") print("=" * 60) print() asyncio.run(setup()) diff --git a/evaluators/builtin/tests/regex/__init__.py b/examples/crewai/rule_showcase/src/rule_showcase/__init__.py similarity index 100% rename from evaluators/builtin/tests/regex/__init__.py rename to examples/crewai/rule_showcase/src/rule_showcase/__init__.py diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/config/agents.yaml b/examples/crewai/rule_showcase/src/rule_showcase/config/agents.yaml similarity index 100% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/config/agents.yaml rename to examples/crewai/rule_showcase/src/rule_showcase/config/agents.yaml diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/config/tasks.yaml b/examples/crewai/rule_showcase/src/rule_showcase/config/tasks.yaml similarity index 100% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/config/tasks.yaml rename to examples/crewai/rule_showcase/src/rule_showcase/config/tasks.yaml diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/crew.py b/examples/crewai/rule_showcase/src/rule_showcase/crew.py similarity index 77% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/crew.py rename to examples/crewai/rule_showcase/src/rule_showcase/crew.py index 8e8942c7..8fbfd773 100644 --- a/examples/crewai/evaluator_showcase/src/evaluator_showcase/crew.py +++ b/examples/crewai/rule_showcase/src/rule_showcase/crew.py @@ -1,14 +1,14 @@ -"""CrewBase crew definition for the Evaluator Showcase.""" +"""CrewBase crew definition for the Rule Showcase.""" from crewai import Agent, Crew, Process, Task from crewai.project import CrewBase, agent, crew, task -from evaluator_showcase.tools import create_sql_tool, create_analysis_tool +from rule_showcase.tools import create_sql_tool, create_analysis_tool @CrewBase -class EvaluatorShowcaseCrew: - """Data analyst crew demonstrating all evaluator types.""" +class RuleShowcaseCrew: + """Data analyst crew demonstrating all rule types.""" agents_config = "config/agents.yaml" tasks_config = "config/tasks.yaml" diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/main.py b/examples/crewai/rule_showcase/src/rule_showcase/main.py similarity index 87% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/main.py rename to examples/crewai/rule_showcase/src/rule_showcase/main.py index 51c1b84c..8ecaa6a3 100644 --- a/examples/crewai/evaluator_showcase/src/evaluator_showcase/main.py +++ b/examples/crewai/rule_showcase/src/rule_showcase/main.py @@ -1,7 +1,7 @@ """ -CrewAI Data Analyst with All Four Built-in Evaluators. +CrewAI Data Analyst with All Four Built-in Rules. -Demonstrates every built-in Agent Control evaluator in a realistic +Demonstrates every built-in Agent Control rule in a realistic data-analyst scenario where a CrewAI crew queries databases and generates reports: @@ -17,17 +17,17 @@ Then run this example: - $ uv run --active evaluator_showcase + $ uv run --active rule_showcase Scenarios: - 1. Safe SELECT query -> SQL evaluator ALLOWS - 2. DROP TABLE injection -> SQL evaluator DENIES - 3. Query without LIMIT -> SQL evaluator DENIES - 4. Query sensitive table -> LIST evaluator DENIES - 5. Query returns PII -> REGEX evaluator DENIES (post-execution) - 6. Valid analysis request -> JSON evaluator ALLOWS - 7. Missing required fields -> JSON evaluator DENIES - 8. Missing purpose field -> JSON evaluator STEERS (then allowed) + 1. Safe SELECT query -> SQL rule ALLOWS + 2. DROP TABLE injection -> SQL rule DENIES + 3. Query without LIMIT -> SQL rule DENIES + 4. Query sensitive table -> LIST rule DENIES + 5. Query returns PII -> REGEX rule DENIES (post-execution) + 6. Valid analysis request -> JSON rule ALLOWS + 7. Missing required fields -> JSON rule DENIES + 8. Missing purpose field -> JSON rule STEERS (then allowed) """ import json @@ -35,8 +35,8 @@ import agent_control -from evaluator_showcase.tools import create_sql_tool, create_analysis_tool -from evaluator_showcase.crew import EvaluatorShowcaseCrew +from rule_showcase.tools import create_sql_tool, create_analysis_tool +from rule_showcase.crew import RuleShowcaseCrew # ── Configuration ─────────────────────────────────────────────────────── AGENT_NAME = "crewai-data-analyst" @@ -44,7 +44,7 @@ agent_control.init( agent_name=AGENT_NAME, - agent_description="CrewAI data analyst with all evaluator types", + agent_description="CrewAI data analyst with all rule types", server_url=SERVER_URL, ) @@ -77,11 +77,11 @@ def verify_server(): return False -def run_direct_test(title, evaluator, tool_fn, input_data, expected): +def run_direct_test(title, rule, tool_fn, input_data, expected): """Run a test by calling the tool function directly (bypasses CrewAI LLM).""" print(f"\n{'=' * 60}") print(f" {title}") - print(f" Evaluator: {evaluator}") + print(f" Rule: {rule}") print(f" Expected: {expected}") print(f"{'=' * 60}") @@ -94,8 +94,8 @@ def run_direct_test(title, evaluator, tool_fn, input_data, expected): def main(): print("=" * 60) - print(" CrewAI Data Analyst - Evaluator Showcase") - print(" All 4 Built-in Evaluators: SQL, LIST, REGEX, JSON") + print(" CrewAI Data Analyst - Rule Showcase") + print(" All 4 Built-in Rules: SQL, LIST, REGEX, JSON") print("=" * 60) print() @@ -110,10 +110,10 @@ def main(): analysis_tool = create_analysis_tool() # ════════════════════════════════════════════════════════════════ - # SQL EVALUATOR SCENARIOS + # SQL RULE SCENARIOS # ════════════════════════════════════════════════════════════════ print("\n" + "#" * 60) - print(" PART 1: SQL EVALUATOR") + print(" PART 1: SQL RULE") print(" Validates queries before they reach the database") print("#" * 60) @@ -154,10 +154,10 @@ def main(): ) # ════════════════════════════════════════════════════════════════ - # LIST EVALUATOR SCENARIOS + # LIST RULE SCENARIOS # ════════════════════════════════════════════════════════════════ print("\n" + "#" * 60) - print(" PART 2: LIST EVALUATOR") + print(" PART 2: LIST RULE") print(" Restricts access to sensitive tables") print("#" * 60) @@ -189,10 +189,10 @@ def main(): ) # ════════════════════════════════════════════════════════════════ - # REGEX EVALUATOR SCENARIOS + # REGEX RULE SCENARIOS # ════════════════════════════════════════════════════════════════ print("\n" + "#" * 60) - print(" PART 3: REGEX EVALUATOR") + print(" PART 3: REGEX RULE") print(" Scans query results for PII patterns (post-execution)") print("#" * 60) @@ -215,11 +215,11 @@ def main(): ) # ════════════════════════════════════════════════════════════════ - # JSON EVALUATOR SCENARIOS + # JSON RULE SCENARIOS # ════════════════════════════════════════════════════════════════ if os.getenv("OPENAI_API_KEY"): print("\n" + "#" * 60) - print(" PART 4: JSON EVALUATOR") + print(" PART 4: JSON RULE") print(" Validates analysis request structure and constraints") print("#" * 60) @@ -300,7 +300,7 @@ def main(): print(" Agent autonomously handles a multi-step data request") print("#" * 60) - crew = EvaluatorShowcaseCrew().crew() + crew = RuleShowcaseCrew().crew() print("\n Running crew with a safe data request...") result = crew.kickoff( @@ -320,31 +320,31 @@ def main(): print(" Demo Complete!") print("=" * 60) print(""" - Evaluators Demonstrated: + Rules Demonstrated: - SQL EVALUATOR (input validation): + SQL RULE (input validation): - Blocked DROP TABLE injection (destructive operation) - Blocked SELECT without LIMIT (require_limit enforced) - Blocked DELETE statement (blocked operation) - Allowed safe SELECT with LIMIT (passed all checks) - LIST EVALUATOR (access control): + LIST RULE (access control): - Blocked query to salary_data (restricted table) - Blocked query to audit_log (restricted table) - Allowed query to orders (not restricted) - REGEX EVALUATOR (output scanning): + REGEX RULE (output scanning): - Blocked results with SSN + email (PII detected post-execution) - Allowed clean results (no PII patterns found) - JSON EVALUATOR (request validation): + JSON RULE (request validation): - Blocked missing required field (date_range absent) - Blocked constraint violation (max_rows > 10000) - Steered to collect missing purpose (STEER action + retry) - Allowed valid complete request (all fields valid) Key Insight: - Each evaluator serves a different purpose: + Each rule serves a different purpose: SQL -> Structural query safety (BEFORE execution) LIST -> Access control / allowlists / blocklists REGEX -> Pattern detection in free-text (AFTER execution) diff --git a/examples/crewai/rule_showcase/src/rule_showcase/tools/__init__.py b/examples/crewai/rule_showcase/src/rule_showcase/tools/__init__.py new file mode 100644 index 00000000..e2299eca --- /dev/null +++ b/examples/crewai/rule_showcase/src/rule_showcase/tools/__init__.py @@ -0,0 +1,4 @@ +from rule_showcase.tools.run_sql_query import create_sql_tool +from rule_showcase.tools.analyze_data import create_analysis_tool + +__all__ = ["create_sql_tool", "create_analysis_tool"] diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/analyze_data.py b/examples/crewai/rule_showcase/src/rule_showcase/tools/analyze_data.py similarity index 96% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/analyze_data.py rename to examples/crewai/rule_showcase/src/rule_showcase/tools/analyze_data.py index 0bff8d58..6d2d9e36 100644 --- a/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/analyze_data.py +++ b/examples/crewai/rule_showcase/src/rule_showcase/tools/analyze_data.py @@ -21,7 +21,7 @@ async def _analyze_data(request: dict) -> str: """Run data analysis (protected by JSON validation controls). Takes a single dict param so the @control() decorator sends it - as input.request — and the JSON evaluator can check which fields + as input.request — and the JSON rule can check which fields are present or absent. """ dataset = request.get("dataset", "") @@ -60,7 +60,7 @@ def analyze_data_tool(request: str) -> str: return f"Invalid request format. Expected JSON, got: {request!r}" # Build the request dict — only include fields that have values. - # The JSON evaluator checks which fields are PRESENT in this dict, + # The JSON rule checks which fields are PRESENT in this dict, # so omitting a field triggers the "required_fields" check. request_dict: dict = {} if params.get("dataset"): diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/run_sql_query.py b/examples/crewai/rule_showcase/src/rule_showcase/tools/run_sql_query.py similarity index 100% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/tools/run_sql_query.py rename to examples/crewai/rule_showcase/src/rule_showcase/tools/run_sql_query.py diff --git a/examples/crewai/secure_research_crew/README.md b/examples/crewai/secure_research_crew/README.md index 79fcb49d..54e8da60 100644 --- a/examples/crewai/secure_research_crew/README.md +++ b/examples/crewai/secure_research_crew/README.md @@ -5,7 +5,7 @@ A production-quality example of a 3-agent CrewAI crew where each agent has its o ## What It Demonstrates - **Per-agent policies**: Different controls for different agent roles, all assigned to a single runtime agent and differentiated by `step_names` in control scopes. -- **Multiple evaluator types**: SQL, LIST, JSON, JSON Schema, and REGEX evaluators working together. +- **Multiple rule types**: SQL, LIST, JSON, JSON Schema, and REGEX rules working together. - **Deny and steer actions**: Hard blocks for security violations, corrective steering for recoverable issues. - **Idempotent setup**: The setup script handles 409 conflicts gracefully and can be run repeatedly. @@ -40,11 +40,11 @@ A production-quality example of a 3-agent CrewAI crew where each agent has its o +------------------------------------------------------------+ ``` -Each tool's `step_name` matches the `step_names` in its corresponding control scope, so the SQL evaluator only fires for `query_database`, the JSON evaluator only fires for `validate_data`, etc. +Each tool's `step_name` matches the `step_names` in its corresponding control scope, so the SQL rule only fires for `query_database`, the JSON rule only fires for `validate_data`, etc. ## Scenarios -| # | Scenario | Agent | Control | Evaluator | Action | Expected | +| # | Scenario | Agent | Control | Rule | Action | Expected | |---|----------|-------|---------|-----------|--------|----------| | 1 | Happy path | All | All | All | observe | Report generated | | 2 | SQL injection | Researcher | researcher-sql-safety | SQL | deny | Query blocked | @@ -75,14 +75,14 @@ Then from this directory: cd examples/crewai/secure_research_crew ``` -`agent-control-sdk` and `crewai` have an incompatible transitive dependency on `pydantic` (crewai caps at `<2.12`, the SDK evaluators require `>=2.12.4`). Install in two steps to work around this: +`agent-control-sdk` and `crewai` have an incompatible transitive dependency on `pydantic` (crewai caps at `<2.12`, the SDK rules require `>=2.12.4`). Install in two steps to work around this: ```bash # Install crewai and all other deps via normal resolver uv pip install -e . -# Install agent-control-sdk separately, skipping the conflicting evaluators dep -# (this example uses server-mode execution and does not need evaluators locally) +# Install agent-control-sdk separately, skipping the conflicting rules dep +# (this example uses server-mode execution and does not need rules locally) uv pip install agent-control-sdk==7.5.0 --no-deps uv pip install httpx pydantic-settings docstring-parser google-re2 jsonschema ``` diff --git a/examples/crewai/secure_research_crew/setup_controls.py b/examples/crewai/secure_research_crew/setup_controls.py index 0d884aac..8fae509e 100644 --- a/examples/crewai/secure_research_crew/setup_controls.py +++ b/examples/crewai/secure_research_crew/setup_controls.py @@ -97,7 +97,7 @@ def researcher_controls() -> list[tuple[str, dict]]: }, "condition": { "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "blocked_operations": ["DROP", "DELETE", "TRUNCATE", "ALTER", "INSERT", "UPDATE"], @@ -121,7 +121,7 @@ def researcher_controls() -> list[tuple[str, dict]]: }, "condition": { "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["salary_data", "admin_users", "credentials", "auth_tokens"], @@ -153,7 +153,7 @@ def analyst_controls() -> list[tuple[str, dict]]: }, "condition": { "selector": {"path": "input.request"}, - "evaluator": { + "rule": { "name": "json", "config": { "required_fields": ["dataset", "findings", "confidence_score"], @@ -181,7 +181,7 @@ def analyst_controls() -> list[tuple[str, dict]]: }, "condition": { "selector": {"path": "input.request"}, - "evaluator": { + "rule": { "name": "json", "config": { "json_schema": { @@ -231,7 +231,7 @@ def writer_controls() -> list[tuple[str, dict]]: }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -248,7 +248,7 @@ def writer_controls() -> list[tuple[str, dict]]: } # NOTE: The citation-presence check is handled client-side in the tool - # wrapper (write_report in research_crew.py) because the regex evaluator + # wrapper (write_report in research_crew.py) because the regex rule # triggers actions on match, not on non-match. The tool adds citations # if they are missing before returning the report. diff --git a/examples/crewai/secure_research_crew/src/secure_research_crew/main.py b/examples/crewai/secure_research_crew/src/secure_research_crew/main.py index ac0c85e9..f2b2efef 100644 --- a/examples/crewai/secure_research_crew/src/secure_research_crew/main.py +++ b/examples/crewai/secure_research_crew/src/secure_research_crew/main.py @@ -4,15 +4,15 @@ A 3-agent sequential crew where each agent has different controls: 1. Researcher -- queries a simulated database - Controls: SQL evaluator (block DROP/DELETE, enforce LIMIT) - LIST evaluator (block sensitive tables) + Controls: SQL rule (block DROP/DELETE, enforce LIMIT) + LIST rule (block sensitive tables) 2. Analyst -- validates and processes research data - Controls: JSON evaluator (require dataset, findings, confidence_score) + Controls: JSON rule (require dataset, findings, confidence_score) JSON schema steer (add methodology if missing) 3. Writer -- generates the final report - Controls: REGEX evaluator (block PII in output) + Controls: REGEX rule (block PII in output) Client-side citation check (steer if missing) Scenarios: @@ -233,10 +233,10 @@ def main(): print("policies protect each agent in a CrewAI crew:") print() print(" 1. Happy path -- all agents pass controls") - print(" 2. SQL injection -- researcher blocked by SQL evaluator") - print(" 3. Restricted table -- researcher blocked by LIST evaluator") + print(" 2. SQL injection -- researcher blocked by SQL rule") + print(" 3. Restricted table -- researcher blocked by LIST rule") print(" 4. Missing methodology-- analyst steered, then succeeds") - print(" 5. PII in report -- writer blocked by REGEX evaluator") + print(" 5. PII in report -- writer blocked by REGEX rule") print() if not verify_setup(): @@ -254,10 +254,10 @@ def main(): header("SUMMARY") print(""" Scenario 1 (Happy Path): All 3 agents passed controls - Scenario 2 (SQL Injection): Researcher BLOCKED by sql evaluator - Scenario 3 (Restricted Table): Researcher BLOCKED by list evaluator + Scenario 2 (SQL Injection): Researcher BLOCKED by sql rule + Scenario 3 (Restricted Table): Researcher BLOCKED by list rule Scenario 4 (Missing Method): Analyst STEERED, then succeeded - Scenario 5 (PII in Report): Writer BLOCKED by regex evaluator + Scenario 5 (PII in Report): Writer BLOCKED by regex rule Controls are enforced per-agent via policies: - data-access-policy -> query_database tool diff --git a/examples/crewai/steering_financial_agent/README.md b/examples/crewai/steering_financial_agent/README.md index 3df24e04..70b0ae03 100644 --- a/examples/crewai/steering_financial_agent/README.md +++ b/examples/crewai/steering_financial_agent/README.md @@ -24,12 +24,12 @@ The key difference: **DENY** raises `ControlViolationError` (permanent), **STEER ## Controls Created -- `deny-sanctioned-countries` — LIST evaluator, blocks OFAC countries -- `deny-high-fraud-score` — JSON evaluator, blocks fraud_score > 0.8 -- `steer-require-2fa` — JSON evaluator with oneOf schema, steers for 2FA -- `steer-require-manager-approval` — JSON evaluator, steers for approval -- `observe-new-recipient` — LIST evaluator, records unknown recipients -- `observe-pii-in-confirmation` — REGEX evaluator, records PII in output +- `deny-sanctioned-countries` — LIST rule, blocks OFAC countries +- `deny-high-fraud-score` — JSON rule, blocks fraud_score > 0.8 +- `steer-require-2fa` — JSON rule with oneOf schema, steers for 2FA +- `steer-require-manager-approval` — JSON rule, steers for approval +- `observe-new-recipient` — LIST rule, records unknown recipients +- `observe-pii-in-confirmation` — REGEX rule, records PII in output ## Prerequisites @@ -48,11 +48,11 @@ cd examples/crewai/steering_financial_agent # Install example dependencies # Note: agent-control-sdk and crewai have an incompatible transitive dependency on pydantic -# (crewai caps at <2.12, the SDK evaluators require >=2.12.4). Install in two steps: +# (crewai caps at <2.12, the SDK rules require >=2.12.4). Install in two steps: uv pip install -e . -# Install agent-control-sdk separately, skipping the conflicting evaluators dep -# (this example uses server-mode execution and does not need evaluators locally) +# Install agent-control-sdk separately, skipping the conflicting rules dep +# (this example uses server-mode execution and does not need rules locally) uv pip install agent-control-sdk==7.5.0 --no-deps uv pip install httpx pydantic-settings docstring-parser google-re2 jsonschema diff --git a/examples/crewai/steering_financial_agent/setup_controls.py b/examples/crewai/steering_financial_agent/setup_controls.py index 55e0175c..fcff2529 100644 --- a/examples/crewai/steering_financial_agent/setup_controls.py +++ b/examples/crewai/steering_financial_agent/setup_controls.py @@ -43,7 +43,7 @@ async def setup(): control_defs = [ # ┌─────────────────────────────────────────────────────────┐ # │ DENY: Sanctioned Countries │ - # │ Uses LIST evaluator with contains matching. │ + # │ Uses LIST rule with contains matching. │ # │ Blocks transfers to OFAC-sanctioned destinations. │ # └─────────────────────────────────────────────────────────┘ ( @@ -59,7 +59,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.destination_country"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -80,7 +80,7 @@ async def setup(): ), # ┌─────────────────────────────────────────────────────────┐ # │ DENY: Fraud Score Too High │ - # │ Uses JSON evaluator with field constraints. │ + # │ Uses JSON rule with field constraints. │ # │ Blocks when fraud_score exceeds 0.8 threshold. │ # └─────────────────────────────────────────────────────────┘ ( @@ -96,7 +96,7 @@ async def setup(): }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "json", "config": { "field_constraints": { @@ -110,7 +110,7 @@ async def setup(): ), # ┌─────────────────────────────────────────────────────────┐ # │ STEER: Large Transfer → Require 2FA │ - # │ Uses JSON evaluator with oneOf schema. │ + # │ Uses JSON rule with oneOf schema. │ # │ Either amount < $10k OR amount >= $10k with 2FA. │ # │ Provides steering_context so agent knows what to do. │ # └─────────────────────────────────────────────────────────┘ @@ -127,7 +127,7 @@ async def setup(): }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "json", "config": { "json_schema": { @@ -170,7 +170,7 @@ async def setup(): ), # ┌─────────────────────────────────────────────────────────┐ # │ STEER: Very Large Transfer → Manager Approval │ - # │ Uses JSON evaluator with oneOf schema. │ + # │ Uses JSON rule with oneOf schema. │ # │ Either amount < $50k OR amount >= $50k with approval. │ # │ Multi-step: collect justification, then get approval. │ # └─────────────────────────────────────────────────────────┘ @@ -187,7 +187,7 @@ async def setup(): }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "json", "config": { "json_schema": { @@ -231,7 +231,7 @@ async def setup(): ), # ┌─────────────────────────────────────────────────────────┐ # │ OBSERVE: New Recipient │ - # │ Uses LIST evaluator with "not in" logic. │ + # │ Uses LIST rule with "not in" logic. │ # │ Records an advisory event when recipient is unknown │ # │ but does NOT block the transfer. Useful for audit. │ # └─────────────────────────────────────────────────────────┘ @@ -248,7 +248,7 @@ async def setup(): }, "condition": { "selector": {"path": "input.recipient"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -266,7 +266,7 @@ async def setup(): ), # ┌─────────────────────────────────────────────────────────┐ # │ OBSERVE: PII in Output │ - # │ Uses REGEX evaluator to detect leaked PII in the │ + # │ Uses REGEX rule to detect leaked PII in the │ # │ transfer confirmation. Records for compliance review. │ # └─────────────────────────────────────────────────────────┘ ( @@ -282,7 +282,7 @@ async def setup(): }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"(?:\b\d{3}-\d{2}-\d{4}\b|\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b)" @@ -327,16 +327,16 @@ async def setup(): print("Setup complete! Controls created:") print() print(" DENY (hard block, no recovery):") - print(" - Sanctioned countries (list evaluator)") - print(" - Fraud score > 0.8 (json evaluator)") + print(" - Sanctioned countries (list rule)") + print(" - Fraud score > 0.8 (json rule)") print() print(" STEER (guide agent, retry after correction):") - print(" - 2FA required for >= $10k (json evaluator)") - print(" - Manager approval for >= $50k (json evaluator)") + print(" - 2FA required for >= $10k (json rule)") + print(" - Manager approval for >= $50k (json rule)") print() print(" OBSERVE (audit trail, no blocking):") - print(" - New/unknown recipient (list evaluator)") - print(" - PII in confirmation output (regex evaluator)") + print(" - New/unknown recipient (list rule)") + print(" - PII in confirmation output (regex rule)") print() print("Run the demo: uv run --active python -m steering_financial_agent.main") diff --git a/examples/customer_support_agent/setup_demo_controls.py b/examples/customer_support_agent/setup_demo_controls.py index efa4d255..f302dfec 100644 --- a/examples/customer_support_agent/setup_demo_controls.py +++ b/examples/customer_support_agent/setup_demo_controls.py @@ -44,7 +44,7 @@ "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\d{3}-\d{2}-\d{4}"}, }, @@ -62,7 +62,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"(?i)(ignore.{0,20}(previous|prior|above).{0,20}instructions|you are now|system:|forget everything|disregard)" @@ -82,7 +82,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\b\d{4}[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4}\b"}, }, @@ -109,7 +109,7 @@ "selector": { "path": "input.query", }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"(?i)(select|insert|update|delete|drop|union|--|;)" @@ -135,7 +135,7 @@ "selector": { "path": "*", # Observe entire payload }, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r".*"}, # Always matches }, @@ -162,7 +162,7 @@ "selector": { "path": "input.query", }, - "evaluator": { + "rule": { "name": "regex", "config": { # Simple profanity pattern for demo @@ -189,7 +189,7 @@ "selector": { "path": "input.priority", }, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["high", "critical", "urgent"], @@ -222,7 +222,7 @@ "selector": { "path": "input.description", }, - "evaluator": { + "rule": { "name": "regex", "config": { # Email pattern diff --git a/examples/deepeval/README.md b/examples/deepeval/README.md index c0bfeba6..7cdfcf04 100644 --- a/examples/deepeval/README.md +++ b/examples/deepeval/README.md @@ -1,6 +1,6 @@ # DeepEval Example -Custom evaluator using DeepEval GEval metrics for quality controls. +Custom rule using DeepEval GEval metrics for quality controls. ## What this example shows diff --git a/examples/deepeval/__init__.py b/examples/deepeval/__init__.py index 77d5fbc6..ae6105bf 100644 --- a/examples/deepeval/__init__.py +++ b/examples/deepeval/__init__.py @@ -1,14 +1,14 @@ -"""DeepEval GEval evaluator example. +"""DeepEval GEval rule example. -This module demonstrates how to extend the base Evaluator class to create -custom evaluators using external libraries like DeepEval. +This module demonstrates how to extend the base Rule class to create +custom rules using external libraries like DeepEval. """ -from config import DeepEvalEvaluatorConfig, DeepEvalTestCaseParam -from evaluator import DeepEvalEvaluator +from config import DeepEvalRuleConfig, DeepEvalTestCaseParam +from rule import DeepEvalRule __all__ = [ - "DeepEvalEvaluator", - "DeepEvalEvaluatorConfig", + "DeepEvalRule", + "DeepEvalRuleConfig", "DeepEvalTestCaseParam", ] diff --git a/examples/deepeval/config.py b/examples/deepeval/config.py index 5c592667..60055d0e 100644 --- a/examples/deepeval/config.py +++ b/examples/deepeval/config.py @@ -1,4 +1,4 @@ -"""Configuration models for DeepEval GEval evaluator. +"""Configuration models for DeepEval GEval rule. Based on DeepEval's GEval metric: https://deepeval.com/docs/metrics-llm-evals """ @@ -24,8 +24,8 @@ ] -class DeepEvalEvaluatorConfig(BaseModel): - """Configuration for DeepEval GEval evaluator. +class DeepEvalRuleConfig(BaseModel): + """Configuration for DeepEval GEval rule. DeepEval's GEval uses LLM-as-a-judge with chain-of-thoughts (CoT) to evaluate LLM outputs based on custom criteria. It's capable of evaluating almost any @@ -33,7 +33,7 @@ class DeepEvalEvaluatorConfig(BaseModel): Example (with criteria): ```python - config = DeepEvalEvaluatorConfig( + config = DeepEvalRuleConfig( name="Correctness", criteria="Determine if the actual output is correct based on the expected output.", evaluation_params=["actual_output", "expected_output"], @@ -43,7 +43,7 @@ class DeepEvalEvaluatorConfig(BaseModel): Example (with evaluation_steps): ```python - config = DeepEvalEvaluatorConfig( + config = DeepEvalRuleConfig( name="Correctness", evaluation_steps=[ "Check whether facts in actual output contradict expected output", @@ -119,7 +119,7 @@ class DeepEvalEvaluatorConfig(BaseModel): ) @model_validator(mode="after") - def validate_criteria_or_steps(self) -> "DeepEvalEvaluatorConfig": + def validate_criteria_or_steps(self) -> "DeepEvalRuleConfig": """Validate that either criteria or evaluation_steps is provided, but not both.""" has_criteria = self.criteria is not None has_steps = self.evaluation_steps is not None and len(self.evaluation_steps) > 0 diff --git a/examples/deepeval/pyproject.toml b/examples/deepeval/pyproject.toml index 081b75c4..6dee62a0 100644 --- a/examples/deepeval/pyproject.toml +++ b/examples/deepeval/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "agent-control-deepeval-example" version = "0.1.0" -description = "Agent Control DeepEval GEval Custom Evaluator Example" +description = "Agent Control DeepEval GEval Custom Rule Example" readme = "README.md" requires-python = ">=3.12" dependencies = [ @@ -10,7 +10,7 @@ dependencies = [ "pydantic>=2.0.0", "httpx>=0.24.0", "google-re2>=1.1", - "agent-control-evaluators>=6.3.0", + "agent-control-rules>=6.3.0", "agent-control-models>=6.3.0", "agent-control-sdk>=6.3.0", ] @@ -18,8 +18,8 @@ dependencies = [ [project.optional-dependencies] dev = [] -[project.entry-points."agent_control.evaluators"] -deepeval-geval = "evaluator:DeepEvalEvaluator" +[project.entry-points."agent_control.rules"] +deepeval-geval = "rule:DeepEvalRule" [build-system] requires = ["hatchling"] diff --git a/examples/deepeval/qa_agent.py b/examples/deepeval/qa_agent.py index 328c6181..dc25afcf 100755 --- a/examples/deepeval/qa_agent.py +++ b/examples/deepeval/qa_agent.py @@ -4,7 +4,7 @@ This example demonstrates: 1. Using agent-control SDK with @control() decorator -2. DeepEval GEval evaluators for quality enforcement +2. DeepEval GEval rules for quality enforcement 3. Handling ControlViolationError gracefully The agent is protected by DeepEval-based controls that check: diff --git a/examples/deepeval/evaluator.py b/examples/deepeval/rule.py similarity index 84% rename from examples/deepeval/evaluator.py rename to examples/deepeval/rule.py index 4cbd9573..d5935a32 100644 --- a/examples/deepeval/evaluator.py +++ b/examples/deepeval/rule.py @@ -1,6 +1,6 @@ -"""DeepEval GEval evaluator implementation. +"""DeepEval GEval rule implementation. -This evaluator demonstrates how to extend the base Evaluator class to integrate +This rule demonstrates how to extend the base Rule class to integrate DeepEval's GEval metric for custom LLM-based evaluations. Based on DeepEval documentation: https://deepeval.com/docs/metrics-llm-evals @@ -9,14 +9,14 @@ import logging from typing import Any -from agent_control_evaluators import ( - Evaluator, - EvaluatorMetadata, - register_evaluator, +from agent_control_rules import ( + Rule, + RuleMetadata, + register_rule, ) -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult -from config import DeepEvalEvaluatorConfig +from config import DeepEvalRuleConfig logger = logging.getLogger(__name__) @@ -33,11 +33,11 @@ LLMTestCaseParams = None # type: ignore -@register_evaluator -class DeepEvalEvaluator(Evaluator[DeepEvalEvaluatorConfig]): - """DeepEval GEval evaluator for custom LLM-based evaluations. +@register_rule +class DeepEvalRule(Rule[DeepEvalRuleConfig]): + """DeepEval GEval rule for custom LLM-based evaluations. - This evaluator uses DeepEval's GEval metric, which leverages LLM-as-a-judge + This rule uses DeepEval's GEval metric, which leverages LLM-as-a-judge with chain-of-thoughts (CoT) to evaluate LLM outputs based on custom criteria. Features: @@ -49,10 +49,10 @@ class DeepEvalEvaluator(Evaluator[DeepEvalEvaluatorConfig]): Example: ```python - from examples.deepeval import DeepEvalEvaluator, DeepEvalEvaluatorConfig + from examples.deepeval import DeepEvalRule, DeepEvalRuleConfig # Create config - config = DeepEvalEvaluatorConfig( + config = DeepEvalRuleConfig( name="Coherence", criteria="Determine if the response is coherent and logically consistent.", evaluation_params=["actual_output"], @@ -60,11 +60,11 @@ class DeepEvalEvaluator(Evaluator[DeepEvalEvaluatorConfig]): model="gpt-4o", ) - # Create evaluator - evaluator = DeepEvalEvaluator(config) + # Create rule + rule = DeepEvalRule(config) # Evaluate - result = await evaluator.evaluate({ + result = await rule.evaluate({ "actual_output": "The sky is blue because of Rayleigh scattering." }) ``` @@ -73,25 +73,25 @@ class DeepEvalEvaluator(Evaluator[DeepEvalEvaluatorConfig]): OPENAI_API_KEY: Required for GPT model usage. """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="deepeval-geval", version="1.0.0", - description="DeepEval GEval custom LLM-based evaluator", + description="DeepEval GEval custom LLM-based rule", requires_api_key=True, timeout_ms=30000, ) - config_model = DeepEvalEvaluatorConfig + config_model = DeepEvalRuleConfig @classmethod def is_available(cls) -> bool: """Check if deepeval dependency is installed.""" return DEEPEVAL_AVAILABLE - def __init__(self, config: DeepEvalEvaluatorConfig) -> None: - """Initialize DeepEval evaluator with configuration. + def __init__(self, config: DeepEvalRuleConfig) -> None: + """Initialize DeepEval rule with configuration. Args: - config: Validated DeepEvalEvaluatorConfig instance. + config: Validated DeepEvalRuleConfig instance. Raises: ValueError: If required configuration is invalid. @@ -133,7 +133,7 @@ def _create_geval_metric(self) -> Any: logger.debug(f"[DeepEval] Creating GEval metric with config: {geval_kwargs}") return GEval(**geval_kwargs) - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate data using DeepEval GEval. Args: @@ -141,7 +141,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: the evaluation_params (e.g., {"actual_output": "text"}). Returns: - EvaluatorResult with matched status and metadata. + RuleResult with matched status and metadata. """ try: logger.debug(f"[DeepEval] Evaluating data: {data}") @@ -242,11 +242,11 @@ def _prepare_test_case(self, data: Any) -> Any: logger.debug(f"[DeepEval] Test case kwargs: {test_case_kwargs}") return LLMTestCase(**test_case_kwargs) - def _parse_metric_result(self) -> EvaluatorResult: - """Parse GEval metric results into EvaluatorResult. + def _parse_metric_result(self) -> RuleResult: + """Parse GEval metric results into RuleResult. Returns: - EvaluatorResult with evaluation results. + RuleResult with evaluation results. """ # Get score and reason from the metric score = self._metric.score @@ -256,7 +256,7 @@ def _parse_metric_result(self) -> EvaluatorResult: # NOTE: matched=True means the control should trigger (block the request) # In DeepEval, is_successful=True means quality is GOOD (score >= threshold) # So we want to trigger (matched=True) when quality is BAD (not is_successful) - return EvaluatorResult( + return RuleResult( matched=not is_successful, # Invert: trigger when quality fails confidence=score if score is not None else 0.0, message=reason if reason else f"GEval {self.config.name}: score={score}", @@ -270,18 +270,18 @@ def _parse_metric_result(self) -> EvaluatorResult: }, ) - def _handle_error(self, error: Exception) -> EvaluatorResult: + def _handle_error(self, error: Exception) -> RuleResult: """Handle errors from DeepEval evaluation. Args: error: The exception that occurred. Returns: - EvaluatorResult indicating error state. + RuleResult indicating error state. """ error_action = self.config.on_error - return EvaluatorResult( + return RuleResult( matched=(error_action == "deny"), # Fail closed if configured confidence=0.0, message=f"DeepEval evaluation error: {str(error)}", diff --git a/examples/deepeval/setup_controls.py b/examples/deepeval/setup_controls.py index d4959c77..9d183857 100755 --- a/examples/deepeval/setup_controls.py +++ b/examples/deepeval/setup_controls.py @@ -4,7 +4,7 @@ This script: 1. Registers the agent with the server -2. Creates DeepEval GEval evaluator controls for quality checks +2. Creates DeepEval GEval rule controls for quality checks 3. Directly associates controls to the agent The controls demonstrate using DeepEval's LLM-as-a-judge to enforce: @@ -20,22 +20,22 @@ import sys import httpx -# Add the current directory to the path so we can import the evaluator +# Add the current directory to the path so we can import the rule sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -# Import and register the DeepEval evaluator +# Import and register the DeepEval rule # This must be done before creating controls that use it try: - from evaluator import DeepEvalEvaluator + from rule import DeepEvalRule - print(f"✓ DeepEval evaluator loaded: {DeepEvalEvaluator.metadata.name}") + print(f"✓ DeepEval rule loaded: {DeepEvalRule.metadata.name}") - # Note: We don't check is_available() here because the evaluator + # Note: We don't check is_available() here because the rule # may not be used immediately - it just needs to be registered # so the server knows about it when creating control definitions except ImportError as e: - print(f"❌ Error: Cannot import DeepEval evaluator: {e}") + print(f"❌ Error: Cannot import DeepEval rule: {e}") print("\nMake sure you're running from the examples/deepeval directory") print("and that agent-control-models is installed") sys.exit(1) @@ -58,7 +58,7 @@ "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "*"}, - "evaluator": { + "rule": { "name": "deepeval-geval", "config": { "name": "Coherence", @@ -88,7 +88,7 @@ "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "*"}, - "evaluator": { + "rule": { "name": "deepeval-geval", "config": { "name": "Relevance", @@ -117,7 +117,7 @@ "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "*"}, - "evaluator": { + "rule": { "name": "deepeval-geval", "config": { "name": "Correctness", diff --git a/examples/galileo_luna/README.md b/examples/galileo_luna/README.md index b81b034f..0527f207 100644 --- a/examples/galileo_luna/README.md +++ b/examples/galileo_luna/README.md @@ -1,12 +1,12 @@ -# Galileo Luna Direct Evaluator Example +# Galileo Luna Direct Rule Example -This example shows an Agent Control agent using the direct Galileo Luna evaluator (`galileo.luna`). The evaluator calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition. +This example shows an Agent Control agent using the direct Galileo Luna rule (`galileo.luna`). The rule calls Galileo's `/scorers/invoke` API and applies thresholds locally from the control definition. ## What It Shows - `setup_controls.py` registers an agent and attaches controls. - `demo_agent.py` runs an agent step protected with `@control`. -- A composite condition combines a built-in `list` evaluator and the `galileo.luna` evaluator. +- A composite condition combines a built-in `list` rule and the `galileo.luna` rule. - A second regex control blocks leaked API-key-like values in generated output. ## Setup diff --git a/examples/galileo_luna/demo_agent.py b/examples/galileo_luna/demo_agent.py index 0b6a0f8a..b41352b1 100644 --- a/examples/galileo_luna/demo_agent.py +++ b/examples/galileo_luna/demo_agent.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Demo agent protected by a direct Galileo Luna evaluator control. +"""Demo agent protected by a direct Galileo Luna rule control. Prerequisites: 1. Start server: make server-run @@ -94,7 +94,7 @@ async def run_demo() -> None: api_secret = os.getenv("GALILEO_API_SECRET_KEY") if not api_key and not api_secret: print( - "Galileo credentials are required for the galileo.luna evaluator. " + "Galileo credentials are required for the galileo.luna rule. " "Set GALILEO_API_KEY for public mode. Deployments using internal " "mode should inject GALILEO_API_SECRET_KEY." ) @@ -108,7 +108,7 @@ async def run_demo() -> None: auth_mode = "public" if api_key else "internal" print("=" * 72) - print("Direct Galileo Luna Evaluator Demo") + print("Direct Galileo Luna Rule Demo") print("=" * 72) print(f"Server: {SERVER_URL}") print(f"Agent: {AGENT_NAME}") diff --git a/examples/galileo_luna/pyproject.toml b/examples/galileo_luna/pyproject.toml index c284d7fa..fb7c2891 100644 --- a/examples/galileo_luna/pyproject.toml +++ b/examples/galileo_luna/pyproject.toml @@ -1,16 +1,16 @@ [project] name = "agent-control-galileo-luna-example" version = "0.1.0" -description = "Agent Control direct Galileo Luna evaluator example" +description = "Agent Control direct Galileo Luna rule example" readme = "README.md" requires-python = ">=3.12" dependencies = [ "agent-control-engine", - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", "agent-control-telemetry", "agent-control-sdk", - "agent-control-evaluator-galileo", + "agent-control-rule-galileo", ] [build-system] @@ -22,8 +22,8 @@ packages = ["."] [tool.uv.sources] agent-control-sdk = { path = "../../sdks/python", editable = true } -agent-control-evaluator-galileo = { path = "../../evaluators/contrib/galileo", editable = true } +agent-control-rule-galileo = { path = "../../rules/contrib/galileo", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/galileo_luna/setup_controls.py b/examples/galileo_luna/setup_controls.py index fe1434c8..05af4822 100644 --- a/examples/galileo_luna/setup_controls.py +++ b/examples/galileo_luna/setup_controls.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Create controls for the direct Galileo Luna evaluator demo. +"""Create controls for the direct Galileo Luna rule demo. Prerequisites: - Agent Control server running at AGENT_CONTROL_URL, default http://localhost:8000 @@ -45,7 +45,7 @@ def luna_config() -> dict[str, Any]: - """Build the direct Luna evaluator config used by the composite control.""" + """Build the direct Luna rule config used by the composite control.""" config: dict[str, Any] = { "threshold": LUNA_THRESHOLD, "operator": "gte", @@ -79,7 +79,7 @@ def luna_config() -> dict[str, Any]: "and": [ { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [ @@ -98,7 +98,7 @@ def luna_config() -> dict[str, Any]: }, { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "galileo.luna", "config": luna_config(), }, @@ -122,7 +122,7 @@ def luna_config() -> dict[str, Any]: }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\bsk-[A-Za-z0-9_-]{12,}\b"}, }, diff --git a/examples/google_adk_callbacks/pyproject.toml b/examples/google_adk_callbacks/pyproject.toml index a0fd20ff..ed484b5b 100644 --- a/examples/google_adk_callbacks/pyproject.toml +++ b/examples/google_adk_callbacks/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "agent-control-engine", - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", "agent-control-telemetry", "agent-control-sdk", @@ -26,5 +26,5 @@ packages = ["my_agent"] agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/google_adk_callbacks/setup_controls.py b/examples/google_adk_callbacks/setup_controls.py index 0aea1754..9e03a5bf 100644 --- a/examples/google_adk_callbacks/setup_controls.py +++ b/examples/google_adk_callbacks/setup_controls.py @@ -23,7 +23,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -49,7 +49,7 @@ }, "condition": { "selector": {"path": "input.city"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["Pyongyang", "Tehran", "Damascus"], @@ -76,7 +76,7 @@ }, "condition": { "selector": {"path": "output.note"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"support@internal\.example|123-45-6789", diff --git a/examples/google_adk_decorator/pyproject.toml b/examples/google_adk_decorator/pyproject.toml index b9cc7567..4bf5ab1e 100644 --- a/examples/google_adk_decorator/pyproject.toml +++ b/examples/google_adk_decorator/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "agent-control-engine", - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", "agent-control-telemetry", "agent-control-sdk", @@ -26,5 +26,5 @@ packages = ["my_agent"] agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/google_adk_decorator/setup_controls.py b/examples/google_adk_decorator/setup_controls.py index f3c68e66..4dfb4ec8 100644 --- a/examples/google_adk_decorator/setup_controls.py +++ b/examples/google_adk_decorator/setup_controls.py @@ -31,7 +31,7 @@ def _control_specs(execution: str) -> list[tuple[str, dict[str, Any]]]: }, "condition": { "selector": {"path": "input.city"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["Pyongyang", "Tehran", "Damascus"], @@ -58,7 +58,7 @@ def _control_specs(execution: str) -> list[tuple[str, dict[str, Any]]]: }, "condition": { "selector": {"path": "output.note"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"support@internal\.example|123-45-6789", diff --git a/examples/google_adk_plugin/pyproject.toml b/examples/google_adk_plugin/pyproject.toml index 88b0ccc6..aa0513d8 100644 --- a/examples/google_adk_plugin/pyproject.toml +++ b/examples/google_adk_plugin/pyproject.toml @@ -6,7 +6,7 @@ readme = "README.md" requires-python = ">=3.12" dependencies = [ "agent-control-engine", - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", "agent-control-telemetry", "agent-control-sdk", @@ -26,5 +26,5 @@ packages = ["my_agent"] agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/google_adk_plugin/setup_controls.py b/examples/google_adk_plugin/setup_controls.py index 3bfeb483..a4d8ff6d 100644 --- a/examples/google_adk_plugin/setup_controls.py +++ b/examples/google_adk_plugin/setup_controls.py @@ -31,7 +31,7 @@ def _control_specs(execution: str) -> list[tuple[str, dict[str, Any]]]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": ( @@ -57,7 +57,7 @@ def _control_specs(execution: str) -> list[tuple[str, dict[str, Any]]]: }, "condition": { "selector": {"path": "input.city"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["Pyongyang", "Tehran", "Damascus"], @@ -84,7 +84,7 @@ def _control_specs(execution: str) -> list[tuple[str, dict[str, Any]]]: }, "condition": { "selector": {"path": "output.note"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"support@internal\.example|123-45-6789", diff --git a/examples/langchain/pyproject.toml b/examples/langchain/pyproject.toml index b026a27a..eab90b9c 100644 --- a/examples/langchain/pyproject.toml +++ b/examples/langchain/pyproject.toml @@ -6,7 +6,7 @@ requires-python = ">=3.12" dependencies = [ "agent-control-engine", "agent-control-models", - "agent-control-evaluators", + "agent-control-rules", "agent-control-telemetry", "agent-control-sdk", "langchain>=0.3.0", @@ -43,5 +43,5 @@ include = [ agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/langchain/setup_sql_controls.py b/examples/langchain/setup_sql_controls.py index c4e4d428..53053ab4 100644 --- a/examples/langchain/setup_sql_controls.py +++ b/examples/langchain/setup_sql_controls.py @@ -78,7 +78,7 @@ async def setup_sql_controls(): "selector": { "path": "input.query" }, - "evaluator": { + "rule": { "name": "sql", "config": { "blocked_operations": ["DROP", "DELETE", "TRUNCATE", "ALTER", "GRANT"], diff --git a/examples/langchain/sql_agent_protection.py b/examples/langchain/sql_agent_protection.py index 67790217..265c4750 100644 --- a/examples/langchain/sql_agent_protection.py +++ b/examples/langchain/sql_agent_protection.py @@ -150,7 +150,7 @@ async def safe_query_tool(query: str): print(error_msg) return error_msg except RuntimeError as e: - # Server-side error (e.g., evaluator not loaded) + # Server-side error (e.g., rule not loaded) error_msg = f"⚠️ Safety check unavailable: {str(e)}" print(error_msg) return error_msg diff --git a/examples/steer_action_demo/pyproject.toml b/examples/steer_action_demo/pyproject.toml index 9fb3be29..c5875ed0 100644 --- a/examples/steer_action_demo/pyproject.toml +++ b/examples/steer_action_demo/pyproject.toml @@ -6,7 +6,7 @@ requires-python = ">=3.12" dependencies = [ "agent-control-engine", "agent-control-models", - "agent-control-evaluators", + "agent-control-rules", "agent-control-telemetry", "agent-control-sdk", "langchain>=0.3.0", @@ -38,5 +38,5 @@ include = [ agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/steer_action_demo/setup_controls.py b/examples/steer_action_demo/setup_controls.py index 7f1efb62..634a7cb6 100644 --- a/examples/steer_action_demo/setup_controls.py +++ b/examples/steer_action_demo/setup_controls.py @@ -53,7 +53,7 @@ async def setup_banking_controls(): "selector": { "path": "input.destination_country" }, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["north korea", "iran", "syria", "cuba", "crimea"], @@ -82,7 +82,7 @@ async def setup_banking_controls(): "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "json", "config": { "field_constraints": { @@ -117,7 +117,7 @@ async def setup_banking_controls(): "selector": { "path": "input.recipient" }, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["John Smith", "Acme Corp", "Global Suppliers Inc"], @@ -151,7 +151,7 @@ async def setup_banking_controls(): "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "json", "config": { "json_schema": { @@ -186,7 +186,7 @@ async def setup_banking_controls(): "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "json", "config": { "json_schema": { diff --git a/examples/strands_agents/.env.example b/examples/strands_agents/.env.example index 82bfe8e8..3cb767a9 100644 --- a/examples/strands_agents/.env.example +++ b/examples/strands_agents/.env.example @@ -4,7 +4,7 @@ AGENT_CONTROL_URL=http://localhost:8000 # OpenAI API Key (required for Strands agents) OPENAI_API_KEY=your_openai_api_key_here -# Galileo API Key (optional - for Galileo evaluators) +# Galileo API Key (optional - for Galileo rules) GALILEO_API_KEY=your_galileo_api_key_here # Galileo Console URL (optional) diff --git a/examples/strands_agents/interactive_demo/interactive_support_demo.py b/examples/strands_agents/interactive_demo/interactive_support_demo.py index c36183d8..a245dc13 100644 --- a/examples/strands_agents/interactive_demo/interactive_support_demo.py +++ b/examples/strands_agents/interactive_demo/interactive_support_demo.py @@ -494,12 +494,12 @@ def render_sidebar(): st.markdown(f"**ID:** `{control_id}`") condition = data.get("condition", {}) - if "evaluator" in condition: - evaluator = condition["evaluator"] - st.markdown(f"**Evaluator:** `{evaluator.get('name', 'N/A')}`") + if "rule" in condition: + rule = condition["rule"] + st.markdown(f"**Rule:** `{rule.get('name', 'N/A')}`") - if "config" in evaluator: - config = evaluator["config"] + if "config" in rule: + config = rule["config"] if "metric" in config: st.markdown(f"**Metric:** `{config['metric']}`") if "target_value" in config: diff --git a/examples/strands_agents/interactive_demo/setup_interactive_controls.py b/examples/strands_agents/interactive_demo/setup_interactive_controls.py index cf1517aa..2f16c57b 100644 --- a/examples/strands_agents/interactive_demo/setup_interactive_controls.py +++ b/examples/strands_agents/interactive_demo/setup_interactive_controls.py @@ -49,7 +49,7 @@ }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b|\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b|\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b" @@ -73,7 +73,7 @@ }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"(\bDROP\s+TABLE\b|\bDROP\s+DATABASE\b|--;)" @@ -99,7 +99,7 @@ }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b|\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b|\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b" @@ -125,7 +125,7 @@ }, "condition": { "selector": {"path": "input"}, # Check entire tool input - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b|\b\d{9}\b|\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b" diff --git a/examples/strands_agents/pyproject.toml b/examples/strands_agents/pyproject.toml index 5b707f52..d9965740 100644 --- a/examples/strands_agents/pyproject.toml +++ b/examples/strands_agents/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.12" dependencies = [ "agent-control-sdk[strands-agents]>=6.3.0", "agent-control-engine>=6.3.0", - "agent-control-evaluators>=6.3.0", + "agent-control-rules>=6.3.0", "agent-control-models>=6.3.0", "agent-control-telemetry>=6.3.0", "openai>=1.0.0", @@ -47,5 +47,5 @@ python_files = ["*_test.py", "test_*.py"] agent-control-sdk = { path = "../../sdks/python", editable = true } agent-control-models = { path = "../../models", editable = true } agent-control-engine = { path = "../../engine", editable = true } -agent-control-evaluators = { path = "../../evaluators/builtin", editable = true } +agent-control-rules = { path = "../../rules/builtin", editable = true } agent-control-telemetry = { path = "../../telemetry", editable = true } diff --git a/examples/strands_agents/steering_demo/setup_email_controls.py b/examples/strands_agents/steering_demo/setup_email_controls.py index 5debaa17..a1488427 100644 --- a/examples/strands_agents/steering_demo/setup_email_controls.py +++ b/examples/strands_agents/steering_demo/setup_email_controls.py @@ -45,7 +45,7 @@ }, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"(\b\d{9,12}\b)|(\d{3}[-\s]?\d{2}[-\s]?\d{4})|(\$[\d,]+\d{3,})" @@ -100,7 +100,7 @@ }, "condition": { "selector": {"path": "input.summary_text"}, - "evaluator": { + "rule": { "name": "regex", "config": { # Match patterns like: api_key, password, secret, token @@ -128,7 +128,7 @@ }, "condition": { "selector": {"path": "input.summary_text"}, - "evaluator": { + "rule": { "name": "regex", "config": { # Match database names, server paths diff --git a/examples/target_context_demo/setup_controls.py b/examples/target_context_demo/setup_controls.py index 5c870845..beefc61f 100644 --- a/examples/target_context_demo/setup_controls.py +++ b/examples/target_context_demo/setup_controls.py @@ -46,7 +46,7 @@ "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\b\d{3}-\d{2}-\d{4}\b", "flags": []}, }, @@ -62,7 +62,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["DROP TABLE", "rm -rf", "sudo"], diff --git a/examples/typescript_sdk/src/quickstart.ts b/examples/typescript_sdk/src/quickstart.ts index 7c8e7441..bab23e51 100644 --- a/examples/typescript_sdk/src/quickstart.ts +++ b/examples/typescript_sdk/src/quickstart.ts @@ -36,7 +36,7 @@ async function main(): Promise { action: { decision: "deny" }, description: "Block SSN-like patterns in post-step output.", enabled: true, - evaluator: { + rule: { name: "regex", config: { pattern: "\\b\\d{3}-\\d{2}-\\d{4}\\b", @@ -63,7 +63,7 @@ async function main(): Promise { controlId: createdControlId, }); console.log( - `Fetched control config: evaluator=${fetched.data.evaluator.name}, selector=${fetched.data.selector.path ?? "*"}`, + `Fetched control config: rule=${fetched.data.rule.name}, selector=${fetched.data.selector.path ?? "*"}`, ); } finally { if (createdControlId !== null) { diff --git a/models/src/agent_control_models/__init__.py b/models/src/agent_control_models/__init__.py index 6b5562a7..5f75ce46 100644 --- a/models/src/agent_control_models/__init__.py +++ b/models/src/agent_control_models/__init__.py @@ -35,10 +35,10 @@ ControlScope, ControlSelector, EnumTemplateParameter, - EvaluatorResult, - EvaluatorSpec, JsonValue, RegexTemplateParameter, + RuleResult, + RuleSpec, SteeringContext, StringListTemplateParameter, StringTemplateParameter, @@ -96,12 +96,11 @@ DeleteControlBindingByKeyResponse, DeleteControlBindingResponse, DeleteControlResponse, - EvaluatorSchema, GetControlBindingResponse, GetControlVersionResponse, GetPolicyControlsResponse, - InitAgentEvaluatorRemoval, InitAgentOverwriteChanges, + InitAgentRuleRemoval, ListAgentsResponse, ListControlBindingsResponse, ListControlsResponse, @@ -114,6 +113,7 @@ PolicyRef, RenderControlTemplateRequest, RenderControlTemplateResponse, + RuleSchema, StepKey, TargetAttachmentRef, UpsertControlBindingRequest, @@ -149,8 +149,8 @@ "ControlScope", "ControlSelector", "ControlDefinitionRuntime", - "EvaluatorSpec", - "EvaluatorResult", + "RuleSpec", + "RuleResult", "SteeringContext", "JsonValue", "TemplateValue", @@ -195,11 +195,11 @@ "DeleteControlBindingByKeyResponse", "DeleteControlBindingResponse", "DeleteControlResponse", - "EvaluatorSchema", + "RuleSchema", "GetControlBindingResponse", "GetControlVersionResponse", "GetPolicyControlsResponse", - "InitAgentEvaluatorRemoval", + "InitAgentRuleRemoval", "InitAgentOverwriteChanges", "ListAgentsResponse", "ListControlBindingsResponse", diff --git a/models/src/agent_control_models/controls.py b/models/src/agent_control_models/controls.py index 1e2bb9e9..f168a868 100644 --- a/models/src/agent_control_models/controls.py +++ b/models/src/agent_control_models/controls.py @@ -19,7 +19,7 @@ class ControlSelector(BaseModel): """Selects data from a Step payload. - - path: which slice of the Step to feed into the evaluator. Optional, defaults to "*" + - path: which slice of the Step to feed into the rule. Optional, defaults to "*" meaning the entire Step object. """ @@ -160,28 +160,28 @@ def validate_stages( # ============================================================================= -# Unified Evaluator Spec (used in API) +# Unified Rule Spec (used in API) # ============================================================================= -class EvaluatorSpec(BaseModel): - """Evaluator specification. See GET /evaluators for available evaluators and schemas. +class RuleSpec(BaseModel): + """Rule specification. See GET /rules for available rules and schemas. - Evaluator reference formats: + Rule reference formats: - Built-in: "regex", "list", "json", "sql" - - External: "galileo.luna" (requires agent-control-evaluators[galileo]) - - Agent-scoped: "my-agent:my-evaluator" (validated in endpoint, not here) + - External: "galileo.luna" (requires agent-control-rules[galileo]) + - Agent-scoped: "my-agent:my-rule" (validated in endpoint, not here) """ name: str = Field( ..., min_length=1, - description="Evaluator name or agent-scoped reference (agent:evaluator)", + description="Rule name or agent-scoped reference (agent:rule)", examples=["regex", "list", "my-agent:pii-detector"], ) config: dict[str, Any] = Field( ..., - description="Evaluator-specific configuration", + description="Rule-specific configuration", examples=[ {"pattern": r"\d{3}-\d{2}-\d{4}"}, {"values": ["admin"], "logic": "any"}, @@ -196,49 +196,49 @@ def validate_name(cls, value: str) -> str: normalized = value.strip() if not normalized: - raise ValueError("Evaluator name cannot be empty or whitespace-only.") + raise ValueError("Rule name cannot be empty or whitespace-only.") return normalized @model_validator(mode="after") - def validate_evaluator_config(self) -> Self: - """Validate config against evaluator's schema if evaluator is registered. + def validate_rule_config(self) -> Self: + """Validate config against rule's schema if rule is registered. - Agent-scoped evaluators (format: agent:evaluator) are validated in the + Agent-scoped rules (format: agent:rule) are validated in the endpoint where we have database access to look up the agent's schema. """ - # Agent-scoped evaluators: defer validation to endpoint (needs DB access) + # Agent-scoped rules: defer validation to endpoint (needs DB access) if ":" in self.name: return self - # Built-in evaluators: validate config against evaluator's config_model - # This import is optional - evaluators package may not be installed + # Built-in rules: validate config against rule's config_model + # This import is optional - rules package may not be installed try: - from agent_control_evaluators import ensure_evaluators_discovered, get_evaluator + from agent_control_rules import ensure_rules_discovered, get_rule - # Ensure entry points are loaded before looking up evaluator - ensure_evaluators_discovered() - evaluator_cls = get_evaluator(self.name) - if evaluator_cls: - evaluator_cls.config_model(**self.config) + # Ensure entry points are loaded before looking up rule + ensure_rules_discovered() + rule_cls = get_rule(self.name) + if rule_cls: + rule_cls.config_model(**self.config) except ImportError: - # Evaluators package not installed - skip validation + # Rules package not installed - skip validation pass - # If evaluator not found, allow it (might be a server-side registered evaluator) + # If rule not found, allow it (might be a server-side registered rule) return self -type ConditionLeafParts = tuple[ControlSelector, EvaluatorSpec] +type ConditionLeafParts = tuple[ControlSelector, RuleSpec] @dataclass(frozen=True) class ControlObservabilityIdentity: - """Stable selector/evaluator identity derived from a condition tree.""" + """Stable selector/rule identity derived from a condition tree.""" selector_path: str | None - evaluator_name: str | None + rule_name: str | None leaf_count: int - all_evaluators: list[str] + all_rules: list[str] all_selector_paths: list[str] @@ -515,7 +515,7 @@ class ControlAction(BaseModel): description=( "Steering context object for steer actions. Strongly recommended when " "decision='steer' to provide correction suggestions. If not provided, the " - "evaluator result message will be used as fallback." + "rule result message will be used as fallback." ) ) @@ -548,11 +548,11 @@ class ConditionNode(BaseModel): selector: ControlSelector | None = Field( default=None, - description="Leaf selector. Must be provided together with evaluator.", + description="Leaf selector. Must be provided together with rule.", ) - evaluator: EvaluatorSpec | None = Field( + rule: RuleSpec | None = Field( default=None, - description="Leaf evaluator. Must be provided together with selector.", + description="Leaf rule. Must be provided together with selector.", ) and_: list[ConditionNode] | None = Field( default=None, @@ -585,10 +585,10 @@ class ConditionNode(BaseModel): def validate_shape(self) -> Self: """Ensure each node is exactly one of leaf/and/or/not.""" has_selector = self.selector is not None - has_evaluator = self.evaluator is not None - has_leaf = has_selector and has_evaluator - if has_selector != has_evaluator: - raise ValueError("Leaf condition requires both selector and evaluator") + has_rule = self.rule is not None + has_leaf = has_selector and has_rule + if has_selector != has_rule: + raise ValueError("Leaf condition requires both selector and rule") populated = sum( 1 @@ -621,8 +621,8 @@ def kind(self) -> Literal["leaf", "and", "or", "not"]: return "not" def is_leaf(self) -> bool: - """Return True when this node is a leaf selector/evaluator pair.""" - return self.selector is not None and self.evaluator is not None + """Return True when this node is a leaf selector/rule pair.""" + return self.selector is not None and self.rule is not None def children_in_order(self) -> list[ConditionNode]: """Return child conditions in evaluation order.""" @@ -644,7 +644,7 @@ def iter_leaves(self) -> Iterator[ConditionNode]: yield from child.iter_leaves() def iter_leaf_parts(self) -> Iterator[ConditionLeafParts]: - """Yield leaf selector/evaluator pairs in left-to-right traversal order.""" + """Yield leaf selector/rule pairs in left-to-right traversal order.""" leaf_parts = self.leaf_parts() if leaf_parts is not None: yield leaf_parts @@ -661,26 +661,26 @@ def max_depth(self) -> int: return 1 + max(child.max_depth() for child in children) def leaf_parts(self) -> ConditionLeafParts | None: - """Return the selector/evaluator pair for leaf nodes.""" + """Return the selector/rule pair for leaf nodes.""" if not self.is_leaf(): return None selector = self.selector - evaluator = self.evaluator - if selector is None or evaluator is None: + rule = self.rule + if selector is None or rule is None: return None - return selector, evaluator + return selector, rule model_config["json_schema_extra"] = { "examples": [ { "selector": {"path": "output"}, - "evaluator": {"name": "regex", "config": {"pattern": r"\d{3}-\d{2}-\d{4}"}}, + "rule": {"name": "regex", "config": {"pattern": r"\d{3}-\d{2}-\d{4}"}}, }, { "and": [ { "selector": {"path": "context.risk_level"}, - "evaluator": { + "rule": { "name": "list", "config": {"values": ["high", "critical"]}, }, @@ -688,7 +688,7 @@ def leaf_parts(self) -> ConditionLeafParts | None: { "not": { "selector": {"path": "context.user_role"}, - "evaluator": { + "rule": { "name": "list", "config": {"values": ["admin", "security"]}, }, @@ -706,20 +706,20 @@ def leaf_parts(self) -> ConditionLeafParts | None: def _build_observability_identity( condition: ConditionNode, ) -> ControlObservabilityIdentity: - """Build a stable selector/evaluator identity for a condition tree.""" - all_evaluators: list[str] = [] + """Build a stable selector/rule identity for a condition tree.""" + all_rules: list[str] = [] all_selector_paths: list[str] = [] - seen_evaluators: set[str] = set() + seen_rules: set[str] = set() seen_selector_paths: set[str] = set() leaf_count = 0 - for selector, evaluator in condition.iter_leaf_parts(): + for selector, rule in condition.iter_leaf_parts(): leaf_count += 1 selector_path = selector.path or "*" - if evaluator.name not in seen_evaluators: - seen_evaluators.add(evaluator.name) - all_evaluators.append(evaluator.name) + if rule.name not in seen_rules: + seen_rules.add(rule.name) + all_rules.append(rule.name) if selector_path not in seen_selector_paths: seen_selector_paths.add(selector_path) @@ -727,9 +727,9 @@ def _build_observability_identity( return ControlObservabilityIdentity( selector_path=all_selector_paths[0] if all_selector_paths else None, - evaluator_name=all_evaluators[0] if all_evaluators else None, + rule_name=all_rules[0] if all_rules else None, leaf_count=leaf_count, - all_evaluators=all_evaluators, + all_rules=all_rules, all_selector_paths=all_selector_paths, ) @@ -744,7 +744,7 @@ def iter_condition_leaves(self) -> Iterator[ConditionNode]: yield from self.condition.iter_leaves() def iter_condition_leaf_parts(self) -> Iterator[ConditionLeafParts]: - """Yield leaf selector/evaluator pairs in evaluation order.""" + """Yield leaf selector/rule pairs in evaluation order.""" yield from self.condition.iter_leaf_parts() def observability_identity(self) -> ControlObservabilityIdentity: @@ -753,30 +753,30 @@ def observability_identity(self) -> ControlObservabilityIdentity: def canonicalize_control_payload(data: Any) -> Any: - """Rewrite legacy selector/evaluator payloads into canonical condition shape.""" + """Rewrite legacy selector/rule payloads into canonical condition shape.""" if not isinstance(data, dict): return data has_condition = "condition" in data has_selector = "selector" in data - has_evaluator = "evaluator" in data + has_rule = "rule" in data - if has_condition and (has_selector or has_evaluator): + if has_condition and (has_selector or has_rule): raise ValueError( "Control definition mixes canonical condition fields " - "with legacy selector/evaluator fields." + "with legacy selector/rule fields." ) - if has_selector != has_evaluator: + if has_selector != has_rule: raise ValueError( - "Legacy control definition must include both selector and evaluator." + "Legacy control definition must include both selector and rule." ) if not has_condition and has_selector: canonical = dict(data) selector = canonical.pop("selector") - evaluator = canonical.pop("evaluator") + rule = canonical.pop("rule") canonical["condition"] = { "selector": selector, - "evaluator": evaluator, + "rule": rule, } return canonical return data @@ -801,7 +801,7 @@ class ControlDefinitionBase(_ConditionBackedControlMixin, BaseModel): condition: ConditionNode = Field( ..., description=( - "Recursive boolean condition tree. Leaf nodes contain selector + evaluator; " + "Recursive boolean condition tree. Leaf nodes contain selector + rule; " "composite nodes contain and/or/not." ), ) @@ -814,7 +814,7 @@ class ControlDefinitionBase(_ConditionBackedControlMixin, BaseModel): @classmethod def canonicalize_payload(cls, data: Any) -> Any: - """Rewrite legacy selector/evaluator payloads into canonical condition shape.""" + """Rewrite legacy selector/rule payloads into canonical condition shape.""" return canonicalize_control_payload(data) @model_validator(mode="before") @@ -882,7 +882,7 @@ def primary_leaf(self) -> ConditionNode | None: "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": r"\b\d{3}-\d{2}-\d{4}\b", @@ -905,19 +905,19 @@ class ControlDefinitionRuntime(ControlDefinitionBase): model_config = ConfigDict(extra="ignore") -class EvaluatorResult(BaseModel): - """Result from a control evaluator. +class RuleResult(BaseModel): + """Result from a control rule. - The `error` field indicates evaluator failures, NOT validation failures: - - Set `error` for: evaluator crashes, timeouts, missing dependencies, external service errors + The `error` field indicates rule failures, NOT validation failures: + - Set `error` for: rule crashes, timeouts, missing dependencies, external service errors - Do NOT set `error` for: invalid input, syntax errors, schema violations, constraint failures - When `error` is set, `matched` must be False (fail-open on evaluator errors). + When `error` is set, `matched` must be False (fail-open on rule errors). When `error` is None, `matched` reflects the actual validation result. This distinction allows: - - Clients to distinguish "data violated rules" from "evaluator is broken" - - Observability systems to monitor evaluator health separately from validation outcomes + - Clients to distinguish "data violated rules" from "rule is broken" + - Observability systems to monitor rule health separately from validation outcomes """ matched: bool = Field(..., description="Whether the pattern matched") @@ -954,8 +954,8 @@ class ControlMatch(BaseModel): action: ActionDecision = Field( ..., description="Action configured for this control" ) - result: EvaluatorResult = Field( - ..., description="Evaluator result (confidence, message, metadata)" + result: RuleResult = Field( + ..., description="Rule result (confidence, message, metadata)" ) steering_context: SteeringContext | None = Field( None, diff --git a/models/src/agent_control_models/errors.py b/models/src/agent_control_models/errors.py index 0db04134..c99d81dd 100644 --- a/models/src/agent_control_models/errors.py +++ b/models/src/agent_control_models/errors.py @@ -63,17 +63,17 @@ class ErrorCode(StrEnum): CONTROL_NOT_FOUND = "CONTROL_NOT_FOUND" CONTROL_VERSION_NOT_FOUND = "CONTROL_VERSION_NOT_FOUND" CONTROL_BINDING_NOT_FOUND = "CONTROL_BINDING_NOT_FOUND" - EVALUATOR_NOT_FOUND = "EVALUATOR_NOT_FOUND" + RULE_NOT_FOUND = "RULE_NOT_FOUND" # Conflict Errors (3xx pattern) AGENT_NAME_CONFLICT = "AGENT_NAME_CONFLICT" POLICY_NAME_CONFLICT = "POLICY_NAME_CONFLICT" CONTROL_NAME_CONFLICT = "CONTROL_NAME_CONFLICT" CONTROL_BINDING_CONFLICT = "CONTROL_BINDING_CONFLICT" - EVALUATOR_NAME_CONFLICT = "EVALUATOR_NAME_CONFLICT" + RULE_NAME_CONFLICT = "RULE_NAME_CONFLICT" CONTROL_IN_USE = "CONTROL_IN_USE" CONTROL_TEMPLATE_CONFLICT = "CONTROL_TEMPLATE_CONFLICT" - EVALUATOR_IN_USE = "EVALUATOR_IN_USE" + RULE_IN_USE = "RULE_IN_USE" SCHEMA_INCOMPATIBLE = "SCHEMA_INCOMPATIBLE" # Validation Errors (4xx pattern) @@ -327,19 +327,19 @@ class ProblemDetail(BaseModel): "errors": [ { "resource": "Control", - "field": "data.evaluator.config.threshold", + "field": "data.rule.config.threshold", "code": "type_error", "message": "Expected number, got string", "value": "high", }, { "resource": "Control", - "field": "data.evaluator.name", + "field": "data.rule.name", "code": "not_found", - "message": "Evaluator 'nonexistent' not registered", + "message": "Rule 'nonexistent' not registered", }, ], - "hint": "Check the evaluator configuration against the schema.", + "hint": "Check the rule configuration against the schema.", }, ] } @@ -372,16 +372,16 @@ def make_error_type(error_code: ErrorCode) -> str: ErrorCode.CONTROL_NOT_FOUND: "Control Not Found", ErrorCode.CONTROL_VERSION_NOT_FOUND: "Control Version Not Found", ErrorCode.CONTROL_BINDING_NOT_FOUND: "Control Binding Not Found", - ErrorCode.EVALUATOR_NOT_FOUND: "Evaluator Not Found", + ErrorCode.RULE_NOT_FOUND: "Rule Not Found", # Conflict errors ErrorCode.AGENT_NAME_CONFLICT: "Agent Name Already Exists", ErrorCode.POLICY_NAME_CONFLICT: "Policy Name Already Exists", ErrorCode.CONTROL_NAME_CONFLICT: "Control Name Already Exists", ErrorCode.CONTROL_BINDING_CONFLICT: "Control Binding Already Exists", - ErrorCode.EVALUATOR_NAME_CONFLICT: "Evaluator Name Conflict", + ErrorCode.RULE_NAME_CONFLICT: "Rule Name Conflict", ErrorCode.CONTROL_IN_USE: "Control In Use", ErrorCode.CONTROL_TEMPLATE_CONFLICT: "Control Template Conflict", - ErrorCode.EVALUATOR_IN_USE: "Evaluator In Use", + ErrorCode.RULE_IN_USE: "Rule In Use", ErrorCode.SCHEMA_INCOMPATIBLE: "Schema Incompatible", # Validation errors ErrorCode.VALIDATION_ERROR: "Validation Error", diff --git a/models/src/agent_control_models/observability.py b/models/src/agent_control_models/observability.py index dbd11fac..733c02c6 100644 --- a/models/src/agent_control_models/observability.py +++ b/models/src/agent_control_models/observability.py @@ -36,7 +36,7 @@ class ControlExecutionEvent(BaseModel): - Context: agent, control, check stage, applies to - Result: action taken, whether matched, confidence score - Timing: when it happened, how long it took - - Optional details: evaluator name, selector path, errors, metadata + - Optional details: rule name, selector path, errors, metadata Attributes: control_execution_id: Unique ID for this specific control execution @@ -48,11 +48,11 @@ class ControlExecutionEvent(BaseModel): check_stage: "pre" (before execution) or "post" (after execution) applies_to: "llm_call" or "tool_call" action: The action taken (deny, steer, observe) - matched: Whether the control evaluator matched - confidence: Confidence score from the evaluator (0.0-1.0) + matched: Whether the control rule matched + confidence: Confidence score from the rule (0.0-1.0) timestamp: When the control was executed (UTC) execution_duration_ms: How long the control evaluation took - evaluator_name: Name of the evaluator used + rule_name: Name of the rule used selector_path: The selector path used to extract data error_message: Error message if evaluation failed metadata: Additional metadata for extensibility @@ -99,7 +99,7 @@ class ControlExecutionEvent(BaseModel): ..., description="Action taken by the control" ) matched: bool = Field( - ..., description="Whether the evaluator matched (True) or not (False)" + ..., description="Whether the rule matched (True) or not (False)" ) confidence: float = Field( ..., ge=0.0, le=1.0, description="Confidence score (0.0 to 1.0)" @@ -115,8 +115,8 @@ class ControlExecutionEvent(BaseModel): ) # Optional details - evaluator_name: str | None = Field( - default=None, description="Name of the evaluator used" + rule_name: str | None = Field( + default=None, description="Name of the rule used" ) selector_path: str | None = Field( default=None, description="Selector path used to extract data" @@ -187,7 +187,7 @@ def normalize_event_action(cls, value: str) -> ActionDecision: "confidence": 0.95, "timestamp": "2025-01-09T10:30:00Z", "execution_duration_ms": 15.3, - "evaluator_name": "regex", + "rule_name": "regex", "selector_path": "input", } ] @@ -499,8 +499,8 @@ class StatsTotals(BaseModel): Attributes: execution_count: Total executions across all controls - match_count: Total matches across all controls (evaluator matched) - non_match_count: Total non-matches across all controls (evaluator didn't match) + match_count: Total matches across all controls (rule matched) + non_match_count: Total non-matches across all controls (rule didn't match) error_count: Total errors across all controls (evaluation failed) action_counts: Breakdown of actions for matched executions timeseries: Time-series data points (only when include_timeseries=true) diff --git a/models/src/agent_control_models/server.py b/models/src/agent_control_models/server.py index a96b0410..c5987cc7 100644 --- a/models/src/agent_control_models/server.py +++ b/models/src/agent_control_models/server.py @@ -39,7 +39,7 @@ def _strip_slug_name(v: str) -> str: { # Legacy flat leaf fields still accepted for raw controls. "selector", - "evaluator", + "rule", } ) @@ -84,17 +84,17 @@ def _parse_control_input(v: Any) -> Any: ] -class EvaluatorSchema(BaseModel): - """Schema for a custom evaluator registered with an agent. +class RuleSchema(BaseModel): + """Schema for a custom rule registered with an agent. - Custom evaluators are Evaluator classes deployed with the engine. + Custom rules are Rule classes deployed with the engine. This schema is registered via initAgent for validation and UI purposes. """ - name: str = Field(..., min_length=1, max_length=255, description="Unique evaluator name") + name: str = Field(..., min_length=1, max_length=255, description="Unique rule name") config_schema: dict[str, Any] = Field( default_factory=dict, - description="JSON Schema for evaluator config validation", + description="JSON Schema for rule config validation", ) description: str | None = Field(None, max_length=1000, description="Optional description") @@ -103,28 +103,28 @@ class ConflictMode(StrEnum): """Conflict handling mode for initAgent registration updates. STRICT preserves compatibility checks and raises conflicts on incompatible changes. - OVERWRITE applies latest-init-wins replacement for steps and evaluators. + OVERWRITE applies latest-init-wins replacement for steps and rules. """ STRICT = "strict" OVERWRITE = "overwrite" -class InitAgentEvaluatorRemoval(BaseModel): - """Details for an evaluator removed during overwrite mode.""" +class InitAgentRuleRemoval(BaseModel): + """Details for a rule removed during overwrite mode.""" - name: str = Field(..., description="Evaluator name removed by overwrite") + name: str = Field(..., description="Rule name removed by overwrite") referenced_by_active_controls: bool = Field( default=False, - description="Whether this evaluator is still referenced by active controls", + description="Whether this rule is still referenced by active controls", ) control_ids: list[int] = Field( default_factory=list, - description="IDs of active controls referencing this evaluator", + description="IDs of active controls referencing this rule", ) control_names: list[str] = Field( default_factory=list, - description="Names of active controls referencing this evaluator", + description="Names of active controls referencing this rule", ) @@ -146,21 +146,21 @@ class InitAgentOverwriteChanges(BaseModel): default_factory=list, description="Steps removed by overwrite", ) - evaluators_added: list[str] = Field( + rules_added: list[str] = Field( default_factory=list, - description="Evaluator names added by overwrite", + description="Rule names added by overwrite", ) - evaluators_updated: list[str] = Field( + rules_updated: list[str] = Field( default_factory=list, - description="Existing evaluator names updated by overwrite", + description="Existing rule names updated by overwrite", ) - evaluators_removed: list[str] = Field( + rules_removed: list[str] = Field( default_factory=list, - description="Evaluator names removed by overwrite", + description="Rule names removed by overwrite", ) - evaluator_removals: list[InitAgentEvaluatorRemoval] = Field( + rule_removals: list[InitAgentRuleRemoval] = Field( default_factory=list, - description="Per-evaluator removal details, including active control references", + description="Per-rule removal details, including active control references", ) @@ -189,9 +189,9 @@ class InitAgentRequest(BaseModel): steps: list[StepSchema] = Field( default_factory=list, description="List of steps available to the agent" ) - evaluators: list[EvaluatorSchema] = Field( + rules: list[RuleSchema] = Field( default_factory=list, - description="Custom evaluator schemas for config validation", + description="Custom rule schemas for config validation", ) force_replace: bool = Field( default=False, @@ -205,7 +205,7 @@ class InitAgentRequest(BaseModel): description=( "Conflict handling mode for init registration updates. " "'strict' preserves existing compatibility checks. " - "'overwrite' applies latest-init-wins replacement for steps and evaluators." + "'overwrite' applies latest-init-wins replacement for steps and rules." ), ) target_type: Annotated[ @@ -254,7 +254,7 @@ def _check_target_pair(self) -> Self: "output_schema": {"results": {"type": "array"}}, } ], - "evaluators": [ + "rules": [ { "name": "pii-detector", "config_schema": { @@ -293,8 +293,8 @@ class GetAgentResponse(BaseModel): """Response containing agent details and registered steps.""" agent: Agent = Field(..., description="Agent metadata") steps: list[StepSchema] = Field(..., description="Steps registered with this agent") - evaluators: list[EvaluatorSchema] = Field( - default_factory=list, description="Custom evaluators registered with this agent" + rules: list[RuleSchema] = Field( + default_factory=list, description="Custom rules registered with this agent" ) @@ -453,13 +453,13 @@ class StepKey(BaseModel): class PatchAgentRequest(BaseModel): - """Request to modify an agent (remove steps/evaluators).""" + """Request to modify an agent (remove steps/rules).""" remove_steps: list[StepKey] = Field( default_factory=list, description="Step identifiers to remove from the agent" ) - remove_evaluators: list[str] = Field( - default_factory=list, description="Evaluator names to remove from the agent" + remove_rules: list[str] = Field( + default_factory=list, description="Rule names to remove from the agent" ) @@ -469,8 +469,8 @@ class PatchAgentResponse(BaseModel): steps_removed: list[StepKey] = Field( default_factory=list, description="Step identifiers that were removed" ) - evaluators_removed: list[str] = Field( - default_factory=list, description="Evaluator names that were removed" + rules_removed: list[str] = Field( + default_factory=list, description="Rule names that were removed" ) @@ -483,7 +483,7 @@ class AgentSummary(BaseModel): ) created_at: str | None = Field(None, description="ISO 8601 timestamp when agent was created") step_count: int = Field(0, description="Number of steps registered with the agent") - evaluator_count: int = Field(0, description="Number of evaluators registered with the agent") + rule_count: int = Field(0, description="Number of rules registered with the agent") active_controls_count: int = Field( 0, description="Number of active controls for this agent" ) diff --git a/models/tests/test_actions.py b/models/tests/test_actions.py index 2b0c5775..c54d2d76 100644 --- a/models/tests/test_actions.py +++ b/models/tests/test_actions.py @@ -8,7 +8,7 @@ ControlExecutionEvent, ControlMatch, EventQueryRequest, - EvaluatorResult, + RuleResult, expand_action_filter, ) from agent_control_models.actions import normalize_action, validate_action @@ -171,7 +171,7 @@ def test_control_match_normalizes_legacy(self, legacy: str, expected: str) -> No control_id=1, control_name="test", action=legacy, - result=EvaluatorResult(matched=True, confidence=0.9), + result=RuleResult(matched=True, confidence=0.9), ) # Then: the action is normalized to the canonical value @@ -214,7 +214,7 @@ def test_control_match_rejects_unknown(self) -> None: control_id=1, control_name="test", action="block", - result=EvaluatorResult(matched=True, confidence=0.9), + result=RuleResult(matched=True, confidence=0.9), ) diff --git a/models/tests/test_control_templates.py b/models/tests/test_control_templates.py index 893f342a..1bd420f9 100644 --- a/models/tests/test_control_templates.py +++ b/models/tests/test_control_templates.py @@ -26,7 +26,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -56,7 +56,7 @@ def test_control_definition_requires_template_fields_together() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "ok"}, }, @@ -81,7 +81,7 @@ def test_control_definition_rejects_template_values_without_template() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "ok"}, }, @@ -215,7 +215,7 @@ def test_control_definition_can_round_trip_to_template_control_input() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "hello"}, }, @@ -244,7 +244,7 @@ def test_control_definition_to_template_control_input_rejects_raw_control() -> N "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "hello"}, }, @@ -267,7 +267,7 @@ def test_control_definition_runtime_ignores_template_metadata() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "hello"}, }, diff --git a/models/tests/test_controls.py b/models/tests/test_controls.py index dc36e67f..352af672 100644 --- a/models/tests/test_controls.py +++ b/models/tests/test_controls.py @@ -2,10 +2,13 @@ from __future__ import annotations +import builtins + import pytest from agent_control_models import ( ControlDefinition, ControlDefinitionRuntime, + RuleSpec, ) from agent_control_models.controls import ControlDefinitionBase from pydantic import ValidationError @@ -13,23 +16,23 @@ def _leaf( path: str, - evaluator_name: str = "regex", + rule_name: str = "regex", config: dict[str, object] | None = None, ) -> dict[str, object]: return { "selector": {"path": path}, - "evaluator": { - "name": evaluator_name, + "rule": { + "name": rule_name, "config": config or {"pattern": "ok"}, }, } -def test_condition_leaf_requires_selector_and_evaluator() -> None: +def test_condition_leaf_requires_selector_and_rule() -> None: # Given: a leaf condition with only a selector with pytest.raises( ValidationError, - match="Leaf condition requires both selector and evaluator", + match="Leaf condition requires both selector and rule", ): # When: validating the control definition ControlDefinition.model_validate( @@ -56,7 +59,7 @@ def test_condition_node_requires_exactly_one_shape() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "ok"}}, + "rule": {"name": "regex", "config": {"pattern": "ok"}}, "and": [_leaf("output")], }, "action": {"decision": "deny"}, @@ -65,13 +68,33 @@ def test_condition_node_requires_exactly_one_shape() -> None: # Then: validation rejects the ambiguous node shape +def test_rule_spec_skips_config_validation_when_rules_package_is_unavailable( + monkeypatch: pytest.MonkeyPatch, +) -> None: + # Given: shared models imported in an environment without agent_control_rules + original_import = builtins.__import__ + + def _raise_for_rules(name: str, *args: object, **kwargs: object) -> object: + if name == "agent_control_rules" or name.startswith("agent_control_rules."): + raise ImportError("rules package unavailable") + return original_import(name, *args, **kwargs) # type: ignore[arg-type] + + monkeypatch.setattr(builtins, "__import__", _raise_for_rules) + + # When: validating a known builtin rule with config that would normally be invalid + spec = RuleSpec.model_validate({"name": "regex", "config": {}}) + + # Then: model validation does not require the optional rules package + assert spec.name == "regex" + + def test_legacy_leaf_payload_is_canonicalized() -> None: - # Given: a legacy flat selector/evaluator payload + # Given: a legacy flat selector/rule payload legacy_payload = { "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "ok"}}, + "rule": {"name": "regex", "config": {"pattern": "ok"}}, "action": {"decision": "deny"}, } @@ -81,18 +104,18 @@ def test_legacy_leaf_payload_is_canonicalized() -> None: # Then: the model dumps back out in canonical condition form dumped = control.model_dump(mode="json", exclude_none=True) assert "selector" not in dumped - assert "evaluator" not in dumped + assert "rule" not in dumped assert dumped["condition"]["selector"]["path"] == "input" - assert dumped["condition"]["evaluator"]["name"] == "regex" + assert dumped["condition"]["rule"]["name"] == "regex" def test_runtime_legacy_leaf_payload_is_canonicalized() -> None: - # Given: a legacy flat selector/evaluator payload loaded for runtime evaluation + # Given: a legacy flat selector/rule payload loaded for runtime evaluation legacy_payload = { "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "ok"}}, + "rule": {"name": "regex", "config": {"pattern": "ok"}}, "action": {"decision": "deny"}, } @@ -102,9 +125,9 @@ def test_runtime_legacy_leaf_payload_is_canonicalized() -> None: # Then: runtime parsing uses the same canonical condition shape dumped = control.model_dump(mode="json", exclude_none=True) assert "selector" not in dumped - assert "evaluator" not in dumped + assert "rule" not in dumped assert dumped["condition"]["selector"]["path"] == "input" - assert dumped["condition"]["evaluator"]["name"] == "regex" + assert dumped["condition"]["rule"]["name"] == "regex" def test_mixed_legacy_and_condition_fields_are_rejected() -> None: @@ -114,14 +137,14 @@ def test_mixed_legacy_and_condition_fields_are_rejected() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": _leaf("input"), "selector": {"path": "output"}, - "evaluator": {"name": "regex", "config": {"pattern": "ok"}}, + "rule": {"name": "regex", "config": {"pattern": "ok"}}, "action": {"decision": "deny"}, } with pytest.raises( ValidationError, match="Control definition mixes canonical condition fields " - "with legacy selector/evaluator fields", + "with legacy selector/rule fields", ): # When: validating the mixed payload ControlDefinition.model_validate(payload) @@ -135,14 +158,14 @@ def test_runtime_mixed_legacy_and_condition_fields_are_rejected() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": _leaf("input"), "selector": {"path": "output"}, - "evaluator": {"name": "regex", "config": {"pattern": "ok"}}, + "rule": {"name": "regex", "config": {"pattern": "ok"}}, "action": {"decision": "deny"}, } with pytest.raises( ValidationError, match="Control definition mixes canonical condition fields " - "with legacy selector/evaluator fields", + "with legacy selector/rule fields", ): # When: validating the mixed payload through the runtime model ControlDefinitionRuntime.model_validate(payload) @@ -179,7 +202,7 @@ def test_condition_iter_leaves_preserves_left_to_right_order() -> None: { "not": _leaf( "input.role", - evaluator_name="list", + rule_name="list", config={"values": ["admin"]}, ) }, @@ -329,28 +352,28 @@ def test_single_leaf_control_returns_primary_leaf() -> None: # When: asking for the primary leaf primary_leaf = control.primary_leaf() - # Then: the original selector/evaluator pair is returned intact + # Then: the original selector/rule pair is returned intact assert primary_leaf is not None leaf_parts = primary_leaf.leaf_parts() assert leaf_parts is not None - selector, evaluator = leaf_parts + selector, rule = leaf_parts assert selector.path == "input.value" - assert evaluator.name == "regex" + assert rule.name == "regex" def test_condition_observability_identity_uses_first_leaf_and_dedupes() -> None: - # Given: a composite condition tree with repeated selectors/evaluators + # Given: a composite condition tree with repeated selectors/rules control = ControlDefinition.model_validate( { "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "and": [ - _leaf("input.user", evaluator_name="regex"), - _leaf("output.user", evaluator_name="regex"), + _leaf("input.user", rule_name="regex"), + _leaf("output.user", rule_name="regex"), _leaf( "output.user", - evaluator_name="list", + rule_name="list", config={"values": ["blocked"]}, ), ] @@ -364,7 +387,7 @@ def test_condition_observability_identity_uses_first_leaf_and_dedupes() -> None: # Then: the first leaf becomes the representative identity and full context stays ordered assert identity.selector_path == "input.user" - assert identity.evaluator_name == "regex" + assert identity.rule_name == "regex" assert identity.leaf_count == 3 - assert identity.all_evaluators == ["regex", "list"] + assert identity.all_rules == ["regex", "list"] assert identity.all_selector_paths == ["input.user", "output.user"] diff --git a/pyproject.toml b/pyproject.toml index b7d87837..dee5b321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ # This monorepo contains: # - models/ - Shared data models (published as agent-control-models) # - engine/ - Evaluation engine (bundled into SDK and server) -# - evaluators/ - Evaluator implementations (bundled into server) +# - rules/ - Rule implementations (bundled into server) # - sdks/python/ - Python SDK (published as agent-control-sdk) # - server/ - FastAPI server (published as agent-control-server) @@ -20,8 +20,8 @@ members = [ "sdks/python", "telemetry", "engine", - "evaluators/builtin", - # NOTE: evaluators/contrib/* excluded - install separately when needed + "rules/builtin", + # NOTE: rules/contrib/* excluded - install separately when needed ] [tool.uv] @@ -36,7 +36,7 @@ required-environments = [ [tool.uv.sources] agent-control-models = { workspace = true } agent-control-engine = { workspace = true } -agent-control-evaluators = { workspace = true } +agent-control-rules = { workspace = true } agent-control-telemetry = { workspace = true } [tool.ruff] @@ -72,10 +72,10 @@ version_toml = [ "sdks/python/pyproject.toml:project.version", "telemetry/pyproject.toml:project.version", "server/pyproject.toml:project.version", - "evaluators/builtin/pyproject.toml:project.version", - "evaluators/contrib/budget/pyproject.toml:project.version", - "evaluators/contrib/cisco/pyproject.toml:project.version", - "evaluators/contrib/galileo/pyproject.toml:project.version", + "rules/builtin/pyproject.toml:project.version", + "rules/contrib/budget/pyproject.toml:project.version", + "rules/contrib/cisco/pyproject.toml:project.version", + "rules/contrib/galileo/pyproject.toml:project.version", ] version_source = "tag" commit_message = "chore(release): v{version}" diff --git a/evaluators/README.md b/rules/README.md similarity index 58% rename from evaluators/README.md rename to rules/README.md index a179b2be..6af89d24 100644 --- a/evaluators/README.md +++ b/rules/README.md @@ -1,16 +1,16 @@ -# Agent Control Evaluators +# Agent Control Rules -Evaluators analyze agent inputs, outputs, and context to determine if controls should trigger. They provide the core logic for policy enforcement. +Rules analyze agent inputs, outputs, and context to determine if controls should trigger. They provide the core logic for policy enforcement. -## What Evaluators Do +## What Rules Do When a control is evaluated: 1. Selector extracts data (e.g., `input.query`, `output.email`) -2. Evaluator analyzes the data against configured rules +2. Rule analyzes the data against configured rules 3. Returns match/no-match decision 4. Control action is applied if matched (deny, warn, steer, log) -## Built-in Evaluators +## Built-in Rules ### Regex Pattern matching for text (PII, keywords, SQL injection) @@ -43,38 +43,38 @@ Structured data validation with JSONPath ## Directory Structure -- `builtin/` — Core evaluators (regex, list, json, sql) -- `contrib/` — Community evaluators (PII detection, toxicity, custom) +- `builtin/` — Core rules (regex, list, json, sql) +- `contrib/` — Community rules (PII detection, toxicity, custom) - `src/` — Shared interfaces and utilities -## Adding Custom Evaluators +## Adding Custom Rules -Place custom evaluators in `contrib/` and they'll be loaded automatically: +Place custom rules in `contrib/` and they'll be loaded automatically: ```python -from agent_control_evaluators import Evaluator, EvaluatorConfig, EvaluatorMetadata, register_evaluator -from agent_control_models import EvaluatorResult +from agent_control_rules import Rule, RuleConfig, RuleMetadata, register_rule +from agent_control_models import RuleResult from typing import Any -class MyEvaluatorConfig(EvaluatorConfig): +class MyRuleConfig(RuleConfig): threshold: float = 0.5 -@register_evaluator -class MyEvaluator(Evaluator[MyEvaluatorConfig]): - metadata = EvaluatorMetadata( - name="my-evaluator", +@register_rule +class MyRule(Rule[MyRuleConfig]): + metadata = RuleMetadata( + name="my-rule", version="1.0.0", - description="My custom evaluator", + description="My custom rule", ) - config_model = MyEvaluatorConfig + config_model = MyRuleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: # Your logic here - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Rule triggered", ) ``` -Full guide: https://docs.agentcontrol.dev/concepts/evaluators +Full guide: https://docs.agentcontrol.dev/concepts/rules diff --git a/evaluators/builtin/Makefile b/rules/builtin/Makefile similarity index 77% rename from evaluators/builtin/Makefile rename to rules/builtin/Makefile index 37abbdd1..7341b113 100644 --- a/evaluators/builtin/Makefile +++ b/rules/builtin/Makefile @@ -1,9 +1,9 @@ .PHONY: help sync test lint lint-fix typecheck build publish -PACKAGE := agent-control-evaluators +PACKAGE := agent-control-rules help: - @echo "Agent Control Evaluators - Makefile commands" + @echo "Agent Control Rules - Makefile commands" @echo "" @echo " make test - run pytest" @echo " make lint - run ruff check" @@ -15,7 +15,7 @@ sync: uv sync test: - uv run pytest --cov=src --cov-report=xml:../../coverage-evaluators.xml -q + uv run pytest --cov=src --cov-report=xml:../../coverage-rules.xml -q lint: uv run ruff check --config ../../pyproject.toml src/ diff --git a/rules/builtin/README.md b/rules/builtin/README.md new file mode 100644 index 00000000..bf46c9c8 --- /dev/null +++ b/rules/builtin/README.md @@ -0,0 +1,27 @@ +# Agent Control Built-in Rules + +Built-in rules provide common checks like regex matching, list matching, JSON validation, and SQL validation. They are discovered automatically via Python entry points and used by the server and SDK runtime. + +## What this package provides + +- `regex` rule for pattern matching +- `list` rule for allow/deny lists +- `json` rule for schema validation +- `sql` rule for query validation + +## Install + +```bash +pip install agent-control-rules +``` + +## Discover rules + +```python +from agent_control_rules import discover_rules, list_rules + +discover_rules() +print(list_rules()) +``` + +Full guide: https://docs.agentcontrol.dev/concepts/rules/built-in-rules diff --git a/rules/builtin/pyproject.toml b/rules/builtin/pyproject.toml new file mode 100644 index 00000000..c5bfc7b5 --- /dev/null +++ b/rules/builtin/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "agent-control-rules" +version = "8.1.2" +description = "Builtin rules for agent-control" +readme = "README.md" +requires-python = ">=3.12" +license = { text = "Apache-2.0" } +authors = [{ name = "Agent Control Team" }] +dependencies = [ + "agent-control-models>=7.5.0", + "pydantic>=2.12.4", + "google-re2>=1.1", + "jsonschema>=4.0.0", + "sqlglot[c]>=30.11.0,<30.12.0", +] + +[project.optional-dependencies] +galileo = ["agent-control-rule-galileo>=7.5.0"] +budget = ["agent-control-rule-budget>=7.5.0"] +cisco = ["agent-control-rule-cisco>=7.5.0"] +dev = ["pytest>=8.0.0", "pytest-asyncio>=0.23.0"] + +[project.entry-points."agent_control.rules"] +regex = "agent_control_rules.regex:RegexRule" +list = "agent_control_rules.list:ListRule" +json = "agent_control_rules.json:JSONRule" +sql = "agent_control_rules.sql:SQLRule" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/agent_control_rules"] + +[tool.uv.sources] +agent-control-models = { workspace = true } +# For local dev: use local contrib packages instead of PyPI +agent-control-rule-galileo = { path = "../contrib/galileo", editable = true } +agent-control-rule-budget = { path = "../contrib/budget", editable = true } +agent-control-rule-cisco = { path = "../contrib/cisco", editable = true } diff --git a/rules/builtin/src/agent_control_rules/__init__.py b/rules/builtin/src/agent_control_rules/__init__.py new file mode 100644 index 00000000..6bca3cdb --- /dev/null +++ b/rules/builtin/src/agent_control_rules/__init__.py @@ -0,0 +1,80 @@ +"""Agent Control Rules. + +This package contains builtin rule implementations for agent-control. +Built-in rules (regex, list, json, sql) are registered automatically on import. + +Available rules: + Built-in (no namespace): + - regex: Regular expression matching + - list: List-based value matching + - json: JSON validation + - sql: SQL query validation + +Naming convention: + - Built-in: "regex", "list", "json", "sql" + - External: "provider.name" (e.g., "galileo.luna") + - Agent-scoped: "agent:name" (custom code deployed with agent) + +External rules are installed via separate packages (e.g., agent-control-rule-galileo). +Custom rules are Rule classes deployed with the engine. +Their schemas are registered via initAgent for validation purposes. +""" + +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("agent-control-rules") +except PackageNotFoundError: + __version__ = "0.0.0.dev" + +# Core infrastructure - export from _base and _registry +from agent_control_rules._base import ( + Rule, + RuleConfig, + RuleMetadata, +) +from agent_control_rules._discovery import ( + discover_rules, + ensure_rules_discovered, + list_rules, + reset_rule_discovery, +) +from agent_control_rules._factory import clear_rule_cache, get_rule_instance +from agent_control_rules._registry import ( + clear_rules, + get_all_rules, + get_rule, + register_rule, +) + +# Import built-in rules to auto-register them +from agent_control_rules.json import JSONRule, JSONRuleConfig +from agent_control_rules.list import ListRule, ListRuleConfig +from agent_control_rules.regex import RegexRule, RegexRuleConfig +from agent_control_rules.sql import SQLRule, SQLRuleConfig + +__all__ = [ + # Core infrastructure + "Rule", + "RuleConfig", + "RuleMetadata", + "register_rule", + "get_rule", + "get_all_rules", + "clear_rules", + "discover_rules", + "ensure_rules_discovered", + "reset_rule_discovery", + "list_rules", + "get_rule_instance", + "clear_rule_cache", + # Built-in rules + "RegexRule", + "RegexRuleConfig", + "ListRule", + "ListRuleConfig", + "JSONRule", + "JSONRuleConfig", + "SQLRule", + "SQLRuleConfig", +] diff --git a/evaluators/builtin/src/agent_control_evaluators/_base.py b/rules/builtin/src/agent_control_rules/_base.py similarity index 60% rename from evaluators/builtin/src/agent_control_evaluators/_base.py rename to rules/builtin/src/agent_control_rules/_base.py index c32b92a5..0c3b3609 100644 --- a/evaluators/builtin/src/agent_control_evaluators/_base.py +++ b/rules/builtin/src/agent_control_rules/_base.py @@ -1,4 +1,4 @@ -"""Evaluator base classes and metadata.""" +"""Rule base classes and metadata.""" from __future__ import annotations @@ -7,7 +7,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Any, ClassVar, Generic, TypeVar -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult from agent_control_models.base import BaseModel if TYPE_CHECKING: @@ -16,17 +16,17 @@ logger = logging.getLogger(__name__) -class EvaluatorConfig(BaseModel): - """Base class for typed evaluator configurations. +class RuleConfig(BaseModel): + """Base class for typed rule configurations. Extends the project's BaseModel to ensure consistent behavior - and enable type checking across all evaluator configs. + and enable type checking across all rule configs. Example: ```python - from agent_control_evaluators import EvaluatorConfig + from agent_control_rules import RuleConfig - class MyEvaluatorConfig(EvaluatorConfig): + class MyRuleConfig(RuleConfig): pattern: str threshold: float = 0.5 ``` @@ -35,18 +35,18 @@ class MyEvaluatorConfig(EvaluatorConfig): pass -ConfigT = TypeVar("ConfigT", bound=EvaluatorConfig) +ConfigT = TypeVar("ConfigT", bound=RuleConfig) @dataclass -class EvaluatorMetadata: - """Metadata about an evaluator. +class RuleMetadata: + """Metadata about a rule. Attributes: - name: Unique evaluator name (e.g., "regex", "galileo.luna") - version: Evaluator version string + name: Unique rule name (e.g., "regex", "galileo.luna") + version: Rule version string description: Human-readable description - requires_api_key: Whether the evaluator requires an API key + requires_api_key: Whether the rule requires an API key timeout_ms: Default timeout in milliseconds """ @@ -57,16 +57,16 @@ class EvaluatorMetadata: timeout_ms: int = 10000 -class Evaluator(ABC, Generic[ConfigT]): # noqa: UP046 - need Python 3.10 compat - """Base class for all evaluators (built-in, external, or custom). +class Rule(ABC, Generic[ConfigT]): # noqa: UP046 - need Python 3.10 compat + """Base class for all rules (built-in, external, or custom). - All evaluators follow the same pattern: + All rules follow the same pattern: 1. Define metadata and config_model as class variables 2. Implement evaluate() method - 3. Register with @register_evaluator decorator + 3. Register with @register_rule decorator IMPORTANT - Instance Caching & Thread Safety: - Evaluator instances are cached and reused across multiple evaluate() calls + Rule instances are cached and reused across multiple evaluate() calls when they have the same configuration. This means: - DO NOT store mutable request-scoped state on `self` @@ -81,7 +81,7 @@ def __init__(self, config): async def evaluate(self, data): result = self._compiled_regex.search(data) # OK: uses immutable state - return EvaluatorResult(matched=result is not None, ...) + return RuleResult(matched=result is not None, ...) Bad pattern: def __init__(self, config): @@ -93,28 +93,28 @@ async def evaluate(self, data): Example: ```python - from agent_control_evaluators import ( - Evaluator, - EvaluatorConfig, - EvaluatorMetadata, - register_evaluator, + from agent_control_rules import ( + Rule, + RuleConfig, + RuleMetadata, + register_rule, ) - from agent_control_models import EvaluatorResult + from agent_control_models import RuleResult - class MyConfig(EvaluatorConfig): + class MyConfig(RuleConfig): threshold: float = 0.5 - @register_evaluator - class MyEvaluator(Evaluator[MyConfig]): - metadata = EvaluatorMetadata( - name="my-evaluator", + @register_rule + class MyRule(Rule[MyConfig]): + metadata = RuleMetadata( + name="my-rule", version="1.0.0", - description="My custom evaluator", + description="My custom rule", ) config_model = MyConfig - async def evaluate(self, data: Any) -> EvaluatorResult: - return EvaluatorResult( + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult( matched=len(str(data)) > self.config.threshold, confidence=1.0, message="Evaluation complete" @@ -123,11 +123,11 @@ async def evaluate(self, data: Any) -> EvaluatorResult: """ - metadata: ClassVar[EvaluatorMetadata] - config_model: ClassVar[type[EvaluatorConfig]] + metadata: ClassVar[RuleMetadata] + config_model: ClassVar[type[RuleConfig]] def __init__(self, config: ConfigT) -> None: - """Initialize evaluator with validated config. + """Initialize rule with validated config. Args: config: Validated configuration (instance of config_model) @@ -136,7 +136,7 @@ def __init__(self, config: ConfigT) -> None: @classmethod def from_dict(cls, config_dict: dict[str, Any]) -> Self: - """Create evaluator instance from raw config dict. + """Create rule instance from raw config dict. Validates config against config_model before creating instance. @@ -144,20 +144,20 @@ def from_dict(cls, config_dict: dict[str, Any]) -> Self: config_dict: Raw configuration dictionary Returns: - Evaluator instance with validated config + Rule instance with validated config """ validated = cls.config_model(**config_dict) return cls(validated) # type: ignore[arg-type] @abstractmethod - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate data and return result. Args: data: Data extracted by selector from the payload Returns: - EvaluatorResult with matched status, confidence, and message + RuleResult with matched status, confidence, and message """ pass @@ -168,12 +168,12 @@ def get_timeout_seconds(self) -> float: @classmethod def is_available(cls) -> bool: - """Check if evaluator dependencies are satisfied. + """Check if rule dependencies are satisfied. - Override this method for evaluators with optional dependencies. + Override this method for rules with optional dependencies. Return False to skip registration during discovery. Returns: - True if evaluator can be used, False otherwise + True if rule can be used, False otherwise """ return True diff --git a/rules/builtin/src/agent_control_rules/_discovery.py b/rules/builtin/src/agent_control_rules/_discovery.py new file mode 100644 index 00000000..cff930be --- /dev/null +++ b/rules/builtin/src/agent_control_rules/_discovery.py @@ -0,0 +1,143 @@ +"""Rule discovery via entry points.""" + +from __future__ import annotations + +import logging +import threading +from importlib import import_module +from importlib.metadata import entry_points +from typing import TYPE_CHECKING, Any + +from agent_control_rules._registry import ( + get_all_rules, + get_rule, + register_rule, +) + +if TYPE_CHECKING: + from agent_control_rules._base import Rule + +logger = logging.getLogger(__name__) + +_DISCOVERY_COMPLETE = False +_DISCOVERY_LOCK = threading.Lock() + +_BUILTIN_RULES = ( + ("regex", "agent_control_rules.regex", "RegexRule"), + ("list", "agent_control_rules.list", "ListRule"), + ("json", "agent_control_rules.json", "JSONRule"), + ("sql", "agent_control_rules.sql", "SQLRule"), +) + + +def _register_rule_class(rule_class: type[Rule[Any]]) -> bool: + name = rule_class.metadata.name + + if get_rule(name) is not None: + return False + + if not rule_class.is_available(): + logger.debug(f"Rule '{name}' not available, skipping") + return False + + register_rule(rule_class) + logger.debug(f"Registered rule: {name}") + return True + + +def _discover_builtin_rules_from_source() -> int: + """Register builtin rules when package entry points are unavailable. + + This fallback is mainly for direct source-tree execution where the package + has not been installed and importlib.metadata has no entry-point metadata. + """ + discovered = 0 + + for entry_point_name, module_name, class_name in _BUILTIN_RULES: + try: + module = import_module(module_name) + rule_class = getattr(module, class_name) + if _register_rule_class(rule_class): + discovered += 1 + except Exception as e: + logger.warning(f"Failed to load rule '{entry_point_name}': {e}") + + return discovered + + +def discover_rules() -> int: + """Discover and register rules via entry points. + + All rules (built-in and third-party) are discovered via the + 'agent_control.rules' entry point group. Rules are only registered + if their `is_available()` method returns True. + + Safe to call multiple times - only runs discovery once. + Thread-safe via lock. + + Returns: + Number of rules discovered + """ + global _DISCOVERY_COMPLETE + + # Fast path without lock + if _DISCOVERY_COMPLETE: + return 0 + + with _DISCOVERY_LOCK: + # Double-check after acquiring lock + if _DISCOVERY_COMPLETE: + return 0 + + discovered = 0 + + # Discover ALL rules (built-in and third-party) via entry points. + # Only register rules where is_available() returns True. + try: + eps = list(entry_points(group="agent_control.rules")) + for ep in eps: + try: + rule_class = ep.load() + if _register_rule_class(rule_class): + discovered += 1 + except Exception as e: + logger.warning(f"Failed to load rule '{ep.name}': {e}") + + if not eps: + discovered += _discover_builtin_rules_from_source() + except Exception as e: + logger.debug(f"Entry point discovery not available: {e}") + + _DISCOVERY_COMPLETE = True + logger.debug(f"Rule discovery complete: {discovered} new rules") + return discovered + + +def ensure_rules_discovered() -> None: + """Ensure rule discovery has run. Call this before using rules.""" + if not _DISCOVERY_COMPLETE: + discover_rules() + + +def reset_rule_discovery() -> None: + """Reset discovery state. Useful for testing.""" + global _DISCOVERY_COMPLETE + with _DISCOVERY_LOCK: + _DISCOVERY_COMPLETE = False + + +# ============================================================================= +# Public rule API +# ============================================================================= + + +def list_rules() -> dict[str, type[Rule[Any]]]: + """List all registered rules. + + This function ensures rule discovery has run before returning results. + + Returns: + Dictionary mapping rule names to rule classes + """ + ensure_rules_discovered() + return get_all_rules() diff --git a/rules/builtin/src/agent_control_rules/_factory.py b/rules/builtin/src/agent_control_rules/_factory.py new file mode 100644 index 00000000..6eba1a7d --- /dev/null +++ b/rules/builtin/src/agent_control_rules/_factory.py @@ -0,0 +1,105 @@ +"""Rule factory with instance caching.""" + +from __future__ import annotations + +import json +import logging +import os +from collections import OrderedDict +from typing import TYPE_CHECKING, Any + +from agent_control_rules._discovery import list_rules + +if TYPE_CHECKING: + from agent_control_models import RuleSpec + + from agent_control_rules._base import Rule + +logger = logging.getLogger(__name__) + +# Configuration +DEFAULT_CACHE_SIZE = 100 +MIN_CACHE_SIZE = 1 # Minimum to avoid infinite loop in eviction + + +def _parse_cache_size() -> int: + """Parse RULE_CACHE_SIZE from env with safe fallback.""" + raw = os.environ.get("RULE_CACHE_SIZE") + if raw is None: + return DEFAULT_CACHE_SIZE + try: + return int(raw) + except ValueError: + logger.warning( + f"Invalid RULE_CACHE_SIZE '{raw}', using default {DEFAULT_CACHE_SIZE}" + ) + return DEFAULT_CACHE_SIZE + + +RULE_CACHE_SIZE = max(_parse_cache_size(), MIN_CACHE_SIZE) + +# LRU cache for rule instances: cache_key -> Rule instance +_RULE_CACHE: OrderedDict[str, Rule[Any]] = OrderedDict() + + +def _config_hash(config: dict[str, Any]) -> str: + """Create a hashable key from config dict.""" + return json.dumps(config, sort_keys=True, default=str) + + +def get_rule_instance(rule_spec: RuleSpec) -> Rule[Any]: + """Get or create a cached rule instance from specification. + + Uses LRU caching to reuse rule instances with the same config. + Cache key is: {rule_name}:{config_hash} + + WARNING: Rule instances are cached and reused across requests! + Rule implementations MUST be stateless - do not store mutable + request-scoped state on the rule instance. See Rule + docstring for details on safe patterns. + + Args: + rule_spec: The rule specification with name and config + + Returns: + Rule instance (cached or new) + + Raises: + ValueError: If rule not found + """ + # Build cache key + cache_key = f"{rule_spec.name}:{_config_hash(rule_spec.config)}" + + # Check cache + if cache_key in _RULE_CACHE: + # Move to end (most recently used) + _RULE_CACHE.move_to_end(cache_key) + logger.debug(f"Cache hit for rule: {rule_spec.name}") + return _RULE_CACHE[cache_key] + + # Cache miss - create new instance + rules = list_rules() + rule_cls = rules.get(rule_spec.name) + + if rule_cls is None: + raise ValueError( + f"Rule '{rule_spec.name}' not found. " + f"Available rules: {', '.join(rules.keys())}" + ) + + logger.debug(f"Cache miss, creating rule: {rule_spec.name}") + instance = rule_cls.from_dict(rule_spec.config) + + # Evict oldest if cache is full + while len(_RULE_CACHE) >= RULE_CACHE_SIZE: + evicted_key, _ = _RULE_CACHE.popitem(last=False) + logger.debug(f"Evicted rule from cache: {evicted_key}") + + # Cache the instance + _RULE_CACHE[cache_key] = instance + return instance + + +def clear_rule_cache() -> None: + """Clear all cached rule instances. Useful for testing.""" + _RULE_CACHE.clear() diff --git a/rules/builtin/src/agent_control_rules/_registry.py b/rules/builtin/src/agent_control_rules/_registry.py new file mode 100644 index 00000000..6b66d606 --- /dev/null +++ b/rules/builtin/src/agent_control_rules/_registry.py @@ -0,0 +1,87 @@ +"""Rule registry for registration and lookup.""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from agent_control_rules._base import Rule + +logger = logging.getLogger(__name__) + +# ============================================================================= +# Rule Registry +# ============================================================================= + +_RULE_REGISTRY: dict[str, type[Rule[Any]]] = {} + + +def register_rule( + rule_class: type[Rule[Any]], +) -> type[Rule[Any]]: + """Register a rule class by its metadata name. + + Can be used as a decorator or called directly. Respects the rule's + is_available() method - rules with unavailable dependencies are + silently skipped. + + Args: + rule_class: Rule class to register + + Returns: + The same rule class (for decorator usage) + + Raises: + ValueError: If rule name already registered with different class + + Example: + ```python + @register_rule + class MyRule(Rule[MyConfig]): + metadata = RuleMetadata(name="my-rule", ...) + ... + ``` + """ + name = rule_class.metadata.name + + # Check if rule dependencies are satisfied + if not rule_class.is_available(): + logger.debug(f"Rule '{name}' not available (is_available=False), skipping") + return rule_class + + if name in _RULE_REGISTRY: + # Allow re-registration of same class (e.g., during hot reload) + if _RULE_REGISTRY[name] is rule_class: + return rule_class + raise ValueError(f"Rule '{name}' is already registered") + + _RULE_REGISTRY[name] = rule_class + logger.debug(f"Registered rule: {name} v{rule_class.metadata.version}") + return rule_class + + +def get_rule(name: str) -> type[Rule[Any]] | None: + """Get a registered rule by name. + + Args: + name: Rule name to look up + + Returns: + Rule class if found, None otherwise + """ + return _RULE_REGISTRY.get(name) + + +def get_all_rules() -> dict[str, type[Rule[Any]]]: + """Get all registered rules. + + Returns: + Dictionary mapping rule names to rule classes + """ + return dict(_RULE_REGISTRY) + + +def clear_rules() -> None: + """Clear all registered rules. Useful for testing.""" + _RULE_REGISTRY.clear() diff --git a/rules/builtin/src/agent_control_rules/json/__init__.py b/rules/builtin/src/agent_control_rules/json/__init__.py new file mode 100644 index 00000000..e7b52255 --- /dev/null +++ b/rules/builtin/src/agent_control_rules/json/__init__.py @@ -0,0 +1,6 @@ +"""JSON validation rule.""" + +from agent_control_rules.json.config import JSONRuleConfig +from agent_control_rules.json.rule import JSONRule + +__all__ = ["JSONRule", "JSONRuleConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/json/config.py b/rules/builtin/src/agent_control_rules/json/config.py similarity index 96% rename from evaluators/builtin/src/agent_control_evaluators/json/config.py rename to rules/builtin/src/agent_control_rules/json/config.py index fc891430..faa74b49 100644 --- a/evaluators/builtin/src/agent_control_evaluators/json/config.py +++ b/rules/builtin/src/agent_control_rules/json/config.py @@ -1,15 +1,15 @@ -"""Configuration for JSON validation evaluator.""" +"""Configuration for JSON validation rule.""" from typing import Any, Literal import re2 from pydantic import Field, field_validator, model_validator -from agent_control_evaluators._base import EvaluatorConfig +from agent_control_rules._base import RuleConfig -class JSONEvaluatorConfig(EvaluatorConfig): - """Configuration for JSON validation evaluator. +class JSONRuleConfig(RuleConfig): + """Configuration for JSON validation rule. Multiple validation checks can be combined. Checks are evaluated in this order (fail-fast): 1. JSON syntax/validity (always - ensures data is valid JSON) @@ -219,7 +219,7 @@ def validate_constraints( return v @model_validator(mode="after") - def validate_has_checks(self) -> "JSONEvaluatorConfig": + def validate_has_checks(self) -> "JSONRuleConfig": """Ensure at least one validation check is configured.""" if not any( [ diff --git a/evaluators/builtin/src/agent_control_evaluators/json/evaluator.py b/rules/builtin/src/agent_control_rules/json/rule.py similarity index 91% rename from evaluators/builtin/src/agent_control_evaluators/json/evaluator.py rename to rules/builtin/src/agent_control_rules/json/rule.py index 35bbf169..dcdce867 100644 --- a/evaluators/builtin/src/agent_control_evaluators/json/evaluator.py +++ b/rules/builtin/src/agent_control_rules/json/rule.py @@ -1,21 +1,21 @@ -"""JSON validation evaluator with schema, type, required field, constraint, and pattern checks.""" +"""JSON validation rule with schema, type, required field, constraint, and pattern checks.""" import asyncio import json from typing import Any import re2 -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult from jsonschema import Draft7Validator -from agent_control_evaluators._base import Evaluator, EvaluatorMetadata -from agent_control_evaluators._registry import register_evaluator -from agent_control_evaluators.json.config import JSONEvaluatorConfig +from agent_control_rules._base import Rule, RuleMetadata +from agent_control_rules._registry import register_rule +from agent_control_rules.json.config import JSONRuleConfig -@register_evaluator -class JSONEvaluator(Evaluator[JSONEvaluatorConfig]): - """Comprehensive JSON validation evaluator. +@register_rule +class JSONRule(Rule[JSONRuleConfig]): + """Comprehensive JSON validation rule. Validates JSON data in this order (fail-fast, simple to complex): 1. JSON syntax/validity - Parse and validate JSON structure @@ -42,7 +42,7 @@ class JSONEvaluator(Evaluator[JSONEvaluatorConfig]): {"field_patterns": {"email": "^[a-z0-9._%+-]+@[a-z0-9.-]+\\\\.[a-z]+$"}} """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="json", version="1.0.0", description=( @@ -51,13 +51,13 @@ class JSONEvaluator(Evaluator[JSONEvaluatorConfig]): ), timeout_ms=15000, # Longer timeout for schema validation ) - config_model = JSONEvaluatorConfig + config_model = JSONRuleConfig # Instance variables (typed to support None when feature not configured) _schema_validator: Draft7Validator | None _compiled_patterns: dict[str, Any] | None - def __init__(self, config: JSONEvaluatorConfig) -> None: + def __init__(self, config: JSONRuleConfig) -> None: super().__init__(config) # Pre-compile schema validator (thread-safe, immutable) @@ -90,7 +90,7 @@ def __init__(self, config: JSONEvaluatorConfig) -> None: else: self._compiled_patterns = None - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate JSON data against all configured validation checks. Note: Validation is offloaded to a thread executor to avoid blocking @@ -99,7 +99,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # Offload synchronous validation to thread to avoid blocking event loop return await asyncio.to_thread(self._evaluate_sync, data) - def _evaluate_sync(self, data: Any) -> EvaluatorResult: + def _evaluate_sync(self, data: Any) -> RuleResult: """Synchronous validation logic (called via thread executor).""" # 1. JSON Syntax/Validity Check @@ -141,7 +141,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: return pattern_result # All checks passed - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="JSON validation passed all checks", @@ -166,24 +166,24 @@ def _parse_json(self, data: Any) -> tuple[dict | list | None, str | None]: # Unsupported type return None, f"Unsupported data type: {type(data).__name__}" - def _handle_parse_error(self, error: str) -> EvaluatorResult: + def _handle_parse_error(self, error: str) -> RuleResult: """Handle JSON parse errors based on allow_invalid_json config.""" if self.config.allow_invalid_json: # Allow invalid JSON through as non-match - return EvaluatorResult( + return RuleResult( matched=False, confidence=0.0, message=f"Invalid JSON allowed: {error}", ) else: # Block invalid JSON - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Invalid JSON blocked: {error}", ) - def _check_schema(self, data: dict | list) -> EvaluatorResult | None: + def _check_schema(self, data: dict | list) -> RuleResult | None: """Validate against JSON Schema. Returns error result or None.""" if not self._schema_validator: return None @@ -203,20 +203,20 @@ def _check_schema(self, data: dict | list) -> EvaluatorResult | None: if len(errors) > 3: message += f" (+{len(errors) - 3} more errors)" - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=message, metadata={"error_count": len(errors), "errors": error_messages}, ) - def _check_types(self, data: dict | list) -> EvaluatorResult | None: + def _check_types(self, data: dict | list) -> RuleResult | None: """Validate field types. Returns error result or None.""" if not self.config.field_types: return None if not isinstance(data, dict): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Type checking requires a JSON object, got array/primitive", @@ -260,20 +260,20 @@ def _check_types(self, data: dict | list) -> EvaluatorResult | None: if not errors: return None # Validation passed - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Type validation failed: {'; '.join(errors[:3])}", metadata={"error_count": len(errors), "errors": errors}, ) - def _check_required(self, data: dict | list) -> EvaluatorResult | None: + def _check_required(self, data: dict | list) -> RuleResult | None: """Validate required fields are present. Returns error result or None.""" if not self.config.required_fields: return None if not isinstance(data, dict): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Required field checking requires a JSON object, got array/primitive", @@ -292,20 +292,20 @@ def _check_required(self, data: dict | list) -> EvaluatorResult | None: if not missing: return None # Validation passed - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Missing required fields: {', '.join(missing[:5])}", metadata={"missing_count": len(missing), "missing_fields": missing}, ) - def _check_constraints(self, data: dict | list) -> EvaluatorResult | None: + def _check_constraints(self, data: dict | list) -> RuleResult | None: """Validate field constraints (ranges, enums, string length).""" if not self.config.field_constraints: return None if not isinstance(data, dict): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Constraint checking requires a JSON object, got array/primitive", @@ -391,20 +391,20 @@ def _check_constraints(self, data: dict | list) -> EvaluatorResult | None: if not errors: return None # Validation passed - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Constraint validation failed: {'; '.join(errors[:3])}", metadata={"error_count": len(errors), "errors": errors}, ) - def _check_patterns(self, data: dict | list) -> EvaluatorResult | None: + def _check_patterns(self, data: dict | list) -> RuleResult | None: """Validate field values match patterns. Returns error result or None.""" if not self._compiled_patterns: return None if not isinstance(data, dict): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Pattern matching requires a JSON object, got array/primitive", @@ -441,7 +441,7 @@ def _check_patterns(self, data: dict | list) -> EvaluatorResult | None: if success: return None # Validation passed - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=message, diff --git a/rules/builtin/src/agent_control_rules/list/__init__.py b/rules/builtin/src/agent_control_rules/list/__init__.py new file mode 100644 index 00000000..cc2aef24 --- /dev/null +++ b/rules/builtin/src/agent_control_rules/list/__init__.py @@ -0,0 +1,6 @@ +"""List rule for value matching.""" + +from agent_control_rules.list.config import ListRuleConfig +from agent_control_rules.list.rule import ListRule + +__all__ = ["ListRule", "ListRuleConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/list/config.py b/rules/builtin/src/agent_control_rules/list/config.py similarity index 87% rename from evaluators/builtin/src/agent_control_evaluators/list/config.py rename to rules/builtin/src/agent_control_rules/list/config.py index a6f323d2..adc2bbda 100644 --- a/evaluators/builtin/src/agent_control_evaluators/list/config.py +++ b/rules/builtin/src/agent_control_rules/list/config.py @@ -1,14 +1,14 @@ -"""Configuration for list evaluator.""" +"""Configuration for list rule.""" from typing import Literal from pydantic import Field, field_validator -from agent_control_evaluators._base import EvaluatorConfig +from agent_control_rules._base import RuleConfig -class ListEvaluatorConfig(EvaluatorConfig): - """Configuration for list evaluator.""" +class ListRuleConfig(RuleConfig): + """Configuration for list rule.""" values: list[str | int | float] = Field( ..., description="List of values to match against" diff --git a/evaluators/builtin/src/agent_control_evaluators/list/evaluator.py b/rules/builtin/src/agent_control_rules/list/rule.py similarity index 84% rename from evaluators/builtin/src/agent_control_evaluators/list/evaluator.py rename to rules/builtin/src/agent_control_rules/list/rule.py index 27ecc0dc..4aba141c 100644 --- a/evaluators/builtin/src/agent_control_evaluators/list/evaluator.py +++ b/rules/builtin/src/agent_control_rules/list/rule.py @@ -1,19 +1,19 @@ -"""List evaluator for value matching.""" +"""List rule for value matching.""" import re from typing import Any import re2 -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult -from agent_control_evaluators._base import Evaluator, EvaluatorMetadata -from agent_control_evaluators._registry import register_evaluator -from agent_control_evaluators.list.config import ListEvaluatorConfig +from agent_control_rules._base import Rule, RuleMetadata +from agent_control_rules._registry import register_rule +from agent_control_rules.list.config import ListRuleConfig -@register_evaluator -class ListEvaluator(Evaluator[ListEvaluatorConfig]): - """List-based value matching evaluator. +@register_rule +class ListRule(Rule[ListRuleConfig]): + """List-based value matching rule. Checks if data matches values in a list. Supports: - any/all logic (match any value vs match all values) @@ -26,14 +26,14 @@ class ListEvaluator(Evaluator[ListEvaluatorConfig]): {"values": ["approved"], "match_on": "no_match"} # Require approval """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="list", version="1.0.0", description="List-based value matching with flexible logic", ) - config_model = ListEvaluatorConfig + config_model = ListRuleConfig - def __init__(self, config: ListEvaluatorConfig) -> None: + def __init__(self, config: ListRuleConfig) -> None: super().__init__(config) # Defensive filtering keeps legacy invalid configs from compiling into pathological regexes. normalized_values = [str(v) for v in config.values] @@ -65,14 +65,14 @@ def _build_regex(self) -> Any: return re2.compile(pattern) - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate data against the value list. Args: data: Data to check (string or list of strings) Returns: - EvaluatorResult based on matching logic + RuleResult based on matching logic """ # Normalize input if data is None: @@ -84,7 +84,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # Short-circuit if input is empty if not input_values: - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Empty input - control ignored", @@ -93,7 +93,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # Short-circuit if control values are empty if self._regex is None: - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Empty control values - control ignored", @@ -127,7 +127,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: if len(matches) > 5: msg += f" (+{len(matches) - 5} more)" - return EvaluatorResult( + return RuleResult( matched=is_match, confidence=1.0, message=msg, diff --git a/evaluators/builtin/src/agent_control_evaluators/py.typed b/rules/builtin/src/agent_control_rules/py.typed similarity index 100% rename from evaluators/builtin/src/agent_control_evaluators/py.typed rename to rules/builtin/src/agent_control_rules/py.typed diff --git a/rules/builtin/src/agent_control_rules/regex/__init__.py b/rules/builtin/src/agent_control_rules/regex/__init__.py new file mode 100644 index 00000000..af6e2df1 --- /dev/null +++ b/rules/builtin/src/agent_control_rules/regex/__init__.py @@ -0,0 +1,6 @@ +"""Regex rule for pattern matching.""" + +from agent_control_rules.regex.config import RegexRuleConfig +from agent_control_rules.regex.rule import RegexRule + +__all__ = ["RegexRule", "RegexRuleConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/regex/config.py b/rules/builtin/src/agent_control_rules/regex/config.py similarity index 74% rename from evaluators/builtin/src/agent_control_evaluators/regex/config.py rename to rules/builtin/src/agent_control_rules/regex/config.py index ed325096..eb5dabe7 100644 --- a/evaluators/builtin/src/agent_control_evaluators/regex/config.py +++ b/rules/builtin/src/agent_control_rules/regex/config.py @@ -1,13 +1,13 @@ -"""Configuration for regex evaluator.""" +"""Configuration for regex rule.""" import re2 from pydantic import Field, field_validator -from agent_control_evaluators._base import EvaluatorConfig +from agent_control_rules._base import RuleConfig -class RegexEvaluatorConfig(EvaluatorConfig): - """Configuration for regex evaluator.""" +class RegexRuleConfig(RuleConfig): + """Configuration for regex rule.""" pattern: str = Field(..., description="Regular expression pattern (RE2 syntax)") flags: list[str] | None = Field(default=None, description="Regex flags (e.g., ['IGNORECASE'])") diff --git a/evaluators/builtin/src/agent_control_evaluators/regex/evaluator.py b/rules/builtin/src/agent_control_rules/regex/rule.py similarity index 68% rename from evaluators/builtin/src/agent_control_evaluators/regex/evaluator.py rename to rules/builtin/src/agent_control_rules/regex/rule.py index 2348be0f..3d4d8a6c 100644 --- a/evaluators/builtin/src/agent_control_evaluators/regex/evaluator.py +++ b/rules/builtin/src/agent_control_rules/regex/rule.py @@ -1,18 +1,18 @@ -"""Regex evaluator for pattern matching.""" +"""Regex rule for pattern matching.""" from typing import Any import re2 -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult -from agent_control_evaluators._base import Evaluator, EvaluatorMetadata -from agent_control_evaluators._registry import register_evaluator -from agent_control_evaluators.regex.config import RegexEvaluatorConfig +from agent_control_rules._base import Rule, RuleMetadata +from agent_control_rules._registry import register_rule +from agent_control_rules.regex.config import RegexRuleConfig -@register_evaluator -class RegexEvaluator(Evaluator[RegexEvaluatorConfig]): - """Regular expression pattern matching evaluator. +@register_rule +class RegexRule(Rule[RegexRuleConfig]): + """Regular expression pattern matching rule. Matches data against a regex pattern using Google RE2 for safety (protects against ReDoS attacks). @@ -25,14 +25,14 @@ class RegexEvaluator(Evaluator[RegexEvaluatorConfig]): {"pattern": "secret", "flags": ["IGNORECASE"]} # Case-insensitive """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="regex", version="1.0.0", description="Regular expression pattern matching (RE2)", ) - config_model = RegexEvaluatorConfig + config_model = RegexRuleConfig - def __init__(self, config: RegexEvaluatorConfig) -> None: + def __init__(self, config: RegexRuleConfig) -> None: super().__init__(config) # Build pattern with flags pattern = config.pattern @@ -45,17 +45,17 @@ def __init__(self, config: RegexEvaluatorConfig) -> None: # RE2 has limited flag support - other flags are ignored self._regex = re2.compile(pattern) - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate data against the regex pattern. Args: data: Data to match against (will be converted to string) Returns: - EvaluatorResult with matched=True if pattern found + RuleResult with matched=True if pattern found """ if data is None: - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="No data to match", @@ -65,7 +65,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: match = self._regex.search(text) is_match = match is not None - return EvaluatorResult( + return RuleResult( matched=is_match, confidence=1.0, message=f"Pattern '{self.config.pattern}' {'found' if is_match else 'not found'}", diff --git a/rules/builtin/src/agent_control_rules/sql/__init__.py b/rules/builtin/src/agent_control_rules/sql/__init__.py new file mode 100644 index 00000000..a900de5b --- /dev/null +++ b/rules/builtin/src/agent_control_rules/sql/__init__.py @@ -0,0 +1,6 @@ +"""SQL validation rule.""" + +from agent_control_rules.sql.config import SQLRuleConfig +from agent_control_rules.sql.rule import SQLRule + +__all__ = ["SQLRule", "SQLRuleConfig"] diff --git a/evaluators/builtin/src/agent_control_evaluators/sql/config.py b/rules/builtin/src/agent_control_rules/sql/config.py similarity index 96% rename from evaluators/builtin/src/agent_control_evaluators/sql/config.py rename to rules/builtin/src/agent_control_rules/sql/config.py index b43b3a48..27783272 100644 --- a/evaluators/builtin/src/agent_control_evaluators/sql/config.py +++ b/rules/builtin/src/agent_control_rules/sql/config.py @@ -1,15 +1,15 @@ -"""Configuration for SQL validation evaluator.""" +"""Configuration for SQL validation rule.""" import warnings from typing import Literal from pydantic import Field, model_validator -from agent_control_evaluators._base import EvaluatorConfig +from agent_control_rules._base import RuleConfig -class SQLEvaluatorConfig(EvaluatorConfig): - """Configuration for comprehensive SQL control evaluator. +class SQLRuleConfig(RuleConfig): + """Configuration for comprehensive SQL control rule. Validates SQL query strings using AST-based analysis via sqlglot. Controls are evaluated in order: @@ -149,7 +149,7 @@ class SQLEvaluatorConfig(EvaluatorConfig): ) @model_validator(mode="after") - def validate_config(self) -> "SQLEvaluatorConfig": + def validate_config(self) -> "SQLRuleConfig": """Validate configuration constraints.""" # Validate operation restrictions if self.blocked_operations and self.allowed_operations: diff --git a/evaluators/builtin/src/agent_control_evaluators/sql/evaluator.py b/rules/builtin/src/agent_control_rules/sql/rule.py similarity index 95% rename from evaluators/builtin/src/agent_control_evaluators/sql/evaluator.py rename to rules/builtin/src/agent_control_rules/sql/rule.py index 2bfbcefa..00ae8b89 100644 --- a/evaluators/builtin/src/agent_control_evaluators/sql/evaluator.py +++ b/rules/builtin/src/agent_control_rules/sql/rule.py @@ -1,4 +1,4 @@ -"""Comprehensive SQL validation evaluator. +"""Comprehensive SQL validation rule. Supports multi-statement, operation, table, column, and limit checking. """ @@ -10,12 +10,12 @@ from typing import Any, cast import sqlglot -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult from sqlglot import exp -from agent_control_evaluators._base import Evaluator, EvaluatorMetadata -from agent_control_evaluators._registry import register_evaluator -from agent_control_evaluators.sql.config import SQLEvaluatorConfig +from agent_control_rules._base import Rule, RuleMetadata +from agent_control_rules._registry import register_rule +from agent_control_rules.sql.config import SQLRuleConfig logger = logging.getLogger(__name__) @@ -61,9 +61,9 @@ class RequiredColumnValueRule: context_key: str -@register_evaluator -class SQLEvaluator(Evaluator[SQLEvaluatorConfig]): - """Comprehensive SQL validation evaluator. +@register_rule +class SQLRule(Rule[SQLRuleConfig]): + """Comprehensive SQL validation rule. Validates SQL queries in this order: 1. Multi-Statement: Control whether multiple SQL statements are allowed @@ -103,7 +103,7 @@ class SQLEvaluator(Evaluator[SQLEvaluatorConfig]): } """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="sql", version="1.0.0", description=( @@ -112,7 +112,7 @@ class SQLEvaluator(Evaluator[SQLEvaluatorConfig]): ), timeout_ms=10000, ) - config_model = SQLEvaluatorConfig + config_model = SQLRuleConfig # SQL operation type mappings DDL_OPERATIONS = { @@ -142,7 +142,7 @@ class SQLEvaluator(Evaluator[SQLEvaluatorConfig]): "SET TRANSACTION", } - def __init__(self, config: SQLEvaluatorConfig) -> None: + def __init__(self, config: SQLRuleConfig) -> None: super().__init__(config) # Pre-process operation controls @@ -397,7 +397,7 @@ def _is_top_level_select(self, select_node: exp.Select) -> bool: def _check_multi_statements( self, parsed: list, query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check multi-statement controls. Args: @@ -405,13 +405,13 @@ def _check_multi_statements( query: Original SQL query string Returns: - EvaluatorResult if multi-statement rules are violated, None otherwise + RuleResult if multi-statement rules are violated, None otherwise """ stmt_count = len([stmt for stmt in parsed if stmt is not None]) # Check if multi-statements are disallowed if not self.config.allow_multi_statements and stmt_count > 1: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Multiple SQL statements not allowed (found {stmt_count} statements)", @@ -420,7 +420,7 @@ def _check_multi_statements( # Check max_statements limit if self.config.max_statements and stmt_count > self.config.max_statements: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -438,7 +438,7 @@ def _check_multi_statements( def _check_limits( self, analyses: list[QueryAnalysis], query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check LIMIT clause requirements for all SELECT queries. Recursively checks all SELECT statements including subqueries to ensure @@ -449,7 +449,7 @@ def _check_limits( query: Original SQL query string Returns: - EvaluatorResult if LIMIT rules are violated, None otherwise + RuleResult if LIMIT rules are violated, None otherwise """ for analysis in analyses: # Use pre-computed SELECT nodes @@ -473,7 +473,7 @@ def _check_limits( else "All SELECT queries (including subqueries) must have a LIMIT clause" ) - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=message, @@ -500,7 +500,7 @@ def _check_limits( self.config.max_limit and limit_value > self.config.max_limit ): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -521,7 +521,7 @@ def _check_limits( ): result_window = limit_value + offset_value if result_window > self.config.max_result_window: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -585,7 +585,7 @@ def _extract_offset_value(self, offset_node: exp.Offset) -> int | None: def _check_complexity( self, analyses: list[QueryAnalysis], query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check query complexity limits. Checks for: @@ -598,14 +598,14 @@ def _check_complexity( query: Original SQL query string Returns: - EvaluatorResult if complexity limits are violated, None otherwise + RuleResult if complexity limits are violated, None otherwise """ for analysis in analyses: # Check subquery depth (pre-computed) if self.config.max_subquery_depth: max_depth = analysis.max_subquery_depth if max_depth > self.config.max_subquery_depth: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -623,7 +623,7 @@ def _check_complexity( if self.config.max_joins: join_count = analysis.join_count if join_count > self.config.max_joins: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -641,7 +641,7 @@ def _check_complexity( if self.config.max_union_count: union_count = analysis.union_count if union_count > self.config.max_union_count: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -855,7 +855,7 @@ def _check_column_values( analyses: list[QueryAnalysis], query: str, context: dict[str, Any], - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check required column equality/value rules for multi-tenant filtering.""" if not self._required_column_values: return None @@ -868,7 +868,7 @@ def _check_column_values( top_level_where = self._get_top_level_where_clause(stmt) if top_level_where is None: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -888,7 +888,7 @@ def _check_column_values( for rule in self._required_column_values: if rule.context_key not in context: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -908,7 +908,7 @@ def _check_column_values( expected_value = context[rule.context_key] if not self._is_scalar_context_value(expected_value): - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -928,7 +928,7 @@ def _check_column_values( ) if rule.table is None and len(base_tables) > 1: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -971,7 +971,7 @@ def _check_column_values( break if not found_valid_predicate: - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=( @@ -991,7 +991,7 @@ def _check_column_values( return None - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate SQL query against all configured controls. Offloads CPU-bound validation to a thread to avoid blocking the event loop. @@ -1000,36 +1000,36 @@ async def evaluate(self, data: Any) -> EvaluatorResult: data: SQL query string or dict with 'query' key Returns: - EvaluatorResult with matched=True if any control triggers + RuleResult with matched=True if any control triggers """ try: return await asyncio.to_thread(self._evaluate_sync, data) except Exception as e: - # Unexpected evaluator error - fail open with error field set + # Unexpected rule error - fail open with error field set logger.error( - "SQL evaluator unexpected error", + "SQL rule unexpected error", exc_info=True, extra={"error_type": type(e).__name__}, ) - return EvaluatorResult( + return RuleResult( matched=False, confidence=0.0, - message="SQL evaluator encountered an unexpected error", + message="SQL rule encountered an unexpected error", error=f"{type(e).__name__}: {str(e)[:200]}", ) - def _evaluate_sync(self, data: Any) -> EvaluatorResult: + def _evaluate_sync(self, data: Any) -> RuleResult: """Synchronous implementation of SQL validation logic. Args: data: SQL query string or dict with 'query' key Returns: - EvaluatorResult with matched=True if any control triggers + RuleResult with matched=True if any control triggers """ # Extract SQL query if data is None: - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="No SQL query provided", @@ -1038,7 +1038,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: query, context = self._extract_query_and_context(data) if not query.strip(): - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="Empty SQL query", @@ -1067,7 +1067,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: if not needs_parsing: # No controls triggered - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="No SQL controls triggered", @@ -1090,8 +1090,8 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: }, ) - # Invalid SQL fails validation (not an evaluator error, just bad input) - return EvaluatorResult( + # Invalid SQL fails validation (not a rule error, just bad input) + return RuleResult( matched=True, confidence=1.0, message=f"SQL parsing failed: {str(e)[:200]}", @@ -1100,7 +1100,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: if not parsed or all(stmt is None for stmt in parsed): # Invalid SQL fails validation - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Could not parse SQL query: no valid statements found", @@ -1176,7 +1176,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: return complexity_result # No controls triggered - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="SQL query passed all controls", @@ -1184,7 +1184,7 @@ def _evaluate_sync(self, data: Any) -> EvaluatorResult: def _check_operations( self, analyses: list[QueryAnalysis], query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check SQL operations against blocked/allowed lists. When both blocked and allowed operations are configured (e.g., via @@ -1196,7 +1196,7 @@ def _check_operations( query: Original SQL query string Returns: - EvaluatorResult if operations violate rules, None otherwise + RuleResult if operations violate rules, None otherwise """ # Collect all operations from all analyses operations = set() @@ -1205,7 +1205,7 @@ def _check_operations( if not operations: # No operations detected - block unknown/unsupported SQL - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message="Could not extract operations from SQL query", @@ -1231,7 +1231,7 @@ def _check_operations( if blocked_found: msg = f"Blocked SQL operations detected: {', '.join(sorted(blocked_found))}" - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=msg, @@ -1314,7 +1314,7 @@ def _get_operation_name(self, stmt: exp.Expression) -> str | None: def _check_tables( self, analyses: list[QueryAnalysis], query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check table/schema access against restrictions. Args: @@ -1322,7 +1322,7 @@ def _check_tables( query: Original SQL query string Returns: - EvaluatorResult if tables violate rules, None otherwise + RuleResult if tables violate rules, None otherwise """ # Collect all tables from all analyses tables = [] @@ -1371,7 +1371,7 @@ def _check_tables( if len(violations) > 3: msg += f" (+{len(violations) - 3} more)" - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=msg, @@ -1408,7 +1408,7 @@ def _extract_tables(self, parsed: list) -> list[tuple[str | None, str]]: def _check_columns( self, analyses: list[QueryAnalysis], query: str - ) -> EvaluatorResult | None: + ) -> RuleResult | None: """Check column presence requirements. Args: @@ -1416,7 +1416,7 @@ def _check_columns( query: Original SQL query string Returns: - EvaluatorResult if required columns are missing, None otherwise + RuleResult if required columns are missing, None otherwise """ # Early return if no required columns (caller should check, but be defensive) if not self._required_columns: @@ -1440,7 +1440,7 @@ def _check_columns( if not columns: # No columns detected - required columns are missing - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Required columns not found: {', '.join(self._required_columns)}", @@ -1468,7 +1468,7 @@ def _check_columns( missing = self._required_columns - columns_norm msg = f"Required columns missing: {', '.join(missing)}" - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=msg, diff --git a/rules/builtin/tests/__init__.py b/rules/builtin/tests/__init__.py new file mode 100644 index 00000000..42f846a0 --- /dev/null +++ b/rules/builtin/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for agent_control_rules package.""" diff --git a/rules/builtin/tests/json/__init__.py b/rules/builtin/tests/json/__init__.py new file mode 100644 index 00000000..049ff9fb --- /dev/null +++ b/rules/builtin/tests/json/__init__.py @@ -0,0 +1 @@ +"""Tests for the JSON rule.""" diff --git a/evaluators/builtin/tests/json/test_json.py b/rules/builtin/tests/json/test_json.py similarity index 68% rename from evaluators/builtin/tests/json/test_json.py rename to rules/builtin/tests/json/test_json.py index f1120fa5..291b8d1b 100644 --- a/evaluators/builtin/tests/json/test_json.py +++ b/rules/builtin/tests/json/test_json.py @@ -1,7 +1,7 @@ -"""Tests for JSON validation evaluator.""" +"""Tests for JSON validation rule.""" import pytest -from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig +from agent_control_rules.json import JSONRule, JSONRuleConfig class TestJSONParsing: @@ -10,40 +10,40 @@ class TestJSONParsing: @pytest.mark.asyncio async def test_dict_input(self): """Test that dict input is accepted as-is.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id"])) - result = await evaluator.evaluate({"id": 123}) + rule = JSONRule(JSONRuleConfig(required_fields=["id"])) + result = await rule.evaluate({"id": 123}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_json_string_input(self): """Test that JSON string input is parsed correctly.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id"])) - result = await evaluator.evaluate('{"id": 123}') + rule = JSONRule(JSONRuleConfig(required_fields=["id"])) + result = await rule.evaluate('{"id": 123}') assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_invalid_json_blocked_by_default(self): """Test that invalid JSON is blocked by default.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id"])) - result = await evaluator.evaluate("{invalid json") + rule = JSONRule(JSONRuleConfig(required_fields=["id"])) + result = await rule.evaluate("{invalid json") assert result.matched is True # Blocked by default assert "Invalid JSON blocked" in result.message @pytest.mark.asyncio async def test_invalid_json_allowed_when_configured(self): """Test that invalid JSON is allowed when allow_invalid_json=True.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(required_fields=["id"], allow_invalid_json=True) + rule = JSONRule( + JSONRuleConfig(required_fields=["id"], allow_invalid_json=True) ) - result = await evaluator.evaluate("{invalid json") + result = await rule.evaluate("{invalid json") assert result.matched is False assert "Invalid JSON allowed" in result.message @pytest.mark.asyncio async def test_none_input(self): """Test that None input is handled gracefully.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id"])) - result = await evaluator.evaluate(None) + rule = JSONRule(JSONRuleConfig(required_fields=["id"])) + result = await rule.evaluate(None) assert result.matched is True assert "None" in result.message @@ -59,16 +59,16 @@ async def test_valid_schema(self): "required": ["id", "name"], "properties": {"id": {"type": "integer"}, "name": {"type": "string"}}, } - evaluator = JSONEvaluator(JSONEvaluatorConfig(json_schema=schema)) - result = await evaluator.evaluate({"id": 1, "name": "test"}) + rule = JSONRule(JSONRuleConfig(json_schema=schema)) + result = await rule.evaluate({"id": 1, "name": "test"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_invalid_schema_missing_required(self): """Test that missing required fields fail schema validation.""" schema = {"type": "object", "required": ["id", "name"]} - evaluator = JSONEvaluator(JSONEvaluatorConfig(json_schema=schema)) - result = await evaluator.evaluate({"id": 1}) + rule = JSONRule(JSONRuleConfig(json_schema=schema)) + result = await rule.evaluate({"id": 1}) assert result.matched is True # Failed assert "Schema validation failed" in result.message assert "'name' is a required property" in result.message @@ -77,8 +77,8 @@ async def test_invalid_schema_missing_required(self): async def test_invalid_schema_wrong_type(self): """Test that wrong type fails schema validation.""" schema = {"type": "object", "properties": {"id": {"type": "integer"}}} - evaluator = JSONEvaluator(JSONEvaluatorConfig(json_schema=schema)) - result = await evaluator.evaluate({"id": "not-an-int"}) + rule = JSONRule(JSONRuleConfig(json_schema=schema)) + result = await rule.evaluate({"id": "not-an-int"}) assert result.matched is True # Failed assert "Schema validation failed" in result.message @@ -95,19 +95,19 @@ async def test_nested_object_validation(self): } }, } - evaluator = JSONEvaluator(JSONEvaluatorConfig(json_schema=schema)) - result = await evaluator.evaluate({"user": {"id": 123}}) + rule = JSONRule(JSONRuleConfig(json_schema=schema)) + result = await rule.evaluate({"user": {"id": 123}}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_array_validation(self): """Test schema validation on arrays.""" schema = {"type": "array", "items": {"type": "integer"}} - evaluator = JSONEvaluator(JSONEvaluatorConfig(json_schema=schema)) - result = await evaluator.evaluate([1, 2, 3]) + rule = JSONRule(JSONRuleConfig(json_schema=schema)) + result = await rule.evaluate([1, 2, 3]) assert result.matched is False # Validation passed - result = await evaluator.evaluate([1, "not-int", 3]) + result = await rule.evaluate([1, "not-int", 3]) assert result.matched is True # Failed @@ -117,42 +117,42 @@ class TestRequiredFieldsValidation: @pytest.mark.asyncio async def test_all_present(self): """Test that all required fields present passes validation.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id", "name", "email"])) - result = await evaluator.evaluate({"id": 1, "name": "test", "email": "test@example.com"}) + rule = JSONRule(JSONRuleConfig(required_fields=["id", "name", "email"])) + result = await rule.evaluate({"id": 1, "name": "test", "email": "test@example.com"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_missing_field(self): """Test that missing required field fails validation.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["id", "name"])) - result = await evaluator.evaluate({"id": 1}) + rule = JSONRule(JSONRuleConfig(required_fields=["id", "name"])) + result = await rule.evaluate({"id": 1}) assert result.matched is True # Failed assert "Missing required fields: name" in result.message @pytest.mark.asyncio async def test_null_allowed(self): """Test that null values are allowed when configured.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(required_fields=["id"], allow_null_required=True) + rule = JSONRule( + JSONRuleConfig(required_fields=["id"], allow_null_required=True) ) - result = await evaluator.evaluate({"id": None}) + result = await rule.evaluate({"id": None}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_null_disallowed(self): """Test that null values fail when disallowed.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(required_fields=["id"], allow_null_required=False) + rule = JSONRule( + JSONRuleConfig(required_fields=["id"], allow_null_required=False) ) - result = await evaluator.evaluate({"id": None}) + result = await rule.evaluate({"id": None}) assert result.matched is True # Failed assert "null not allowed" in result.message @pytest.mark.asyncio async def test_nested_required_fields(self): """Test required fields validation on nested paths.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["user.id", "user.email"])) - result = await evaluator.evaluate({"user": {"id": 123, "email": "test@example.com"}}) + rule = JSONRule(JSONRuleConfig(required_fields=["user.id", "user.email"])) + result = await rule.evaluate({"user": {"id": 123, "email": "test@example.com"}}) assert result.matched is False # Validation passed @@ -162,8 +162,8 @@ class TestTypesValidation: @pytest.mark.asyncio async def test_all_types_match(self): """Test that all types matching passes validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={ "id": "string", "age": "integer", @@ -175,7 +175,7 @@ async def test_all_types_match(self): } ) ) - result = await evaluator.evaluate( + result = await rule.evaluate( { "id": "123", "age": 25, @@ -191,80 +191,80 @@ async def test_all_types_match(self): @pytest.mark.asyncio async def test_type_mismatch(self): """Test that type mismatch fails validation.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(field_types={"id": "string"})) - result = await evaluator.evaluate({"id": 123}) + rule = JSONRule(JSONRuleConfig(field_types={"id": "string"})) + result = await rule.evaluate({"id": 123}) assert result.matched is True # Failed assert "expected string, got integer" in result.message @pytest.mark.asyncio async def test_missing_field(self): """Test that missing field fails type validation.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(field_types={"id": "string"})) - result = await evaluator.evaluate({"name": "test"}) + rule = JSONRule(JSONRuleConfig(field_types={"id": "string"})) + result = await rule.evaluate({"name": "test"}) assert result.matched is True # Failed assert "field not found" in result.message @pytest.mark.asyncio async def test_nested_field_types(self): """Test type checking on nested fields.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_types={"user.id": "integer", "user.name": "string"}) + rule = JSONRule( + JSONRuleConfig(field_types={"user.id": "integer", "user.name": "string"}) ) - result = await evaluator.evaluate({"user": {"id": 123, "name": "test"}}) + result = await rule.evaluate({"user": {"id": 123, "name": "test"}}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_extra_fields_allowed(self): """Test that extra fields are allowed by default.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_types={"id": "string"}, allow_extra_fields=True) + rule = JSONRule( + JSONRuleConfig(field_types={"id": "string"}, allow_extra_fields=True) ) - result = await evaluator.evaluate({"id": "123", "extra": "field"}) + result = await rule.evaluate({"id": "123", "extra": "field"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_extra_fields_denied(self): """Test that extra fields can be denied.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_types={"id": "string"}, allow_extra_fields=False) + rule = JSONRule( + JSONRuleConfig(field_types={"id": "string"}, allow_extra_fields=False) ) - result = await evaluator.evaluate({"id": "123", "extra": "field"}) + result = await rule.evaluate({"id": "123", "extra": "field"}) assert result.matched is True # Failed assert "Extra fields not allowed" in result.message @pytest.mark.asyncio async def test_array_input_fails_type_check(self): """Test that array input fails type checking gracefully.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(field_types={"id": "string"})) - result = await evaluator.evaluate([1, 2, 3]) + rule = JSONRule(JSONRuleConfig(field_types={"id": "string"})) + result = await rule.evaluate([1, 2, 3]) assert result.matched is True # Failed assert "requires a JSON object, got array" in result.message @pytest.mark.asyncio async def test_nested_fields_with_strict_mode_no_extra_fields(self): """Test P1 fix: Nested fields with allow_extra_fields=False should not flag parent containers.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"user.id": "string"}, allow_extra_fields=False, ) ) # Should pass: "user" is a container, "user.id" is the typed leaf field - result = await evaluator.evaluate({"user": {"id": "123"}}) + result = await rule.evaluate({"user": {"id": "123"}}) assert result.matched is False # Validation passed assert "Extra fields" not in result.message @pytest.mark.asyncio async def test_nested_fields_strict_mode_detects_actual_extra_leaf_fields(self): """Test that strict mode still catches actual extra leaf fields in nested objects.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"user.id": "string"}, allow_extra_fields=False, ) ) # Should fail: "user.name" is an extra leaf field not in field_types - result = await evaluator.evaluate({"user": {"id": "123", "name": "test"}}) + result = await rule.evaluate({"user": {"id": "123", "name": "test"}}) assert result.matched is True # Failed assert "Extra fields not allowed" in result.message assert "user.name" in result.message @@ -272,41 +272,41 @@ async def test_nested_fields_strict_mode_detects_actual_extra_leaf_fields(self): @pytest.mark.asyncio async def test_multiple_nested_levels_strict_mode(self): """Test strict mode with multiple levels of nesting.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"user.profile.email": "string"}, allow_extra_fields=False, ) ) # Should pass: "user" and "user.profile" are containers - result = await evaluator.evaluate({"user": {"profile": {"email": "test@example.com"}}}) + result = await rule.evaluate({"user": {"profile": {"email": "test@example.com"}}}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_nested_fields_with_required_and_strict_mode(self): """Test nested fields with both required_fields and strict mode.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["user.id", "user.email"], field_types={"user.id": "string"}, allow_extra_fields=False, ) ) # Should pass: both user.id and user.email are allowed (one typed, one required) - result = await evaluator.evaluate({"user": {"id": "123", "email": "test@example.com"}}) + result = await rule.evaluate({"user": {"id": "123", "email": "test@example.com"}}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_strict_mode_top_level_extra_field_still_detected(self): """Test that top-level extra fields are still detected in strict mode.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"id": "string"}, allow_extra_fields=False, ) ) # Should fail: "extra" is a top-level extra field - result = await evaluator.evaluate({"id": "123", "extra": "field"}) + result = await rule.evaluate({"id": "123", "extra": "field"}) assert result.matched is True # Failed assert "Extra fields not allowed" in result.message @@ -317,15 +317,15 @@ async def test_strict_mode_allows_field_constraints_fields(self): Regression test: field_constraints fields should be in the allow-list when allow_extra_fields=False. """ - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"id": "string"}, field_constraints={"score": {"min": 0.0, "max": 1.0}}, allow_extra_fields=False, ) ) # Should pass: "score" is referenced in field_constraints - result = await evaluator.evaluate({"id": "123", "score": 0.5}) + result = await rule.evaluate({"id": "123", "score": 0.5}) assert result.matched is False # Validation passed assert "passed" in result.message.lower() @@ -336,15 +336,15 @@ async def test_strict_mode_allows_field_patterns_fields(self): Regression test: field_patterns fields should be in the allow-list when allow_extra_fields=False. """ - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_types={"id": "string"}, field_patterns={"email": r"^.+@.+$"}, allow_extra_fields=False, ) ) # Should pass: "email" is referenced in field_patterns - result = await evaluator.evaluate({"id": "123", "email": "test@example.com"}) + result = await rule.evaluate({"id": "123", "email": "test@example.com"}) assert result.matched is False # Validation passed assert "passed" in result.message.lower() @@ -355,8 +355,8 @@ async def test_strict_mode_with_all_field_references(self): Ensures that required_fields, field_types, field_constraints, and field_patterns are all included in the allow-list. """ - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["name"], field_types={"id": "string"}, field_constraints={"score": {"min": 0.0, "max": 1.0}}, @@ -365,7 +365,7 @@ async def test_strict_mode_with_all_field_references(self): ) ) # Should pass: all fields are referenced in some config option - result = await evaluator.evaluate({ + result = await rule.evaluate({ "id": "123", "name": "Test", "score": 0.75, @@ -374,7 +374,7 @@ async def test_strict_mode_with_all_field_references(self): assert result.matched is False # Validation passed # Should fail: "extra" is not referenced anywhere - result = await evaluator.evaluate({ + result = await rule.evaluate({ "id": "123", "name": "Test", "score": 0.75, @@ -391,97 +391,97 @@ class TestConstraintsValidation: @pytest.mark.asyncio async def test_numeric_range_within_bounds(self): """Test that numeric value within range passes validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) ) - result = await evaluator.evaluate({"score": 0.75}) + result = await rule.evaluate({"score": 0.75}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_numeric_range_below_min(self): """Test that value below minimum fails validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) ) - result = await evaluator.evaluate({"score": -0.5}) + result = await rule.evaluate({"score": -0.5}) assert result.matched is True # Failed assert "below minimum" in result.message @pytest.mark.asyncio async def test_numeric_range_above_max(self): """Test that value above maximum fails validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"score": {"min": 0.0, "max": 1.0}}) ) - result = await evaluator.evaluate({"score": 1.5}) + result = await rule.evaluate({"score": 1.5}) assert result.matched is True # Failed assert "above maximum" in result.message @pytest.mark.asyncio async def test_integer_range(self): """Test integer range constraints.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"count": {"min": -10, "max": 5}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"count": {"min": -10, "max": 5}}) ) - result = await evaluator.evaluate({"count": 3}) + result = await rule.evaluate({"count": 3}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"count": 10}) + result = await rule.evaluate({"count": 10}) assert result.matched is True # Failed @pytest.mark.asyncio async def test_enum_valid_value(self): """Test that valid enum value passes validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"status": {"enum": ["pending", "approved", "rejected"]}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"status": {"enum": ["pending", "approved", "rejected"]}}) ) - result = await evaluator.evaluate({"status": "approved"}) + result = await rule.evaluate({"status": "approved"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_enum_invalid_value(self): """Test that invalid enum value fails validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"status": {"enum": ["pending", "approved", "rejected"]}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"status": {"enum": ["pending", "approved", "rejected"]}}) ) - result = await evaluator.evaluate({"status": "invalid"}) + result = await rule.evaluate({"status": "invalid"}) assert result.matched is True # Failed assert "not in allowed values" in result.message @pytest.mark.asyncio async def test_string_length_within_range(self): """Test that string length within range passes validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) ) - result = await evaluator.evaluate({"username": "test_user"}) + result = await rule.evaluate({"username": "test_user"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_string_length_too_short(self): """Test that string shorter than minimum fails validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) ) - result = await evaluator.evaluate({"username": "ab"}) + result = await rule.evaluate({"username": "ab"}) assert result.matched is True # Failed assert "below minimum" in result.message @pytest.mark.asyncio async def test_string_length_too_long(self): """Test that string longer than maximum fails validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"username": {"min_length": 3, "max_length": 20}}) ) - result = await evaluator.evaluate({"username": "a" * 25}) + result = await rule.evaluate({"username": "a" * 25}) assert result.matched is True # Failed assert "above maximum" in result.message @pytest.mark.asyncio async def test_mixed_constraints(self): """Test multiple constraint types on different fields.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_constraints={ "score": {"min": 0.0, "max": 1.0}, "status": {"enum": ["active", "inactive"]}, @@ -489,7 +489,7 @@ async def test_mixed_constraints(self): } ) ) - result = await evaluator.evaluate({"score": 0.8, "status": "active", "name": "Test"}) + result = await rule.evaluate({"score": 0.8, "status": "active", "name": "Test"}) assert result.matched is False # Validation passed @@ -499,8 +499,8 @@ class TestPatternMatching: @pytest.mark.asyncio async def test_all_patterns_match(self): """Test that all patterns matching passes validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={ "email": r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]+$", "phone": r"^\+?[1-9]\d{1,14}$", @@ -508,14 +508,14 @@ async def test_all_patterns_match(self): pattern_match_logic="all", ) ) - result = await evaluator.evaluate({"email": "test@example.com", "phone": "+1234567890"}) + result = await rule.evaluate({"email": "test@example.com", "phone": "+1234567890"}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_pattern_fails_all_mode(self): """Test that one pattern failing fails 'all' mode validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={ "email": r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]+$", "phone": r"^\+?[1-9]\d{1,14}$", @@ -523,15 +523,15 @@ async def test_pattern_fails_all_mode(self): pattern_match_logic="all", ) ) - result = await evaluator.evaluate({"email": "invalid", "phone": "+1234567890"}) + result = await rule.evaluate({"email": "invalid", "phone": "+1234567890"}) assert result.matched is True # Failed assert "Pattern validation failed" in result.message @pytest.mark.asyncio async def test_any_pattern_match(self): """Test that any pattern matching passes 'any' mode validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={ "email": r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]+$", "phone": r"^\+?[1-9]\d{1,14}$", @@ -539,19 +539,19 @@ async def test_any_pattern_match(self): pattern_match_logic="any", ) ) - result = await evaluator.evaluate({"email": "test@example.com", "phone": "invalid"}) + result = await rule.evaluate({"email": "test@example.com", "phone": "invalid"}) assert result.matched is False # Validation passed (email matched) @pytest.mark.asyncio async def test_no_patterns_match_any_mode(self): """Test that no patterns matching fails 'any' mode validation.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={"email": r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]+$"}, pattern_match_logic="any", ) ) - result = await evaluator.evaluate({"email": "invalid"}) + result = await rule.evaluate({"email": "invalid"}) assert result.matched is True # Failed assert "No patterns matched" in result.message @@ -562,54 +562,54 @@ class TestCombinedValidation: @pytest.mark.asyncio async def test_all_checks_pass(self): """Test that all checks passing results in validation success.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["id", "email"], field_types={"id": "string", "email": "string", "age": "integer"}, field_constraints={"age": {"min": 0, "max": 120}}, field_patterns={"email": r"^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]+$"}, ) ) - result = await evaluator.evaluate({"id": "123", "email": "test@example.com", "age": 30}) + result = await rule.evaluate({"id": "123", "email": "test@example.com", "age": 30}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_fails_at_required_check(self): """Test that validation fails at required fields check.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["id", "email"], field_types={"id": "string", "email": "string"}, ) ) - result = await evaluator.evaluate({"id": "123"}) # Missing email + result = await rule.evaluate({"id": "123"}) # Missing email assert result.matched is True # Failed assert "Missing required fields" in result.message @pytest.mark.asyncio async def test_fails_at_type_check(self): """Test that validation fails at type check.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["id"], field_types={"id": "integer"}, ) ) - result = await evaluator.evaluate({"id": "not-an-int"}) + result = await rule.evaluate({"id": "not-an-int"}) assert result.matched is True # Failed assert "Type validation failed" in result.message @pytest.mark.asyncio async def test_fails_at_constraint_check(self): """Test that validation fails at constraint check.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( required_fields=["score"], field_types={"score": "number"}, field_constraints={"score": {"min": 0.0, "max": 1.0}}, ) ) - result = await evaluator.evaluate({"score": 1.5}) + result = await rule.evaluate({"score": 1.5}) assert result.matched is True # Failed assert "Constraint validation failed" in result.message @@ -622,32 +622,32 @@ def test_invalid_schema_rejected(self): from jsonschema.exceptions import SchemaError with pytest.raises(SchemaError): - JSONEvaluatorConfig(json_schema={"type": "invalid-type"}) + JSONRuleConfig(json_schema={"type": "invalid-type"}) def test_invalid_type_name_rejected(self): """Test that invalid type name is rejected at config time.""" with pytest.raises(ValueError, match="Invalid type"): - JSONEvaluatorConfig(field_types={"id": "invalid-type"}) + JSONRuleConfig(field_types={"id": "invalid-type"}) def test_invalid_regex_pattern_rejected(self): """Test that invalid regex pattern is rejected at config time.""" with pytest.raises(ValueError, match="Invalid regex"): - JSONEvaluatorConfig(field_patterns={"email": "["}) # Invalid regex + JSONRuleConfig(field_patterns={"email": "["}) # Invalid regex def test_empty_enum_rejected(self): """Test that empty enum list is rejected at config time.""" with pytest.raises(ValueError, match="non-empty list"): - JSONEvaluatorConfig(field_constraints={"status": {"enum": []}}) + JSONRuleConfig(field_constraints={"status": {"enum": []}}) def test_invalid_min_length_type_rejected(self): """Test that non-integer min_length is rejected at config time.""" with pytest.raises(ValueError, match="must be an integer"): - JSONEvaluatorConfig(field_constraints={"name": {"min_length": "invalid"}}) + JSONRuleConfig(field_constraints={"name": {"min_length": "invalid"}}) def test_at_least_one_check_required(self): """Test that at least one validation check must be configured.""" with pytest.raises(ValueError, match="At least one validation check"): - JSONEvaluatorConfig() + JSONRuleConfig() class TestNestedValues: @@ -656,30 +656,30 @@ class TestNestedValues: @pytest.mark.asyncio async def test_deep_nesting(self): """Test validation on deeply nested fields.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(required_fields=["a.b.c.d.e"], field_types={"a.b.c.d.e": "integer"}) + rule = JSONRule( + JSONRuleConfig(required_fields=["a.b.c.d.e"], field_types={"a.b.c.d.e": "integer"}) ) - result = await evaluator.evaluate({"a": {"b": {"c": {"d": {"e": 42}}}}}) + result = await rule.evaluate({"a": {"b": {"c": {"d": {"e": 42}}}}}) assert result.matched is False # Validation passed @pytest.mark.asyncio async def test_missing_intermediate_key(self): """Test that missing intermediate key is handled gracefully.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(required_fields=["a.b.c"])) - result = await evaluator.evaluate({"a": {"x": 1}}) # Missing 'b' + rule = JSONRule(JSONRuleConfig(required_fields=["a.b.c"])) + result = await rule.evaluate({"a": {"x": 1}}) # Missing 'b' assert result.matched is True # Failed assert "Missing required fields" in result.message @pytest.mark.asyncio async def test_constraints_on_nested_fields(self): """Test constraints on nested field paths.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"user.age": {"min": 0, "max": 120}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"user.age": {"min": 0, "max": 120}}) ) - result = await evaluator.evaluate({"user": {"age": 30}}) + result = await rule.evaluate({"user": {"age": 30}}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"user": {"age": 150}}) + result = await rule.evaluate({"user": {"age": 150}}) assert result.matched is True # Failed @@ -689,46 +689,46 @@ class TestEnumCaseSensitivity: @pytest.mark.asyncio async def test_enum_case_sensitive_default(self): """Test that enum matching is case-sensitive by default.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig(field_constraints={"status": {"enum": ["active", "inactive"]}}) + rule = JSONRule( + JSONRuleConfig(field_constraints={"status": {"enum": ["active", "inactive"]}}) ) # Should fail with "Active" (wrong case) - result = await evaluator.evaluate({"status": "Active"}) + result = await rule.evaluate({"status": "Active"}) assert result.matched is True # Failed validation assert "not in allowed values" in result.message @pytest.mark.asyncio async def test_enum_case_insensitive_enabled(self): """Test case-insensitive enum matching when enabled.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_constraints={"status": {"enum": ["active", "inactive"]}}, case_sensitive_enums=False, ) ) # Should pass with any case - result = await evaluator.evaluate({"status": "Active"}) + result = await rule.evaluate({"status": "Active"}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"status": "INACTIVE"}) + result = await rule.evaluate({"status": "INACTIVE"}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"status": "pending"}) + result = await rule.evaluate({"status": "pending"}) assert result.matched is True # Failed - not in enum @pytest.mark.asyncio async def test_enum_case_insensitive_non_strings(self): """Test that non-string enums still use exact matching.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_constraints={"code": {"enum": [1, 2, 3]}}, case_sensitive_enums=False, ) ) - result = await evaluator.evaluate({"code": 1}) + result = await rule.evaluate({"code": 1}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"code": 4}) + result = await rule.evaluate({"code": 4}) assert result.matched is True # Failed validation @@ -738,35 +738,35 @@ class TestPatternFlags: @pytest.mark.asyncio async def test_pattern_case_sensitive_default(self): """Test that pattern matching is case-sensitive by default.""" - evaluator = JSONEvaluator(JSONEvaluatorConfig(field_patterns={"code": "^[A-Z]{3}$"})) - result = await evaluator.evaluate({"code": "ABC"}) + rule = JSONRule(JSONRuleConfig(field_patterns={"code": "^[A-Z]{3}$"})) + result = await rule.evaluate({"code": "ABC"}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"code": "abc"}) + result = await rule.evaluate({"code": "abc"}) assert result.matched is True # Failed - lowercase @pytest.mark.asyncio async def test_pattern_ignorecase_flag(self): """Test case-insensitive pattern matching with IGNORECASE flag.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={"code": {"pattern": "^[A-Z]{3}$", "flags": ["IGNORECASE"]}} ) ) - result = await evaluator.evaluate({"code": "ABC"}) + result = await rule.evaluate({"code": "ABC"}) assert result.matched is False # Validation passed - result = await evaluator.evaluate({"code": "abc"}) + result = await rule.evaluate({"code": "abc"}) assert result.matched is False # Validation passed (case-insensitive) - result = await evaluator.evaluate({"code": "AB"}) + result = await rule.evaluate({"code": "AB"}) assert result.matched is True # Failed - wrong length @pytest.mark.asyncio async def test_pattern_mixed_string_and_dict(self): """Test mixed string/dict patterns work together.""" - evaluator = JSONEvaluator( - JSONEvaluatorConfig( + rule = JSONRule( + JSONRuleConfig( field_patterns={ "email": { "pattern": "^[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$", @@ -777,5 +777,5 @@ async def test_pattern_mixed_string_and_dict(self): ) ) # Both should work - result = await evaluator.evaluate({"email": "Test@Example.COM", "code": "1234"}) + result = await rule.evaluate({"email": "Test@Example.COM", "code": "1234"}) assert result.matched is False # Validation passed diff --git a/evaluators/builtin/tests/list/test_list.py b/rules/builtin/tests/list/test_list.py similarity index 72% rename from evaluators/builtin/tests/list/test_list.py rename to rules/builtin/tests/list/test_list.py index 3ee950d0..ea80c6a4 100644 --- a/evaluators/builtin/tests/list/test_list.py +++ b/rules/builtin/tests/list/test_list.py @@ -1,44 +1,44 @@ -"""Tests for list evaluator.""" +"""Tests for list rule.""" import pytest from pydantic import ValidationError -from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig +from agent_control_rules.list import ListRule, ListRuleConfig -class TestListEvaluatorConfig: - """Tests for list evaluator config validation.""" +class TestListRuleConfig: + """Tests for list rule config validation.""" def test_empty_string_value_rejected(self) -> None: """Test that empty-string list entries are rejected at config validation time.""" - # Given: a list evaluator config with an empty-string value + # Given: a list rule config with an empty-string value # When: constructing the config model with pytest.raises( ValidationError, match="values must not contain empty or whitespace-only strings" ): - ListEvaluatorConfig(values=[""]) + ListRuleConfig(values=[""]) # Then: validation rejects the config (asserted by pytest) def test_whitespace_only_value_rejected(self) -> None: """Test that whitespace-only list entries are rejected at config validation time.""" - # Given: a list evaluator config with a whitespace-only value + # Given: a list rule config with a whitespace-only value # When: constructing the config model with pytest.raises( ValidationError, match="values must not contain empty or whitespace-only strings" ): - ListEvaluatorConfig(values=[" "]) + ListRuleConfig(values=[" "]) # Then: validation rejects the config (asserted by pytest) -class TestListEvaluator: - """Tests for list evaluator runtime behavior.""" +class TestListRule: + """Tests for list rule runtime behavior.""" @pytest.mark.asyncio async def test_starts_with_matches_prefix(self) -> None: """Test that starts_with mode triggers on prefix matches.""" - # Given: a starts_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule config + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control", "/tmp/cache"], logic="any", match_on="match", @@ -48,7 +48,7 @@ async def test_starts_with_matches_prefix(self) -> None: ) # When: evaluating a path under an allowed prefix - result = await evaluator.evaluate("/home/lev/agent-control/server/src/app.py") + result = await rule.evaluate("/home/lev/agent-control/server/src/app.py") # Then: the prefix match triggers assert result.matched is True @@ -57,9 +57,9 @@ async def test_starts_with_matches_prefix(self) -> None: @pytest.mark.asyncio async def test_starts_with_matches_exact_path_value(self) -> None: """Test that starts_with mode matches the configured path value exactly.""" - # Given: a starts_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule config + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control"], logic="any", match_on="match", @@ -69,7 +69,7 @@ async def test_starts_with_matches_exact_path_value(self) -> None: ) # When: evaluating the exact configured path - result = await evaluator.evaluate("/home/lev/agent-control") + result = await rule.evaluate("/home/lev/agent-control") # Then: the exact path matches assert result.matched is True @@ -78,9 +78,9 @@ async def test_starts_with_matches_exact_path_value(self) -> None: @pytest.mark.asyncio async def test_starts_with_no_match_when_prefix_absent(self) -> None: """Test that starts_with mode does not trigger when no prefix matches.""" - # Given: a starts_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule config + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control", "/tmp/cache"], logic="any", match_on="match", @@ -90,17 +90,17 @@ async def test_starts_with_no_match_when_prefix_absent(self) -> None: ) # When: evaluating a path with no configured prefix - result = await evaluator.evaluate("/var/log/system.log") + result = await rule.evaluate("/var/log/system.log") - # Then: the evaluator does not trigger + # Then: the rule does not trigger assert result.matched is False @pytest.mark.asyncio async def test_starts_with_uses_raw_string_prefix_for_path_like_values(self) -> None: """Test that starts_with is generic string-prefix matching, not path-segment aware.""" - # Given: a starts_with evaluator configured with a path-like prefix - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule configured with a path-like prefix + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control"], logic="any", match_on="match", @@ -110,18 +110,18 @@ async def test_starts_with_uses_raw_string_prefix_for_path_like_values(self) -> ) # When: evaluating a sibling path that shares the same string prefix - result = await evaluator.evaluate("/home/lev/agent-control-old/server") + result = await rule.evaluate("/home/lev/agent-control-old/server") - # Then: the evaluator matches because starts_with is not path-boundary aware + # Then: the rule matches because starts_with is not path-boundary aware assert result.matched is True assert result.metadata["matches"] == ["/home/lev/agent-control-old/server"] @pytest.mark.asyncio async def test_starts_with_honors_case_sensitivity(self) -> None: """Test that starts_with mode respects case sensitivity settings.""" - # Given: two starts_with evaluators that differ only by case sensitivity - insensitive = ListEvaluator( - ListEvaluatorConfig( + # Given: two starts_with rules that differ only by case sensitivity + insensitive = ListRule( + ListRuleConfig( values=["/HOME/LEV/AGENT-CONTROL"], logic="any", match_on="match", @@ -129,8 +129,8 @@ async def test_starts_with_honors_case_sensitivity(self) -> None: case_sensitive=False, ) ) - sensitive = ListEvaluator( - ListEvaluatorConfig( + sensitive = ListRule( + ListRuleConfig( values=["/HOME/LEV/AGENT-CONTROL"], logic="any", match_on="match", @@ -143,16 +143,16 @@ async def test_starts_with_honors_case_sensitivity(self) -> None: insensitive_result = await insensitive.evaluate("/home/lev/agent-control/server") sensitive_result = await sensitive.evaluate("/home/lev/agent-control/server") - # Then: only the case-insensitive evaluator matches + # Then: only the case-insensitive rule matches assert insensitive_result.matched is True assert sensitive_result.matched is False @pytest.mark.asyncio async def test_starts_with_supports_no_match_allowlists(self) -> None: """Test that starts_with works with no_match for allowlist-style controls.""" - # Given: a starts_with evaluator configured as an allowlist - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule configured as an allowlist + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control", "/tmp/cache"], logic="any", match_on="no_match", @@ -162,8 +162,8 @@ async def test_starts_with_supports_no_match_allowlists(self) -> None: ) # When: evaluating one allowed and one disallowed path - allowed_result = await evaluator.evaluate("/home/lev/agent-control/server") - denied_result = await evaluator.evaluate("/var/log/system.log") + allowed_result = await rule.evaluate("/home/lev/agent-control/server") + denied_result = await rule.evaluate("/var/log/system.log") # Then: only the disallowed path triggers the control assert allowed_result.matched is False @@ -172,9 +172,9 @@ async def test_starts_with_supports_no_match_allowlists(self) -> None: @pytest.mark.asyncio async def test_starts_with_matches_plain_text_prefix(self) -> None: """Test that starts_with mode works for ordinary non-path strings.""" - # Given: a starts_with evaluator config for ordinary strings - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule config for ordinary strings + rule = ListRule( + ListRuleConfig( values=["agent", "control:"], logic="any", match_on="match", @@ -184,8 +184,8 @@ async def test_starts_with_matches_plain_text_prefix(self) -> None: ) # When: evaluating values with and without the configured prefixes - matched_result = await evaluator.evaluate("agent-control") - unmatched_result = await evaluator.evaluate("please control: now") + matched_result = await rule.evaluate("agent-control") + unmatched_result = await rule.evaluate("please control: now") # Then: only the true prefix match triggers assert matched_result.matched is True @@ -195,8 +195,8 @@ async def test_starts_with_matches_plain_text_prefix(self) -> None: async def test_starts_with_escapes_regex_metacharacters(self) -> None: """Test that starts_with treats configured values as literals, not regex.""" # Given: prefixes containing regex metacharacters - evaluator = ListEvaluator( - ListEvaluatorConfig( + rule = ListRule( + ListRuleConfig( values=["release/v1.2+", "[beta]"], logic="any", match_on="match", @@ -206,8 +206,8 @@ async def test_starts_with_escapes_regex_metacharacters(self) -> None: ) # When: evaluating values that begin with those literal prefixes - release_result = await evaluator.evaluate("release/v1.2+rc1") - beta_result = await evaluator.evaluate("[beta] feature flag") + release_result = await rule.evaluate("release/v1.2+rc1") + beta_result = await rule.evaluate("[beta] feature flag") # Then: both values match literally assert release_result.matched is True @@ -216,9 +216,9 @@ async def test_starts_with_escapes_regex_metacharacters(self) -> None: @pytest.mark.asyncio async def test_starts_with_supports_list_input_with_any_logic(self) -> None: """Test that starts_with works on list inputs when any item matches.""" - # Given: a starts_with evaluator with any-item semantics - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule with any-item semantics + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control", "agent"], logic="any", match_on="match", @@ -228,18 +228,18 @@ async def test_starts_with_supports_list_input_with_any_logic(self) -> None: ) # When: evaluating a list where only one element matches - result = await evaluator.evaluate(["/var/log/system.log", "agent-control"]) + result = await rule.evaluate(["/var/log/system.log", "agent-control"]) - # Then: the evaluator triggers and reports the matching entry + # Then: the rule triggers and reports the matching entry assert result.matched is True assert result.metadata["matches"] == ["agent-control"] @pytest.mark.asyncio async def test_starts_with_supports_list_input_with_all_logic(self) -> None: """Test that starts_with respects all-item semantics for list inputs.""" - # Given: a starts_with evaluator with all-item semantics - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: a starts_with rule with all-item semantics + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control", "/tmp/cache"], logic="all", match_on="match", @@ -249,10 +249,10 @@ async def test_starts_with_supports_list_input_with_all_logic(self) -> None: ) # When: evaluating one fully matching list and one partially matching list - matching_result = await evaluator.evaluate( + matching_result = await rule.evaluate( ["/home/lev/agent-control/server", "/tmp/cache/build"] ) - partial_result = await evaluator.evaluate( + partial_result = await rule.evaluate( ["/home/lev/agent-control/server", "/var/log/system.log"] ) @@ -263,9 +263,9 @@ async def test_starts_with_supports_list_input_with_all_logic(self) -> None: @pytest.mark.asyncio async def test_ends_with_matches_suffix(self) -> None: """Test that ends_with mode triggers on suffix matches.""" - # Given: an ends_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule config + rule = ListRule( + ListRuleConfig( values=["/SOUL.md", ".py"], logic="any", match_on="match", @@ -275,7 +275,7 @@ async def test_ends_with_matches_suffix(self) -> None: ) # When: evaluating a path with an allowed suffix - result = await evaluator.evaluate("/home/lev/agent-control/SOUL.md") + result = await rule.evaluate("/home/lev/agent-control/SOUL.md") # Then: the suffix match triggers assert result.matched is True @@ -284,9 +284,9 @@ async def test_ends_with_matches_suffix(self) -> None: @pytest.mark.asyncio async def test_ends_with_matches_exact_path_value(self) -> None: """Test that ends_with mode matches the configured path value exactly.""" - # Given: an ends_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule config + rule = ListRule( + ListRuleConfig( values=["/home/lev/agent-control/SOUL.md"], logic="any", match_on="match", @@ -296,7 +296,7 @@ async def test_ends_with_matches_exact_path_value(self) -> None: ) # When: evaluating the exact configured path - result = await evaluator.evaluate("/home/lev/agent-control/SOUL.md") + result = await rule.evaluate("/home/lev/agent-control/SOUL.md") # Then: the exact path matches assert result.matched is True @@ -305,9 +305,9 @@ async def test_ends_with_matches_exact_path_value(self) -> None: @pytest.mark.asyncio async def test_ends_with_no_match_when_suffix_absent(self) -> None: """Test that ends_with mode does not trigger when no suffix matches.""" - # Given: an ends_with evaluator config - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule config + rule = ListRule( + ListRuleConfig( values=["/SOUL.md", ".py"], logic="any", match_on="match", @@ -317,17 +317,17 @@ async def test_ends_with_no_match_when_suffix_absent(self) -> None: ) # When: evaluating a path with no configured suffix - result = await evaluator.evaluate("/var/log/system.log") + result = await rule.evaluate("/var/log/system.log") - # Then: the evaluator does not trigger + # Then: the rule does not trigger assert result.matched is False @pytest.mark.asyncio async def test_ends_with_honors_case_sensitivity(self) -> None: """Test that ends_with mode respects case sensitivity settings.""" - # Given: two ends_with evaluators that differ only by case sensitivity - insensitive = ListEvaluator( - ListEvaluatorConfig( + # Given: two ends_with rules that differ only by case sensitivity + insensitive = ListRule( + ListRuleConfig( values=["/SOUL.MD"], logic="any", match_on="match", @@ -335,8 +335,8 @@ async def test_ends_with_honors_case_sensitivity(self) -> None: case_sensitive=False, ) ) - sensitive = ListEvaluator( - ListEvaluatorConfig( + sensitive = ListRule( + ListRuleConfig( values=["/SOUL.MD"], logic="any", match_on="match", @@ -349,16 +349,16 @@ async def test_ends_with_honors_case_sensitivity(self) -> None: insensitive_result = await insensitive.evaluate("/home/lev/agent-control/SOUL.md") sensitive_result = await sensitive.evaluate("/home/lev/agent-control/SOUL.md") - # Then: only the case-insensitive evaluator matches + # Then: only the case-insensitive rule matches assert insensitive_result.matched is True assert sensitive_result.matched is False @pytest.mark.asyncio async def test_ends_with_supports_no_match_allowlists(self) -> None: """Test that ends_with works with no_match for allowlist-style controls.""" - # Given: an ends_with evaluator configured as an allowlist - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule configured as an allowlist + rule = ListRule( + ListRuleConfig( values=[".md", ".txt"], logic="any", match_on="no_match", @@ -368,8 +368,8 @@ async def test_ends_with_supports_no_match_allowlists(self) -> None: ) # When: evaluating one allowed and one disallowed path - allowed_result = await evaluator.evaluate("/home/lev/agent-control/SOUL.md") - denied_result = await evaluator.evaluate("/var/log/system.log") + allowed_result = await rule.evaluate("/home/lev/agent-control/SOUL.md") + denied_result = await rule.evaluate("/var/log/system.log") # Then: only the disallowed path triggers the control assert allowed_result.matched is False @@ -378,9 +378,9 @@ async def test_ends_with_supports_no_match_allowlists(self) -> None: @pytest.mark.asyncio async def test_ends_with_matches_plain_text_suffix(self) -> None: """Test that ends_with mode works for ordinary non-path strings.""" - # Given: an ends_with evaluator config for ordinary strings - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule config for ordinary strings + rule = ListRule( + ListRuleConfig( values=["-control", ":done"], logic="any", match_on="match", @@ -390,8 +390,8 @@ async def test_ends_with_matches_plain_text_suffix(self) -> None: ) # When: evaluating values with and without the configured suffixes - matched_result = await evaluator.evaluate("agent-control") - unmatched_result = await evaluator.evaluate("done: please") + matched_result = await rule.evaluate("agent-control") + unmatched_result = await rule.evaluate("done: please") # Then: only the true suffix match triggers assert matched_result.matched is True @@ -401,8 +401,8 @@ async def test_ends_with_matches_plain_text_suffix(self) -> None: async def test_ends_with_escapes_regex_metacharacters(self) -> None: """Test that ends_with treats configured values as literals, not regex.""" # Given: suffixes containing regex metacharacters - evaluator = ListEvaluator( - ListEvaluatorConfig( + rule = ListRule( + ListRuleConfig( values=[".tar.gz+", "[beta]"], logic="any", match_on="match", @@ -412,8 +412,8 @@ async def test_ends_with_escapes_regex_metacharacters(self) -> None: ) # When: evaluating values that end with those literal suffixes - archive_result = await evaluator.evaluate("release-1.tar.gz+") - beta_result = await evaluator.evaluate("feature-[beta]") + archive_result = await rule.evaluate("release-1.tar.gz+") + beta_result = await rule.evaluate("feature-[beta]") # Then: both values match literally assert archive_result.matched is True @@ -422,9 +422,9 @@ async def test_ends_with_escapes_regex_metacharacters(self) -> None: @pytest.mark.asyncio async def test_ends_with_supports_list_input_with_any_logic(self) -> None: """Test that ends_with works on list inputs when any item matches.""" - # Given: an ends_with evaluator with any-item semantics - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule with any-item semantics + rule = ListRule( + ListRuleConfig( values=[".md", "-control"], logic="any", match_on="match", @@ -434,18 +434,18 @@ async def test_ends_with_supports_list_input_with_any_logic(self) -> None: ) # When: evaluating a list where only one element matches - result = await evaluator.evaluate(["/var/log/system.log", "agent-control"]) + result = await rule.evaluate(["/var/log/system.log", "agent-control"]) - # Then: the evaluator triggers and reports the matching entry + # Then: the rule triggers and reports the matching entry assert result.matched is True assert result.metadata["matches"] == ["agent-control"] @pytest.mark.asyncio async def test_ends_with_supports_list_input_with_all_logic(self) -> None: """Test that ends_with respects all-item semantics for list inputs.""" - # Given: an ends_with evaluator with all-item semantics - evaluator = ListEvaluator( - ListEvaluatorConfig( + # Given: an ends_with rule with all-item semantics + rule = ListRule( + ListRuleConfig( values=[".md", ".txt"], logic="all", match_on="match", @@ -455,10 +455,10 @@ async def test_ends_with_supports_list_input_with_all_logic(self) -> None: ) # When: evaluating one fully matching list and one partially matching list - matching_result = await evaluator.evaluate( + matching_result = await rule.evaluate( ["/home/lev/agent-control/SOUL.md", "/tmp/cache/notes.txt"] ) - partial_result = await evaluator.evaluate( + partial_result = await rule.evaluate( ["/home/lev/agent-control/SOUL.md", "/var/log/system.log"] ) @@ -470,19 +470,19 @@ async def test_ends_with_supports_list_input_with_all_logic(self) -> None: async def test_legacy_empty_string_value_is_ignored_defensively(self) -> None: """Test that legacy invalid configs do not compile into a match-all regex.""" # Given: a legacy invalid config constructed without validation - config = ListEvaluatorConfig.model_construct( + config = ListRuleConfig.model_construct( values=[""], logic="any", match_on="match", match_mode="contains", case_sensitive=False, ) - evaluator = ListEvaluator(config) + rule = ListRule(config) # When: evaluating normal text against the legacy config - result = await evaluator.evaluate("Tell me a joke") + result = await rule.evaluate("Tell me a joke") - # Then: the evaluator ignores the empty control values + # Then: the rule ignores the empty control values assert result.matched is False assert result.message == "Empty control values - control ignored" @@ -490,19 +490,19 @@ async def test_legacy_empty_string_value_is_ignored_defensively(self) -> None: async def test_legacy_whitespace_only_value_is_ignored_defensively(self) -> None: """Test that legacy whitespace-only configs do not compile into pathological regexes.""" # Given: a legacy invalid config with a whitespace-only value - config = ListEvaluatorConfig.model_construct( + config = ListRuleConfig.model_construct( values=[" "], logic="any", match_on="match", match_mode="contains", case_sensitive=False, ) - evaluator = ListEvaluator(config) + rule = ListRule(config) # When: evaluating normal text against the legacy config - result = await evaluator.evaluate("Tell me a joke") + result = await rule.evaluate("Tell me a joke") - # Then: the evaluator ignores the empty control values + # Then: the rule ignores the empty control values assert result.matched is False assert result.message == "Empty control values - control ignored" @@ -510,18 +510,18 @@ async def test_legacy_whitespace_only_value_is_ignored_defensively(self) -> None async def test_legacy_empty_string_allowlist_does_not_block_all(self) -> None: """Test that legacy invalid allowlist configs do not block all inputs.""" # Given: a legacy invalid allowlist config constructed without validation - config = ListEvaluatorConfig.model_construct( + config = ListRuleConfig.model_construct( values=[""], logic="any", match_on="no_match", match_mode="contains", case_sensitive=False, ) - evaluator = ListEvaluator(config) + rule = ListRule(config) # When: evaluating normal text against the legacy config - result = await evaluator.evaluate("legitimate_value") + result = await rule.evaluate("legitimate_value") - # Then: the evaluator ignores the empty control values instead of blocking all input + # Then: the rule ignores the empty control values instead of blocking all input assert result.matched is False assert result.message == "Empty control values - control ignored" diff --git a/evaluators/builtin/tests/list/test_list_extra.py b/rules/builtin/tests/list/test_list_extra.py similarity index 58% rename from evaluators/builtin/tests/list/test_list_extra.py rename to rules/builtin/tests/list/test_list_extra.py index ff8fe90a..2f619145 100644 --- a/evaluators/builtin/tests/list/test_list_extra.py +++ b/rules/builtin/tests/list/test_list_extra.py @@ -3,43 +3,43 @@ from __future__ import annotations import pytest -from agent_control_evaluators.list.config import ListEvaluatorConfig -from agent_control_evaluators.list.evaluator import ListEvaluator +from agent_control_rules.list.config import ListRuleConfig +from agent_control_rules.list.rule import ListRule @pytest.mark.asyncio async def test_match_mode_contains_uses_word_boundary(): """contains mode matches whole words but rejects sub-word matches.""" - config = ListEvaluatorConfig(values=["admin"], match_mode="contains") - evaluator = ListEvaluator(config) + config = ListRuleConfig(values=["admin"], match_mode="contains") + rule = ListRule(config) - matched = await evaluator.evaluate("the admin user logged in") + matched = await rule.evaluate("the admin user logged in") assert matched.matched is True - not_matched = await evaluator.evaluate("administrator") # sub-word, no boundary + not_matched = await rule.evaluate("administrator") # sub-word, no boundary assert not_matched.matched is False @pytest.mark.asyncio async def test_match_mode_exact_is_the_default(): """No explicit mode uses anchored exact matching.""" - config = ListEvaluatorConfig(values=["admin"]) - evaluator = ListEvaluator(config) + config = ListRuleConfig(values=["admin"]) + rule = ListRule(config) - exact = await evaluator.evaluate("admin") + exact = await rule.evaluate("admin") assert exact.matched is True - partial = await evaluator.evaluate("admin user") # not anchored end + partial = await rule.evaluate("admin user") # not anchored end assert partial.matched is False @pytest.mark.asyncio async def test_data_none_returns_empty_input_message(): """None input is treated as empty and the control is ignored.""" - config = ListEvaluatorConfig(values=["x"]) - evaluator = ListEvaluator(config) + config = ListRuleConfig(values=["x"]) + rule = ListRule(config) - result = await evaluator.evaluate(None) + result = await rule.evaluate(None) assert result.matched is False assert result.message == "Empty input - control ignored" @@ -49,13 +49,13 @@ async def test_data_none_returns_empty_input_message(): @pytest.mark.asyncio async def test_message_truncates_match_list_at_five(): """More than five matches collapse into a ``(+N more)`` suffix.""" - config = ListEvaluatorConfig( + config = ListRuleConfig( values=["a", "b", "c", "d", "e", "f", "g"], logic="any", ) - evaluator = ListEvaluator(config) + rule = ListRule(config) - result = await evaluator.evaluate(["a", "b", "c", "d", "e", "f", "g"]) + result = await rule.evaluate(["a", "b", "c", "d", "e", "f", "g"]) assert result.matched is True # First five matches appear, the rest summarized. diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/__init__.py b/rules/builtin/tests/regex/__init__.py similarity index 100% rename from evaluators/contrib/budget/src/agent_control_evaluator_budget/__init__.py rename to rules/builtin/tests/regex/__init__.py diff --git a/evaluators/builtin/tests/regex/test_regex.py b/rules/builtin/tests/regex/test_regex.py similarity index 62% rename from evaluators/builtin/tests/regex/test_regex.py rename to rules/builtin/tests/regex/test_regex.py index 9df69560..1563b250 100644 --- a/evaluators/builtin/tests/regex/test_regex.py +++ b/rules/builtin/tests/regex/test_regex.py @@ -1,45 +1,45 @@ -"""Tests for the regex evaluator and its config validation.""" +"""Tests for the regex rule and its config validation.""" from __future__ import annotations import pytest -from agent_control_evaluators.regex.config import RegexEvaluatorConfig -from agent_control_evaluators.regex.evaluator import RegexEvaluator +from agent_control_rules.regex.config import RegexRuleConfig +from agent_control_rules.regex.rule import RegexRule class TestRegexConfig: """Pattern validation rejects invalid RE2 syntax at config time.""" def test_valid_pattern_accepted(self): - config = RegexEvaluatorConfig(pattern=r"\d{3}-\d{2}-\d{4}") + config = RegexRuleConfig(pattern=r"\d{3}-\d{2}-\d{4}") assert config.pattern == r"\d{3}-\d{2}-\d{4}" def test_empty_pattern_accepted(self): # Empty string is technically a valid RE2 pattern (matches everything). - config = RegexEvaluatorConfig(pattern="") + config = RegexRuleConfig(pattern="") assert config.pattern == "" def test_invalid_pattern_rejected(self): with pytest.raises(ValueError, match="Invalid regex pattern"): - RegexEvaluatorConfig(pattern="[invalid(regex") + RegexRuleConfig(pattern="[invalid(regex") def test_flags_default_to_none(self): - config = RegexEvaluatorConfig(pattern=r"\d+") + config = RegexRuleConfig(pattern=r"\d+") assert config.flags is None def test_flags_can_be_specified(self): - config = RegexEvaluatorConfig(pattern="secret", flags=["IGNORECASE"]) + config = RegexRuleConfig(pattern="secret", flags=["IGNORECASE"]) assert config.flags == ["IGNORECASE"] -class TestRegexEvaluator: +class TestRegexRule: """Pattern matching against arbitrary data.""" @pytest.mark.asyncio async def test_match_returns_matched_true(self): - evaluator = RegexEvaluator.from_dict({"pattern": r"\d{3}-\d{4}"}) + rule = RegexRule.from_dict({"pattern": r"\d{3}-\d{4}"}) - result = await evaluator.evaluate("call 555-1234 today") + result = await rule.evaluate("call 555-1234 today") assert result.matched is True assert result.confidence == 1.0 @@ -48,18 +48,18 @@ async def test_match_returns_matched_true(self): @pytest.mark.asyncio async def test_no_match_returns_matched_false(self): - evaluator = RegexEvaluator.from_dict({"pattern": r"\d{3}-\d{4}"}) + rule = RegexRule.from_dict({"pattern": r"\d{3}-\d{4}"}) - result = await evaluator.evaluate("no numbers here") + result = await rule.evaluate("no numbers here") assert result.matched is False assert "not found" in result.message @pytest.mark.asyncio async def test_none_data_returns_no_data_message(self): - evaluator = RegexEvaluator.from_dict({"pattern": r".*"}) + rule = RegexRule.from_dict({"pattern": r".*"}) - result = await evaluator.evaluate(None) + result = await rule.evaluate(None) assert result.matched is False assert result.message == "No data to match" @@ -67,49 +67,49 @@ async def test_none_data_returns_no_data_message(self): @pytest.mark.asyncio async def test_non_string_data_is_coerced(self): """Non-string inputs are stringified before matching.""" - evaluator = RegexEvaluator.from_dict({"pattern": r"^42$"}) + rule = RegexRule.from_dict({"pattern": r"^42$"}) - result = await evaluator.evaluate(42) + result = await rule.evaluate(42) assert result.matched is True @pytest.mark.asyncio async def test_ignorecase_flag_short_form(self): """The ``I`` short form is treated the same as ``IGNORECASE``.""" - evaluator = RegexEvaluator.from_dict( + rule = RegexRule.from_dict( {"pattern": "SECRET", "flags": ["I"]}, ) - result = await evaluator.evaluate("the secret value") + result = await rule.evaluate("the secret value") assert result.matched is True @pytest.mark.asyncio async def test_ignorecase_flag_long_form(self): - evaluator = RegexEvaluator.from_dict( + rule = RegexRule.from_dict( {"pattern": "secret", "flags": ["IGNORECASE"]}, ) - result = await evaluator.evaluate("THE SECRET VALUE") + result = await rule.evaluate("THE SECRET VALUE") assert result.matched is True @pytest.mark.asyncio async def test_unknown_flag_is_ignored(self): """RE2 supports a narrow flag set; unknown flag names must not raise.""" - evaluator = RegexEvaluator.from_dict( + rule = RegexRule.from_dict( {"pattern": "x", "flags": ["MULTILINE"]}, ) - result = await evaluator.evaluate("xyz") + result = await rule.evaluate("xyz") # Should still work — unknown flag is silently dropped, not an error. assert result.matched is True @pytest.mark.asyncio async def test_case_sensitive_by_default(self): - evaluator = RegexEvaluator.from_dict({"pattern": "Secret"}) + rule = RegexRule.from_dict({"pattern": "Secret"}) - result = await evaluator.evaluate("the secret value") + result = await rule.evaluate("the secret value") assert result.matched is False diff --git a/rules/builtin/tests/sql/__init__.py b/rules/builtin/tests/sql/__init__.py new file mode 100644 index 00000000..53aeb828 --- /dev/null +++ b/rules/builtin/tests/sql/__init__.py @@ -0,0 +1 @@ +"""Tests for the SQL rule.""" diff --git a/evaluators/builtin/tests/sql/test_sql.py b/rules/builtin/tests/sql/test_sql.py similarity index 72% rename from evaluators/builtin/tests/sql/test_sql.py rename to rules/builtin/tests/sql/test_sql.py index c7eebe9a..1df5fdb2 100644 --- a/evaluators/builtin/tests/sql/test_sql.py +++ b/rules/builtin/tests/sql/test_sql.py @@ -1,21 +1,21 @@ -"""Tests for SQL evaluator.""" +"""Tests for SQL rule.""" from unittest.mock import patch import pytest from pydantic import ValidationError -from agent_control_models import EvaluatorResult -from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig +from agent_control_models import RuleResult +from agent_control_rules.sql import SQLRule, SQLRuleConfig -class TestEvaluatorResultValidator: - """Tests for EvaluatorResult model validator.""" +class TestRuleResultValidator: + """Tests for RuleResult model validator.""" def test_error_with_matched_true_raises_validation_error(self): """Should raise ValidationError when error is set with matched=True.""" with pytest.raises(ValidationError) as exc_info: - EvaluatorResult( + RuleResult( matched=True, confidence=0.5, error="Some error", @@ -24,7 +24,7 @@ def test_error_with_matched_true_raises_validation_error(self): def test_error_with_matched_false_is_valid(self): """Should allow error when matched=False.""" - result = EvaluatorResult( + result = RuleResult( matched=False, confidence=0.0, error="Some error", @@ -35,7 +35,7 @@ def test_error_with_matched_false_is_valid(self): def test_no_error_with_matched_true_is_valid(self): """Should allow matched=True when no error is set.""" - result = EvaluatorResult( + result = RuleResult( matched=True, confidence=1.0, message="Blocked", @@ -45,20 +45,20 @@ def test_no_error_with_matched_true_is_valid(self): assert result.matched is True -class TestEvaluatorErrorHandling: - """Tests for evaluator error handling (unexpected exceptions).""" +class TestRuleErrorHandling: + """Tests for rule error handling (unexpected exceptions).""" @pytest.mark.asyncio async def test_unexpected_exception_returns_error(self): - """Should return error field when evaluator encounters unexpected exception.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + """Should return error field when rule encounters unexpected exception.""" + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) # Simulate an unexpected exception in the internal method with patch.object( - evaluator, "_evaluate_sync", side_effect=RuntimeError("Unexpected failure") + rule, "_evaluate_sync", side_effect=RuntimeError("Unexpected failure") ): - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") # Check error first (convention: error field takes precedence) assert result.error is not None @@ -72,13 +72,13 @@ async def test_unexpected_exception_returns_error(self): @pytest.mark.asyncio async def test_memory_error_returns_error(self): """Should handle MemoryError gracefully.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) with patch.object( - evaluator, "_evaluate_sync", side_effect=MemoryError("Out of memory") + rule, "_evaluate_sync", side_effect=MemoryError("Out of memory") ): - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") # Check error first assert result.error is not None @@ -88,31 +88,31 @@ async def test_memory_error_returns_error(self): @pytest.mark.asyncio async def test_keyboard_interrupt_propagates(self): """KeyboardInterrupt should propagate (not be caught).""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) with patch.object( - evaluator, "_evaluate_sync", side_effect=KeyboardInterrupt() + rule, "_evaluate_sync", side_effect=KeyboardInterrupt() ): with pytest.raises(KeyboardInterrupt): - await evaluator.evaluate("SELECT * FROM users") + await rule.evaluate("SELECT * FROM users") @pytest.mark.asyncio async def test_normal_validation_still_works_after_error(self): - """Evaluator should continue working after an error.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + """Rule should continue working after an error.""" + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) # First call fails with patch.object( - evaluator, "_evaluate_sync", side_effect=RuntimeError("Temporary failure") + rule, "_evaluate_sync", side_effect=RuntimeError("Temporary failure") ): - error_result = await evaluator.evaluate("SELECT * FROM users") + error_result = await rule.evaluate("SELECT * FROM users") assert error_result.error is not None # Second call should work normally (no patch) - normal_result = await evaluator.evaluate("SELECT * FROM users") + normal_result = await rule.evaluate("SELECT * FROM users") assert normal_result.error is None assert normal_result.matched is False @@ -123,10 +123,10 @@ class TestSQLMultiStatement: @pytest.mark.asyncio async def test_allow_multi_statements_by_default(self): """Should allow multiple statements by default.""" - config = SQLEvaluatorConfig() - evaluator = SQLEvaluator(config) + config = SQLRuleConfig() + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users; SELECT * FROM orders" ) assert result.error is None @@ -135,16 +135,16 @@ async def test_allow_multi_statements_by_default(self): @pytest.mark.asyncio async def test_block_multi_statements_when_disabled(self): """Should block multiple statements when allow_multi_statements=False.""" - config = SQLEvaluatorConfig(allow_multi_statements=False) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allow_multi_statements=False) + rule = SQLRule(config) # Single statement should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # Multiple statements should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users; DELETE FROM logs" ) assert result.error is None @@ -155,18 +155,18 @@ async def test_block_multi_statements_when_disabled(self): @pytest.mark.asyncio async def test_max_statements_limit(self): """Should enforce max_statements limit.""" - config = SQLEvaluatorConfig(max_statements=2) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_statements=2) + rule = SQLRule(config) # 2 statements should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users; SELECT * FROM orders" ) assert result.error is None assert result.matched is False # 3 statements should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users; SELECT * FROM orders; SELECT 1" ) assert result.error is None @@ -179,7 +179,7 @@ async def test_max_statements_with_allow_false(self): """Should validate that max_statements requires allow_multi_statements.""" # This should raise a validation error during config creation with pytest.raises(ValueError, match="max_statements is only applicable"): - SQLEvaluatorConfig(allow_multi_statements=False, max_statements=3) + SQLRuleConfig(allow_multi_statements=False, max_statements=3) class TestSQLOperations: @@ -188,10 +188,10 @@ class TestSQLOperations: @pytest.mark.asyncio async def test_block_drop_operation(self): """Should block DROP operations.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is True @@ -201,122 +201,122 @@ async def test_block_drop_operation(self): @pytest.mark.asyncio async def test_block_multiple_dangerous_operations(self): """Should block multiple dangerous operations.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( blocked_operations=["DROP", "DELETE", "TRUNCATE"] ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Test DROP - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is True # Test DELETE - result = await evaluator.evaluate("DELETE FROM users WHERE id = 1") + result = await rule.evaluate("DELETE FROM users WHERE id = 1") assert result.error is None assert result.matched is True # Test TRUNCATE - result = await evaluator.evaluate("TRUNCATE TABLE users") + result = await rule.evaluate("TRUNCATE TABLE users") assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_allow_safe_operations_when_blocking_dangerous(self): """Should allow safe operations when blocking dangerous ones.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( blocked_operations=["DROP", "DELETE", "TRUNCATE"] ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False - result = await evaluator.evaluate("INSERT INTO users (name) VALUES ('test')") + result = await rule.evaluate("INSERT INTO users (name) VALUES ('test')") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_allowlist_mode_select_only(self): """Should allow only SELECT when in allowlist mode.""" - config = SQLEvaluatorConfig(allowed_operations=["SELECT"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_operations=["SELECT"]) + rule = SQLRule(config) # SELECT should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # Other operations should be blocked - result = await evaluator.evaluate("INSERT INTO users (name) VALUES ('test')") + result = await rule.evaluate("INSERT INTO users (name) VALUES ('test')") assert result.error is None assert result.matched is True assert "INSERT" in result.metadata["blocked"] - result = await evaluator.evaluate("UPDATE users SET name = 'new' WHERE id = 1") + result = await rule.evaluate("UPDATE users SET name = 'new' WHERE id = 1") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("DELETE FROM users WHERE id = 1") + result = await rule.evaluate("DELETE FROM users WHERE id = 1") assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_block_ddl_flag(self): """Should block all DDL operations when block_ddl=True.""" - config = SQLEvaluatorConfig(block_ddl=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(block_ddl=True) + rule = SQLRule(config) # DDL operations should be blocked - result = await evaluator.evaluate("CREATE TABLE test (id INT)") + result = await rule.evaluate("CREATE TABLE test (id INT)") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("ALTER TABLE users ADD COLUMN age INT") + result = await rule.evaluate("ALTER TABLE users ADD COLUMN age INT") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("TRUNCATE TABLE users") + result = await rule.evaluate("TRUNCATE TABLE users") assert result.error is None assert result.matched is True # DML operations should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_block_dcl_flag(self): """Should block all DCL operations when block_dcl=True.""" - config = SQLEvaluatorConfig(block_dcl=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(block_dcl=True) + rule = SQLRule(config) # DCL operations should be blocked - result = await evaluator.evaluate("GRANT SELECT ON users TO user1") + result = await rule.evaluate("GRANT SELECT ON users TO user1") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("REVOKE SELECT ON users FROM user1") + result = await rule.evaluate("REVOKE SELECT ON users FROM user1") assert result.error is None assert result.matched is True # Other operations should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_multiple_statements(self): """Should detect blocked operations in multiple statements.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users; DROP TABLE users; SELECT 1" ) assert result.error is None @@ -330,20 +330,20 @@ class TestSQLTableAccess: @pytest.mark.asyncio async def test_allow_specific_tables(self): """Should allow only specific tables.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Allowed tables should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False - result = await evaluator.evaluate("SELECT * FROM orders") + result = await rule.evaluate("SELECT * FROM orders") assert result.error is None assert result.matched is False # Other tables should be blocked - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is True assert "admin" in result.message @@ -351,86 +351,86 @@ async def test_allow_specific_tables(self): @pytest.mark.asyncio async def test_block_specific_tables(self): """Should block specific tables.""" - config = SQLEvaluatorConfig(blocked_tables=["admin", "secrets"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_tables=["admin", "secrets"]) + rule = SQLRule(config) # Blocked tables should be blocked - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("SELECT * FROM secrets") + result = await rule.evaluate("SELECT * FROM secrets") assert result.error is None assert result.matched is True # Other tables should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_block_system_schemas(self): """Should block system schemas.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( blocked_schemas=["pg_catalog", "information_schema"] ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # System schemas should be blocked - result = await evaluator.evaluate("SELECT * FROM pg_catalog.pg_tables") + result = await rule.evaluate("SELECT * FROM pg_catalog.pg_tables") assert result.error is None assert result.matched is True assert "pg_catalog" in result.message - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM information_schema.tables" ) assert result.error is None assert result.matched is True # Regular queries should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_qualified_table_names(self): """Should handle qualified table names (schema.table).""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( allowed_schemas=["public"], blocked_tables=["admin"] ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Public schema should pass - result = await evaluator.evaluate("SELECT * FROM public.users") + result = await rule.evaluate("SELECT * FROM public.users") assert result.error is None assert result.matched is False # Non-public schema should be blocked - result = await evaluator.evaluate("SELECT * FROM private.users") + result = await rule.evaluate("SELECT * FROM private.users") assert result.error is None assert result.matched is True # Blocked table even in allowed schema should be blocked - result = await evaluator.evaluate("SELECT * FROM public.admin") + result = await rule.evaluate("SELECT * FROM public.admin") assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_multiple_tables_in_query(self): """Should check all tables in a query.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # All allowed tables - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users JOIN orders ON users.id = orders.user_id" ) assert result.error is None assert result.matched is False # One disallowed table - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users JOIN admin ON users.id = admin.user_id" ) assert result.error is None @@ -441,36 +441,36 @@ async def test_multiple_tables_in_query(self): async def test_case_sensitivity_tables(self): """Should respect case sensitivity setting for tables.""" # Case insensitive (default) - config = SQLEvaluatorConfig( + config = SQLRuleConfig( blocked_tables=["admin"], case_sensitive=False ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate("SELECT * FROM Admin") + result = await rule.evaluate("SELECT * FROM Admin") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("SELECT * FROM ADMIN") + result = await rule.evaluate("SELECT * FROM ADMIN") assert result.error is None assert result.matched is True # Case sensitive - config = SQLEvaluatorConfig(blocked_tables=["admin"], case_sensitive=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_tables=["admin"], case_sensitive=True) + rule = SQLRule(config) - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is True - result = await evaluator.evaluate("SELECT * FROM Admin") + result = await rule.evaluate("SELECT * FROM Admin") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_cte_not_treated_as_table_violation(self): """Should not treat CTEs as unauthorized table access.""" - config = SQLEvaluatorConfig(allowed_tables=["users"], case_sensitive=False) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users"], case_sensitive=False) + rule = SQLRule(config) # CTE 'temp_users' is defined locally, not in allowed_tables # This should pass because CTEs are not external tables @@ -480,7 +480,7 @@ async def test_cte_not_treated_as_table_violation(self): ) SELECT * FROM temp_users """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is False @@ -491,7 +491,7 @@ async def test_cte_not_treated_as_table_violation(self): premium_users AS (SELECT * FROM active_users WHERE premium = true) SELECT * FROM premium_users """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is False @@ -502,7 +502,7 @@ async def test_cte_not_treated_as_table_violation(self): ) SELECT * FROM temp_data """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "admin" in result.message @@ -514,26 +514,26 @@ class TestSQLColumnPresence: @pytest.mark.asyncio async def test_require_column_in_where_clause(self): """Should require specific column in WHERE clause.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where" ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Query with tenant_id in WHERE - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123" ) assert result.error is None assert result.matched is False # Query without tenant_id in WHERE - should be blocked - result = await evaluator.evaluate("SELECT * FROM users WHERE id = 1") + result = await rule.evaluate("SELECT * FROM users WHERE id = 1") assert result.error is None assert result.matched is True assert "tenant_id" in result.message # Query with tenant_id in SELECT but not WHERE - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT tenant_id FROM users WHERE id = 1" ) assert result.error is None @@ -542,22 +542,22 @@ async def test_require_column_in_where_clause(self): @pytest.mark.asyncio async def test_require_column_in_select_clause(self): """Should require specific column in SELECT clause.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["id", "created_at"], column_presence_logic="all", column_context="select" ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Query with both columns in SELECT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id, name, created_at FROM users" ) assert result.error is None assert result.matched is False # Query missing one column - should be blocked - result = await evaluator.evaluate("SELECT id, name FROM users") + result = await rule.evaluate("SELECT id, name FROM users") assert result.error is None assert result.matched is True assert "created_at" in result.message @@ -565,72 +565,72 @@ async def test_require_column_in_select_clause(self): @pytest.mark.asyncio async def test_require_column_anywhere(self): """Should require column anywhere in query.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id"], column_context=None ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Column in SELECT - should pass - result = await evaluator.evaluate("SELECT user_id FROM logs") + result = await rule.evaluate("SELECT user_id FROM logs") assert result.error is None assert result.matched is False # Column in WHERE - should pass - result = await evaluator.evaluate("SELECT * FROM logs WHERE user_id = 1") + result = await rule.evaluate("SELECT * FROM logs WHERE user_id = 1") assert result.error is None assert result.matched is False # Column not present - should be blocked - result = await evaluator.evaluate("SELECT * FROM logs WHERE id = 1") + result = await rule.evaluate("SELECT * FROM logs WHERE id = 1") assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_column_presence_any_logic(self): """Should require at least one column with 'any' logic.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id", "admin_id"], column_presence_logic="any", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Has user_id - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM logs WHERE user_id = 1" ) assert result.error is None assert result.matched is False # Has admin_id - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM logs WHERE admin_id = 1" ) assert result.error is None assert result.matched is False # Has neither - should be blocked - result = await evaluator.evaluate("SELECT * FROM logs WHERE id = 1") + result = await rule.evaluate("SELECT * FROM logs WHERE id = 1") assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_column_presence_all_logic(self): """Should require all columns with 'all' logic.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id", "timestamp"], column_presence_logic="all", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Has both columns - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM logs WHERE user_id = 1 AND timestamp > '2024-01-01'" ) assert result.error is None assert result.matched is False # Has only one column - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM logs WHERE user_id = 1" ) assert result.error is None @@ -641,34 +641,34 @@ async def test_column_presence_all_logic(self): async def test_case_sensitivity_columns(self): """Should respect case sensitivity for columns.""" # Case insensitive (default) - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", case_sensitive=False, ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE Tenant_ID = 123" ) assert result.error is None assert result.matched is False # Case sensitive - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", case_sensitive=True, ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123" ) assert result.error is None assert result.matched is False - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE Tenant_ID = 123" ) assert result.error is None @@ -678,14 +678,14 @@ async def test_case_sensitivity_columns(self): async def test_column_extraction_with_join_queries(self): """Should extract columns from JOIN queries correctly.""" # Test WHERE context with JOIN - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # JOIN with tenant_id in WHERE - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.id, orders.total FROM users " "JOIN orders ON users.id = orders.user_id " "WHERE users.tenant_id = 123" @@ -694,7 +694,7 @@ async def test_column_extraction_with_join_queries(self): assert result.matched is False # JOIN without tenant_id in WHERE - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.id, orders.total FROM users " "JOIN orders ON users.id = orders.user_id " "WHERE orders.id = 1" @@ -704,15 +704,15 @@ async def test_column_extraction_with_join_queries(self): assert "tenant_id" in result.message # Test SELECT context with JOIN - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id", "tenant_id"], column_context="select", column_presence_logic="all", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # JOIN with both required columns in SELECT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.user_id, users.tenant_id, orders.total " "FROM users JOIN orders ON users.id = orders.user_id" ) @@ -720,7 +720,7 @@ async def test_column_extraction_with_join_queries(self): assert result.matched is False # JOIN missing one required column in SELECT - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.user_id, orders.total " "FROM users JOIN orders ON users.id = orders.user_id" ) @@ -729,14 +729,14 @@ async def test_column_extraction_with_join_queries(self): assert "tenant_id" in result.message # Test columns anywhere (None context) with JOIN - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context=None, ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # tenant_id in SELECT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.tenant_id, orders.total FROM users " "JOIN orders ON users.id = orders.user_id" ) @@ -744,7 +744,7 @@ async def test_column_extraction_with_join_queries(self): assert result.matched is False # tenant_id in WHERE - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.id, orders.total FROM users " "JOIN orders ON users.id = orders.user_id " "WHERE users.tenant_id = 123" @@ -753,7 +753,7 @@ async def test_column_extraction_with_join_queries(self): assert result.matched is False # tenant_id in JOIN condition - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.id, orders.total FROM users " "JOIN orders ON users.tenant_id = orders.tenant_id" ) @@ -761,7 +761,7 @@ async def test_column_extraction_with_join_queries(self): assert result.matched is False # tenant_id not present anywhere - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT users.id, orders.total FROM users " "JOIN orders ON users.id = orders.user_id " "WHERE orders.status = 'active'" @@ -777,16 +777,16 @@ class TestSQLLimits: @pytest.mark.asyncio async def test_require_limit_on_select(self): """Should require LIMIT clause on SELECT queries.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # SELECT with LIMIT should pass - result = await evaluator.evaluate("SELECT * FROM users LIMIT 100") + result = await rule.evaluate("SELECT * FROM users LIMIT 100") assert result.error is None assert result.matched is False # SELECT without LIMIT should be blocked - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is True assert "LIMIT" in result.message @@ -795,38 +795,38 @@ async def test_require_limit_on_select(self): @pytest.mark.asyncio async def test_require_limit_only_affects_select(self): """Should only check LIMIT on SELECT statements.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # INSERT without LIMIT should pass (LIMIT not applicable) - result = await evaluator.evaluate( + result = await rule.evaluate( "INSERT INTO users (name) VALUES ('test')" ) assert result.error is None assert result.matched is False # DELETE without LIMIT should pass - result = await evaluator.evaluate("DELETE FROM users WHERE id = 1") + result = await rule.evaluate("DELETE FROM users WHERE id = 1") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_max_limit_enforcement(self): """Should enforce maximum LIMIT value.""" - config = SQLEvaluatorConfig(max_limit=1000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_limit=1000) + rule = SQLRule(config) # LIMIT within bounds should pass - result = await evaluator.evaluate("SELECT * FROM users LIMIT 100") + result = await rule.evaluate("SELECT * FROM users LIMIT 100") assert result.error is None assert result.matched is False - result = await evaluator.evaluate("SELECT * FROM users LIMIT 1000") + result = await rule.evaluate("SELECT * FROM users LIMIT 1000") assert result.error is None assert result.matched is False # LIMIT exceeding max should be blocked - result = await evaluator.evaluate("SELECT * FROM users LIMIT 10000") + result = await rule.evaluate("SELECT * FROM users LIMIT 10000") assert result.error is None assert result.matched is True assert "10000" in result.message @@ -837,18 +837,18 @@ async def test_max_limit_enforcement(self): @pytest.mark.asyncio async def test_limit_with_offset(self): """Should handle LIMIT with OFFSET correctly.""" - config = SQLEvaluatorConfig(max_limit=1000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_limit=1000) + rule = SQLRule(config) # LIMIT + OFFSET within bounds should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users LIMIT 100 OFFSET 50" ) assert result.error is None assert result.matched is False # LIMIT exceeding max with OFFSET should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users LIMIT 5000 OFFSET 10" ) assert result.error is None @@ -857,33 +857,33 @@ async def test_limit_with_offset(self): @pytest.mark.asyncio async def test_limit_all_allowed(self): """Should allow LIMIT ALL (indeterminate limits are allowed).""" - config = SQLEvaluatorConfig(max_limit=1000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_limit=1000) + rule = SQLRule(config) # LIMIT ALL should be allowed (indeterminate limits are skipped) - result = await evaluator.evaluate("SELECT * FROM users LIMIT ALL") + result = await rule.evaluate("SELECT * FROM users LIMIT ALL") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_require_and_max_limit_combined(self): """Should enforce both require_limit and max_limit.""" - config = SQLEvaluatorConfig(require_limit=True, max_limit=500) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True, max_limit=500) + rule = SQLRule(config) # Valid query with LIMIT - result = await evaluator.evaluate("SELECT * FROM users LIMIT 100") + result = await rule.evaluate("SELECT * FROM users LIMIT 100") assert result.error is None assert result.matched is False # Missing LIMIT - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is True assert "must have a LIMIT" in result.message # LIMIT too high - result = await evaluator.evaluate("SELECT * FROM users LIMIT 1000") + result = await rule.evaluate("SELECT * FROM users LIMIT 1000") assert result.error is None assert result.matched is True assert "exceeds maximum" in result.message @@ -891,18 +891,18 @@ async def test_require_and_max_limit_combined(self): @pytest.mark.asyncio async def test_multi_select_statements_limit_check(self): """Should check LIMIT on all SELECT statements.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # All SELECTs have LIMIT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users LIMIT 10; SELECT * FROM orders LIMIT 20" ) assert result.error is None assert result.matched is False # One SELECT missing LIMIT - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users LIMIT 10; SELECT * FROM orders" ) assert result.error is None @@ -915,24 +915,24 @@ class TestCombinedControls: @pytest.mark.asyncio async def test_operation_and_table_restrictions(self): """Should enforce both operation and table restrictions.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( allowed_operations=["SELECT"], allowed_tables=["users", "orders"], ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Both constraints satisfied - should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # Invalid operation - should be blocked - result = await evaluator.evaluate("DELETE FROM users WHERE id = 1") + result = await rule.evaluate("DELETE FROM users WHERE id = 1") assert result.error is None assert result.matched is True # Invalid table - should be blocked - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is True @@ -947,40 +947,40 @@ async def test_allowlist_with_block_ddl_enforces_both(self): This tests the fix for the critical security bug where the allowlist was ignored when combined with block_ddl/block_dcl. """ - config = SQLEvaluatorConfig(allowed_operations=["SELECT"], block_ddl=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_operations=["SELECT"], block_ddl=True) + rule = SQLRule(config) # SELECT should pass (in allowlist, not DDL) - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # DROP should be blocked (DDL) - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is True assert "DROP" in result.metadata["blocked"] # INSERT should be blocked (not in allowlist) - result = await evaluator.evaluate("INSERT INTO users (name) VALUES ('test')") + result = await rule.evaluate("INSERT INTO users (name) VALUES ('test')") assert result.error is None assert result.matched is True assert "INSERT" in result.metadata["blocked"] # UPDATE should be blocked (not in allowlist) - result = await evaluator.evaluate("UPDATE users SET name = 'new'") + result = await rule.evaluate("UPDATE users SET name = 'new'") assert result.error is None assert result.matched is True assert "UPDATE" in result.metadata["blocked"] # DELETE should be blocked (not in allowlist) - result = await evaluator.evaluate("DELETE FROM users WHERE id = 1") + result = await rule.evaluate("DELETE FROM users WHERE id = 1") assert result.error is None assert result.matched is True assert "DELETE" in result.metadata["blocked"] # TRUNCATE should be blocked (both DDL and not in allowlist) - result = await evaluator.evaluate("TRUNCATE TABLE users") + result = await rule.evaluate("TRUNCATE TABLE users") assert result.error is None assert result.matched is True assert "TRUNCATE" in result.metadata["blocked"] @@ -988,22 +988,22 @@ async def test_allowlist_with_block_ddl_enforces_both(self): @pytest.mark.asyncio async def test_allowlist_with_block_dcl_enforces_both(self): """Test allowed_operations + block_dcl combination.""" - config = SQLEvaluatorConfig(allowed_operations=["SELECT"], block_dcl=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_operations=["SELECT"], block_dcl=True) + rule = SQLRule(config) # SELECT should pass - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # GRANT should be blocked (DCL) - result = await evaluator.evaluate("GRANT SELECT ON users TO user1") + result = await rule.evaluate("GRANT SELECT ON users TO user1") assert result.error is None assert result.matched is True assert "GRANT" in result.metadata["blocked"] # INSERT should be blocked (not in allowlist) - result = await evaluator.evaluate("INSERT INTO users (name) VALUES ('test')") + result = await rule.evaluate("INSERT INTO users (name) VALUES ('test')") assert result.error is None assert result.matched is True assert "INSERT" in result.metadata["blocked"] @@ -1011,27 +1011,27 @@ async def test_allowlist_with_block_dcl_enforces_both(self): @pytest.mark.asyncio async def test_operation_and_column_presence(self): """Should enforce operation restrictions and column presence.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( allowed_operations=["SELECT"], required_columns=["tenant_id"], column_context="where", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Both constraints satisfied - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123" ) assert result.error is None assert result.matched is False # Missing column - should be blocked - result = await evaluator.evaluate("SELECT * FROM users WHERE id = 1") + result = await rule.evaluate("SELECT * FROM users WHERE id = 1") assert result.error is None assert result.matched is True # Invalid operation - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "DELETE FROM users WHERE tenant_id = 123" ) assert result.error is None @@ -1040,7 +1040,7 @@ async def test_operation_and_column_presence(self): @pytest.mark.asyncio async def test_all_features_combined(self): """Should enforce all validation types together.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( allowed_operations=["SELECT", "INSERT"], allowed_tables=["users", "orders"], required_columns=["tenant_id"], @@ -1048,45 +1048,45 @@ async def test_all_features_combined(self): require_limit=True, max_limit=1000, ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # All constraints satisfied - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123 LIMIT 100" ) assert result.error is None assert result.matched is False # Missing LIMIT - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123" ) assert result.error is None assert result.matched is True # LIMIT too high - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123 LIMIT 5000" ) assert result.error is None assert result.matched is True # Operation violation - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "DELETE FROM users WHERE tenant_id = 123" ) assert result.error is None assert result.matched is True # Table violation - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM admin WHERE tenant_id = 123 LIMIT 100" ) assert result.error is None assert result.matched is True # Column violation - should be blocked - result = await evaluator.evaluate("SELECT * FROM users WHERE id = 1 LIMIT 100") + result = await rule.evaluate("SELECT * FROM users WHERE id = 1 LIMIT 100") assert result.error is None assert result.matched is True @@ -1097,10 +1097,10 @@ class TestEdgeCases: @pytest.mark.asyncio async def test_none_input(self): """Should handle None input.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate(None) + result = await rule.evaluate(None) assert result.error is None assert result.matched is False assert "No SQL query" in result.message @@ -1108,10 +1108,10 @@ async def test_none_input(self): @pytest.mark.asyncio async def test_empty_string(self): """Should handle empty string input.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate("") + result = await rule.evaluate("") assert result.error is None assert result.matched is False assert "Empty" in result.message @@ -1119,10 +1119,10 @@ async def test_empty_string(self): @pytest.mark.asyncio async def test_whitespace_only(self): """Should handle whitespace-only input.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate(" ") + result = await rule.evaluate(" ") assert result.error is None assert result.matched is False assert "Empty" in result.message @@ -1130,53 +1130,53 @@ async def test_whitespace_only(self): @pytest.mark.asyncio async def test_malformed_sql_blocked(self): """Should block malformed SQL (invalid SQL fails validation).""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate("This is not valid SQL at all!!!") + result = await rule.evaluate("This is not valid SQL at all!!!") assert result.error is None assert result.matched is True # Invalid SQL is blocked assert result.confidence == 1.0 - assert result.error is None # Not a evaluator error, just bad input + assert result.error is None # Not a rule error, just bad input assert "pars" in result.message.lower() @pytest.mark.asyncio async def test_empty_config(self): """Should pass all queries with empty config.""" - config = SQLEvaluatorConfig() - evaluator = SQLEvaluator(config) + config = SQLRuleConfig() + rule = SQLRule(config) - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is False - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_dict_input_with_query_key(self): """Should extract query from dict with 'query' key.""" - config = SQLEvaluatorConfig(blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate({"query": "DROP TABLE users"}) + result = await rule.evaluate({"query": "DROP TABLE users"}) assert result.error is None assert result.matched is True @pytest.mark.asyncio async def test_non_table_query_with_table_restrictions(self): """Should allow non-table queries even with table restrictions.""" - config = SQLEvaluatorConfig(allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users"]) + rule = SQLRule(config) # SELECT without FROM clause - result = await evaluator.evaluate("SELECT 1") + result = await rule.evaluate("SELECT 1") assert result.error is None assert result.matched is False # SELECT with expression - result = await evaluator.evaluate("SELECT 1 + 1 AS result") + result = await rule.evaluate("SELECT 1 + 1 AS result") assert result.error is None assert result.matched is False @@ -1187,11 +1187,11 @@ class TestSQLSubqueries: @pytest.mark.asyncio async def test_subquery_in_where_clause(self): """Should extract tables from subqueries in WHERE clause.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Subquery with allowed tables - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders WHERE total > 100)" ) @@ -1199,7 +1199,7 @@ async def test_subquery_in_where_clause(self): assert result.matched is False # Subquery with blocked table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM admin WHERE active = true)" ) @@ -1210,11 +1210,11 @@ async def test_subquery_in_where_clause(self): @pytest.mark.asyncio async def test_subquery_in_from_clause(self): """Should extract tables from subqueries in FROM clause.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Derived table with allowed table - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM " "(SELECT * FROM users WHERE active = true) AS active_users" ) @@ -1222,7 +1222,7 @@ async def test_subquery_in_from_clause(self): assert result.matched is False # Derived table with blocked table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM " "(SELECT * FROM admin WHERE role = 'super') AS admins" ) @@ -1233,11 +1233,11 @@ async def test_subquery_in_from_clause(self): @pytest.mark.asyncio async def test_correlated_subquery(self): """Should handle correlated subqueries correctly.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Correlated subquery with allowed tables - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users u WHERE EXISTS " "(SELECT 1 FROM orders o WHERE o.user_id = u.id)" ) @@ -1245,7 +1245,7 @@ async def test_correlated_subquery(self): assert result.matched is False # Correlated subquery with blocked table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users u WHERE EXISTS " "(SELECT 1 FROM secrets s WHERE s.user_id = u.id)" ) @@ -1256,11 +1256,11 @@ async def test_correlated_subquery(self): @pytest.mark.asyncio async def test_nested_subqueries(self): """Should handle deeply nested subqueries.""" - config = SQLEvaluatorConfig(blocked_tables=["admin", "secrets"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_tables=["admin", "secrets"]) + rule = SQLRule(config) # Nested subqueries without blocked tables - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders WHERE id IN " "(SELECT order_id FROM payments WHERE status = 'completed'))" @@ -1269,7 +1269,7 @@ async def test_nested_subqueries(self): assert result.matched is False # Nested subquery with blocked table in innermost - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders WHERE id IN " "(SELECT order_id FROM admin WHERE verified = true))" @@ -1279,7 +1279,7 @@ async def test_nested_subqueries(self): assert "admin" in result.message # Nested subquery with blocked table in middle - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM secrets WHERE id IN " "(SELECT secret_id FROM logs))" @@ -1291,11 +1291,11 @@ async def test_nested_subqueries(self): @pytest.mark.asyncio async def test_subquery_with_blocked_operations(self): """Should detect blocked operations in subqueries.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE", "DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE", "DROP"]) + rule = SQLRule(config) # SELECT with subquery - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders)" ) @@ -1303,7 +1303,7 @@ async def test_subquery_with_blocked_operations(self): assert result.matched is False # DELETE in main query - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "DELETE FROM users WHERE id IN " "(SELECT user_id FROM orders WHERE total < 10)" ) @@ -1311,12 +1311,12 @@ async def test_subquery_with_blocked_operations(self): assert result.matched is True assert "DELETE" in result.metadata["blocked"] - # NOTE: The following is a KNOWN LIMITATION of the SQL evaluator + # NOTE: The following is a KNOWN LIMITATION of the SQL rule # DELETE in subquery - SHOULD be blocked but currently ISN'T # Blocked operations in subqueries are not currently detected # This test is commented out until this limitation is addressed # - # result = await evaluator.evaluate( + # result = await rule.evaluate( # "SELECT * FROM users WHERE id NOT IN " # "(DELETE FROM orders WHERE total = 0 RETURNING user_id)" # ) @@ -1326,14 +1326,14 @@ async def test_subquery_with_blocked_operations(self): @pytest.mark.asyncio async def test_subquery_with_column_requirements(self): """Should check column requirements in subqueries.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Column in outer query WHERE - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 123 AND id IN " "(SELECT user_id FROM orders WHERE total > 100)" ) @@ -1350,15 +1350,15 @@ async def test_subquery_with_column_requirements(self): @pytest.mark.asyncio async def test_subquery_with_column_in_select(self): """Should extract columns from subquery SELECT clauses with scope=all.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id"], column_context="select", column_context_scope="top_level", # Old behavior: only check outer SELECT ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Column in outer SELECT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT user_id, name FROM users WHERE id IN " "(SELECT id FROM orders)" ) @@ -1366,7 +1366,7 @@ async def test_subquery_with_column_in_select(self): assert result.matched is False # Column only in subquery SELECT - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id, name FROM users WHERE id IN " "(SELECT user_id FROM orders)" ) @@ -1377,11 +1377,11 @@ async def test_subquery_with_column_in_select(self): @pytest.mark.asyncio async def test_multiple_subqueries(self): """Should handle multiple subqueries in same query.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders", "payments"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders", "payments"]) + rule = SQLRule(config) # Multiple subqueries with allowed tables - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders) AND id IN " "(SELECT user_id FROM payments)" @@ -1390,7 +1390,7 @@ async def test_multiple_subqueries(self): assert result.matched is False # One subquery with disallowed table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE id IN " "(SELECT user_id FROM orders) AND id IN " "(SELECT user_id FROM admin)" @@ -1402,11 +1402,11 @@ async def test_multiple_subqueries(self): @pytest.mark.asyncio async def test_subquery_in_join(self): """Should handle subqueries used in JOIN clauses.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Subquery in JOIN with allowed table - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT u.* FROM users u " "JOIN (SELECT user_id, COUNT(*) as order_count FROM orders " "GROUP BY user_id) o ON u.id = o.user_id" @@ -1415,7 +1415,7 @@ async def test_subquery_in_join(self): assert result.matched is False # Subquery in JOIN with blocked table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT u.* FROM users u " "JOIN (SELECT user_id, role FROM admin) a ON u.id = a.user_id" ) @@ -1426,11 +1426,11 @@ async def test_subquery_in_join(self): @pytest.mark.asyncio async def test_union_with_subqueries(self): """Should handle UNION with subqueries.""" - config = SQLEvaluatorConfig(allowed_tables=["users", "customers"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(allowed_tables=["users", "customers"]) + rule = SQLRule(config) # UNION with allowed tables - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id, name FROM users " "UNION " "SELECT id, name FROM customers" @@ -1439,7 +1439,7 @@ async def test_union_with_subqueries(self): assert result.matched is False # UNION with blocked table - should be blocked - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id, name FROM users " "UNION " "SELECT id, name FROM admin" @@ -1454,33 +1454,33 @@ class TestSQLDialectConfiguration: def test_dialect_defaults_to_postgres(self): """Should default to postgres dialect.""" - config = SQLEvaluatorConfig() + config = SQLRuleConfig() assert config.dialect == "postgres" def test_dialect_can_be_set_to_mysql(self): """Should accept mysql dialect.""" - config = SQLEvaluatorConfig(dialect="mysql") + config = SQLRuleConfig(dialect="mysql") assert config.dialect == "mysql" def test_dialect_can_be_set_to_tsql(self): """Should accept tsql dialect.""" - config = SQLEvaluatorConfig(dialect="tsql") + config = SQLRuleConfig(dialect="tsql") assert config.dialect == "tsql" def test_dialect_can_be_set_to_oracle(self): """Should accept oracle dialect.""" - config = SQLEvaluatorConfig(dialect="oracle") + config = SQLRuleConfig(dialect="oracle") assert config.dialect == "oracle" def test_dialect_can_be_set_to_sqlite(self): """Should accept sqlite dialect.""" - config = SQLEvaluatorConfig(dialect="sqlite") + config = SQLRuleConfig(dialect="sqlite") assert config.dialect == "sqlite" def test_invalid_dialect_raises_error(self): """Should reject invalid dialect.""" with pytest.raises(Exception): - SQLEvaluatorConfig(dialect="invalid_dialect") + SQLRuleConfig(dialect="invalid_dialect") class TestSQLDialectParsing: @@ -1490,22 +1490,22 @@ class TestSQLDialectParsing: @pytest.mark.asyncio async def test_postgres_double_quoted_identifiers(self): """PostgreSQL should parse double-quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="postgres", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="postgres", allowed_tables=["users"]) + rule = SQLRule(config) # Double quotes in PostgreSQL - result = await evaluator.evaluate('SELECT * FROM "users"') + result = await rule.evaluate('SELECT * FROM "users"') assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_postgres_case_sensitive_identifiers_quoted(self): """PostgreSQL preserves case in quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="postgres", allowed_tables=["Users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="postgres", allowed_tables=["Users"]) + rule = SQLRule(config) # Quoted identifier in PostgreSQL preserves case - result = await evaluator.evaluate('SELECT * FROM "Users"') + result = await rule.evaluate('SELECT * FROM "Users"') assert result.error is None assert result.matched is False @@ -1513,22 +1513,22 @@ async def test_postgres_case_sensitive_identifiers_quoted(self): @pytest.mark.asyncio async def test_mysql_backtick_identifiers(self): """MySQL should parse backtick-quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="mysql", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="mysql", allowed_tables=["users"]) + rule = SQLRule(config) # Backticks in MySQL - result = await evaluator.evaluate("SELECT * FROM `users`") + result = await rule.evaluate("SELECT * FROM `users`") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_mysql_column_alias_syntax(self): """MySQL should parse column aliases correctly.""" - config = SQLEvaluatorConfig(dialect="mysql", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="mysql", allowed_tables=["users"]) + rule = SQLRule(config) # MySQL-specific alias syntax - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id as `user_id`, name as `user_name` FROM users" ) assert result.error is None @@ -1538,26 +1538,26 @@ async def test_mysql_column_alias_syntax(self): @pytest.mark.asyncio async def test_tsql_bracket_quoted_identifiers(self): """T-SQL should parse bracket-quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="tsql", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="tsql", allowed_tables=["users"]) + rule = SQLRule(config) # Brackets in T-SQL - result = await evaluator.evaluate("SELECT * FROM [users]") + result = await rule.evaluate("SELECT * FROM [users]") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_tsql_column_with_spaces(self): """T-SQL should parse column names with spaces in brackets.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( dialect="tsql", allowed_tables=["users"], required_columns=["user id"], ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # T-SQL with spaces in column name using brackets - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT [user id], name FROM [users] WHERE [user id] = 1" ) assert result.error is None @@ -1567,22 +1567,22 @@ async def test_tsql_column_with_spaces(self): @pytest.mark.asyncio async def test_oracle_double_quoted_identifiers(self): """Oracle should parse double-quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="oracle", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="oracle", allowed_tables=["users"]) + rule = SQLRule(config) # Double quotes in Oracle - result = await evaluator.evaluate('SELECT * FROM "users"') + result = await rule.evaluate('SELECT * FROM "users"') assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_oracle_line_comment_syntax(self): """Oracle should parse -- line comments.""" - config = SQLEvaluatorConfig(dialect="oracle", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="oracle", allowed_tables=["users"]) + rule = SQLRule(config) # Oracle -- comment syntax - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users -- get all users\n WHERE id > 0" ) assert result.error is None @@ -1592,22 +1592,22 @@ async def test_oracle_line_comment_syntax(self): @pytest.mark.asyncio async def test_sqlite_double_quoted_identifiers(self): """SQLite should parse double-quoted identifiers.""" - config = SQLEvaluatorConfig(dialect="sqlite", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="sqlite", allowed_tables=["users"]) + rule = SQLRule(config) # Double quotes in SQLite - result = await evaluator.evaluate('SELECT * FROM "users"') + result = await rule.evaluate('SELECT * FROM "users"') assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_sqlite_autoincrement_syntax(self): """SQLite should parse AUTOINCREMENT syntax.""" - config = SQLEvaluatorConfig(dialect="sqlite", block_ddl=False) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="sqlite", block_ddl=False) + rule = SQLRule(config) # SQLite AUTOINCREMENT syntax - result = await evaluator.evaluate( + result = await rule.evaluate( "CREATE TABLE users (id INTEGER PRIMARY KEY AUTOINCREMENT, " "name TEXT)" ) @@ -1622,10 +1622,10 @@ class TestSQLDialectIntegration: async def test_dialect_with_blocked_operations(self): """Should enforce blocked_operations across dialects.""" for dialect in ["postgres", "mysql", "tsql", "oracle", "sqlite"]: - config = SQLEvaluatorConfig(dialect=dialect, blocked_operations=["DROP"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect=dialect, blocked_operations=["DROP"]) + rule = SQLRule(config) - result = await evaluator.evaluate("DROP TABLE users") + result = await rule.evaluate("DROP TABLE users") assert result.error is None assert result.matched is True assert "DROP" in result.metadata["blocked"] @@ -1634,18 +1634,18 @@ async def test_dialect_with_blocked_operations(self): async def test_dialect_with_table_restrictions(self): """Should enforce table restrictions across dialects.""" for dialect in ["postgres", "mysql", "tsql", "oracle", "sqlite"]: - config = SQLEvaluatorConfig( + config = SQLRuleConfig( dialect=dialect, allowed_tables=["users", "orders"] ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Allowed table - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False # Blocked table - result = await evaluator.evaluate("SELECT * FROM admin") + result = await rule.evaluate("SELECT * FROM admin") assert result.error is None assert result.matched is True @@ -1653,18 +1653,18 @@ async def test_dialect_with_table_restrictions(self): async def test_dialect_with_limit_enforcement(self): """Should enforce LIMIT constraints across dialects.""" for dialect in ["postgres", "mysql", "tsql", "oracle", "sqlite"]: - config = SQLEvaluatorConfig( + config = SQLRuleConfig( dialect=dialect, require_limit=True, max_limit=100 ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # No LIMIT - should fail - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is True # With LIMIT - should pass - result = await evaluator.evaluate("SELECT * FROM users LIMIT 50") + result = await rule.evaluate("SELECT * FROM users LIMIT 50") assert result.error is None assert result.matched is False @@ -1672,22 +1672,22 @@ async def test_dialect_with_limit_enforcement(self): async def test_dialect_with_column_requirements(self): """Should enforce column requirements across dialects.""" for dialect in ["postgres", "mysql", "tsql", "oracle", "sqlite"]: - config = SQLEvaluatorConfig( + config = SQLRuleConfig( dialect=dialect, required_columns=["tenant_id"], column_context="where", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # With required column - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users WHERE tenant_id = 1" ) assert result.error is None assert result.matched is False # Without required column - should fail - result = await evaluator.evaluate("SELECT * FROM users WHERE id = 1") + result = await rule.evaluate("SELECT * FROM users WHERE id = 1") assert result.error is None assert result.matched is True @@ -1698,27 +1698,27 @@ class TestSQLDialectEdgeCases: @pytest.mark.asyncio async def test_mysql_case_insensitive_table_names(self): """MySQL table names are case-insensitive on most systems.""" - config = SQLEvaluatorConfig(dialect="mysql", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="mysql", allowed_tables=["users"]) + rule = SQLRule(config) # Both should work (MySQL normalizes to lowercase) - result = await evaluator.evaluate("SELECT * FROM users") + result = await rule.evaluate("SELECT * FROM users") assert result.error is None assert result.matched is False - result = await evaluator.evaluate("SELECT * FROM USERS") + result = await rule.evaluate("SELECT * FROM USERS") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_tsql_function_syntax(self): """T-SQL has different function syntax than standard SQL.""" - config = SQLEvaluatorConfig(dialect="tsql", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="tsql", allowed_tables=["users"]) + rule = SQLRule(config) # T-SQL datetime function - using DATEADD instead of GETDATE # since GETDATE is already parsed correctly - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT TOP 10 * FROM users " "WHERE created > DATEADD(day, -7, GETDATE())" ) @@ -1728,10 +1728,10 @@ async def test_tsql_function_syntax(self): @pytest.mark.asyncio async def test_oracle_schema_prefix(self): """Oracle uses schema.table.column notation.""" - config = SQLEvaluatorConfig(dialect="oracle", allowed_tables=["users"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="oracle", allowed_tables=["users"]) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT u.id, u.name FROM schema.users u" ) assert result.error is None @@ -1741,24 +1741,24 @@ async def test_oracle_schema_prefix(self): async def test_dialect_with_unicode_identifiers(self): """All dialects should handle unicode in identifiers.""" for dialect in ["postgres", "mysql", "tsql", "oracle", "sqlite"]: - config = SQLEvaluatorConfig( + config = SQLRuleConfig( dialect=dialect, allowed_tables=["usuarios"], # Spanish for "users" ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate("SELECT * FROM usuarios") + result = await rule.evaluate("SELECT * FROM usuarios") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_sqlite_with_complex_query(self): """SQLite should handle complex queries correctly.""" - config = SQLEvaluatorConfig(dialect="sqlite", allowed_tables=["users", "orders"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(dialect="sqlite", allowed_tables=["users", "orders"]) + rule = SQLRule(config) # Complex SQLite query with JOIN and WHERE - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT u.name, o.total FROM users u " "JOIN orders o ON u.id = o.user_id " "WHERE o.total > 100" @@ -1773,11 +1773,11 @@ class TestOperationSecurityBypass: @pytest.mark.asyncio async def test_delete_in_cte_is_detected(self): """DELETE in CTE should be detected and blocked.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE"]) + rule = SQLRule(config) # DELETE hidden in CTE - result = await evaluator.evaluate( + result = await rule.evaluate( "WITH deleted AS (" "DELETE FROM users WHERE id = 1 RETURNING *" ") SELECT * FROM deleted" @@ -1789,10 +1789,10 @@ async def test_delete_in_cte_is_detected(self): @pytest.mark.asyncio async def test_update_in_cte_is_detected(self): """UPDATE in CTE should be detected.""" - config = SQLEvaluatorConfig(blocked_operations=["UPDATE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["UPDATE"]) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "WITH updated AS (" "UPDATE users SET name = 'test' WHERE id = 1 RETURNING *" ") SELECT * FROM updated" @@ -1804,10 +1804,10 @@ async def test_update_in_cte_is_detected(self): @pytest.mark.asyncio async def test_insert_in_nested_cte_is_detected(self): """INSERT in nested CTE should be detected.""" - config = SQLEvaluatorConfig(blocked_operations=["INSERT"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["INSERT"]) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "WITH outer_cte AS (" " WITH inner_cte AS (" " INSERT INTO users (name) VALUES ('test') RETURNING *" @@ -1821,11 +1821,11 @@ async def test_insert_in_nested_cte_is_detected(self): @pytest.mark.asyncio async def test_select_with_delete_subquery_in_from(self): """DELETE in SELECT's FROM subquery should be detected.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE"]) + rule = SQLRule(config) # Use nested SELECT with CTE pattern - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (" " WITH deleted AS (DELETE FROM users WHERE id = 1 RETURNING *) " " SELECT * FROM deleted" @@ -1838,10 +1838,10 @@ async def test_select_with_delete_subquery_in_from(self): @pytest.mark.asyncio async def test_multiple_operations_in_ctes(self): """Multiple different operations in CTEs should all be detected.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE", "UPDATE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE", "UPDATE"]) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "WITH deleted AS (DELETE FROM users WHERE id = 1 RETURNING *), " "updated AS (UPDATE orders SET status = 'done' WHERE id = 2 RETURNING *) " "SELECT * FROM deleted UNION ALL SELECT * FROM updated" @@ -1858,15 +1858,15 @@ class TestMultiTenantRLSSecurityBypass: @pytest.mark.asyncio async def test_top_level_scope_blocks_subquery_tenant_filter(self): """top_level scope requires tenant_id in outer WHERE, not subquery.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # tenant_id only in subquery - should FAIL with top_level scope - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users " "WHERE id IN (SELECT user_id FROM orders WHERE tenant_id = 123)" ) @@ -1877,15 +1877,15 @@ async def test_top_level_scope_blocks_subquery_tenant_filter(self): @pytest.mark.asyncio async def test_top_level_scope_passes_with_outer_tenant_filter(self): """top_level scope should pass when tenant_id in outer WHERE.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # tenant_id in outer WHERE - should PASS - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users " "WHERE tenant_id = 123 AND id IN (SELECT user_id FROM orders)" ) @@ -1895,15 +1895,15 @@ async def test_top_level_scope_passes_with_outer_tenant_filter(self): @pytest.mark.asyncio async def test_all_scope_backward_compatible(self): """'all' scope should find tenant_id in any WHERE (backward compatible).""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", column_context_scope="all", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # tenant_id in subquery - should PASS with 'all' scope - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users " "WHERE id IN (SELECT user_id FROM orders WHERE tenant_id = 123)" ) @@ -1913,15 +1913,15 @@ async def test_all_scope_backward_compatible(self): @pytest.mark.asyncio async def test_default_scope_is_all(self): """Default column_context_scope should be 'all' for backward compatibility.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", # column_context_scope not specified, should default to "all" ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # Should behave like scope="all" - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users " "WHERE id IN (SELECT user_id FROM orders WHERE tenant_id = 123)" ) @@ -1931,15 +1931,15 @@ async def test_default_scope_is_all(self): @pytest.mark.asyncio async def test_select_context_with_top_level_scope(self): """top_level scope with select context only checks outer SELECT.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="select", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # tenant_id only in subquery SELECT - should FAIL - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT id, name FROM users " "WHERE id IN (SELECT tenant_id FROM orders)" ) @@ -1953,10 +1953,10 @@ class TestRequiredColumnValues: @pytest.mark.asyncio async def test_exact_equality_match_passes(self): """Required column value match in top-level WHERE should pass.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = 'u1'", "context": {"user_id": "u1"}} ) assert result.error is None @@ -1964,11 +1964,11 @@ async def test_exact_equality_match_passes(self): @pytest.mark.asyncio async def test_full_step_payload_format_passes(self): - """Full step payload should provide query and context to the evaluator.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + """Full step payload should provide query and context to the rule.""" + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "type": "tool", "name": "execute_sql", @@ -1982,10 +1982,10 @@ async def test_full_step_payload_format_passes(self): @pytest.mark.asyncio async def test_case_insensitive_column_match_passes(self): """Column matching should follow case-insensitive default behavior.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE User_ID = 'u1'", "context": {"user_id": "u1"}} ) assert result.error is None @@ -1994,10 +1994,10 @@ async def test_case_insensitive_column_match_passes(self): @pytest.mark.asyncio async def test_numeric_literal_match_passes(self): """Numeric SQL literals should match equivalent scalar context values.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = 123", "context": {"user_id": 123}} ) assert result.error is None @@ -2006,10 +2006,10 @@ async def test_numeric_literal_match_passes(self): @pytest.mark.asyncio async def test_value_mismatch_is_blocked(self): """Mismatched literal value should be blocked.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = 'u2'", "context": {"user_id": "u1"}} ) assert result.error is None @@ -2019,10 +2019,10 @@ async def test_value_mismatch_is_blocked(self): @pytest.mark.asyncio async def test_neq_operator_is_blocked(self): """Negated comparison should not satisfy required_column_values.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id != 'u1'", "context": {"user_id": "u1"}} ) assert result.error is None @@ -2032,10 +2032,10 @@ async def test_neq_operator_is_blocked(self): @pytest.mark.asyncio async def test_not_wrapper_is_blocked(self): """NOT-wrapped equality should be blocked.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": "SELECT * FROM users WHERE NOT user_id = 'u1'", "context": {"user_id": "u1"}, @@ -2048,10 +2048,10 @@ async def test_not_wrapper_is_blocked(self): @pytest.mark.asyncio async def test_or_bypass_is_blocked(self): """OR-branch predicates should be rejected for tenant checks.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": "SELECT * FROM users WHERE user_id = 'u1' OR 1 = 1", "context": {"user_id": "u1"}, @@ -2064,10 +2064,10 @@ async def test_or_bypass_is_blocked(self): @pytest.mark.asyncio async def test_parameterized_rhs_is_blocked(self): """Parameterized comparisons should fail closed.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = $1", "context": {"user_id": "u1"}} ) assert result.error is None @@ -2077,10 +2077,10 @@ async def test_parameterized_rhs_is_blocked(self): @pytest.mark.asyncio async def test_function_rhs_is_blocked(self): """Function-based RHS should fail closed.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": "SELECT * FROM users WHERE user_id = get_user_id()", "context": {"user_id": "u1"}, @@ -2093,10 +2093,10 @@ async def test_function_rhs_is_blocked(self): @pytest.mark.asyncio async def test_missing_context_key_is_blocked(self): """Missing context key should fail closed.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate({"query": "SELECT * FROM users WHERE user_id = 'u1'"}) + result = await rule.evaluate({"query": "SELECT * FROM users WHERE user_id = 'u1'"}) assert result.error is None assert result.matched is True assert result.metadata["violation"] == "missing_context_value" @@ -2104,10 +2104,10 @@ async def test_missing_context_key_is_blocked(self): @pytest.mark.asyncio async def test_non_scalar_context_value_is_blocked(self): """Context values must be scalar.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = 'u1'", "context": {"user_id": ["u1"]}} ) assert result.error is None @@ -2117,10 +2117,10 @@ async def test_non_scalar_context_value_is_blocked(self): @pytest.mark.asyncio async def test_none_context_value_is_blocked(self): """None context values must be rejected.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users WHERE user_id = 'u1'", "context": {"user_id": None}} ) assert result.error is None @@ -2130,10 +2130,10 @@ async def test_none_context_value_is_blocked(self): @pytest.mark.asyncio async def test_missing_top_level_where_is_blocked(self): """Table-accessing statements must have top-level WHERE.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( {"query": "SELECT * FROM users", "context": {"user_id": "u1"}} ) assert result.error is None @@ -2143,10 +2143,10 @@ async def test_missing_top_level_where_is_blocked(self): @pytest.mark.asyncio async def test_subquery_only_predicate_is_blocked(self): """Predicates only inside subqueries must not satisfy the rule.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": ( "SELECT * FROM users WHERE id IN " @@ -2162,10 +2162,10 @@ async def test_subquery_only_predicate_is_blocked(self): @pytest.mark.asyncio async def test_table_qualified_rule_with_alias_resolves_to_base_table(self): """Qualified rules should resolve aliases to base tables.""" - config = SQLEvaluatorConfig(required_column_values={"users.user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"users.user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": "SELECT * FROM users u WHERE u.user_id = 'u1'", "context": {"user_id": "u1"}, @@ -2177,10 +2177,10 @@ async def test_table_qualified_rule_with_alias_resolves_to_base_table(self): @pytest.mark.asyncio async def test_table_alias_spoofing_is_blocked(self): """Alias spoofing should not satisfy qualified base-table rules.""" - config = SQLEvaluatorConfig(required_column_values={"users.user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"users.user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": "SELECT * FROM orders AS users WHERE users.user_id = 'u1'", "context": {"user_id": "u1"}, @@ -2193,10 +2193,10 @@ async def test_table_alias_spoofing_is_blocked(self): @pytest.mark.asyncio async def test_unqualified_column_with_multi_table_query_is_blocked(self): """Unqualified rules should fail closed when statement has multiple tables.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": ( "SELECT * FROM users u JOIN orders o ON u.id = o.user_id " @@ -2212,10 +2212,10 @@ async def test_unqualified_column_with_multi_table_query_is_blocked(self): @pytest.mark.asyncio async def test_multi_statement_requires_each_statement_to_be_scoped(self): """Each table-accessing statement must satisfy required_column_values.""" - config = SQLEvaluatorConfig(required_column_values={"user_id": "user_id"}) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(required_column_values={"user_id": "user_id"}) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( { "query": ( "SELECT * FROM users WHERE user_id = 'u1'; " @@ -2232,7 +2232,7 @@ async def test_multi_statement_requires_each_statement_to_be_scoped(self): def test_required_column_values_warns_with_select_context(self): """Value-based rules should warn when select context is configured.""" with pytest.warns(UserWarning, match="value checks only apply to WHERE"): - SQLEvaluatorConfig( + SQLRuleConfig( required_columns=["user_id"], column_context="select", required_column_values={"user_id": "user_id"}, @@ -2245,11 +2245,11 @@ class TestLimitBypassSubqueries: @pytest.mark.asyncio async def test_subquery_without_limit_is_blocked(self): """Subquery without LIMIT should be blocked when require_limit=True.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # Outer has LIMIT, inner doesn't - should FAIL - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (SELECT * FROM huge_table) AS t LIMIT 10" ) assert result.error is None @@ -2259,10 +2259,10 @@ async def test_subquery_without_limit_is_blocked(self): @pytest.mark.asyncio async def test_all_subqueries_with_limit_passes(self): """All SELECTs with LIMIT should pass.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (SELECT * FROM users LIMIT 100) AS t LIMIT 10" ) assert result.error is None @@ -2271,11 +2271,11 @@ async def test_all_subqueries_with_limit_passes(self): @pytest.mark.asyncio async def test_nested_subqueries_all_need_limit(self): """All nested subqueries need LIMIT.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # Deepest subquery missing LIMIT - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (" " SELECT * FROM (" " SELECT * FROM users" @@ -2289,11 +2289,11 @@ async def test_nested_subqueries_all_need_limit(self): @pytest.mark.asyncio async def test_max_limit_enforced_on_subqueries(self): """max_limit should be enforced on all subqueries.""" - config = SQLEvaluatorConfig(require_limit=True, max_limit=100) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True, max_limit=100) + rule = SQLRule(config) # Subquery exceeds max_limit - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (SELECT * FROM users LIMIT 500) AS t LIMIT 10" ) assert result.error is None @@ -2303,10 +2303,10 @@ async def test_max_limit_enforced_on_subqueries(self): @pytest.mark.asyncio async def test_cte_without_limit_is_blocked(self): """CTE SELECT without LIMIT should be blocked.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "WITH user_data AS (SELECT * FROM users) " "SELECT * FROM user_data LIMIT 10" ) @@ -2317,16 +2317,16 @@ async def test_cte_without_limit_is_blocked(self): @pytest.mark.asyncio async def test_max_result_window_enforced(self): """max_result_window should prevent deep pagination.""" - config = SQLEvaluatorConfig(max_result_window=10000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_result_window=10000) + rule = SQLRule(config) # Within limit: 100 + 9900 = 10000 - should PASS - result = await evaluator.evaluate("SELECT * FROM users LIMIT 100 OFFSET 9900") + result = await rule.evaluate("SELECT * FROM users LIMIT 100 OFFSET 9900") assert result.error is None assert result.matched is False # Exceeds limit: 10 + 10000 = 10010 > 10000 - should FAIL - result = await evaluator.evaluate("SELECT * FROM users LIMIT 10 OFFSET 10000") + result = await rule.evaluate("SELECT * FROM users LIMIT 10 OFFSET 10000") assert result.error is None assert result.matched is True assert ( @@ -2336,22 +2336,22 @@ async def test_max_result_window_enforced(self): @pytest.mark.asyncio async def test_large_offset_without_max_result_window(self): """Without max_result_window, large OFFSET should be allowed.""" - config = SQLEvaluatorConfig() # No max_result_window - evaluator = SQLEvaluator(config) + config = SQLRuleConfig() # No max_result_window + rule = SQLRule(config) # Large OFFSET but no restriction - should PASS - result = await evaluator.evaluate("SELECT * FROM users LIMIT 10 OFFSET 1000000") + result = await rule.evaluate("SELECT * FROM users LIMIT 10 OFFSET 1000000") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_max_result_window_on_subqueries(self): """max_result_window should be enforced on subqueries.""" - config = SQLEvaluatorConfig(max_result_window=1000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_result_window=1000) + rule = SQLRule(config) # Subquery exceeds max_result_window - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM (" "SELECT * FROM users LIMIT 10 OFFSET 1000" ") AS t LIMIT 10" @@ -2365,17 +2365,17 @@ async def test_max_result_window_on_subqueries(self): @pytest.mark.asyncio async def test_max_limit_and_max_result_window_together(self): """Both max_limit and max_result_window should be enforced.""" - config = SQLEvaluatorConfig(max_limit=100, max_result_window=10000) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_limit=100, max_result_window=10000) + rule = SQLRule(config) # Exceeds max_limit - should FAIL - result = await evaluator.evaluate("SELECT * FROM users LIMIT 500") + result = await rule.evaluate("SELECT * FROM users LIMIT 500") assert result.error is None assert result.matched is True assert "500" in result.message # Within max_limit but exceeds max_result_window - should FAIL - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT * FROM users LIMIT 100 OFFSET 10000" ) assert result.error is None @@ -2385,7 +2385,7 @@ async def test_max_limit_and_max_result_window_together(self): ) # Within both limits - should PASS - result = await evaluator.evaluate("SELECT * FROM users LIMIT 100 OFFSET 9000") + result = await rule.evaluate("SELECT * FROM users LIMIT 100 OFFSET 9000") assert result.error is None assert result.matched is False @@ -2396,44 +2396,44 @@ class TestSelectColumnExtractionFixed: @pytest.mark.asyncio async def test_column_in_function_is_extracted(self): """Columns in functions should be extracted.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id"], column_context="select", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # user_id in COUNT() function - should be extracted - result = await evaluator.evaluate("SELECT COUNT(user_id), name FROM users") + result = await rule.evaluate("SELECT COUNT(user_id), name FROM users") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_column_in_expression_is_extracted(self): """Columns in expressions should be extracted.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["price"], column_context="select", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) # price in arithmetic expression - result = await evaluator.evaluate("SELECT price * 1.1, name FROM products") + result = await rule.evaluate("SELECT price * 1.1, name FROM products") assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_column_in_case_is_extracted(self): """Columns in CASE expressions should be extracted.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["status"], column_context="select", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT CASE WHEN status = 'active' THEN 1 ELSE 0 END FROM users" ) assert result.error is None @@ -2442,15 +2442,15 @@ async def test_column_in_case_is_extracted(self): @pytest.mark.asyncio async def test_multiple_columns_in_coalesce(self): """Multiple columns in COALESCE should be extracted.""" - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["user_id", "guest_id"], column_presence_logic="any", column_context="select", column_context_scope="top_level", ) - evaluator = SQLEvaluator(config) + rule = SQLRule(config) - result = await evaluator.evaluate( + result = await rule.evaluate( "SELECT COALESCE(user_id, guest_id) FROM sessions" ) assert result.error is None @@ -2463,10 +2463,10 @@ class TestNewOperationDetection: @pytest.mark.asyncio async def test_commit_operation_detected(self): """COMMIT should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["COMMIT"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["COMMIT"]) + rule = SQLRule(config) - result = await evaluator.evaluate("COMMIT") + result = await rule.evaluate("COMMIT") assert result.error is None assert result.matched is True assert "COMMIT" in result.metadata["blocked"] @@ -2474,10 +2474,10 @@ async def test_commit_operation_detected(self): @pytest.mark.asyncio async def test_rollback_operation_detected(self): """ROLLBACK should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["ROLLBACK"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["ROLLBACK"]) + rule = SQLRule(config) - result = await evaluator.evaluate("ROLLBACK") + result = await rule.evaluate("ROLLBACK") assert result.error is None assert result.matched is True assert "ROLLBACK" in result.metadata["blocked"] @@ -2485,10 +2485,10 @@ async def test_rollback_operation_detected(self): @pytest.mark.asyncio async def test_show_operation_detected(self): """SHOW parses to COMMAND (sqlglot fallback for unsupported syntax).""" - config = SQLEvaluatorConfig(blocked_operations=["COMMAND"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["COMMAND"]) + rule = SQLRule(config) - result = await evaluator.evaluate("SHOW TABLES") + result = await rule.evaluate("SHOW TABLES") assert result.error is None assert result.matched is True assert "COMMAND" in result.metadata["blocked"] @@ -2496,10 +2496,10 @@ async def test_show_operation_detected(self): @pytest.mark.asyncio async def test_describe_operation_detected(self): """DESCRIBE should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["DESCRIBE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DESCRIBE"]) + rule = SQLRule(config) - result = await evaluator.evaluate("DESCRIBE users") + result = await rule.evaluate("DESCRIBE users") assert result.error is None assert result.matched is True assert "DESCRIBE" in result.metadata["blocked"] @@ -2507,10 +2507,10 @@ async def test_describe_operation_detected(self): @pytest.mark.asyncio async def test_set_operation_detected(self): """SET should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["SET"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["SET"]) + rule = SQLRule(config) - result = await evaluator.evaluate("SET search_path = public") + result = await rule.evaluate("SET search_path = public") assert result.error is None assert result.matched is True assert "SET" in result.metadata["blocked"] @@ -2518,10 +2518,10 @@ async def test_set_operation_detected(self): @pytest.mark.asyncio async def test_use_operation_detected(self): """USE should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["USE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["USE"]) + rule = SQLRule(config) - result = await evaluator.evaluate("USE database_name") + result = await rule.evaluate("USE database_name") assert result.error is None assert result.matched is True assert "USE" in result.metadata["blocked"] @@ -2529,10 +2529,10 @@ async def test_use_operation_detected(self): @pytest.mark.asyncio async def test_copy_operation_detected(self): """COPY should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["COPY"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["COPY"]) + rule = SQLRule(config) - result = await evaluator.evaluate("COPY users TO '/tmp/users.csv'") + result = await rule.evaluate("COPY users TO '/tmp/users.csv'") assert result.error is None assert result.matched is True assert "COPY" in result.metadata["blocked"] @@ -2541,23 +2541,23 @@ async def test_copy_operation_detected(self): async def test_lock_operation_fails_to_parse(self): """LOCK TABLE fails to parse in sqlglot - blocked as invalid SQL.""" # Need a control configured for parsing to be attempted - config = SQLEvaluatorConfig(blocked_operations=["DELETE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE"]) + rule = SQLRule(config) # LOCK TABLE doesn't parse, so it's blocked as invalid SQL - result = await evaluator.evaluate("LOCK TABLE users IN ACCESS EXCLUSIVE MODE") + result = await rule.evaluate("LOCK TABLE users IN ACCESS EXCLUSIVE MODE") assert result.error is None assert result.matched is True # Invalid SQL is blocked - assert result.error is None # Not a evaluator error + assert result.error is None # Not a rule error assert "pars" in result.message.lower() @pytest.mark.asyncio async def test_analyze_operation_detected(self): """ANALYZE should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["ANALYZE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["ANALYZE"]) + rule = SQLRule(config) - result = await evaluator.evaluate("ANALYZE users") + result = await rule.evaluate("ANALYZE users") assert result.error is None assert result.matched is True assert "ANALYZE" in result.metadata["blocked"] @@ -2565,10 +2565,10 @@ async def test_analyze_operation_detected(self): @pytest.mark.asyncio async def test_comment_operation_detected(self): """COMMENT should be detected and blockable.""" - config = SQLEvaluatorConfig(blocked_operations=["COMMENT"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["COMMENT"]) + rule = SQLRule(config) - result = await evaluator.evaluate("COMMENT ON TABLE users IS 'User data'") + result = await rule.evaluate("COMMENT ON TABLE users IS 'User data'") assert result.error is None assert result.matched is True assert "COMMENT" in result.metadata["blocked"] @@ -2580,8 +2580,8 @@ class TestQueryComplexityLimits: @pytest.mark.asyncio async def test_subquery_depth_limit_enforced(self): """Deeply nested subqueries should be blocked.""" - config = SQLEvaluatorConfig(max_subquery_depth=2) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_subquery_depth=2) + rule = SQLRule(config) # Depth 3: exceeds limit query = """ @@ -2593,7 +2593,7 @@ async def test_subquery_depth_limit_enforced(self): ) AS level2 ) AS level1 """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "subquery depth" in result.message.lower() @@ -2603,8 +2603,8 @@ async def test_subquery_depth_limit_enforced(self): @pytest.mark.asyncio async def test_subquery_depth_within_limit(self): """Shallow subqueries should pass.""" - config = SQLEvaluatorConfig(max_subquery_depth=2) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_subquery_depth=2) + rule = SQLRule(config) # Depth 2: at limit query = """ @@ -2614,15 +2614,15 @@ async def test_subquery_depth_within_limit(self): ) AS level2 ) AS level1 """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_max_joins_enforced(self): """Too many joins should be blocked.""" - config = SQLEvaluatorConfig(max_joins=3) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_joins=3) + rule = SQLRule(config) # 4 joins: exceeds limit query = """ @@ -2632,7 +2632,7 @@ async def test_max_joins_enforced(self): JOIN categories ON products.category_id = categories.id JOIN brands ON products.brand_id = brands.id """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "JOIN" in result.message @@ -2642,8 +2642,8 @@ async def test_max_joins_enforced(self): @pytest.mark.asyncio async def test_max_joins_within_limit(self): """Reasonable number of joins should pass.""" - config = SQLEvaluatorConfig(max_joins=3) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_joins=3) + rule = SQLRule(config) # 3 joins: at limit query = """ @@ -2652,15 +2652,15 @@ async def test_max_joins_within_limit(self): JOIN products ON orders.product_id = products.id JOIN categories ON products.category_id = categories.id """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is False @pytest.mark.asyncio async def test_max_union_count_enforced(self): """Too many UNION operations should be blocked.""" - config = SQLEvaluatorConfig(max_union_count=2) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_union_count=2) + rule = SQLRule(config) # 3 UNIONs: exceeds limit query = """ @@ -2672,7 +2672,7 @@ async def test_max_union_count_enforced(self): UNION ALL SELECT * FROM partners """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "set operations" in result.message.lower() @@ -2682,8 +2682,8 @@ async def test_max_union_count_enforced(self): @pytest.mark.asyncio async def test_max_union_count_within_limit(self): """Reasonable UNION chains should pass.""" - config = SQLEvaluatorConfig(max_union_count=2) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_union_count=2) + rule = SQLRule(config) # 2 UNIONs: at limit query = """ @@ -2693,7 +2693,7 @@ async def test_max_union_count_within_limit(self): UNION ALL SELECT * FROM vendors """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is False @@ -2704,8 +2704,8 @@ class TestEdgeCasesAlreadyFixed: @pytest.mark.asyncio async def test_union_all_parts_checked_for_limit(self): """Issue #19: All parts of UNION should be checked for LIMIT.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # One part missing LIMIT - should fail query = """ @@ -2713,7 +2713,7 @@ async def test_union_all_parts_checked_for_limit(self): UNION ALL SELECT * FROM customers """ - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "LIMIT" in result.message @@ -2721,17 +2721,17 @@ async def test_union_all_parts_checked_for_limit(self): @pytest.mark.asyncio async def test_insert_select_validated(self): """Issue #20: SELECT in INSERT...SELECT should be validated.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # INSERT...SELECT without LIMIT - should fail - result = await evaluator.evaluate("INSERT INTO backup SELECT * FROM users") + result = await rule.evaluate("INSERT INTO backup SELECT * FROM users") assert result.error is None assert result.matched is True assert "LIMIT" in result.message # INSERT...SELECT with LIMIT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "INSERT INTO backup SELECT * FROM users LIMIT 100" ) assert result.error is None @@ -2740,11 +2740,11 @@ async def test_insert_select_validated(self): @pytest.mark.asyncio async def test_create_view_validated(self): """Issue #21: SELECT in CREATE VIEW should be validated.""" - config = SQLEvaluatorConfig(require_limit=True) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(require_limit=True) + rule = SQLRule(config) # CREATE VIEW without LIMIT - should fail - result = await evaluator.evaluate( + result = await rule.evaluate( "CREATE VIEW active_users AS SELECT * FROM users WHERE active = true" ) assert result.error is None @@ -2752,7 +2752,7 @@ async def test_create_view_validated(self): assert "LIMIT" in result.message # CREATE VIEW with LIMIT - should pass - result = await evaluator.evaluate( + result = await rule.evaluate( "CREATE VIEW active_users AS SELECT * FROM users WHERE active = true LIMIT 1000" ) assert result.error is None @@ -2765,12 +2765,12 @@ class TestEnhancedMetadata: @pytest.mark.asyncio async def test_short_query_metadata(self): """Short queries should have full snippet.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE"]) + rule = SQLRule(config) # Blocked operation to trigger metadata query = "DELETE FROM users" - result = await evaluator.evaluate(query) + result = await rule.evaluate(query) assert result.error is None assert result.matched is True assert "query_snippet" in result.metadata or "query" in result.metadata @@ -2783,13 +2783,13 @@ async def test_short_query_metadata(self): @pytest.mark.asyncio async def test_long_query_smart_truncation(self): """Long queries should have beginning and end with ellipsis.""" - config = SQLEvaluatorConfig(max_limit=10) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(max_limit=10) + rule = SQLRule(config) # Create a very long query that violates max_limit long_query = "SELECT " + ", ".join(f"col{i}" for i in range(100)) + " FROM users WHERE " + " AND ".join(f"field{i} = {i}" for i in range(50)) + " LIMIT 1000" - result = await evaluator.evaluate(long_query) + result = await rule.evaluate(long_query) assert result.error is None assert result.matched is True assert result.metadata is not None @@ -2802,13 +2802,13 @@ async def test_long_query_smart_truncation(self): @pytest.mark.asyncio async def test_query_hash_consistent(self): """Same query should produce same hash.""" - config = SQLEvaluatorConfig(blocked_operations=["DELETE"]) - evaluator = SQLEvaluator(config) + config = SQLRuleConfig(blocked_operations=["DELETE"]) + rule = SQLRule(config) query = "DELETE FROM users WHERE id = 1" - result1 = await evaluator.evaluate(query) - result2 = await evaluator.evaluate(query) + result1 = await rule.evaluate(query) + result2 = await rule.evaluate(query) assert result1.matched is True assert result2.matched is True diff --git a/evaluators/builtin/tests/sql/test_sql_config_validation.py b/rules/builtin/tests/sql/test_sql_config_validation.py similarity index 85% rename from evaluators/builtin/tests/sql/test_sql_config_validation.py rename to rules/builtin/tests/sql/test_sql_config_validation.py index 8842ed4f..cdcddc4e 100644 --- a/evaluators/builtin/tests/sql/test_sql_config_validation.py +++ b/rules/builtin/tests/sql/test_sql_config_validation.py @@ -1,11 +1,11 @@ -"""Targeted tests for SQLEvaluatorConfig validate_config branches.""" +"""Targeted tests for SQLRuleConfig validate_config branches.""" from __future__ import annotations import warnings import pytest -from agent_control_evaluators.sql.config import SQLEvaluatorConfig +from agent_control_rules.sql.config import SQLRuleConfig class TestConflictingRestrictions: @@ -13,21 +13,21 @@ class TestConflictingRestrictions: def test_blocked_and_allowed_operations_conflict(self): with pytest.raises(ValueError, match="blocked_operations and allowed_operations"): - SQLEvaluatorConfig( + SQLRuleConfig( blocked_operations=["DELETE"], allowed_operations=["SELECT"], ) def test_blocked_and_allowed_tables_conflict(self): with pytest.raises(ValueError, match="allowed_tables and blocked_tables"): - SQLEvaluatorConfig( + SQLRuleConfig( allowed_tables=["users"], blocked_tables=["secrets"], ) def test_blocked_and_allowed_schemas_conflict(self): with pytest.raises(ValueError, match="allowed_schemas and blocked_schemas"): - SQLEvaluatorConfig( + SQLRuleConfig( allowed_schemas=["public"], blocked_schemas=["internal"], ) @@ -38,15 +38,15 @@ class TestLimitBounds: def test_max_limit_must_be_positive(self): with pytest.raises(ValueError, match="max_limit must be a positive integer"): - SQLEvaluatorConfig(max_limit=0) + SQLRuleConfig(max_limit=0) def test_max_limit_negative_rejected(self): with pytest.raises(ValueError, match="max_limit must be a positive integer"): - SQLEvaluatorConfig(max_limit=-5) + SQLRuleConfig(max_limit=-5) def test_max_statements_must_be_positive(self): with pytest.raises(ValueError, match="max_statements must be a positive integer"): - SQLEvaluatorConfig( + SQLRuleConfig( allow_multi_statements=True, max_statements=0, ) @@ -57,11 +57,11 @@ class TestColumnControls: def test_column_context_without_required_columns_warns(self): with pytest.warns(UserWarning, match="column_context is set but required_columns"): - SQLEvaluatorConfig(column_context="where") + SQLRuleConfig(column_context="where") def test_required_column_values_rejects_empty_column_ref(self): with pytest.raises(ValueError, match="empty column reference"): - SQLEvaluatorConfig( + SQLRuleConfig( required_columns=["tenant_id"], required_column_values={" ": "tenant_id"}, ) @@ -70,7 +70,7 @@ def test_required_column_values_rejects_malformed_qualified_ref(self): with pytest.raises( ValueError, match="'table.column' format when qualified" ): - SQLEvaluatorConfig( + SQLRuleConfig( required_columns=["tenant_id"], required_column_values={"users.": "tenant_id"}, ) @@ -79,14 +79,14 @@ def test_required_column_values_rejects_blank_qualified_table_side(self): with pytest.raises( ValueError, match="'table.column' format when qualified" ): - SQLEvaluatorConfig( + SQLRuleConfig( required_columns=["tenant_id"], required_column_values={".tenant_id": "tenant_id"}, ) def test_required_column_values_rejects_empty_context_key(self): with pytest.raises(ValueError, match="empty context key"): - SQLEvaluatorConfig( + SQLRuleConfig( required_columns=["tenant_id"], required_column_values={"users.tenant_id": " "}, ) @@ -95,7 +95,7 @@ def test_valid_required_column_values_accepted(self): """Sanity check: a valid combination passes without raising.""" with warnings.catch_warnings(): warnings.simplefilter("error") # promote any warning to a failure - config = SQLEvaluatorConfig( + config = SQLRuleConfig( required_columns=["tenant_id"], column_context="where", required_column_values={"users.tenant_id": "tenant_id"}, diff --git a/rules/builtin/tests/sql/test_sqlglot_runtime.py b/rules/builtin/tests/sql/test_sqlglot_runtime.py new file mode 100644 index 00000000..8a017caf --- /dev/null +++ b/rules/builtin/tests/sql/test_sqlglot_runtime.py @@ -0,0 +1,17 @@ +"""SQLGlot runtime integration tests.""" + +from sqlglot import exp + +from agent_control_rules.sql import SQLRule, SQLRuleConfig + + +def test_sqlglot_public_imports_support_sql_rule(): + """SQLGlot's public API should remain importable with the native extra installed.""" + # Given: the SQL rule package imports SQLGlot's public expression module + assert exp.Select is not None + + # When: constructing the SQL rule + rule = SQLRule(SQLRuleConfig(blocked_operations=["DROP"])) + + # Then: the rule can be created without SQLGlot import shadowing failures + assert rule.metadata.name == "sql" diff --git a/rules/builtin/tests/test_base.py b/rules/builtin/tests/test_base.py new file mode 100644 index 00000000..911874f6 --- /dev/null +++ b/rules/builtin/tests/test_base.py @@ -0,0 +1,140 @@ +"""Tests for rule base classes. + +Architecture: Rules take config at __init__, evaluate() only takes data. +""" + +import pytest +from typing import Any + +from agent_control_rules import Rule, RuleConfig, RuleMetadata +from agent_control_models import RuleResult + + +class MockConfig(RuleConfig): + """Config model for mock rule.""" + + should_match: bool = False + timeout_ms: int = 5000 + + +class MockRule(Rule[MockConfig]): + """A mock rule for testing.""" + + metadata = RuleMetadata( + name="mock-rule", + version="1.0.0", + description="A mock rule for testing", + requires_api_key=False, + timeout_ms=5000, + ) + config_model = MockConfig + + async def evaluate(self, data: Any) -> RuleResult: + """Simple mock evaluation.""" + return RuleResult( + matched=self.config.should_match, + confidence=1.0, + message="Mock evaluation", + metadata={"data": str(data)}, + ) + + +class TestRuleMetadata: + """Tests for RuleMetadata dataclass.""" + + def test_metadata_with_defaults(self): + """Test metadata with default values.""" + metadata = RuleMetadata( + name="test-rule", + version="1.0.0", + description="Test rule", + ) + + assert metadata.name == "test-rule" + assert metadata.version == "1.0.0" + assert metadata.description == "Test rule" + assert metadata.requires_api_key is False + assert metadata.timeout_ms == 10000 + + def test_metadata_with_all_fields(self): + """Test metadata with all fields specified.""" + metadata = RuleMetadata( + name="full-rule", + version="2.0.0", + description="Full rule", + requires_api_key=True, + timeout_ms=15000, + ) + + assert metadata.name == "full-rule" + assert metadata.version == "2.0.0" + assert metadata.requires_api_key is True + assert metadata.timeout_ms == 15000 + + +class TestRule: + """Tests for Rule base class.""" + + def test_rule_is_abstract(self): + """Test that Rule is an ABC.""" + from abc import ABC + assert issubclass(Rule, ABC) + + def test_mock_rule_metadata(self): + """Test that mock rule has correct metadata.""" + assert MockRule.metadata.name == "mock-rule" + assert MockRule.metadata.version == "1.0.0" + assert MockRule.metadata.timeout_ms == 5000 + + @pytest.mark.asyncio + async def test_mock_rule_evaluate(self): + """Test mock rule evaluation.""" + rule = MockRule.from_dict({"should_match": True}) + + result = await rule.evaluate("test data") + + assert result.matched is True + assert result.confidence == 1.0 + assert result.metadata["data"] == "test data" + + @pytest.mark.asyncio + async def test_mock_rule_evaluate_no_match(self): + """Test mock rule evaluation without match.""" + rule = MockRule.from_dict({"should_match": False}) + + result = await rule.evaluate("test data") + + assert result.matched is False + + def test_rule_config_stored(self): + """Test that rule stores config.""" + rule = MockRule.from_dict({"should_match": True}) + + assert isinstance(rule.config, MockConfig) + assert rule.config.should_match is True + + def test_get_timeout_seconds_from_config(self): + """Test timeout conversion from config.""" + rule = MockRule.from_dict({"timeout_ms": 3000}) + + assert rule.get_timeout_seconds() == 3.0 + + def test_get_timeout_seconds_different_values(self): + """Test timeout with different values.""" + rule1 = MockRule.from_dict({"timeout_ms": 7500}) + rule2 = MockRule.from_dict({"timeout_ms": 1000}) + + assert rule1.get_timeout_seconds() == 7.5 + assert rule2.get_timeout_seconds() == 1.0 + + def test_get_timeout_seconds_from_default(self): + """Test timeout uses metadata default when not in config.""" + rule = MockRule.from_dict({}) # No timeout_ms in config + + # MockConfig has default timeout_ms=5000 + assert rule.get_timeout_seconds() == 5.0 + + def test_cannot_instantiate_abstract_class(self): + """Test that Rule cannot be instantiated directly.""" + with pytest.raises(TypeError, match="abstract"): + Rule({}) # type: ignore diff --git a/evaluators/builtin/tests/test_contrib_packages.py b/rules/builtin/tests/test_contrib_packages.py similarity index 88% rename from evaluators/builtin/tests/test_contrib_packages.py rename to rules/builtin/tests/test_contrib_packages.py index 9f25186d..92f1a9d3 100644 --- a/evaluators/builtin/tests/test_contrib_packages.py +++ b/rules/builtin/tests/test_contrib_packages.py @@ -37,9 +37,9 @@ def test_discover_contrib_packages_returns_expected_metadata() -> None: packages = module.discover_contrib_packages() assert [(package.name, package.package, package.extra) for package in packages] == [ - ("budget", "agent-control-evaluator-budget", "budget"), - ("cisco", "agent-control-evaluator-cisco", "cisco"), - ("galileo", "agent-control-evaluator-galileo", "galileo"), + ("budget", "agent-control-rule-budget", "budget"), + ("cisco", "agent-control-rule-cisco", "cisco"), + ("galileo", "agent-control-rule-galileo", "galileo"), ] diff --git a/rules/builtin/tests/test_discovery.py b/rules/builtin/tests/test_discovery.py new file mode 100644 index 00000000..7b10a714 --- /dev/null +++ b/rules/builtin/tests/test_discovery.py @@ -0,0 +1,211 @@ +"""Tests for entry-point-based rule discovery.""" + +from __future__ import annotations + +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from agent_control_models import RuleResult +from agent_control_rules import ( + Rule, + RuleConfig, + RuleMetadata, + clear_rules, + discover_rules, + ensure_rules_discovered, + get_all_rules, + list_rules, + register_rule, + reset_rule_discovery, +) +from agent_control_rules import _discovery as discovery_module + + +class _DiscoveryConfig(RuleConfig): + pass + + +def _make_class(*, name: str, available: bool = True) -> type[Rule[_DiscoveryConfig]]: + class _Dummy(Rule[_DiscoveryConfig]): + metadata = RuleMetadata(name=name, version="1.0.0", description="") + config_model = _DiscoveryConfig + + @classmethod + def is_available(cls) -> bool: + return available + + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=1.0, message="") + + _Dummy.__name__ = f"Discovery_{name.replace('-', '_')}" + return _Dummy + + +@pytest.fixture +def isolated_discovery(): + """Snapshot registry + discovery flag, restore on teardown.""" + snapshot = dict(get_all_rules()) + clear_rules() + reset_rule_discovery() + yield + clear_rules() + reset_rule_discovery() + for cls in snapshot.values(): + register_rule(cls) + + +def _make_fake_entry_point(name: str, rule_class: type[Any]) -> MagicMock: + """Build a MagicMock that mimics importlib.metadata.EntryPoint.""" + ep = MagicMock() + ep.name = name + ep.load.return_value = rule_class + return ep + + +def test_discover_rules_registers_available_classes(isolated_discovery): + """Discover walks the entry-point group and registers each available class.""" + cls = _make_class(name="disc-a") + fake_ep = _make_fake_entry_point("disc-a", cls) + + with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): + count = discover_rules() + + assert count == 1 + assert get_all_rules().get("disc-a") is cls + + +def test_discover_rules_skips_unavailable_classes(isolated_discovery): + """Rules whose is_available() is False must NOT be registered.""" + cls = _make_class(name="disc-unavailable", available=False) + fake_ep = _make_fake_entry_point("disc-unavailable", cls) + + with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): + count = discover_rules() + + assert count == 0 + assert "disc-unavailable" not in get_all_rules() + + +def test_discover_rules_skips_already_registered(isolated_discovery): + """Already-registered names are skipped without raising.""" + cls = _make_class(name="disc-existing") + register_rule(cls) + + fake_ep = _make_fake_entry_point("disc-existing", cls) + with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): + count = discover_rules() + + assert count == 0 + + +def test_discover_rules_only_runs_once(isolated_discovery): + """Repeat calls short-circuit on the _DISCOVERY_COMPLETE flag.""" + cls = _make_class(name="disc-once") + fake_ep = _make_fake_entry_point("disc-once", cls) + + with patch.object( + discovery_module, "entry_points", return_value=[fake_ep] + ) as patched: + first = discover_rules() + second = discover_rules() + + # First call discovers, second returns 0 without consulting entry_points. + assert first == 1 + assert second == 0 + assert patched.call_count == 1 + + +def test_discover_rules_swallows_load_failures(isolated_discovery): + """A broken entry point is logged and skipped, not propagated.""" + bad_ep = MagicMock() + bad_ep.name = "broken" + bad_ep.load.side_effect = RuntimeError("boom") + + good_cls = _make_class(name="disc-good") + good_ep = _make_fake_entry_point("disc-good", good_cls) + + with patch.object(discovery_module, "entry_points", return_value=[bad_ep, good_ep]): + count = discover_rules() + + assert count == 1 + assert get_all_rules().get("disc-good") is good_cls + + +def test_discover_rules_handles_entry_points_failure(isolated_discovery): + """If entry_points() itself raises, discovery completes with zero results.""" + with patch.object( + discovery_module, + "entry_points", + side_effect=RuntimeError("entry-point system unavailable"), + ): + count = discover_rules() + + assert count == 0 + + +def test_discover_rules_falls_back_to_builtin_source_imports(isolated_discovery): + """Source-tree runs without entry point metadata still load builtin rules.""" + with patch.object(discovery_module, "entry_points", return_value=[]): + count = discover_rules() + + assert count == 4 + assert set(get_all_rules()) == {"json", "list", "regex", "sql"} + + +def test_builtin_source_discovery_skips_broken_imports( + isolated_discovery, + monkeypatch: pytest.MonkeyPatch, +): + """A broken builtin source import is logged and skipped.""" + monkeypatch.setattr( + discovery_module, + "_BUILTIN_RULES", + (("broken", "agent_control_rules.does_not_exist", "MissingRule"),), + ) + + assert discovery_module._discover_builtin_rules_from_source() == 0 + assert get_all_rules() == {} + + +def test_reset_rule_discovery_allows_rerun(isolated_discovery): + """reset_rule_discovery clears the completed flag so discover runs again.""" + cls = _make_class(name="disc-reset") + fake_ep = _make_fake_entry_point("disc-reset", cls) + + with patch.object( + discovery_module, "entry_points", return_value=[fake_ep] + ) as patched: + discover_rules() + clear_rules() + reset_rule_discovery() + count = discover_rules() + + assert count == 1 + assert patched.call_count == 2 + + +def test_ensure_rules_discovered_runs_once(isolated_discovery): + """ensure_rules_discovered is the lazy-init entry point.""" + cls = _make_class(name="disc-ensure") + fake_ep = _make_fake_entry_point("disc-ensure", cls) + + with patch.object( + discovery_module, "entry_points", return_value=[fake_ep] + ) as patched: + ensure_rules_discovered() + ensure_rules_discovered() + + assert patched.call_count == 1 + assert get_all_rules().get("disc-ensure") is cls + + +def test_list_rules_triggers_discovery(isolated_discovery): + """list_rules is the convenience accessor; it must trigger discovery.""" + cls = _make_class(name="disc-list") + fake_ep = _make_fake_entry_point("disc-list", cls) + + with patch.object(discovery_module, "entry_points", return_value=[fake_ep]): + result = list_rules() + + assert result.get("disc-list") is cls diff --git a/rules/builtin/tests/test_factory.py b/rules/builtin/tests/test_factory.py new file mode 100644 index 00000000..5eb1afa5 --- /dev/null +++ b/rules/builtin/tests/test_factory.py @@ -0,0 +1,172 @@ +"""Tests for the LRU-cached rule factory.""" + +from __future__ import annotations + +import importlib +from typing import Any + +import pytest +from agent_control_rules import ( + Rule, + RuleConfig, + RuleMetadata, + clear_rule_cache, + clear_rules, + get_all_rules, + get_rule_instance, + register_rule, +) +from agent_control_rules import _factory as factory_module +from agent_control_models import RuleResult, RuleSpec + + +class _FactoryConfig(RuleConfig): + payload: str = "default" + + +class _FactoryRule(Rule[_FactoryConfig]): + metadata = RuleMetadata(name="factory-dummy", version="1.0.0", description="") + config_model = _FactoryConfig + + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=1.0, message="") + + +@pytest.fixture +def isolated_factory(): + """Snapshot registry/cache so factory tests don't leak state.""" + snapshot = dict(get_all_rules()) + clear_rules() + clear_rule_cache() + register_rule(_FactoryRule) + yield + clear_rule_cache() + clear_rules() + for cls in snapshot.values(): + register_rule(cls) + + +def test_get_rule_instance_returns_rule(isolated_factory): + spec = RuleSpec(name="factory-dummy", config={"payload": "p1"}) + + instance = get_rule_instance(spec) + + assert isinstance(instance, _FactoryRule) + assert instance.config.payload == "p1" + + +def test_get_rule_instance_caches_by_config(isolated_factory): + spec_a = RuleSpec(name="factory-dummy", config={"payload": "same"}) + spec_b = RuleSpec(name="factory-dummy", config={"payload": "same"}) + + first = get_rule_instance(spec_a) + second = get_rule_instance(spec_b) + + # Same config = same cached instance. + assert first is second + + +def test_get_rule_instance_treats_different_configs_separately(isolated_factory): + spec_a = RuleSpec(name="factory-dummy", config={"payload": "a"}) + spec_b = RuleSpec(name="factory-dummy", config={"payload": "b"}) + + instance_a = get_rule_instance(spec_a) + instance_b = get_rule_instance(spec_b) + + assert instance_a is not instance_b + assert instance_a.config.payload == "a" + assert instance_b.config.payload == "b" + + +def test_get_rule_instance_raises_for_unknown_rule(isolated_factory): + with pytest.raises(ValueError, match="not found"): + get_rule_instance(RuleSpec(name="no-such-rule", config={})) + + +def test_clear_rule_cache_forces_recreation(isolated_factory): + spec = RuleSpec(name="factory-dummy", config={"payload": "p"}) + + first = get_rule_instance(spec) + clear_rule_cache() + second = get_rule_instance(spec) + + assert first is not second + + +def test_get_rule_instance_evicts_oldest_when_full(isolated_factory, monkeypatch): + """LRU eviction: when cache is full, the least-recently-used entry is dropped.""" + # Force a tiny cache so we can observe eviction without overhead. + monkeypatch.setattr(factory_module, "RULE_CACHE_SIZE", 2) + + spec_a = RuleSpec(name="factory-dummy", config={"payload": "a"}) + spec_b = RuleSpec(name="factory-dummy", config={"payload": "b"}) + spec_c = RuleSpec(name="factory-dummy", config={"payload": "c"}) + + first_a = get_rule_instance(spec_a) + get_rule_instance(spec_b) + # Insert third → "a" is the LRU and must be evicted. + get_rule_instance(spec_c) + + re_a = get_rule_instance(spec_a) + # "a" was evicted: new instance must NOT be the original. + assert re_a is not first_a + + +def test_get_rule_instance_moves_hit_to_most_recent( + isolated_factory, monkeypatch +): + """Cache hit must refresh LRU recency so the touched entry isn't evicted next.""" + monkeypatch.setattr(factory_module, "RULE_CACHE_SIZE", 2) + + spec_a = RuleSpec(name="factory-dummy", config={"payload": "a"}) + spec_b = RuleSpec(name="factory-dummy", config={"payload": "b"}) + spec_c = RuleSpec(name="factory-dummy", config={"payload": "c"}) + + first_a = get_rule_instance(spec_a) + get_rule_instance(spec_b) + # Touch "a" so "b" becomes the LRU. + re_a = get_rule_instance(spec_a) + assert re_a is first_a + + # Inserting "c" should evict "b", not "a". + get_rule_instance(spec_c) + + refetched_a = get_rule_instance(spec_a) + assert refetched_a is first_a # still cached + + +def test_parse_cache_size_uses_default_when_unset(monkeypatch): + monkeypatch.delenv("RULE_CACHE_SIZE", raising=False) + reloaded = importlib.reload(factory_module) + try: + assert reloaded.RULE_CACHE_SIZE == factory_module.DEFAULT_CACHE_SIZE + finally: + importlib.reload(factory_module) + + +def test_parse_cache_size_falls_back_on_invalid_value(monkeypatch): + monkeypatch.setenv("RULE_CACHE_SIZE", "not-a-number") + reloaded = importlib.reload(factory_module) + try: + assert reloaded.RULE_CACHE_SIZE == reloaded.DEFAULT_CACHE_SIZE + finally: + importlib.reload(factory_module) + + +def test_parse_cache_size_clamps_to_minimum(monkeypatch): + monkeypatch.setenv("RULE_CACHE_SIZE", "0") + reloaded = importlib.reload(factory_module) + try: + # Anything below MIN_CACHE_SIZE is clamped to avoid infinite eviction loops. + assert reloaded.RULE_CACHE_SIZE >= reloaded.MIN_CACHE_SIZE + finally: + importlib.reload(factory_module) + + +def test_parse_cache_size_accepts_valid_int(monkeypatch): + monkeypatch.setenv("RULE_CACHE_SIZE", "42") + reloaded = importlib.reload(factory_module) + try: + assert reloaded.RULE_CACHE_SIZE == 42 + finally: + importlib.reload(factory_module) diff --git a/rules/builtin/tests/test_package_exports.py b/rules/builtin/tests/test_package_exports.py new file mode 100644 index 00000000..1bd9bad5 --- /dev/null +++ b/rules/builtin/tests/test_package_exports.py @@ -0,0 +1,36 @@ +"""Coverage for package-level exports and local-source metadata fallbacks.""" + +from __future__ import annotations + +import importlib.metadata +import importlib.util +from pathlib import Path + + +def test_package_version_falls_back_when_distribution_metadata_is_absent( + monkeypatch, +) -> None: + """Local source-tree imports should work before the package is installed.""" + + def _raise_not_found(_: str) -> str: + raise importlib.metadata.PackageNotFoundError("agent-control-rules") + + monkeypatch.setattr(importlib.metadata, "version", _raise_not_found) + + init_path = ( + Path(__file__).resolve().parents[1] + / "src" + / "agent_control_rules" + / "__init__.py" + ) + spec = importlib.util.spec_from_file_location( + "_agent_control_rules_version_probe", + init_path, + ) + assert spec is not None + assert spec.loader is not None + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + assert module.__version__ == "0.0.0.dev" diff --git a/rules/builtin/tests/test_registry.py b/rules/builtin/tests/test_registry.py new file mode 100644 index 00000000..b1985b00 --- /dev/null +++ b/rules/builtin/tests/test_registry.py @@ -0,0 +1,119 @@ +"""Tests for the in-memory rule registry.""" + +from __future__ import annotations + +from typing import Any + +import pytest +from agent_control_rules import ( + Rule, + RuleConfig, + RuleMetadata, + clear_rules, + get_all_rules, + get_rule, + register_rule, +) +from agent_control_models import RuleResult + + +class _DummyConfig(RuleConfig): + pass + + +def _make_class(*, name: str, available: bool = True) -> type[Rule[_DummyConfig]]: + """Build a fresh Rule subclass with the supplied metadata name.""" + + class _Dummy(Rule[_DummyConfig]): + metadata = RuleMetadata( + name=name, + version="1.0.0", + description="", + ) + config_model = _DummyConfig + + @classmethod + def is_available(cls) -> bool: + return available + + async def evaluate(self, data: Any) -> RuleResult: + return RuleResult(matched=False, confidence=1.0, message="") + + _Dummy.__name__ = f"Dummy_{name.replace('-', '_')}" + return _Dummy + + +@pytest.fixture +def isolated_registry(): + """Snapshot and restore the global registry so tests don't leak state.""" + snapshot = dict(get_all_rules()) + clear_rules() + yield + clear_rules() + for cls in snapshot.values(): + register_rule(cls) + + +def test_register_and_lookup_rule(isolated_registry): + cls = _make_class(name="reg-a") + + register_rule(cls) + + assert get_rule("reg-a") is cls + + +def test_get_rule_returns_none_when_not_registered(isolated_registry): + assert get_rule("does-not-exist") is None + + +def test_get_all_rules_returns_copy(isolated_registry): + cls = _make_class(name="reg-copy") + register_rule(cls) + + snapshot = get_all_rules() + snapshot["evil"] = cls # mutate the returned dict + + # Internal registry must not reflect external mutation. + assert "evil" not in get_all_rules() + + +def test_register_is_idempotent_for_same_class(isolated_registry): + cls = _make_class(name="reg-idem") + + register_rule(cls) + # Registering the exact same class again must not raise. + assert register_rule(cls) is cls + + +def test_register_rejects_name_collision_with_different_class(isolated_registry): + first = _make_class(name="reg-conflict") + second = _make_class(name="reg-conflict") + register_rule(first) + + with pytest.raises(ValueError, match="already registered"): + register_rule(second) + + +def test_register_skips_unavailable_rules(isolated_registry): + cls = _make_class(name="reg-unavailable", available=False) + + # Should not raise and should not register. + assert register_rule(cls) is cls + assert get_rule("reg-unavailable") is None + + +def test_clear_rules_empties_registry(isolated_registry): + register_rule(_make_class(name="reg-c1")) + register_rule(_make_class(name="reg-c2")) + assert len(get_all_rules()) == 2 + + clear_rules() + + assert get_all_rules() == {} + + +def test_register_decorator_returns_class(isolated_registry): + cls = _make_class(name="reg-decorator") + # The function is documented as decorator-compatible: it must return the class. + decorated = register_rule(cls) + assert decorated is cls diff --git a/rules/contrib/README.md b/rules/contrib/README.md new file mode 100644 index 00000000..78977950 --- /dev/null +++ b/rules/contrib/README.md @@ -0,0 +1,8 @@ +# Agent Control Contrib Rules + +Contributed rules and templates for extending Agent Control. + +- `galileo/` — Luna rule integration +- `template/` — Starter template for adding new rules + +Full guide: https://docs.agentcontrol.dev/concepts/rules/custom-rules diff --git a/evaluators/contrib/budget/Makefile b/rules/contrib/budget/Makefile similarity index 83% rename from evaluators/contrib/budget/Makefile rename to rules/contrib/budget/Makefile index a6b0c609..43dcde91 100644 --- a/evaluators/contrib/budget/Makefile +++ b/rules/contrib/budget/Makefile @@ -1,7 +1,7 @@ .PHONY: help test lint lint-fix typecheck check build help: - @echo "Agent Control Evaluator - Budget - Makefile commands" + @echo "Agent Control Rule - Budget - Makefile commands" @echo "" @echo " make test - run pytest" @echo " make lint - run ruff check" @@ -11,7 +11,7 @@ help: @echo " make build - build package" test: - uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-budget.xml -q + uv run --with pytest --with pytest-asyncio --with pytest-cov pytest tests --cov=src --cov-report=xml:../../../coverage-rules-budget.xml -q lint: uv run --with ruff ruff check --config ../../../pyproject.toml src/ diff --git a/evaluators/contrib/budget/README.md b/rules/contrib/budget/README.md similarity index 74% rename from evaluators/contrib/budget/README.md rename to rules/contrib/budget/README.md index c83de1ab..12dfcb44 100644 --- a/evaluators/contrib/budget/README.md +++ b/rules/contrib/budget/README.md @@ -1,35 +1,35 @@ -# agent-control-evaluator-budget +# agent-control-rule-budget -Budget evaluator for agent-control that tracks cumulative LLM token and cost usage per scope and time window. +Budget rule for agent-control that tracks cumulative LLM token and cost usage per scope and time window. ## Install ```bash -pip install "agent-control-evaluators[budget]" +pip install "agent-control-rules[budget]" ``` Fallback direct wheel install: ```bash -pip install agent-control-evaluator-budget +pip install agent-control-rule-budget ``` For local development: ```bash -uv pip install -e evaluators/contrib/budget +uv pip install -e rules/contrib/budget ``` ## Quickstart ```python -from agent_control_evaluator_budget.budget import ( - BudgetEvaluatorConfig, +from agent_control_rule_budget.budget import ( + BudgetRuleConfig, BudgetLimitRule, ModelPricing, ) -config = BudgetEvaluatorConfig( +config = BudgetRuleConfig( budget_id="support-daily", limits=[ BudgetLimitRule( @@ -59,7 +59,7 @@ config = BudgetEvaluatorConfig( ) ``` -The evaluator reads token usage from standard fields such as `usage.input_tokens` and `usage.output_tokens`. Configure `token_path` only when your event shape uses a custom location. +The rule reads token usage from standard fields such as `usage.input_tokens` and `usage.output_tokens`. Configure `token_path` only when your event shape uses a custom location. ## Scope and group_by @@ -85,7 +85,7 @@ With `metadata_paths={"user_id": "metadata.user_id"}`, each user gets a separate `budget_id` identifies the accumulated budget pool. -Evaluators with the same `budget_id` share accumulated spend and token totals across all evaluator instances. Each evaluator still evaluates using its own configured rules -- the shared state is the bucket (the rolling sum), not the rule set. Evaluators with different `budget_id` values are fully isolated. +Rules with the same `budget_id` share accumulated spend and token totals across all rule instances. Each rule still evaluates using its own configured rules -- the shared state is the bucket (the rolling sum), not the rule set. Rules with different `budget_id` values are fully isolated. Use stable names such as `support-daily`, `billing-global`, or `tenant-acme-monthly`. Avoid generating a new `budget_id` per request unless each request should have an isolated budget. @@ -103,10 +103,10 @@ Pricing and `model_path` are required when any rule uses `limit_unit="usd_cents" ## Dual Ceiling Pattern -Use two evaluators when cost and token ceilings need independent control records or different `budget_id` pools: +Use two rules when cost and token ceilings need independent control records or different `budget_id` pools: ```python -cost_config = BudgetEvaluatorConfig( +cost_config = BudgetRuleConfig( budget_id="support-cost-daily", limits=[ BudgetLimitRule( @@ -124,7 +124,7 @@ cost_config = BudgetEvaluatorConfig( metadata_paths={"agent": "metadata.agent", "user_id": "metadata.user_id"}, ) -token_config = BudgetEvaluatorConfig( +token_config = BudgetRuleConfig( budget_id="support-token-daily", limits=[ BudgetLimitRule( @@ -139,7 +139,7 @@ token_config = BudgetEvaluatorConfig( ) ``` -This pattern lets cost and token budgets reset, alert, and roll out independently. A single evaluator can also contain both rules when one shared pool and one control result are sufficient. +This pattern lets cost and token budgets reset, alert, and roll out independently. A single rule can also contain both rules when one shared pool and one control result are sufficient. ## Limitations diff --git a/evaluators/contrib/budget/pyproject.toml b/rules/contrib/budget/pyproject.toml similarity index 63% rename from evaluators/contrib/budget/pyproject.toml rename to rules/contrib/budget/pyproject.toml index 1af20d5e..3b0808a5 100644 --- a/evaluators/contrib/budget/pyproject.toml +++ b/rules/contrib/budget/pyproject.toml @@ -1,13 +1,13 @@ [project] -name = "agent-control-evaluator-budget" +name = "agent-control-rule-budget" version = "8.1.2" -description = "Budget evaluator for agent-control -- cumulative LLM cost and token tracking" +description = "Budget rule for agent-control -- cumulative LLM cost and token tracking" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-evaluators>=7.5.0", + "agent-control-rules>=7.5.0", "agent-control-models>=7.5.0", ] @@ -19,15 +19,15 @@ dev = [ "mypy>=1.8.0", ] -[project.entry-points."agent_control.evaluators"] -budget = "agent_control_evaluator_budget.budget:BudgetEvaluator" +[project.entry-points."agent_control.rules"] +budget = "agent_control_rule_budget.budget:BudgetRule" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluator_budget"] +packages = ["src/agent_control_rule_budget"] [tool.ruff] line-length = 100 @@ -37,7 +37,7 @@ target-version = "py312" select = ["E", "F", "I"] [tool.uv.sources] -agent-control-evaluators = { path = "../../builtin", editable = true } +agent-control-rules = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } [dependency-groups] diff --git a/evaluators/contrib/budget/tests/__init__.py b/rules/contrib/budget/src/agent_control_rule_budget/__init__.py similarity index 100% rename from evaluators/contrib/budget/tests/__init__.py rename to rules/contrib/budget/src/agent_control_rule_budget/__init__.py diff --git a/rules/contrib/budget/src/agent_control_rule_budget/budget/__init__.py b/rules/contrib/budget/src/agent_control_rule_budget/budget/__init__.py new file mode 100644 index 00000000..64a5ba79 --- /dev/null +++ b/rules/contrib/budget/src/agent_control_rule_budget/budget/__init__.py @@ -0,0 +1,24 @@ +"""Budget rule for per-agent LLM cost and token tracking.""" + +from agent_control_rule_budget.budget.config import ( + BudgetLimitRule, + BudgetRuleConfig, + ModelPricing, +) +from agent_control_rule_budget.budget.memory_store import InMemoryBudgetStore +from agent_control_rule_budget.budget.rule import BudgetRule +from agent_control_rule_budget.budget.store import BudgetSnapshot, BudgetStore + +# Note: clear_budget_stores is a testing utility and is intentionally not +# re-exported here. Import it directly from the rule submodule in tests: +# from agent_control_rule_budget.budget.rule import clear_budget_stores + +__all__ = [ + "BudgetRule", + "BudgetRuleConfig", + "BudgetLimitRule", + "BudgetSnapshot", + "BudgetStore", + "InMemoryBudgetStore", + "ModelPricing", +] diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py b/rules/contrib/budget/src/agent_control_rule_budget/budget/config.py similarity index 91% rename from evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py rename to rules/contrib/budget/src/agent_control_rule_budget/budget/config.py index 795044be..1c27036f 100644 --- a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/config.py +++ b/rules/contrib/budget/src/agent_control_rule_budget/budget/config.py @@ -1,10 +1,10 @@ -"""Configuration for the budget evaluator.""" +"""Configuration for the budget rule.""" from __future__ import annotations from typing import Literal -from agent_control_evaluators._base import EvaluatorConfig +from agent_control_rules._base import RuleConfig from pydantic import Field, field_validator, model_validator # --------------------------------------------------------------------------- @@ -17,19 +17,19 @@ WINDOW_MONTHLY = 2592000 # 30 days -class ModelPricing(EvaluatorConfig): +class ModelPricing(RuleConfig): """Per-model token pricing in cents per 1K tokens.""" input_per_1k: float = 0.0 output_per_1k: float = 0.0 -class BudgetLimitRule(EvaluatorConfig): +class BudgetLimitRule(RuleConfig): """A single budget limit rule. Each rule defines a ceiling for a combination of scope dimensions and time window. Multiple rules can apply to the same step -- the - evaluator checks all of them and triggers on the first breach. + rule checks all of them and triggers on the first breach. Attributes: scope: Static scope dimensions that must match for this rule @@ -68,8 +68,8 @@ def validate_window_seconds(cls, v: int | None) -> int | None: return v -class BudgetEvaluatorConfig(EvaluatorConfig): - """Configuration for the budget evaluator. +class BudgetRuleConfig(RuleConfig): + """Configuration for the budget rule. Attributes: limits: List of budget limit rules. Each is checked independently. @@ -111,7 +111,7 @@ class BudgetEvaluatorConfig(EvaluatorConfig): metadata_paths: dict[str, str] = Field(default_factory=dict) @model_validator(mode="after") - def require_pricing_for_cost_rules(self) -> BudgetEvaluatorConfig: + def require_pricing_for_cost_rules(self) -> BudgetRuleConfig: has_cost_rule = any(rule.limit_unit == "usd_cents" for rule in self.limits) if has_cost_rule and self.pricing is None: raise ValueError('pricing is required when any rule uses limit_unit="usd_cents"') diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/memory_store.py b/rules/contrib/budget/src/agent_control_rule_budget/budget/memory_store.py similarity index 100% rename from evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/memory_store.py rename to rules/contrib/budget/src/agent_control_rule_budget/budget/memory_store.py diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py b/rules/contrib/budget/src/agent_control_rule_budget/budget/rule.py similarity index 87% rename from evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py rename to rules/contrib/budget/src/agent_control_rule_budget/budget/rule.py index c4380903..eb17a90e 100644 --- a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/evaluator.py +++ b/rules/contrib/budget/src/agent_control_rule_budget/budget/rule.py @@ -1,11 +1,11 @@ -"""Budget evaluator -- tracks cumulative LLM token/cost usage. +"""Budget rule -- tracks cumulative LLM token/cost usage. -Deterministic evaluator: confidence is always 1.0, matched is True when +Deterministic rule: confidence is always 1.0, matched is True when any configured limit is exceeded. Utilization ratio and spend breakdown are returned in result metadata, not in confidence. -The evaluator is stateless. Budget state lives in a module-level store -registry, independent of the evaluator instance cache in _factory.py. +The rule is stateless. Budget state lives in a module-level store +registry, independent of the rule instance cache in _factory.py. This prevents silent state loss on LRU eviction and avoids cross-control leakage when different controls use different budget_id values. """ @@ -18,11 +18,11 @@ from importlib.metadata import PackageNotFoundError, version from typing import Any -from agent_control_evaluators._base import Evaluator, EvaluatorMetadata -from agent_control_evaluators._registry import register_evaluator -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult +from agent_control_rules._base import Rule, RuleMetadata +from agent_control_rules._registry import register_rule -from .config import BudgetEvaluatorConfig, ModelPricing +from .config import BudgetRuleConfig, ModelPricing from .memory_store import InMemoryBudgetStore, _scope_matches from .store import BudgetStore @@ -32,7 +32,7 @@ def _resolve_package_version() -> str: """Return the installed package version, or a dev fallback during local imports.""" try: - return version("agent-control-evaluator-budget") + return version("agent-control-rule-budget") except PackageNotFoundError: return "0.0.0.dev" @@ -42,7 +42,7 @@ def _resolve_package_version() -> str: # --------------------------------------------------------------------------- # Module-level store registry # -# Decoupled from the evaluator instance cache so that LRU eviction in +# Decoupled from the rule instance cache so that LRU eviction in # _factory.py does not destroy accumulated budget state. The registry # is keyed by budget_id. Controls with the same budget_id intentionally # share accumulated spend; different budget_id values are isolated. @@ -55,7 +55,7 @@ def _resolve_package_version() -> str: _STORE_REGISTRY_LOCK = threading.Lock() -def get_or_create_store(config: BudgetEvaluatorConfig) -> BudgetStore: +def get_or_create_store(config: BudgetRuleConfig) -> BudgetStore: """Get or create a store for the given config, thread-safe.""" key = f"budget:{config.budget_id}" with _STORE_REGISTRY_LOCK: @@ -175,32 +175,32 @@ def _extract_metadata(data: Any, metadata_paths: dict[str, str]) -> dict[str, st # --------------------------------------------------------------------------- -# Evaluator +# Rule # --------------------------------------------------------------------------- -@register_evaluator -class BudgetEvaluator(Evaluator[BudgetEvaluatorConfig]): +@register_rule +class BudgetRule(Rule[BudgetRuleConfig]): """Tracks cumulative LLM token and cost usage per scope and time window. - Deterministic evaluator: matched=True when any configured limit is + Deterministic rule: matched=True when any configured limit is exceeded, confidence=1.0 always. - The evaluator is stateless. Budget state is managed by a module-level - store registry (get_or_create_store), not by the evaluator instance. + The rule is stateless. Budget state is managed by a module-level + store registry (get_or_create_store), not by the rule instance. """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="budget", version=_PACKAGE_VERSION, description="Cumulative LLM token and cost budget tracking", ) - config_model = BudgetEvaluatorConfig + config_model = BudgetRuleConfig - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate step data against all configured budget limits.""" if data is None: - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="No data to evaluate", @@ -238,7 +238,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: else: block_reason = f"Unknown model: {model}" if self.config.unknown_model_behavior == "block": - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"{block_reason} (blocked)", @@ -249,7 +249,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: }, ) logger.warning( - "Budget evaluator: %s, treating cost as 0 " + "Budget rule: %s, treating cost as 0 " "(unknown_model_behavior=warn)", block_reason, ) @@ -281,7 +281,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: if breached: first = breached[0] - return EvaluatorResult( + return RuleResult( matched=True, confidence=1.0, message=f"Budget exceeded (utilization={first['utilization']:.0%})", @@ -295,7 +295,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: ) max_util = max((s["utilization"] for s in all_snaps), default=0.0) - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message=f"Within budget (utilization={max_util:.0%})", diff --git a/evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/store.py b/rules/contrib/budget/src/agent_control_rule_budget/budget/store.py similarity index 100% rename from evaluators/contrib/budget/src/agent_control_evaluator_budget/budget/store.py rename to rules/contrib/budget/src/agent_control_rule_budget/budget/store.py diff --git a/evaluators/contrib/budget/tests/budget/__init__.py b/rules/contrib/budget/tests/__init__.py similarity index 100% rename from evaluators/contrib/budget/tests/budget/__init__.py rename to rules/contrib/budget/tests/__init__.py diff --git a/evaluators/contrib/galileo/tests/__init__.py b/rules/contrib/budget/tests/budget/__init__.py similarity index 100% rename from evaluators/contrib/galileo/tests/__init__.py rename to rules/contrib/budget/tests/budget/__init__.py diff --git a/evaluators/contrib/budget/tests/budget/test_budget.py b/rules/contrib/budget/tests/budget/test_budget.py similarity index 90% rename from evaluators/contrib/budget/tests/budget/test_budget.py rename to rules/contrib/budget/tests/budget/test_budget.py index 68e5cf24..ab1e22e0 100644 --- a/evaluators/contrib/budget/tests/budget/test_budget.py +++ b/rules/contrib/budget/tests/budget/test_budget.py @@ -1,4 +1,4 @@ -"""Tests for the budget evaluator (contrib). +"""Tests for the budget rule (contrib). Given/When/Then comment style per reviewer request. """ @@ -12,22 +12,22 @@ import pytest from pydantic import ValidationError -import agent_control_evaluator_budget.budget.evaluator as budget_evaluator_module -from agent_control_evaluator_budget.budget.config import ( +import agent_control_rule_budget.budget.rule as budget_rule_module +from agent_control_rule_budget.budget.config import ( WINDOW_DAILY, WINDOW_MONTHLY, WINDOW_WEEKLY, - BudgetEvaluatorConfig, + BudgetRuleConfig, BudgetLimitRule, ModelPricing, ) -from agent_control_evaluator_budget.budget.evaluator import ( - BudgetEvaluator, +from agent_control_rule_budget.budget.rule import ( + BudgetRule, _extract_tokens, clear_budget_stores, get_or_create_store, ) -from agent_control_evaluator_budget.budget.memory_store import ( +from agent_control_rule_budget.budget.memory_store import ( InMemoryBudgetStore, _build_scope_key, _compute_utilization, @@ -42,7 +42,7 @@ def _clean_store_registry() -> None: def test_metadata_version_matches_distribution_version() -> None: - assert BudgetEvaluator.metadata.version == version("agent-control-evaluator-budget") + assert BudgetRule.metadata.version == version("agent-control-rule-budget") def test_metadata_version_falls_back_without_distribution( @@ -51,9 +51,9 @@ def test_metadata_version_falls_back_without_distribution( def _raise_not_found(_: str) -> str: raise PackageNotFoundError - monkeypatch.setattr(budget_evaluator_module, "version", _raise_not_found) + monkeypatch.setattr(budget_rule_module, "version", _raise_not_found) - assert budget_evaluator_module._resolve_package_version() == "0.0.0.dev" + assert budget_rule_module._resolve_package_version() == "0.0.0.dev" # --------------------------------------------------------------------------- @@ -333,7 +333,7 @@ def test_compute_utilization_negative_clamped_to_zero(self) -> None: def test_parse_period_key_valid(self) -> None: # Given: well-formed period key / Then: parsed tuple - from agent_control_evaluator_budget.budget.memory_store import _parse_period_key + from agent_control_rule_budget.budget.memory_store import _parse_period_key assert _parse_period_key("P86400:19675") == (86400, 19675) assert _parse_period_key("P3600:0") == (3600, 0) @@ -342,7 +342,7 @@ def test_parse_period_key_malformed(self) -> None: # Given: empty, missing, or non-numeric period keys # When: parsed # Then: None returned (never raises) - from agent_control_evaluator_budget.budget.memory_store import _parse_period_key + from agent_control_rule_budget.budget.memory_store import _parse_period_key assert _parse_period_key("") is None # cumulative sentinel assert _parse_period_key("P") is None # no separator @@ -473,7 +473,7 @@ def test_token_only_rule(self) -> None: def test_empty_limits_rejected(self) -> None: # Given/When: empty limits list / Then: rejected with pytest.raises(ValidationError): - BudgetEvaluatorConfig(limits=[]) + BudgetRuleConfig(limits=[]) def test_window_constants(self) -> None: # Given/When/Then: constants have expected values @@ -487,31 +487,31 @@ def test_model_pricing_validation_requires_pricing_for_cost_rules(self) -> None: # Given: a cost-based rule without pricing # When/Then: config validation rejects it with pytest.raises(ValidationError, match="pricing is required"): - BudgetEvaluatorConfig(limits=[BudgetLimitRule(limit=100)]) + BudgetRuleConfig(limits=[BudgetLimitRule(limit=100)]) def test_model_pricing_token_rule_no_pricing_ok(self) -> None: # Given: a token-only rule without pricing # When: config is created - config = BudgetEvaluatorConfig(limits=[BudgetLimitRule(limit=100, limit_unit="tokens")]) + config = BudgetRuleConfig(limits=[BudgetLimitRule(limit=100, limit_unit="tokens")]) # Then: no pricing table is required assert config.pricing is None # --------------------------------------------------------------------------- -# BudgetEvaluator integration +# BudgetRule integration # --------------------------------------------------------------------------- -class TestBudgetEvaluator: - def _make_evaluator(self, **kwargs: Any) -> BudgetEvaluator: - config = BudgetEvaluatorConfig(**kwargs) - return BudgetEvaluator(config) +class TestBudgetRule: + def _make_rule(self, **kwargs: Any) -> BudgetRule: + config = BudgetRuleConfig(**kwargs) + return BudgetRule(config) @pytest.mark.asyncio async def test_single_call_under_budget(self) -> None: - # Given: evaluator with 1000-token limit (token-only, no pricing needed) - ev = self._make_evaluator(limits=[{"limit": 1000, "limit_unit": "tokens"}]) + # Given: rule with 1000-token limit (token-only, no pricing needed) + ev = self._make_rule(limits=[{"limit": 1000, "limit_unit": "tokens"}]) # When: evaluate with usage data result = await ev.evaluate({"usage": {"input_tokens": 100, "output_tokens": 50}}) @@ -522,8 +522,8 @@ async def test_single_call_under_budget(self) -> None: @pytest.mark.asyncio async def test_accumulate_past_budget(self) -> None: - # Given: evaluator with 50-cent limit and pricing table - ev = self._make_evaluator( + # Given: rule with 50-cent limit and pricing table + ev = self._make_rule( limits=[{"limit": 50}], pricing={"gpt-4": {"input_per_1k": 30.0, "output_per_1k": 60.0}}, model_path="model", @@ -543,7 +543,7 @@ async def test_accumulate_past_budget(self) -> None: @pytest.mark.asyncio async def test_group_by_user(self) -> None: # Given: per-user 1000-cent budget with pricing table - ev = self._make_evaluator( + ev = self._make_rule( limits=[{"group_by": "user_id", "limit": 1000}], pricing={"gpt-4": {"input_per_1k": 200.0, "output_per_1k": 0.0}}, model_path="model", @@ -569,7 +569,7 @@ def _step(tokens: int, user: str) -> dict: @pytest.mark.asyncio async def test_token_only_limit(self) -> None: # Given: 500 token limit - ev = self._make_evaluator(limits=[{"limit": 500, "limit_unit": "tokens"}]) + ev = self._make_rule(limits=[{"limit": 500, "limit_unit": "tokens"}]) # When: consume 600 tokens result = await ev.evaluate({"usage": {"input_tokens": 300, "output_tokens": 300}}) @@ -579,15 +579,15 @@ async def test_token_only_limit(self) -> None: @pytest.mark.asyncio async def test_no_data_returns_not_matched(self) -> None: - # Given: evaluator / When: None data / Then: not matched - ev = self._make_evaluator(limits=[{"limit": 1000}], pricing={}, model_path="model") + # Given: rule / When: None data / Then: not matched + ev = self._make_rule(limits=[{"limit": 1000}], pricing={}, model_path="model") result = await ev.evaluate(None) assert result.matched is False @pytest.mark.asyncio async def test_confidence_always_one(self) -> None: - # Given: evaluator with 1000-cent limit and pricing table - ev = self._make_evaluator( + # Given: rule with 1000-cent limit and pricing table + ev = self._make_rule( limits=[{"limit": 1000}], pricing={"gpt-4": {"input_per_1k": 200.0, "output_per_1k": 0.0}}, model_path="model", @@ -606,8 +606,8 @@ def _step(tokens: int) -> dict: @pytest.mark.asyncio async def test_cost_computed_from_pricing_table(self) -> None: - # Given: evaluator with pricing table and 100-cent cost limit - ev = self._make_evaluator( + # Given: rule with pricing table and 100-cent cost limit + ev = self._make_rule( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 30.0, "output_per_1k": 60.0}}, model_path="model", @@ -629,8 +629,8 @@ async def test_cost_computed_from_pricing_table(self) -> None: @pytest.mark.asyncio async def test_unknown_model_cost_zero(self) -> None: - # Given: evaluator with warn mode and data from an unknown model - ev = self._make_evaluator( + # Given: rule with warn mode and data from an unknown model + ev = self._make_rule( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 30.0, "output_per_1k": 60.0}}, model_path="model", @@ -652,8 +652,8 @@ async def test_unknown_model_cost_zero(self) -> None: @pytest.mark.asyncio async def test_small_cost_no_overcount(self) -> None: - # Given: evaluator with 1-cent limit, pricing yields 0.003 cents per call - ev = self._make_evaluator( + # Given: rule with 1-cent limit, pricing yields 0.003 cents per call + ev = self._make_rule( limits=[{"limit": 1}], pricing={"gpt-4": {"input_per_1k": 0.03, "output_per_1k": 0.0}}, model_path="model", @@ -671,24 +671,24 @@ async def test_small_cost_no_overcount(self) -> None: class TestBudgetIdSemantics: @pytest.mark.asyncio async def test_same_budget_id_shares_store(self) -> None: - # Given: two evaluators with the same budget_id - config1 = BudgetEvaluatorConfig( + # Given: two rules with the same budget_id + config1 = BudgetRuleConfig( limits=[{"limit": 100}], budget_id="shared", pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - config2 = BudgetEvaluatorConfig( + config2 = BudgetRuleConfig( limits=[{"limit": 100}], budget_id="shared", pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - ev1 = BudgetEvaluator(config1) - ev2 = BudgetEvaluator(config2) + ev1 = BudgetRule(config1) + ev2 = BudgetRule(config2) step = {"model": "gpt-4", "usage": {"input_tokens": 500, "output_tokens": 0}} - # When: each evaluator records a 50-cent call + # When: each rule records a 50-cent call first = await ev1.evaluate(step) second = await ev2.evaluate(step) @@ -698,24 +698,24 @@ async def test_same_budget_id_shares_store(self) -> None: @pytest.mark.asyncio async def test_different_budget_id_isolates_store(self) -> None: - # Given: two evaluators with different budget_id values - config1 = BudgetEvaluatorConfig( + # Given: two rules with different budget_id values + config1 = BudgetRuleConfig( limits=[{"limit": 100}], budget_id="pool-a", pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - config2 = BudgetEvaluatorConfig( + config2 = BudgetRuleConfig( limits=[{"limit": 100}], budget_id="pool-b", pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - ev1 = BudgetEvaluator(config1) - ev2 = BudgetEvaluator(config2) + ev1 = BudgetRule(config1) + ev2 = BudgetRule(config2) step = {"model": "gpt-4", "usage": {"input_tokens": 500, "output_tokens": 0}} - # When: each evaluator records a 50-cent call + # When: each rule records a 50-cent call first = await ev1.evaluate(step) second = await ev2.evaluate(step) @@ -728,19 +728,19 @@ class TestUnknownModelBehavior: @pytest.mark.asyncio async def test_unknown_model_block_default(self) -> None: # Given: a cost rule with pricing that does not include the incoming model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 10.0, "output_per_1k": 20.0}}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: the step uses an unknown model - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "unknown-model", "usage": {"input_tokens": 100, "output_tokens": 50}} ) - # Then: the evaluator fails closed and reports the unknown model + # Then: the rule fails closed and reports the unknown model assert result.matched is True assert result.metadata is not None assert result.metadata["unknown_model"] == "unknown-model" @@ -748,20 +748,20 @@ async def test_unknown_model_block_default(self) -> None: @pytest.mark.asyncio async def test_unknown_model_warn(self) -> None: # Given: a cost rule configured to warn on unknown models - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 10.0, "output_per_1k": 20.0}}, model_path="model", unknown_model_behavior="warn", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: the step uses an unknown model - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "unknown-model", "usage": {"input_tokens": 100, "output_tokens": 50}} ) - # Then: the evaluator treats cost as 0 and does not block + # Then: the rule treats cost as 0 and does not block assert result.matched is False assert result.metadata is not None assert result.metadata["cost"] == 0.0 @@ -771,15 +771,15 @@ async def test_unknown_model_warn(self) -> None: async def test_unknown_model_token_only_unaffected(self) -> None: # Given: a token-only rule with a pricing table that does not include # the incoming model and the default block setting - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 1000, "limit_unit": "tokens"}], pricing={}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: the step uses an unknown model below the token limit - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "unknown-model", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -791,15 +791,15 @@ async def test_unknown_model_token_only_unaffected(self) -> None: @pytest.mark.asyncio async def test_pricing_lookup_is_case_sensitive(self) -> None: # Given: pricing for lowercase gpt-4 - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 10.0, "output_per_1k": 20.0}}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: the step uses a differently cased model name - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "GPT-4", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -811,15 +811,15 @@ async def test_pricing_lookup_is_case_sensitive(self) -> None: @pytest.mark.asyncio async def test_known_model_not_blocked(self) -> None: # Given: a cost rule whose pricing includes the incoming model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 10.0, "output_per_1k": 20.0}}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: the step uses the known model - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "gpt-4", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -837,7 +837,7 @@ async def test_known_model_not_blocked(self) -> None: class TestStoreRegistry: def test_same_config_returns_same_store(self) -> None: # Given: two configs with identical parameters - config = BudgetEvaluatorConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") + config = BudgetRuleConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") # When: get store twice store1 = get_or_create_store(config) @@ -848,10 +848,10 @@ def test_same_config_returns_same_store(self) -> None: def test_different_budget_id_returns_different_store(self) -> None: # Given: two configs with different budget ids - config1 = BudgetEvaluatorConfig( + config1 = BudgetRuleConfig( limits=[{"limit": 1000}], budget_id="a", pricing={}, model_path="model", ) - config2 = BudgetEvaluatorConfig( + config2 = BudgetRuleConfig( limits=[{"limit": 1000}], budget_id="b", pricing={}, model_path="model", ) @@ -864,7 +864,7 @@ def test_different_budget_id_returns_different_store(self) -> None: def test_clear_budget_stores(self) -> None: # Given: a registered store - config = BudgetEvaluatorConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") + config = BudgetRuleConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") store1 = get_or_create_store(config) # When: clear all stores @@ -875,15 +875,15 @@ def test_clear_budget_stores(self) -> None: assert store1 is not store2 @pytest.mark.asyncio - async def test_evaluator_uses_registry(self) -> None: - # Given: two evaluators with same config - config = BudgetEvaluatorConfig( + async def test_rule_uses_registry(self) -> None: + # Given: two rules with same config + config = BudgetRuleConfig( limits=[{"limit": 100}], pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - ev1 = BudgetEvaluator(config) - ev2 = BudgetEvaluator(config) + ev1 = BudgetRule(config) + ev2 = BudgetRule(config) # When: ev1 records usage, ev2 checks step = {"model": "gpt-4", "usage": {"input_tokens": 500, "output_tokens": 0}} @@ -897,28 +897,28 @@ async def test_evaluator_uses_registry(self) -> None: async def test_same_budget_id_shares_buckets_but_not_rules(self) -> None: # Given: two configs sharing budget_id but using different limits pricing = {"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}} - config1 = BudgetEvaluatorConfig( + config1 = BudgetRuleConfig( limits=[{"limit": 100}], budget_id="shared", pricing=pricing, model_path="model", ) - config2 = BudgetEvaluatorConfig( + config2 = BudgetRuleConfig( limits=[{"limit": 1000}], budget_id="shared", pricing=pricing, model_path="model", ) - ev1 = BudgetEvaluator(config1) - ev2 = BudgetEvaluator(config2) + ev1 = BudgetRule(config1) + ev2 = BudgetRule(config2) step = {"model": "gpt-4", "usage": {"input_tokens": 600, "output_tokens": 0}} - # When: the first evaluator records 60 cents, then the second records + # When: the first rule records 60 cents, then the second records # another 60 cents into the same budget bucket first = await ev1.evaluate(step) second = await ev2.evaluate(step) - # Then: the second evaluator sees shared bucket state (120 cents) but + # Then: the second rule sees shared bucket state (120 cents) but # evaluates against its own 1000-cent rule, not config1's 100-cent rule. assert first.matched is False assert second.matched is False @@ -948,7 +948,7 @@ def test_scope_key_no_collision(self) -> None: assert key1 != key2 def test_extract_by_path_rejects_dunder(self) -> None: - from agent_control_evaluator_budget.budget.evaluator import _extract_by_path + from agent_control_rule_budget.budget.rule import _extract_by_path assert _extract_by_path({"a": 1}, "__class__") is None @@ -1049,7 +1049,7 @@ def test_extract_tokens_rejects_bool(self) -> None: class TestStoreRegistryRobustness: def test_concurrent_get_or_create_store(self) -> None: # Given: 10 threads requesting the same config concurrently - config = BudgetEvaluatorConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") + config = BudgetRuleConfig(limits=[{"limit": 1000}], pricing={}, model_path="model") stores: list[Any] = [] lock = threading.Lock() @@ -1070,31 +1070,31 @@ def get_store() -> None: assert all(s is stores[0] for s in stores) @pytest.mark.asyncio - async def test_evaluator_cache_eviction_preserves_budget_state(self) -> None: - # Given: evaluator that has recorded usage - from agent_control_evaluators._factory import ( - clear_evaluator_cache, + async def test_rule_cache_eviction_preserves_budget_state(self) -> None: + # Given: rule that has recorded usage + from agent_control_rules._factory import ( + clear_rule_cache, ) - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[{"limit": 1000}], pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="model", ) - ev = BudgetEvaluator(config) + ev = BudgetRule(config) step = {"model": "gpt-4", "usage": {"input_tokens": 500, "output_tokens": 0}} await ev.evaluate(step) - # When: simulate LRU eviction by clearing the evaluator cache - clear_evaluator_cache() + # When: simulate LRU eviction by clearing the rule cache + clear_rule_cache() - # Then: budget state survives (stored in module-level registry, not on evaluator) - ev2 = BudgetEvaluator(config) + # Then: budget state survives (stored in module-level registry, not on rule) + ev2 = BudgetRule(config) result = await ev2.evaluate(step) # 500 tokens * 100 cents/1k = 50.0 cents per call. # Two calls = 100.0 cents total. limit=1000, so not exceeded. - # Key assertion: state IS preserved across evaluator re-creation. + # Key assertion: state IS preserved across rule re-creation. assert result.metadata is not None assert result.metadata["cost"] == pytest.approx(50.0, abs=0.1) # The all_snapshots should show accumulated spend from both calls @@ -1144,10 +1144,10 @@ def test_limits_order_does_not_affect_same_budget_id_store_identity(self) -> Non # Given: two configs with same budget_id and rules in different order rule_a = {"limit": 1000, "scope": {"agent": "a"}} rule_b = {"limit": 2000, "scope": {"agent": "b"}} - config1 = BudgetEvaluatorConfig( + config1 = BudgetRuleConfig( limits=[rule_a, rule_b], budget_id="ordered", pricing={}, model_path="model", ) - config2 = BudgetEvaluatorConfig( + config2 = BudgetRuleConfig( limits=[rule_b, rule_a], budget_id="ordered", pricing={}, model_path="model", ) @@ -1161,7 +1161,7 @@ def test_limits_order_does_not_affect_same_budget_id_store_identity(self) -> Non class TestEstimateCostEdgeCases: def test_nan_rate_returns_zero(self) -> None: - from agent_control_evaluator_budget.budget.evaluator import _estimate_cost + from agent_control_rule_budget.budget.rule import _estimate_cost # Given: pricing table with NaN rate pricing = {"gpt-4": ModelPricing(input_per_1k=float("nan"), output_per_1k=0.0)} @@ -1173,7 +1173,7 @@ def test_nan_rate_returns_zero(self) -> None: assert cost == 0.0 def test_inf_rate_returns_zero(self) -> None: - from agent_control_evaluator_budget.budget.evaluator import _estimate_cost + from agent_control_rule_budget.budget.rule import _estimate_cost # Given: pricing table with Inf rate pricing = {"gpt-4": ModelPricing(input_per_1k=float("inf"), output_per_1k=0.0)} @@ -1185,7 +1185,7 @@ def test_inf_rate_returns_zero(self) -> None: assert cost == 0.0 def test_negative_rate_returns_zero(self) -> None: - from agent_control_evaluator_budget.budget.evaluator import _estimate_cost + from agent_control_rule_budget.budget.rule import _estimate_cost # Given: pricing table with negative rate pricing = {"gpt-4": ModelPricing(input_per_1k=-10.0, output_per_1k=0.0)} @@ -1205,9 +1205,9 @@ def test_negative_rate_returns_zero(self) -> None: class TestNestedModelPath: @pytest.mark.asyncio async def test_nested_model_path(self) -> None: - # Given: evaluator with nested model_path - ev = BudgetEvaluator( - BudgetEvaluatorConfig( + # Given: rule with nested model_path + ev = BudgetRule( + BudgetRuleConfig( limits=[{"limit": 1000}], pricing={"gpt-4": {"input_per_1k": 100.0, "output_per_1k": 0.0}}, model_path="llm.model_name", @@ -1584,7 +1584,7 @@ def test_cost_rule_without_model_path_rejected(self) -> None: # Given: a cost-based rule with pricing but no model_path # When/Then: config validation rejects it with pytest.raises(ValidationError, match="model_path is required"): - BudgetEvaluatorConfig( + BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, ) @@ -1592,7 +1592,7 @@ def test_cost_rule_without_model_path_rejected(self) -> None: def test_token_rule_without_model_path_ok(self) -> None: # Given: a token-only rule without model_path # When: config is created - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100, limit_unit="tokens")], ) @@ -1601,7 +1601,7 @@ def test_token_rule_without_model_path_ok(self) -> None: def test_cost_rule_with_model_path_accepted(self) -> None: # Given: a cost-based rule with pricing and model_path - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", @@ -1614,7 +1614,7 @@ def test_cost_rule_with_empty_model_path_rejected(self) -> None: # Given: cost rule with model_path="" (empty string is falsy) # When/Then: validator rejects it with pytest.raises(ValidationError, match="model_path is required"): - BudgetEvaluatorConfig( + BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="", @@ -1624,7 +1624,7 @@ def test_cost_rule_with_whitespace_model_path_rejected(self) -> None: # Given: cost rule with model_path=" " (whitespace-only is stripped) # When/Then: validator rejects it with pytest.raises(ValidationError, match="model_path is required"): - BudgetEvaluatorConfig( + BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path=" ", @@ -1635,15 +1635,15 @@ class TestModelPathRuntimeExtraction: @pytest.mark.asyncio async def test_model_field_missing_blocks_when_cost_rule_matches(self) -> None: # Given: cost rule with model_path, but data has no "model" field - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step data omits the model field entirely - result = await evaluator.evaluate( + result = await rule.evaluate( {"usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1655,15 +1655,15 @@ async def test_model_field_missing_blocks_when_cost_rule_matches(self) -> None: @pytest.mark.asyncio async def test_model_field_missing_block_message(self) -> None: # Given: cost rule with model_path, data has no model field - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step omits model field - result = await evaluator.evaluate( + result = await rule.evaluate( {"usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1674,15 +1674,15 @@ async def test_model_field_missing_block_message(self) -> None: @pytest.mark.asyncio async def test_unknown_model_block_message(self) -> None: # Given: cost rule with model_path, unknown model in data - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step has model not in pricing - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "unknown-model", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1693,16 +1693,16 @@ async def test_unknown_model_block_message(self) -> None: @pytest.mark.asyncio async def test_model_field_missing_warn_mode(self) -> None: # Given: cost rule with model_path, warn mode, data has no model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", unknown_model_behavior="warn", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step data omits the model field - result = await evaluator.evaluate( + result = await rule.evaluate( {"usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1716,14 +1716,14 @@ async def test_model_field_missing_token_only_with_model_path(self) -> None: # Given: token-only rule, model_path IS set, data has no model field # This exercises Branch B: model_path_configured=True, model=None, # has_matching_cost_rule=False (token rule only). - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=1000, limit_unit="tokens")], model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step has no "model" field - result = await evaluator.evaluate( + result = await rule.evaluate( {"usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1737,13 +1737,13 @@ async def test_model_field_missing_token_only_with_model_path(self) -> None: @pytest.mark.asyncio async def test_model_field_missing_token_only_unaffected(self) -> None: # Given: token-only rule, model_path not set, data has no model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=1000, limit_unit="tokens")], ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step data with no model - result = await evaluator.evaluate( + result = await rule.evaluate( {"usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1753,15 +1753,15 @@ async def test_model_field_missing_token_only_unaffected(self) -> None: @pytest.mark.asyncio async def test_empty_pricing_with_model_triggers_block(self) -> None: # Given: cost rule, pricing={} (not None), model IS present - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(limit=100)], pricing={}, model_path="model", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: model present but not in empty pricing table - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "gpt-4", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1774,8 +1774,8 @@ async def test_empty_pricing_with_model_triggers_block(self) -> None: class TestScopedUnknownModelBlock: @pytest.mark.asyncio async def test_unknown_model_not_blocked_when_only_token_rule_matches(self) -> None: - # Given: evaluator with a token-only rule for scope A and a cost rule for scope B - config = BudgetEvaluatorConfig( + # Given: rule with a token-only rule for scope A and a cost rule for scope B + config = BudgetRuleConfig( limits=[ BudgetLimitRule(scope={"agent": "a"}, limit=1000, limit_unit="tokens"), BudgetLimitRule(scope={"agent": "b"}, limit=100), @@ -1784,10 +1784,10 @@ async def test_unknown_model_not_blocked_when_only_token_rule_matches(self) -> N model_path="model", metadata_paths={"agent": "agent"}, ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: scope A step uses an unknown model (only token rule applies) - result = await evaluator.evaluate( + result = await rule.evaluate( { "agent": "a", "model": "unknown-model", @@ -1803,7 +1803,7 @@ async def test_unknown_model_not_blocked_when_only_token_rule_matches(self) -> N @pytest.mark.asyncio async def test_unknown_model_blocked_when_cost_rule_matches(self) -> None: # Given: same config but step targets scope B (where cost rule lives) - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[ BudgetLimitRule(scope={"agent": "a"}, limit=1000, limit_unit="tokens"), BudgetLimitRule(scope={"agent": "b"}, limit=100), @@ -1812,10 +1812,10 @@ async def test_unknown_model_blocked_when_cost_rule_matches(self) -> None: model_path="model", metadata_paths={"agent": "agent"}, ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: scope B step uses an unknown model (cost rule applies) - result = await evaluator.evaluate( + result = await rule.evaluate( { "agent": "b", "model": "unknown-model", @@ -1831,7 +1831,7 @@ async def test_unknown_model_blocked_when_cost_rule_matches(self) -> None: @pytest.mark.asyncio async def test_unknown_model_no_matching_rules_at_all(self) -> None: # Given: cost rule scoped to agent=b, step from agent=c (no match) - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[ BudgetLimitRule(scope={"agent": "b"}, limit=100), ], @@ -1839,10 +1839,10 @@ async def test_unknown_model_no_matching_rules_at_all(self) -> None: model_path="model", metadata_paths={"agent": "agent"}, ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: step from agent=c with unknown model - result = await evaluator.evaluate( + result = await rule.evaluate( { "agent": "c", "model": "unknown-model", @@ -1856,17 +1856,17 @@ async def test_unknown_model_no_matching_rules_at_all(self) -> None: @pytest.mark.asyncio async def test_warn_mode_scoped_no_warning_when_scope_mismatches(self) -> None: # Given: cost rule scoped to agent=b, warn mode - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(scope={"agent": "b"}, limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", metadata_paths={"agent": "agent"}, unknown_model_behavior="warn", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: scope A step with unknown model (cost rule is scoped to B) - result = await evaluator.evaluate( + result = await rule.evaluate( { "agent": "a", "model": "unknown-model", @@ -1880,7 +1880,7 @@ async def test_warn_mode_scoped_no_warning_when_scope_mismatches(self) -> None: @pytest.mark.asyncio async def test_mixed_global_rules_warn_mode_token_accumulates(self) -> None: # Given: global cost rule (warn) + global token rule, unknown model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[ BudgetLimitRule(limit=100), BudgetLimitRule(limit=1000, limit_unit="tokens"), @@ -1889,10 +1889,10 @@ async def test_mixed_global_rules_warn_mode_token_accumulates(self) -> None: model_path="model", unknown_model_behavior="warn", ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: unknown model with tokens - result = await evaluator.evaluate( + result = await rule.evaluate( {"model": "unknown-model", "usage": {"input_tokens": 100, "output_tokens": 50}} ) @@ -1907,16 +1907,16 @@ async def test_mixed_global_rules_warn_mode_token_accumulates(self) -> None: @pytest.mark.asyncio async def test_group_by_unknown_model_block_no_bucket_created(self) -> None: # Given: group_by cost rule, unknown model - config = BudgetEvaluatorConfig( + config = BudgetRuleConfig( limits=[BudgetLimitRule(group_by="user_id", limit=100)], pricing={"gpt-4": ModelPricing(input_per_1k=10.0, output_per_1k=20.0)}, model_path="model", metadata_paths={"user_id": "user_id"}, ) - evaluator = BudgetEvaluator(config) + rule = BudgetRule(config) # When: unknown model is blocked (no bucket created) - blocked = await evaluator.evaluate( + blocked = await rule.evaluate( { "user_id": "u1", "model": "unknown", @@ -1926,7 +1926,7 @@ async def test_group_by_unknown_model_block_no_bucket_created(self) -> None: assert blocked.matched is True # When: known model follows -- bucket starts fresh - result = await evaluator.evaluate( + result = await rule.evaluate( { "user_id": "u1", "model": "gpt-4", @@ -1947,7 +1947,7 @@ def test_subclass_with_sync_override_rejected_at_class_creation(self) -> None: # Then: TypeError is raised, surfacing the contract violation at # class-creation time rather than failing silently at the first # `await` call site in production. - from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore + from agent_control_rule_budget.budget.store import BudgetSnapshot, BudgetStore with pytest.raises(TypeError, match="must be an async def"): @@ -1965,7 +1965,7 @@ def record_and_check( # noqa: D401, ANN001 def test_subclass_with_async_override_accepted(self) -> None: # Given/When: a subclass that overrides with async def # Then: class creation succeeds and the subclass can be instantiated - from agent_control_evaluator_budget.budget.store import BudgetSnapshot, BudgetStore + from agent_control_rule_budget.budget.store import BudgetSnapshot, BudgetStore class GoodStore(BudgetStore): async def record_and_check( @@ -1986,7 +1986,7 @@ def test_subclass_without_override_accepted_at_class_creation(self) -> None: # Given/When: a subclass that does NOT override record_and_check # Then: class creation succeeds (__init_subclass__ method=None path). # ABC enforces the abstractmethod at instantiation, not class creation. - from agent_control_evaluator_budget.budget.store import BudgetStore + from agent_control_rule_budget.budget.store import BudgetStore class PartialStore(BudgetStore): pass # no override; abstractmethod prevents instantiation @@ -2000,7 +2000,7 @@ def test_mixin_sync_override_rejected(self) -> None: # that inherits it via MRO without overriding in its own __dict__ # When: class creation is attempted # Then: __init_subclass__ walks MRO and catches the sync mixin override - from agent_control_evaluator_budget.budget.store import BudgetStore + from agent_control_rule_budget.budget.store import BudgetStore class SyncMixin: def record_and_check(self, rules, scope, input_tokens, output_tokens, cost): diff --git a/evaluators/contrib/cisco/Makefile b/rules/contrib/cisco/Makefile similarity index 84% rename from evaluators/contrib/cisco/Makefile rename to rules/contrib/cisco/Makefile index 0f64617a..cc97f9a8 100644 --- a/evaluators/contrib/cisco/Makefile +++ b/rules/contrib/cisco/Makefile @@ -1,9 +1,9 @@ .PHONY: help test lint lint-fix typecheck check build -PACKAGE := agent-control-evaluator-cisco +PACKAGE := agent-control-rule-cisco help: - @echo "Agent Control Evaluator - Cisco AI Defense - Makefile commands" + @echo "Agent Control Rule - Cisco AI Defense - Makefile commands" @echo " make test - run pytest" @echo " make lint - run ruff check" @echo " make lint-fix - run ruff check --fix" @@ -12,7 +12,7 @@ help: @echo " make build - build package" test: - uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-cisco.xml -q + uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-rules-cisco.xml -q lint: uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml src/ diff --git a/evaluators/contrib/cisco/README.md b/rules/contrib/cisco/README.md similarity index 79% rename from evaluators/contrib/cisco/README.md rename to rules/contrib/cisco/README.md index a43edb18..34de66d3 100644 --- a/evaluators/contrib/cisco/README.md +++ b/rules/contrib/cisco/README.md @@ -1,6 +1,6 @@ -# Agent Control Evaluator - Cisco AI Defense +# Agent Control Rule - Cisco AI Defense -External evaluator that calls Cisco AI Defense Chat Inspection via REST and maps `InspectResponse.is_safe` to Agent Control decisions. +External rule that calls Cisco AI Defense Chat Inspection via REST and maps `InspectResponse.is_safe` to Agent Control decisions. - Entry point name: `cisco.ai_defense` - Transport: direct HTTP (httpx) @@ -10,29 +10,29 @@ External evaluator that calls Cisco AI Defense Chat Inspection via REST and maps Canonical install path: ```bash -pip install "agent-control-evaluators[cisco]" +pip install "agent-control-rules[cisco]" ``` Fallback direct wheel install: ```bash -pip install agent-control-evaluator-cisco +pip install agent-control-rule-cisco ``` For local development: ```bash -uv pip install -e evaluators/contrib/cisco +uv pip install -e rules/contrib/cisco ``` - Build wheel from the repo root (contrib package only): ```bash make engine-build - (cd evaluators/contrib/cisco && make build) + (cd rules/contrib/cisco && make build) ``` -To run the server with this evaluator enabled, see `examples/cisco_ai_defense/README.md` for setup and seeding instructions. +To run the server with this rule enabled, see `examples/cisco_ai_defense/README.md` for setup and seeding instructions. ## Configuration @@ -42,7 +42,7 @@ Set the `AI_DEFENSE_API_KEY` environment variable: export AI_DEFENSE_API_KEY="" ``` -Evaluator config fields (all optional unless stated): +Rule config fields (all optional unless stated): - `api_key_env: str = "AI_DEFENSE_API_KEY"` - `region: "us" | "ap" | "eu" | None = "us"` (ignored if `api_url` set) @@ -57,7 +57,7 @@ Evaluator config fields (all optional unless stated): - `inspect_config: dict[str, Any] | None = None` (forwarded to API per OpenAPI spec) - `include_raw_response: bool = false` (when true, includes the full provider response under `metadata.raw`) -## Available Evaluators +## Available Rules | Name | Description | |------|-------------| @@ -65,7 +65,7 @@ Evaluator config fields (all optional unless stated): Behavior mapping: -- `is_safe == false` → `EvaluatorResult.matched = true` (e.g., a `deny` action will block) +- `is_safe == false` → `RuleResult.matched = true` (e.g., a `deny` action will block) - `is_safe == true` → `matched = false` - Errors or invalid responses → `matched = (on_error == "deny")`; error details in `metadata` (no `error` field is set; engine honors `matched` per `on_error`) @@ -81,7 +81,7 @@ Example using `messages_strategy: "history"` (for inputs that already have a `me "scope": { "step_types": ["llm"], "stages": ["pre", "post"] }, "condition": { "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "cisco.ai_defense", "config": { "api_key_env": "AI_DEFENSE_API_KEY", @@ -105,7 +105,7 @@ Example using `messages_strategy: "history"` (for inputs that already have a `me "scope": { "step_types": ["llm"], "stages": ["pre", "post"] }, "condition": { "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "cisco.ai_defense", "config": { "api_key_env": "AI_DEFENSE_API_KEY", @@ -124,20 +124,20 @@ Example using `messages_strategy: "history"` (for inputs that already have a `me ## Usage -Once installed, the evaluator is automatically discovered: +Once installed, the rule is automatically discovered: ```python -from agent_control_evaluators import discover_evaluators, get_evaluator +from agent_control_rules import discover_rules, get_rule -discover_evaluators() -CiscoAIDefenseEvaluator = get_evaluator("cisco.ai_defense") +discover_rules() +CiscoAIDefenseRule = get_rule("cisco.ai_defense") ``` Or import directly: ```python import asyncio -from agent_control_evaluator_cisco.ai_defense import CiscoAIDefenseEvaluator, CiscoAIDefenseConfig +from agent_control_rule_cisco.ai_defense import CiscoAIDefenseRule, CiscoAIDefenseConfig cfg = CiscoAIDefenseConfig( region="us", @@ -146,7 +146,7 @@ cfg = CiscoAIDefenseConfig( messages_strategy="history", payload_field="input", ) -ev = CiscoAIDefenseEvaluator(cfg) +ev = CiscoAIDefenseRule(cfg) async def main(): data = {"messages": [{"role": "user", "content": "tell me how to hack wifi"}]} @@ -160,7 +160,7 @@ asyncio.run(main()) - Auth header: `X-Cisco-AI-Defense-API-Key: ` - Regions and endpoint path follow the Cisco AI Defense API spec - For custom deployments, set `api_url` to the full Chat Inspection endpoint. -- The evaluator validates the API key at construction and raises if missing. +- The rule validates the API key at construction and raises if missing. - `is_available()` returns false if `httpx` is not installed; discovery will skip registration. - `messages_strategy: "history"` forwards the full message array when present; consider `messages_strategy: "single"` if payload size is a concern. @@ -169,7 +169,7 @@ asyncio.run(main()) - Cisco AI Defense Inspection API reference: https://developer.cisco.com/docs/ai-defense-inspection/introduction/ - Cisco Security Console (get API Key): https://security.cisco.com - Cisco AI Defense User Guide: https://securitydocs.cisco.com/docs/ai-def/user/97384.dita -- Regional API base URLs used by this evaluator: +- Regional API base URLs used by this rule: - US: `https://us.api.inspect.aidefense.security.cisco.com` - AP: `https://ap.api.inspect.aidefense.security.cisco.com` - EU: `https://eu.api.inspect.aidefense.security.cisco.com` diff --git a/evaluators/contrib/cisco/pyproject.toml b/rules/contrib/cisco/pyproject.toml similarity index 63% rename from evaluators/contrib/cisco/pyproject.toml rename to rules/contrib/cisco/pyproject.toml index 74a35e42..08dac253 100644 --- a/evaluators/contrib/cisco/pyproject.toml +++ b/rules/contrib/cisco/pyproject.toml @@ -1,13 +1,13 @@ [project] -name = "agent-control-evaluator-cisco" +name = "agent-control-rule-cisco" version = "8.1.2" -description = "Cisco AI Defense evaluator for agent-control" +description = "Cisco AI Defense rule for agent-control" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Cisco AI Defense Team" }] dependencies = [ - "agent-control-evaluators>=7.5.0", + "agent-control-rules>=7.5.0", "agent-control-models>=7.5.0", "httpx>=0.24.0", ] @@ -21,15 +21,15 @@ dev = [ "mypy>=1.8.0", ] -[project.entry-points."agent_control.evaluators"] -"cisco.ai_defense" = "agent_control_evaluator_cisco.ai_defense:CiscoAIDefenseEvaluator" +[project.entry-points."agent_control.rules"] +"cisco.ai_defense" = "agent_control_rule_cisco.ai_defense:CiscoAIDefenseRule" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluator_cisco"] +packages = ["src/agent_control_rule_cisco"] [tool.ruff] line-length = 100 @@ -39,5 +39,5 @@ target-version = "py312" select = ["E", "F", "I"] [tool.uv.sources] -agent-control-evaluators = { path = "../../builtin", editable = true } +agent-control-rules = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/__init__.py b/rules/contrib/cisco/src/agent_control_rule_cisco/__init__.py similarity index 100% rename from evaluators/contrib/cisco/src/agent_control_evaluator_cisco/__init__.py rename to rules/contrib/cisco/src/agent_control_rule_cisco/__init__.py diff --git a/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/__init__.py b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/__init__.py new file mode 100644 index 00000000..537b76ac --- /dev/null +++ b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/__init__.py @@ -0,0 +1,5 @@ +from .config import CiscoAIDefenseConfig +from .rule import CiscoAIDefenseRule + +__all__ = ["CiscoAIDefenseRule", "CiscoAIDefenseConfig"] + diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/client.py b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/client.py similarity index 100% rename from evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/client.py rename to rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/client.py diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/config.py b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/config.py similarity index 88% rename from evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/config.py rename to rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/config.py index 69cafbf3..48c36b41 100644 --- a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/config.py +++ b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/config.py @@ -2,12 +2,12 @@ from typing import Any, Literal -from agent_control_evaluators import EvaluatorConfig +from agent_control_rules import RuleConfig from pydantic import Field -class CiscoAIDefenseConfig(EvaluatorConfig): - """Configuration for Cisco AI Defense evaluator (REST). +class CiscoAIDefenseConfig(RuleConfig): + """Configuration for Cisco AI Defense rule (REST). Attributes: api_key_env: Env var name for API key diff --git a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/rule.py similarity index 88% rename from evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py rename to rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/rule.py index adbab9e5..a33ea642 100644 --- a/evaluators/contrib/cisco/src/agent_control_evaluator_cisco/ai_defense/evaluator.py +++ b/rules/contrib/cisco/src/agent_control_rule_cisco/ai_defense/rule.py @@ -5,12 +5,12 @@ from importlib.metadata import PackageNotFoundError, version from typing import Any -from agent_control_evaluators import ( - Evaluator, - EvaluatorMetadata, - register_evaluator, +from agent_control_models import RuleResult +from agent_control_rules import ( + Rule, + RuleMetadata, + register_rule, ) -from agent_control_models import EvaluatorResult from .client import AI_DEFENSE_HTTPX_AVAILABLE, REGION_BASE_URLS, AIDefenseClient, build_endpoint from .config import CiscoAIDefenseConfig @@ -19,7 +19,7 @@ def _resolve_package_version() -> str: """Return the installed package version, or a dev fallback during local imports.""" try: - return version("agent-control-evaluator-cisco") + return version("agent-control-rule-cisco") except PackageNotFoundError: return "0.0.0.dev" @@ -102,14 +102,14 @@ def _stringify_message_content(value: Any) -> str: return str(value) -@register_evaluator -class CiscoAIDefenseEvaluator(Evaluator[CiscoAIDefenseConfig]): - """Cisco AI Defense evaluator. +@register_rule +class CiscoAIDefenseRule(Rule[CiscoAIDefenseConfig]): + """Cisco AI Defense rule. - Maps InspectResponse.is_safe to EvaluatorResult.matched. + Maps InspectResponse.is_safe to RuleResult.matched. """ - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="cisco.ai_defense", version=_PACKAGE_VERSION, description="Cisco AI Defense Chat Inspection integration", @@ -121,7 +121,7 @@ class CiscoAIDefenseEvaluator(Evaluator[CiscoAIDefenseConfig]): @classmethod def is_available(cls) -> bool: - """Evaluator is available only if httpx dependency exists.""" + """Rule is available only if httpx dependency exists.""" return AI_DEFENSE_HTTPX_AVAILABLE def __init__(self, config: CiscoAIDefenseConfig) -> None: @@ -152,10 +152,10 @@ def __init__(self, config: CiscoAIDefenseConfig) -> None: timeout_s=timeout_s, ) - async def evaluate(self, data: Any) -> EvaluatorResult: # noqa: D401 + async def evaluate(self, data: Any) -> RuleResult: # noqa: D401 # Null input: do not call external service; treat as no data if data is None: - return EvaluatorResult(matched=False, confidence=1.0, message="No data") + return RuleResult(matched=False, confidence=1.0, message="No data") messages = _build_messages( data, @@ -163,7 +163,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # noqa: D401 payload_field=self.config.payload_field, ) if not messages: - return EvaluatorResult(matched=False, confidence=1.0, message="No data to inspect") + return RuleResult(matched=False, confidence=1.0, message="No data to inspect") # Call REST API for Chat Inspection try: @@ -187,21 +187,21 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # noqa: D401 } if self.config.include_raw_response: meta["raw"] = response - return EvaluatorResult( + return RuleResult( matched=matched, confidence=1.0, message=msg, metadata=meta, ) - # If no boolean is present, consider it an evaluator error + # If no boolean is present, consider it a rule error fallback = self.config.on_error matched = fallback == "deny" error_message = "Cisco AI Defense response missing 'is_safe'" meta2: dict[str, Any] = {"fallback_action": fallback} if self.config.include_raw_response: meta2["raw"] = response - return EvaluatorResult( + return RuleResult( matched=matched, confidence=0.0, message=error_message, @@ -212,7 +212,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: # noqa: D401 fallback = self.config.on_error matched = fallback == "deny" error_detail = str(e) - return EvaluatorResult( + return RuleResult( matched=matched, confidence=0.0, message=f"Cisco AI Defense evaluation error: {error_detail}", diff --git a/rules/contrib/cisco/tests/__init__.py b/rules/contrib/cisco/tests/__init__.py new file mode 100644 index 00000000..5f1c35ce --- /dev/null +++ b/rules/contrib/cisco/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for the Cisco AI Defense contrib rule.""" diff --git a/evaluators/contrib/cisco/tests/test_client.py b/rules/contrib/cisco/tests/test_client.py similarity index 93% rename from evaluators/contrib/cisco/tests/test_client.py rename to rules/contrib/cisco/tests/test_client.py index 0d2c8c26..bf61ee9d 100644 --- a/evaluators/contrib/cisco/tests/test_client.py +++ b/rules/contrib/cisco/tests/test_client.py @@ -7,7 +7,7 @@ import pytest -from agent_control_evaluator_cisco.ai_defense.client import ( +from agent_control_rule_cisco.ai_defense.client import ( AIDefenseClient, build_endpoint, ) @@ -42,7 +42,7 @@ async def aclose(self) -> None: self.is_closed = True # Patch the constructor used by the module - from agent_control_evaluator_cisco.ai_defense import client as client_mod + from agent_control_rule_cisco.ai_defense import client as client_mod monkeypatch.setattr(client_mod.httpx, "AsyncClient", FakeAsyncClient, raising=True) @@ -92,7 +92,7 @@ async def post(self, *_: Any, **__: Any): async def aclose(self) -> None: self.is_closed = True - from agent_control_evaluator_cisco.ai_defense import client as client_mod + from agent_control_rule_cisco.ai_defense import client as client_mod monkeypatch.setattr(client_mod.httpx, "AsyncClient", FakeAsyncClient, raising=True) @@ -124,7 +124,7 @@ async def post(self, *_: Any, **__: Any): async def aclose(self) -> None: self.is_closed = True - from agent_control_evaluator_cisco.ai_defense import client as client_mod + from agent_control_rule_cisco.ai_defense import client as client_mod # Patch AsyncClient and also patch the exception class to a stable local one monkeypatch.setattr(client_mod, "httpx", SimpleNamespace(AsyncClient=FakeAsyncClient, HTTPStatusError=FakeHTTPError)) @@ -148,7 +148,7 @@ def __init__(self, *_, **kwargs: Any): async def aclose(self) -> None: self.is_closed = True - from agent_control_evaluator_cisco.ai_defense import client as client_mod + from agent_control_rule_cisco.ai_defense import client as client_mod monkeypatch.setattr(client_mod.httpx, "AsyncClient", FakeAsyncClient, raising=True) @@ -210,7 +210,7 @@ def find_spec(self, fullname, path=None, target=None): # type: ignore[no-untype sys.meta_path.insert(0, ImportBlocker()) try: - from agent_control_evaluator_cisco.ai_defense import client as client_mod + from agent_control_rule_cisco.ai_defense import client as client_mod importlib.reload(client_mod) diff --git a/evaluators/contrib/cisco/tests/test_evaluator.py b/rules/contrib/cisco/tests/test_rule.py similarity index 88% rename from evaluators/contrib/cisco/tests/test_evaluator.py rename to rules/contrib/cisco/tests/test_rule.py index 976b1941..84ec5360 100644 --- a/evaluators/contrib/cisco/tests/test_evaluator.py +++ b/rules/contrib/cisco/tests/test_rule.py @@ -1,14 +1,14 @@ from importlib.metadata import PackageNotFoundError, version import pytest -import agent_control_evaluator_cisco.ai_defense.evaluator as cisco_evaluator_module +import agent_control_rule_cisco.ai_defense.rule as cisco_rule_module from pydantic import ValidationError -from agent_control_evaluator_cisco.ai_defense import ( - CiscoAIDefenseEvaluator, +from agent_control_rule_cisco.ai_defense import ( + CiscoAIDefenseRule, CiscoAIDefenseConfig, ) -from agent_control_evaluator_cisco.ai_defense.client import AIDefenseClient +from agent_control_rule_cisco.ai_defense.client import AIDefenseClient @pytest.fixture(autouse=True) @@ -17,7 +17,7 @@ def _env_api_key(monkeypatch: pytest.MonkeyPatch) -> None: def test_metadata_version_matches_distribution_version() -> None: - assert CiscoAIDefenseEvaluator.metadata.version == version("agent-control-evaluator-cisco") + assert CiscoAIDefenseRule.metadata.version == version("agent-control-rule-cisco") def test_metadata_version_falls_back_without_distribution( @@ -26,15 +26,15 @@ def test_metadata_version_falls_back_without_distribution( def _raise_not_found(_: str) -> str: raise PackageNotFoundError - monkeypatch.setattr(cisco_evaluator_module, "version", _raise_not_found) + monkeypatch.setattr(cisco_rule_module, "version", _raise_not_found) - assert cisco_evaluator_module._resolve_package_version() == "0.0.0.dev" + assert cisco_rule_module._resolve_package_version() == "0.0.0.dev" @pytest.mark.asyncio async def test_none_input_returns_no_data() -> None: cfg = CiscoAIDefenseConfig() - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate(None) assert res.matched is False assert res.error is None @@ -48,7 +48,7 @@ async def fake_chat_inspect(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", fake_chat_inspect, raising=True) cfg = CiscoAIDefenseConfig() - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("bad content") assert res.matched is True assert res.metadata and res.metadata.get("severity") == "HIGH" @@ -62,7 +62,7 @@ async def fake_chat_inspect(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", fake_chat_inspect, raising=True) cfg = CiscoAIDefenseConfig() - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("ok content") assert res.matched is False assert res.metadata and res.metadata.get("severity") == "LOW" @@ -77,7 +77,7 @@ async def boom(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", boom, raising=True) cfg = CiscoAIDefenseConfig(on_error="deny") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("anything") assert res.matched is True # fail-closed assert res.metadata and res.metadata.get("fallback_action") == "deny" @@ -88,7 +88,7 @@ def test_missing_api_key_raises_on_init(monkeypatch: pytest.MonkeyPatch) -> None cfg = CiscoAIDefenseConfig() with pytest.raises(ValueError, match="Missing Cisco AI Defense API key"): - CiscoAIDefenseEvaluator(cfg) + CiscoAIDefenseRule(cfg) @pytest.mark.asyncio @@ -99,7 +99,7 @@ async def fake(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", fake, raising=True) cfg = CiscoAIDefenseConfig(on_error="allow") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("text") assert res.matched is False assert res.error == "Cisco AI Defense response missing 'is_safe'" @@ -118,7 +118,7 @@ async def success(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", success, raising=True) cfg = CiscoAIDefenseConfig(include_raw_response=True) - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("ok") assert res.matched is False assert res.metadata and res.metadata.get("classifications") == {"a": 1} @@ -131,7 +131,7 @@ async def no_bool(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", no_bool, raising=True) cfg2 = CiscoAIDefenseConfig(include_raw_response=True, on_error="allow") - ev2 = CiscoAIDefenseEvaluator(cfg2) + ev2 = CiscoAIDefenseRule(cfg2) res2 = await ev2.evaluate("x") assert res2.matched is False assert res2.metadata and res2.metadata.get("fallback_action") == "allow" @@ -149,7 +149,7 @@ async def capture(self: AIDefenseClient, **_): monkeypatch.setattr(AIDefenseClient, "chat_inspect", capture, raising=True) cfg = CiscoAIDefenseConfig(api_url="https://example.com/custom/chat") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) _ = await ev.evaluate("text") assert captured["endpoint_url"] == "https://example.com/custom/chat" @@ -162,7 +162,7 @@ async def boom(self: AIDefenseClient, **kwargs): monkeypatch.setattr(AIDefenseClient, "chat_inspect", boom, raising=True) cfg = CiscoAIDefenseConfig(on_error="allow") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) res = await ev.evaluate("anything") assert res.matched is False assert res.error == "network down" @@ -180,7 +180,7 @@ async def capture(self: AIDefenseClient, messages, **_): monkeypatch.setattr(AIDefenseClient, "chat_inspect", capture, raising=True) cfg = CiscoAIDefenseConfig(messages_strategy="history") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) data = {"messages": [{"role": "user", "content": "hello"}]} _ = await ev.evaluate(data) assert captured["messages"] == data["messages"] @@ -197,7 +197,7 @@ async def capture(self: AIDefenseClient, messages, **_): monkeypatch.setattr(AIDefenseClient, "chat_inspect", capture, raising=True) cfg = CiscoAIDefenseConfig(payload_field="output") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) _ = await ev.evaluate("some output text") assert captured["messages"][0]["role"] == "assistant" @@ -213,7 +213,7 @@ async def capture(self: AIDefenseClient, messages, **_): monkeypatch.setattr(AIDefenseClient, "chat_inspect", capture, raising=True) cfg = CiscoAIDefenseConfig(messages_strategy="single", payload_field="input") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) _ = await ev.evaluate("hello world") assert captured["messages"] == [{"role": "user", "content": "hello world"}] @@ -229,7 +229,7 @@ async def capture(self: AIDefenseClient, messages, **_): monkeypatch.setattr(AIDefenseClient, "chat_inspect", capture, raising=True) cfg = CiscoAIDefenseConfig(messages_strategy="single", payload_field="input") - ev = CiscoAIDefenseEvaluator(cfg) + ev = CiscoAIDefenseRule(cfg) _ = await ev.evaluate({"input": "hello world", "extra": "ignored"}) assert captured["messages"] == [{"role": "user", "content": "hello world"}] diff --git a/evaluators/contrib/galileo/Makefile b/rules/contrib/galileo/Makefile similarity index 86% rename from evaluators/contrib/galileo/Makefile rename to rules/contrib/galileo/Makefile index 89d61ac0..25229ca9 100644 --- a/evaluators/contrib/galileo/Makefile +++ b/rules/contrib/galileo/Makefile @@ -1,9 +1,9 @@ .PHONY: help sync test lint lint-fix typecheck check build publish -PACKAGE := agent-control-evaluator-galileo +PACKAGE := agent-control-rule-galileo help: - @echo "Agent Control Evaluator - Galileo - Makefile commands" + @echo "Agent Control Rule - Galileo - Makefile commands" @echo "" @echo " make test - run pytest" @echo " make lint - run ruff check" @@ -16,7 +16,7 @@ sync: uv sync test: - uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-evaluators-galileo.xml -q + uv run --with pytest --with pytest-asyncio --with pytest-cov --package $(PACKAGE) pytest tests --cov=src --cov-report=xml:../../../coverage-rules-galileo.xml -q lint: uv run --with ruff --package $(PACKAGE) ruff check --config ../../../pyproject.toml src/ diff --git a/evaluators/contrib/galileo/README.md b/rules/contrib/galileo/README.md similarity index 62% rename from evaluators/contrib/galileo/README.md rename to rules/contrib/galileo/README.md index f8461f2a..2873bb51 100644 --- a/evaluators/contrib/galileo/README.md +++ b/rules/contrib/galileo/README.md @@ -1,21 +1,21 @@ -# Galileo Luna Evaluator +# Galileo Luna Rule -Integration package for Galileo Luna evaluator. +Integration package for Galileo Luna rule. ## Migrating from Luna2 -The `galileo.luna2` evaluator ID has been removed. Existing controls that use -`galileo.luna2` should migrate to `galileo.luna` and update their evaluator +The `galileo.luna2` rule ID has been removed. Existing controls that use +`galileo.luna2` should migrate to `galileo.luna` and update their rule configuration to the direct Luna scorer fields (`scorer_label`, `scorer_id`, or `scorer_version_id`, plus `threshold` and `operator`). If you still need the -legacy Luna2 evaluator, pin `agent-control-evaluator-galileo <8`. +legacy Luna2 rule, pin `agent-control-rule-galileo <8`. ## Install Canonical install path: ```bash -pip install "agent-control-evaluators[galileo]" +pip install "agent-control-rules[galileo]" ``` Grandfathered convenience aliases remain available: @@ -27,9 +27,9 @@ pip install "agent-control-sdk[galileo]" Fallback direct wheel install: ```bash -pip install agent-control-evaluator-galileo +pip install agent-control-rule-galileo ``` -See full documentation in: https://docs.agentcontrol.dev/concepts/evaluators/contributing-evaluator +See full documentation in: https://docs.agentcontrol.dev/concepts/rules/contributing-rule Example with usage: https://docs.agentcontrol.dev/examples/galileo-luna diff --git a/evaluators/contrib/galileo/pyproject.toml b/rules/contrib/galileo/pyproject.toml similarity index 63% rename from evaluators/contrib/galileo/pyproject.toml rename to rules/contrib/galileo/pyproject.toml index a6e7a191..d474fd26 100644 --- a/evaluators/contrib/galileo/pyproject.toml +++ b/rules/contrib/galileo/pyproject.toml @@ -1,13 +1,13 @@ [project] -name = "agent-control-evaluator-galileo" +name = "agent-control-rule-galileo" version = "8.1.2" -description = "Galileo Luna evaluator for agent-control" +description = "Galileo Luna rule for agent-control" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "Agent Control Team" }] dependencies = [ - "agent-control-evaluators>=7.5.0", + "agent-control-rules>=7.5.0", "agent-control-models>=7.5.0", "httpx>=0.24.0", "pydantic>=2.12.4", @@ -22,17 +22,17 @@ dev = [ "mypy>=1.8.0", ] -[project.entry-points."agent_control.evaluators"] -"galileo.luna" = "agent_control_evaluator_galileo.luna:LunaEvaluator" +[project.entry-points."agent_control.rules"] +"galileo.luna" = "agent_control_rule_galileo.luna:LunaRule" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluator_galileo"] +packages = ["src/agent_control_rule_galileo"] # For local dev, use override to resolve from workspace [tool.uv.sources] -agent-control-evaluators = { path = "../../builtin", editable = true } +agent-control-rules = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } diff --git a/rules/contrib/galileo/src/agent_control_rule_galileo/__init__.py b/rules/contrib/galileo/src/agent_control_rule_galileo/__init__.py new file mode 100644 index 00000000..3949581d --- /dev/null +++ b/rules/contrib/galileo/src/agent_control_rule_galileo/__init__.py @@ -0,0 +1,40 @@ +"""Agent Control Rule - Galileo. + +This package provides Galileo rules for agent-control. + +Available rules: + - galileo.luna: Galileo Luna direct scorer evaluation + +Installation: + pip install agent-control-rule-galileo + +Or via the agent-control-rules convenience extra: + pip install agent-control-rules[galileo] +""" + +from importlib.metadata import PackageNotFoundError, version + +try: + __version__ = version("agent-control-rule-galileo") +except PackageNotFoundError: + __version__ = "0.0.0.dev" + +from agent_control_rule_galileo.luna import ( + LUNA_AVAILABLE, + GalileoLunaClient, + LunaOperator, + LunaRule, + LunaRuleConfig, + ScorerInvokeRequest, + ScorerInvokeResponse, +) + +__all__ = [ + "GalileoLunaClient", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaRule", + "LunaRuleConfig", + "LunaOperator", + "LUNA_AVAILABLE", +] diff --git a/rules/contrib/galileo/src/agent_control_rule_galileo/luna/__init__.py b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/__init__.py new file mode 100644 index 00000000..fc253c91 --- /dev/null +++ b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/__init__.py @@ -0,0 +1,21 @@ +"""Galileo Luna direct scorer rule.""" + +from agent_control_rule_galileo.luna.client import ( + GalileoLunaClient, + ScorerInvokeInputs, + ScorerInvokeRequest, + ScorerInvokeResponse, +) +from agent_control_rule_galileo.luna.config import LunaOperator, LunaRuleConfig +from agent_control_rule_galileo.luna.rule import LUNA_AVAILABLE, LunaRule + +__all__ = [ + "GalileoLunaClient", + "ScorerInvokeInputs", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaRuleConfig", + "LunaOperator", + "LunaRule", + "LUNA_AVAILABLE", +] diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/client.py similarity index 100% rename from evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/client.py rename to rules/contrib/galileo/src/agent_control_rule_galileo/luna/client.py diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/config.py similarity index 93% rename from evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py rename to rules/contrib/galileo/src/agent_control_rule_galileo/luna/config.py index 788fa24c..d71cfbe5 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/config.py +++ b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/config.py @@ -4,8 +4,8 @@ from typing import Literal -from agent_control_evaluators import EvaluatorConfig from agent_control_models import JSONObject, JSONValue +from agent_control_rules import RuleConfig from pydantic import Field, model_validator LunaOperator = Literal["gt", "gte", "lt", "lte", "eq", "ne", "contains", "any"] @@ -28,14 +28,14 @@ def coerce_number(value: JSONValue) -> float | None: return None -class LunaEvaluatorConfig(EvaluatorConfig): +class LunaRuleConfig(RuleConfig): """Configuration for direct Luna scorer evaluation. Attributes: scorer_label: Preset, registered, or fine-tuned scorer label. scorer_id: Optional Galileo scorer identifier. scorer_version_id: Optional Galileo scorer version identifier. - threshold: Local threshold used by the evaluator for comparison. + threshold: Local threshold used by the rule for comparison. operator: Local comparison operator. Numeric operators use threshold as a number. scorer_config: Optional scorer-specific config sent as ``config``. payload_field: Explicit scorer input side for scalar selected data. @@ -86,7 +86,7 @@ class LunaEvaluatorConfig(EvaluatorConfig): ) @model_validator(mode="after") - def validate_threshold(self) -> LunaEvaluatorConfig: + def validate_threshold(self) -> LunaRuleConfig: """Validate threshold compatibility with the configured operator.""" if not (self.scorer_label or self.scorer_id or self.scorer_version_id): raise ValueError( diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/rule.py similarity index 90% rename from evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py rename to rules/contrib/galileo/src/agent_control_rule_galileo/luna/rule.py index 1221cedb..bbd9491d 100644 --- a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/luna/evaluator.py +++ b/rules/contrib/galileo/src/agent_control_rule_galileo/luna/rule.py @@ -1,4 +1,4 @@ -"""Direct Galileo Luna evaluator implementation.""" +"""Direct Galileo Luna rule implementation.""" from __future__ import annotations @@ -9,11 +9,11 @@ from typing import Any import httpx -from agent_control_evaluators import Evaluator, EvaluatorMetadata, register_evaluator -from agent_control_models import EvaluatorResult, JSONValue +from agent_control_models import JSONValue, RuleResult +from agent_control_rules import Rule, RuleMetadata, register_rule from .client import GalileoLunaClient, ScorerInvokeResponse -from .config import LunaEvaluatorConfig, coerce_number +from .config import LunaRuleConfig, coerce_number logger = logging.getLogger(__name__) @@ -21,7 +21,7 @@ def _resolve_package_version() -> str: """Return the installed package version, or a dev fallback during local imports.""" try: - return version("agent-control-evaluator-galileo") + return version("agent-control-rule-galileo") except PackageNotFoundError: return "0.0.0.dev" @@ -102,29 +102,29 @@ def _http_status_error_metadata(error: httpx.HTTPStatusError) -> dict[str, Any]: return {key: value for key, value in metadata.items() if value is not None} -@register_evaluator -class LunaEvaluator(Evaluator[LunaEvaluatorConfig]): - """Galileo Luna evaluator using the direct scorer invocation API.""" +@register_rule +class LunaRule(Rule[LunaRuleConfig]): + """Galileo Luna rule using the direct scorer invocation API.""" - metadata = EvaluatorMetadata( + metadata = RuleMetadata( name="galileo.luna", version=_PACKAGE_VERSION, description="Galileo Luna direct scorer evaluation", requires_api_key=True, timeout_ms=10000, ) - config_model = LunaEvaluatorConfig + config_model = LunaRuleConfig @classmethod def is_available(cls) -> bool: """Check whether required runtime dependencies are available.""" return LUNA_AVAILABLE - def __init__(self, config: LunaEvaluatorConfig) -> None: - """Initialize the direct Luna evaluator. + def __init__(self, config: LunaRuleConfig) -> None: + """Initialize the direct Luna rule. Args: - config: Validated LunaEvaluatorConfig instance. + config: Validated LunaRuleConfig instance. Raises: ValueError: If neither GALILEO_API_SECRET_KEY nor GALILEO_API_KEY is set. @@ -193,18 +193,18 @@ def _score_matches(self, score: JSONValue) -> bool: raise ValueError(f"Unsupported Luna operator: {operator}") - async def evaluate(self, data: Any) -> EvaluatorResult: + async def evaluate(self, data: Any) -> RuleResult: """Evaluate selected data with Galileo Luna direct scorer invocation. Args: data: The data selected from the runtime step. Returns: - EvaluatorResult with local threshold decision and scorer metadata. + RuleResult with local threshold decision and scorer metadata. """ input_text, output_text = self._prepare_payload(data) if not (_has_text(input_text) or _has_text(output_text)): - return EvaluatorResult( + return RuleResult( matched=False, confidence=1.0, message="No data to score with Luna", @@ -230,7 +230,7 @@ async def evaluate(self, data: Any) -> EvaluatorResult: operator = self.config.operator threshold = self.config.threshold state = "triggered" if matched else "not triggered" - return EvaluatorResult( + return RuleResult( matched=matched, confidence=_confidence_from_score(response.score), message=( @@ -278,7 +278,7 @@ def _metadata( def _handle_error( self, error: Exception, - ) -> EvaluatorResult: + ) -> RuleResult: error_detail = str(error) metadata: dict[str, Any] = { "error_type": type(error).__name__, @@ -289,7 +289,7 @@ def _handle_error( if isinstance(error, httpx.HTTPStatusError): metadata.update(_http_status_error_metadata(error)) - return EvaluatorResult( + return RuleResult( matched=False, confidence=0.0, message=f"Luna evaluation error: {error_detail}", diff --git a/evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed b/rules/contrib/galileo/src/agent_control_rule_galileo/py.typed similarity index 100% rename from evaluators/contrib/galileo/src/agent_control_evaluator_galileo/py.typed rename to rules/contrib/galileo/src/agent_control_rule_galileo/py.typed diff --git a/examples/crewai/evaluator_showcase/src/evaluator_showcase/__init__.py b/rules/contrib/galileo/tests/__init__.py similarity index 100% rename from examples/crewai/evaluator_showcase/src/evaluator_showcase/__init__.py rename to rules/contrib/galileo/tests/__init__.py diff --git a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py b/rules/contrib/galileo/tests/test_luna_coverage_gaps.py similarity index 73% rename from evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py rename to rules/contrib/galileo/tests/test_luna_coverage_gaps.py index e1518eec..082eef0b 100644 --- a/evaluators/contrib/galileo/tests/test_luna_coverage_gaps.py +++ b/rules/contrib/galileo/tests/test_luna_coverage_gaps.py @@ -1,7 +1,7 @@ -"""Targeted tests filling coverage gaps in luna/evaluator.py and luna/client.py. +"""Targeted tests filling coverage gaps in luna/rule.py and luna/client.py. These tests cover the small utility functions and rare branches that the -integration-style tests in ``test_luna_evaluator.py`` skip past. +integration-style tests in ``test_luna_rule.py`` skip past. """ from __future__ import annotations @@ -15,7 +15,7 @@ # ============================================================================= -# luna/evaluator.py: utility helpers +# luna/rule.py: utility helpers # ============================================================================= @@ -23,30 +23,30 @@ class TestCoercePayloadText: """``_coerce_payload_text`` normalises arbitrary values to strings.""" def test_none_returns_none(self): - from agent_control_evaluator_galileo.luna.evaluator import _coerce_payload_text + from agent_control_rule_galileo.luna.rule import _coerce_payload_text assert _coerce_payload_text(None) is None def test_string_passed_through(self): - from agent_control_evaluator_galileo.luna.evaluator import _coerce_payload_text + from agent_control_rule_galileo.luna.rule import _coerce_payload_text assert _coerce_payload_text("hello") == "hello" @pytest.mark.parametrize("value", [42, 3.14, True]) def test_scalars_stringified(self, value): - from agent_control_evaluator_galileo.luna.evaluator import _coerce_payload_text + from agent_control_rule_galileo.luna.rule import _coerce_payload_text assert _coerce_payload_text(value) == str(value) def test_dict_is_json_serialized(self): - from agent_control_evaluator_galileo.luna.evaluator import _coerce_payload_text + from agent_control_rule_galileo.luna.rule import _coerce_payload_text result = _coerce_payload_text({"a": 1, "b": 2}) assert json.loads(result) == {"a": 1, "b": 2} def test_unserialisable_falls_back_to_str(self): - from agent_control_evaluator_galileo.luna.evaluator import _coerce_payload_text + from agent_control_rule_galileo.luna.rule import _coerce_payload_text class CannotJson: def __repr__(self): @@ -65,12 +65,12 @@ class TestExtractDictText: """``_extract_dict_text`` returns ``None`` for missing keys.""" def test_missing_key_returns_none(self): - from agent_control_evaluator_galileo.luna.evaluator import _extract_dict_text + from agent_control_rule_galileo.luna.rule import _extract_dict_text assert _extract_dict_text({}, "absent") is None def test_present_key_coerced(self): - from agent_control_evaluator_galileo.luna.evaluator import _extract_dict_text + from agent_control_rule_galileo.luna.rule import _extract_dict_text assert _extract_dict_text({"x": 7}, "x") == "7" @@ -79,34 +79,34 @@ class TestContains: """``_contains`` supports str/list and dict values against a threshold.""" def test_none_threshold_is_no_match(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains assert _contains("anything", None) is False def test_string_contains_substring(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains assert _contains("hello world", "world") is True assert _contains("hello world", "absent") is False def test_list_contains_value(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains assert _contains(["a", "b", "c"], "b") is True assert _contains(["a", "b", "c"], "z") is False def test_dict_threshold_does_not_match_key(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains assert _contains({"toxicity": 0.9}, "toxicity") is False def test_dict_threshold_matches_value(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains assert _contains({"label": "flagged"}, "flagged") is True def test_other_types_return_false(self): - from agent_control_evaluator_galileo.luna.evaluator import _contains + from agent_control_rule_galileo.luna.rule import _contains # Non-iterable score => no match. assert _contains(42, 42) is False @@ -116,44 +116,44 @@ class TestConfidenceFromScore: """``_confidence_from_score`` maps a raw score to [0, 1].""" def test_true_bool_maps_to_one(self): - from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score + from agent_control_rule_galileo.luna.rule import _confidence_from_score assert _confidence_from_score(True) == 1.0 def test_false_bool_maps_to_zero(self): - from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score + from agent_control_rule_galileo.luna.rule import _confidence_from_score assert _confidence_from_score(False) == 0.0 def test_in_range_number_returned_as_is(self): - from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score + from agent_control_rule_galileo.luna.rule import _confidence_from_score assert _confidence_from_score(0.42) == 0.42 def test_out_of_range_falls_back_to_one(self): - from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score + from agent_control_rule_galileo.luna.rule import _confidence_from_score # Above 1.0 → fall back to default confidence assert _confidence_from_score(7.2) == 1.0 def test_non_numeric_falls_back_to_one(self): - from agent_control_evaluator_galileo.luna.evaluator import _confidence_from_score + from agent_control_rule_galileo.luna.rule import _confidence_from_score assert _confidence_from_score("not-a-number") == 1.0 # ============================================================================= -# luna/evaluator.py: _score_matches operator branches +# luna/rule.py: _score_matches operator branches # ============================================================================= @pytest.fixture -def luna_evaluator(monkeypatch): - """A ready-to-use LunaEvaluator instance with auth env wired up.""" +def luna_rule(monkeypatch): + """A ready-to-use LunaRule instance with auth env wired up.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - return LunaEvaluator.from_dict( + return LunaRule.from_dict( {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} ) @@ -163,35 +163,35 @@ class TestScoreMatchesOperators: def _make(self, operator, threshold, monkeypatch): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule if operator in {"eq", "ne", "contains"}: threshold_value = threshold else: threshold_value = threshold - return LunaEvaluator.from_dict( + return LunaRule.from_dict( {"scorer_label": "toxicity", "threshold": threshold_value, "operator": operator} ) def test_any_truthy_score_matches(self, monkeypatch): - evaluator = self._make("any", 0.5, monkeypatch) - assert evaluator._score_matches(1) is True - assert evaluator._score_matches(0) is False + rule = self._make("any", 0.5, monkeypatch) + assert rule._score_matches(1) is True + assert rule._score_matches(0) is False def test_eq_matches_threshold(self, monkeypatch): - evaluator = self._make("eq", "flagged", monkeypatch) - assert evaluator._score_matches("flagged") is True - assert evaluator._score_matches("safe") is False + rule = self._make("eq", "flagged", monkeypatch) + assert rule._score_matches("flagged") is True + assert rule._score_matches("safe") is False def test_ne_matches_when_different(self, monkeypatch): - evaluator = self._make("ne", "flagged", monkeypatch) - assert evaluator._score_matches("safe") is True - assert evaluator._score_matches("flagged") is False + rule = self._make("ne", "flagged", monkeypatch) + assert rule._score_matches("safe") is True + assert rule._score_matches("flagged") is False def test_contains_matches_substring(self, monkeypatch): - evaluator = self._make("contains", "flag", monkeypatch) - assert evaluator._score_matches("flagged") is True - assert evaluator._score_matches("clean") is False + rule = self._make("contains", "flag", monkeypatch) + assert rule._score_matches("flagged") is True + assert rule._score_matches("clean") is False def test_numeric_operators_all_branches(self, monkeypatch): for op, expectations in [ @@ -200,18 +200,18 @@ def test_numeric_operators_all_branches(self, monkeypatch): ("lt", [(0.4, True), (0.5, False)]), ("lte", [(0.5, True), (0.6, False)]), ]: - evaluator = self._make(op, 0.5, monkeypatch) + rule = self._make(op, 0.5, monkeypatch) for score, expected in expectations: - assert evaluator._score_matches(score) is expected, (op, score) + assert rule._score_matches(score) is expected, (op, score) def test_numeric_operator_rejects_non_numeric_score(self, monkeypatch): - evaluator = self._make("gte", 0.5, monkeypatch) + rule = self._make("gte", 0.5, monkeypatch) with pytest.raises(ValueError, match="not numeric"): - evaluator._score_matches("not-a-number") + rule._score_matches("not-a-number") # ============================================================================= -# luna/evaluator.py: payload preparation + aclose +# luna/rule.py: payload preparation + aclose # ============================================================================= @@ -220,20 +220,20 @@ class TestPreparePayload: def test_scalar_routed_to_input_when_label_lacks_output(self, monkeypatch): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + rule = LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) - input_text, output_text = evaluator._prepare_payload("hello") + input_text, output_text = rule._prepare_payload("hello") assert input_text == "hello" assert output_text is None def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( { "scorer_label": "toxicity", "threshold": 0.5, @@ -241,7 +241,7 @@ def test_scalar_routed_to_output_when_payload_field_is_output(self, monkeypatch) } ) - input_text, output_text = evaluator._prepare_payload("hello") + input_text, output_text = rule._prepare_payload("hello") assert input_text is None assert output_text == "hello" @@ -251,22 +251,22 @@ def test_scalar_output_label_without_payload_field_still_defaults_to_input( monkeypatch, ): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( {"scorer_label": "output_correctness", "threshold": 0.5} ) - input_text, output_text = evaluator._prepare_payload("hello") + input_text, output_text = rule._prepare_payload("hello") assert input_text == "hello" assert output_text is None def test_structured_payload_uses_input_output_keys_over_payload_field(self, monkeypatch): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( { "scorer_label": "toxicity", "threshold": 0.5, @@ -274,7 +274,7 @@ def test_structured_payload_uses_input_output_keys_over_payload_field(self, monk } ) - input_text, output_text = evaluator._prepare_payload( + input_text, output_text = rule._prepare_payload( {"input": "prompt", "output": "answer"} ) @@ -283,31 +283,31 @@ def test_structured_payload_uses_input_output_keys_over_payload_field(self, monk @pytest.mark.asyncio -async def test_evaluator_aclose_closes_underlying_client(monkeypatch): +async def test_rule_aclose_closes_underlying_client(monkeypatch): """``aclose`` must release the eagerly-created client without clearing it.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + rule = LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) fake = MagicMock() fake.close = AsyncMock() - evaluator._client = fake + rule._client = fake - await evaluator.aclose() + await rule.aclose() fake.close.assert_awaited_once() - assert evaluator._client is fake + assert rule._client is fake @pytest.mark.asyncio -async def test_evaluator_handles_non_success_status(monkeypatch): +async def test_rule_handles_non_success_status(monkeypatch): """A non-success status from the scorer must surface as an error result.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna import LunaRule, ScorerInvokeResponse + from agent_control_rule_galileo.luna.client import GalileoLunaClient - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( {"scorer_label": "toxicity", "threshold": 0.5, "operator": "gte"} ) @@ -319,7 +319,7 @@ async def test_evaluator_handles_non_success_status(monkeypatch): error_message="upstream timeout", ) - result = await evaluator.evaluate("hello") + result = await rule.evaluate("hello") assert result.matched is False assert result.error is not None @@ -327,7 +327,7 @@ async def test_evaluator_handles_non_success_status(monkeypatch): # ============================================================================= -# luna/evaluator.py: package version fallback +# luna/rule.py: package version fallback # ============================================================================= @@ -335,10 +335,10 @@ def test_resolve_package_version_falls_back_when_metadata_missing(): """The dev fallback must trigger when the package isn't installed by metadata.""" from importlib.metadata import PackageNotFoundError - from agent_control_evaluator_galileo.luna import evaluator as evaluator_module + from agent_control_rule_galileo.luna import rule as rule_module - with patch.object(evaluator_module, "version", side_effect=PackageNotFoundError): - result = evaluator_module._resolve_package_version() + with patch.object(rule_module, "version", side_effect=PackageNotFoundError): + result = rule_module._resolve_package_version() assert result == "0.0.0.dev" @@ -352,32 +352,32 @@ class TestAsFloatOrNone: """``_as_float_or_none`` parses scalar values; strings may fail.""" def test_returns_none_for_bool(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none(True) is None def test_returns_none_for_none(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none(None) is None def test_returns_float_for_int(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none(7) == 7.0 def test_returns_float_for_string_number(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none("0.42") == 0.42 def test_returns_none_for_unparseable_string(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none("not-a-number") is None def test_returns_none_for_other_types(self): - from agent_control_evaluator_galileo.luna.client import _as_float_or_none + from agent_control_rule_galileo.luna.client import _as_float_or_none assert _as_float_or_none([1, 2]) is None @@ -386,35 +386,35 @@ class TestHasValue: """``_has_value`` is the "is this scorable" predicate.""" def test_none_is_empty(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value(None) is False def test_empty_string_is_empty(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value("") is False assert _has_value(" ") is False def test_non_empty_string_has_value(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value("hi") is True def test_empty_list_or_dict_is_empty(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value([]) is False assert _has_value({}) is False def test_non_empty_list_or_dict_has_value(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value([1]) is True assert _has_value({"k": "v"}) is True def test_scalar_other_types_have_value(self): - from agent_control_evaluator_galileo.luna.client import _has_value + from agent_control_rule_galileo.luna.client import _has_value assert _has_value(42) is True assert _has_value(0) is True # 0 is a real value, not empty @@ -425,7 +425,7 @@ class TestScorerInvokeRequestValidation: """``ScorerInvokeRequest`` rejects malformed input combos.""" def test_missing_all_identifiers_raises(self): - from agent_control_evaluator_galileo.luna.client import ( + from agent_control_rule_galileo.luna.client import ( ScorerInvokeInputs, ScorerInvokeRequest, ) @@ -444,7 +444,7 @@ def test_client_raises_when_no_credentials(monkeypatch): "GALILEO_LUNA_AUTH_MODE", ): monkeypatch.delenv(name, raising=False) - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): GalileoLunaClient() @@ -455,7 +455,7 @@ def test_client_requires_explicit_mode_when_both_credentials_are_present(monkeyp monkeypatch.setenv("GALILEO_API_KEY", "public-key") monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.raises( ValueError, match="Both a Galileo API key and a Galileo API secret are configured" @@ -468,7 +468,7 @@ def test_client_uses_explicit_public_mode_when_both_credentials_are_present(monk monkeypatch.setenv("GALILEO_API_KEY", "public-key") monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "public") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): client = GalileoLunaClient() @@ -484,7 +484,7 @@ def test_client_uses_explicit_internal_mode_when_both_credentials_are_present(mo monkeypatch.setenv("GALILEO_API_KEY", "public-key") monkeypatch.setenv("GALILEO_API_SECRET_KEY", "internal-secret") monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): client = GalileoLunaClient() @@ -501,7 +501,7 @@ def test_client_rejects_mode_without_matching_credential(monkeypatch): monkeypatch.delenv("GALILEO_API_SECRET", raising=False) monkeypatch.setenv("GALILEO_API_KEY", "public-key") monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "internal") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY"): @@ -512,7 +512,7 @@ def test_client_rejects_invalid_auth_mode(monkeypatch): """Invalid auth mode values should fail during client initialization.""" monkeypatch.setenv("GALILEO_API_KEY", "public-key") monkeypatch.setenv("GALILEO_LUNA_AUTH_MODE", "sideways") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient with pytest.warns(DeprecationWarning, match="GALILEO_LUNA_AUTH_MODE is deprecated"): with pytest.raises(ValueError, match="GALILEO_LUNA_AUTH_MODE"): @@ -527,7 +527,7 @@ def _client(self, monkeypatch): monkeypatch.delenv("GALILEO_API_SECRET", raising=False) monkeypatch.delenv("GALILEO_LUNA_AUTH_MODE", raising=False) monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient return GalileoLunaClient() @@ -582,7 +582,7 @@ async def test_get_client_adds_api_key_header_when_no_secret(monkeypatch): monkeypatch.delenv("GALILEO_API_SECRET_KEY", raising=False) monkeypatch.delenv("GALILEO_API_SECRET", raising=False) monkeypatch.setenv("GALILEO_API_KEY", "public-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() http_client = await client._get_client() @@ -595,7 +595,7 @@ async def test_get_client_adds_api_key_header_when_no_secret(monkeypatch): @pytest.mark.asyncio async def test_invoke_rejects_missing_scorer_identifier(monkeypatch): monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() try: @@ -609,7 +609,7 @@ async def test_invoke_rejects_missing_scorer_identifier(monkeypatch): async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): """A non-object JSON body must surface as a clear RuntimeError.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -633,7 +633,7 @@ async def test_invoke_raises_when_response_is_not_a_json_object(monkeypatch): async def test_invoke_propagates_http_status_error(monkeypatch): """The client logs and re-raises HTTP status errors.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -662,7 +662,7 @@ async def test_invoke_propagates_http_status_error(monkeypatch): async def test_invoke_propagates_request_error(monkeypatch): """RequestError is logged and re-raised so callers can decide policy.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient client = GalileoLunaClient() @@ -682,7 +682,7 @@ async def test_invoke_propagates_request_error(monkeypatch): async def test_client_async_context_manager_closes_on_exit(monkeypatch): """Entering/exiting the async context manager must close the client.""" monkeypatch.setenv("GALILEO_API_KEY", "test-key") - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + from agent_control_rule_galileo.luna.client import GalileoLunaClient async with GalileoLunaClient() as client: # Trigger lazy client creation so close() has work to do. diff --git a/evaluators/contrib/galileo/tests/test_luna_evaluator.py b/rules/contrib/galileo/tests/test_luna_rule.py similarity index 83% rename from evaluators/contrib/galileo/tests/test_luna_evaluator.py rename to rules/contrib/galileo/tests/test_luna_rule.py index 86328e1c..29abfdf6 100644 --- a/evaluators/contrib/galileo/tests/test_luna_evaluator.py +++ b/rules/contrib/galileo/tests/test_luna_rule.py @@ -1,4 +1,4 @@ -"""Tests for the direct Galileo Luna evaluator and client.""" +"""Tests for the direct Galileo Luna rule and client.""" from __future__ import annotations @@ -11,7 +11,7 @@ import httpx import pytest -from agent_control_models import EvaluatorResult +from agent_control_models import RuleResult from pydantic import ValidationError @@ -21,14 +21,14 @@ def _decode_jwt_payload(token: str) -> dict[str, object]: return json.loads(urlsafe_b64decode(padded.encode()).decode()) -class TestLunaEvaluatorConfig: - """Tests for direct Luna evaluator configuration.""" +class TestLunaRuleConfig: + """Tests for direct Luna rule configuration.""" def test_config_accepts_direct_scorer_fields(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + from agent_control_rule_galileo.luna import LunaRuleConfig # Given: a direct scorer config with local thresholding - config = LunaEvaluatorConfig( + config = LunaRuleConfig( scorer_label="toxicity", scorer_id="scorer-123", scorer_version_id="version-123", @@ -47,32 +47,32 @@ def test_config_accepts_direct_scorer_fields(self) -> None: assert config.payload_field == "input" def test_config_accepts_scorer_id_without_label(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + from agent_control_rule_galileo.luna import LunaRuleConfig - config = LunaEvaluatorConfig(scorer_id="scorer-123") + config = LunaRuleConfig(scorer_id="scorer-123") assert config.scorer_id == "scorer-123" assert config.scorer_label is None def test_config_requires_a_scorer_identifier(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + from agent_control_rule_galileo.luna import LunaRuleConfig with pytest.raises(ValidationError, match="one of scorer_label"): - LunaEvaluatorConfig(threshold=0.5) + LunaRuleConfig(threshold=0.5) def test_numeric_operator_requires_numeric_threshold(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluatorConfig + from agent_control_rule_galileo.luna import LunaRuleConfig # Given/When/Then: numeric local comparison rejects non-numeric thresholds with pytest.raises(ValidationError, match="numeric threshold"): - LunaEvaluatorConfig(scorer_label="toxicity", threshold="high", operator="gte") + LunaRuleConfig(scorer_label="toxicity", threshold="high", operator="gte") class TestGalileoLunaClient: """Tests for the GalileoLunaClient HTTP contract.""" def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: - from agent_control_evaluator_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest + from agent_control_rule_galileo.luna import ScorerInvokeInputs, ScorerInvokeRequest # Given: a scorer request with scorer config request = ScorerInvokeRequest( @@ -97,7 +97,7 @@ def test_scorer_invoke_request_matches_api_schema_shape(self) -> None: @pytest.mark.parametrize("empty_value", ["", " ", {}, []]) def test_scorer_invoke_request_requires_input_or_output(self, empty_value: object) -> None: - from agent_control_evaluator_galileo.luna import ScorerInvokeRequest + from agent_control_rule_galileo.luna import ScorerInvokeRequest # Given/When/Then: the request mirrors API validation with pytest.raises( @@ -109,7 +109,7 @@ def test_scorer_invoke_request_requires_input_or_output(self, empty_value: objec ) def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: - from agent_control_evaluator_galileo.luna import ScorerInvokeResponse + from agent_control_rule_galileo.luna import ScorerInvokeResponse # Given: an API scorer invoke response response = ScorerInvokeResponse.from_dict( @@ -134,7 +134,7 @@ def test_scorer_invoke_response_matches_api_schema_shape(self) -> None: assert response.raw_response["scorer_label"] == "toxicity" def test_client_uses_protect_api_url_derivation(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: the same console URL shape used by Protect with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): @@ -144,7 +144,7 @@ def test_client_uses_protect_api_url_derivation(self) -> None: assert client.api_base == "https://api.demo-v2.galileocloud.io" def test_client_uses_galileo_api_url_when_set(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: an explicit custom-environment API URL with patch.dict( @@ -161,7 +161,7 @@ def test_client_uses_galileo_api_url_when_set(self) -> None: assert client.api_base == "https://api-test-luna.example.com" def test_client_uses_luna_api_url_when_set(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a Luna-specific API URL and a general API URL are both configured with patch.dict( @@ -179,7 +179,7 @@ def test_client_uses_luna_api_url_when_set(self) -> None: assert client.api_base == "https://luna-api.example.com" def test_client_uses_luna_api_url_for_internal_auth(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: internal auth and both Luna-specific and general API URLs are configured with patch.dict( @@ -197,7 +197,7 @@ def test_client_uses_luna_api_url_for_internal_auth(self) -> None: assert client.api_base == "https://internal-api.example.com" def test_client_derives_api_url_from_console_dash_hostname(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a console- hostname with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): @@ -207,7 +207,7 @@ def test_client_derives_api_url_from_console_dash_hostname(self) -> None: assert client.api_base == "https://api-test-luna.example.com" def test_client_strips_whitespace_from_env_url(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a URL override padded with whitespace and a trailing slash with patch.dict( @@ -226,7 +226,7 @@ def test_client_strips_whitespace_from_env_url(self) -> None: def test_client_warns_when_deprecated_auth_mode_env_is_set( self, caplog: pytest.LogCaptureFixture ) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: the deprecated auth-mode environment variable caplog.set_level(logging.WARNING) @@ -243,7 +243,7 @@ def test_client_warns_when_deprecated_auth_mode_env_is_set( assert "GALILEO_LUNA_AUTH_MODE is deprecated" in caplog.text def test_client_rejects_unreadable_ca_bundle(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a CA bundle path that does not exist with patch.dict( @@ -261,8 +261,8 @@ def test_client_rejects_unreadable_ca_bundle(self) -> None: @pytest.mark.asyncio async def test_client_applies_ca_bundle_and_connection_limits(self) -> None: import certifi - from agent_control_evaluator_galileo.luna import GalileoLunaClient - from agent_control_evaluator_galileo.luna.client import ( + from agent_control_rule_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna.client import ( DEFAULT_KEEPALIVE_EXPIRY_SECS, DEFAULT_MAX_CONNECTIONS, DEFAULT_MAX_KEEPALIVE_CONNECTIONS, @@ -282,7 +282,7 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: ) with patch( - "agent_control_evaluator_galileo.luna.client.httpx.AsyncClient", recording_client + "agent_control_rule_galileo.luna.client.httpx.AsyncClient", recording_client ): try: await client._get_client() @@ -300,7 +300,7 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: @pytest.mark.asyncio async def test_client_applies_connection_tuning_env(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} real_async_client = httpx.AsyncClient @@ -322,7 +322,7 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: client = GalileoLunaClient(console_url="https://console.example.com") with patch( - "agent_control_evaluator_galileo.luna.client.httpx.AsyncClient", recording_client + "agent_control_rule_galileo.luna.client.httpx.AsyncClient", recording_client ): try: await client._get_client() @@ -339,8 +339,8 @@ def recording_client(**kwargs: object) -> httpx.AsyncClient: assert limits.max_keepalive_connections == 4 def test_client_ignores_empty_connection_tuning_env(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient - from agent_control_evaluator_galileo.luna.client import ( + from agent_control_rule_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna.client import ( DEFAULT_CLIENT_POOL_SIZE, DEFAULT_KEEPALIVE_EXPIRY_SECS, DEFAULT_MAX_CONNECTIONS, @@ -367,8 +367,8 @@ def test_client_ignores_empty_connection_tuning_env(self) -> None: @pytest.mark.asyncio async def test_client_pool_size_rotates_across_http_clients(self) -> None: - import agent_control_evaluator_galileo.luna.client as luna_client_module - from agent_control_evaluator_galileo.luna import GalileoLunaClient + import agent_control_rule_galileo.luna.client as luna_client_module + from agent_control_rule_galileo.luna import GalileoLunaClient class FakeAsyncClient: def __init__(self, **kwargs: object) -> None: @@ -408,7 +408,7 @@ def recording_client(**kwargs: object) -> FakeAsyncClient: @pytest.mark.asyncio async def test_pooled_client_selection_waits_for_client_lock(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient class FakeAsyncClient: is_closed = False @@ -446,7 +446,7 @@ async def aclose(self) -> None: @pytest.mark.asyncio async def test_close_waits_for_client_lock_before_resetting_state(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient class FakeAsyncClient: is_closed = False @@ -508,7 +508,7 @@ async def aclose(self) -> None: def test_client_reports_invalid_connection_tuning_env( self, env_values: dict[str, str], expected: str ) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient env = {"GALILEO_API_SECRET_KEY": "test-secret"} | env_values with patch.dict(os.environ, env, clear=True): @@ -519,7 +519,7 @@ def test_client_reports_invalid_connection_tuning_env( @pytest.mark.asyncio async def test_client_posts_to_scorers_invoke_without_protect_fields(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} @@ -575,7 +575,7 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio async def test_client_uses_internal_jwt_when_api_secret_is_set(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient captured: dict[str, object] = {} @@ -625,7 +625,7 @@ def handler(request: httpx.Request) -> httpx.Response: @pytest.mark.asyncio async def test_client_uses_internal_jwt_without_api_key(self) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a Luna client configured with internal JWT auth with patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True): @@ -662,7 +662,7 @@ def handler(request: httpx.Request) -> httpx.Response: async def test_client_rejects_missing_input_and_output_values( self, empty_value: object ) -> None: - from agent_control_evaluator_galileo.luna import GalileoLunaClient + from agent_control_rule_galileo.luna import GalileoLunaClient # Given: a Luna client and scorer input values that API treats as missing with patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}, clear=True): @@ -673,44 +673,44 @@ async def test_client_rejects_missing_input_and_output_values( await client.invoke(scorer_label="toxicity", input=empty_value, output=empty_value) -class TestLunaEvaluator: - """Tests for direct Luna evaluator behavior.""" +class TestLunaRule: + """Tests for direct Luna rule behavior.""" @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) - def test_evaluator_metadata(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator + def test_rule_metadata(self) -> None: + from agent_control_rule_galileo.luna import LunaRule - assert LunaEvaluator.metadata.name == "galileo.luna" - assert LunaEvaluator.metadata.requires_api_key is True + assert LunaRule.metadata.name == "galileo.luna" + assert LunaRule.metadata.requires_api_key is True @patch.dict(os.environ, {}, clear=True) - def test_evaluator_init_without_auth_raises(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator + def test_rule_init_without_auth_raises(self) -> None: + from agent_control_rule_galileo.luna import LunaRule with pytest.raises(ValueError, match="GALILEO_API_SECRET_KEY or GALILEO_API_KEY"): - LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) @patch.dict(os.environ, {"GALILEO_API_SECRET_KEY": "test-secret"}, clear=True) - def test_evaluator_init_accepts_api_secret(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator + def test_rule_init_accepts_api_secret(self) -> None: + from agent_control_rule_galileo.luna import LunaRule - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( { "scorer_label": "toxicity", "threshold": 0.5, } ) - assert evaluator.config.scorer_label == "toxicity" + assert rule.config.scorer_label == "toxicity" @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) @pytest.mark.asyncio - async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + async def test_rule_applies_threshold_locally_to_raw_score(self) -> None: + from agent_control_rule_galileo.luna import LunaRule, ScorerInvokeResponse + from agent_control_rule_galileo.luna.client import GalileoLunaClient - # Given: a direct Luna evaluator and a raw successful scorer response - evaluator = LunaEvaluator.from_dict( + # Given: a direct Luna rule and a raw successful scorer response + rule = LunaRule.from_dict( { "scorer_label": "toxicity", "threshold": 0.7, @@ -728,7 +728,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: ) # When: evaluating a full step payload - result = await evaluator.evaluate( + result = await rule.evaluate( { "input": "user prompt", "output": "model answer", @@ -736,7 +736,7 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: ) # Then: the raw score is thresholded locally and no Protect fields are sent - assert isinstance(result, EvaluatorResult) + assert isinstance(result, RuleResult) assert result.matched is True assert result.confidence == 0.82 assert result.metadata == { @@ -758,12 +758,12 @@ async def test_evaluator_applies_threshold_locally_to_raw_score(self) -> None: @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) @pytest.mark.asyncio - async def test_evaluator_returns_non_match_below_threshold(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator, ScorerInvokeResponse - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + async def test_rule_returns_non_match_below_threshold(self) -> None: + from agent_control_rule_galileo.luna import LunaRule, ScorerInvokeResponse + from agent_control_rule_galileo.luna.client import GalileoLunaClient # Given: a raw scorer value below the local threshold - evaluator = LunaEvaluator.from_dict( + rule = LunaRule.from_dict( {"scorer_label": "toxicity", "threshold": 0.7, "operator": "gte"} ) @@ -775,7 +775,7 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None: ) # When: evaluating selected scalar data - result = await evaluator.evaluate("hello") + result = await rule.evaluate("hello") # Then: the control does not match assert result.matched is False @@ -791,16 +791,16 @@ async def test_evaluator_returns_non_match_below_threshold(self) -> None: @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) @pytest.mark.asyncio @pytest.mark.parametrize("data", ["", " "]) - async def test_evaluator_does_not_call_api_for_empty_data(self, data: str) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + async def test_rule_does_not_call_api_for_empty_data(self, data: str) -> None: + from agent_control_rule_galileo.luna import LunaRule + from agent_control_rule_galileo.luna.client import GalileoLunaClient - # Given: an evaluator and empty selected data - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + # Given: a rule and empty selected data + rule = LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: # When: evaluating empty data - result = await evaluator.evaluate(data) + result = await rule.evaluate(data) # Then: no remote scorer call is made assert result.matched is False @@ -810,20 +810,20 @@ async def test_evaluator_does_not_call_api_for_empty_data(self, data: str) -> No @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) @pytest.mark.asyncio - async def test_evaluator_fail_open_sets_error(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + async def test_rule_fail_open_sets_error(self) -> None: + from agent_control_rule_galileo.luna import LunaRule + from agent_control_rule_galileo.luna.client import GalileoLunaClient # Given: fixed fail-open behavior for scorer errors - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + rule = LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) with patch.object(GalileoLunaClient, "invoke", new_callable=AsyncMock) as mock_invoke: mock_invoke.side_effect = RuntimeError("service unavailable") # When: the scorer call fails - result = await evaluator.evaluate("hello") + result = await rule.evaluate("hello") - # Then: the evaluator reports an infrastructure error without matching + # Then: the rule reports an infrastructure error without matching assert result.matched is False assert result.error == "service unavailable" assert result.metadata is not None @@ -833,11 +833,11 @@ async def test_evaluator_fail_open_sets_error(self) -> None: @patch.dict(os.environ, {"GALILEO_API_KEY": "test-key"}) @pytest.mark.asyncio - async def test_evaluator_error_metadata_includes_http_status_context(self) -> None: - from agent_control_evaluator_galileo.luna import LunaEvaluator - from agent_control_evaluator_galileo.luna.client import GalileoLunaClient + async def test_rule_error_metadata_includes_http_status_context(self) -> None: + from agent_control_rule_galileo.luna import LunaRule + from agent_control_rule_galileo.luna.client import GalileoLunaClient - evaluator = LunaEvaluator.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) + rule = LunaRule.from_dict({"scorer_label": "toxicity", "threshold": 0.5}) request = httpx.Request( "POST", "https://api.example.test/internal/scorers/invoke?token=secret", @@ -856,7 +856,7 @@ async def test_evaluator_error_metadata_includes_http_status_context(self) -> No response=response, ) - result = await evaluator.evaluate("hello") + result = await rule.evaluate("hello") assert result.matched is False assert result.metadata is not None diff --git a/rules/contrib/galileo/tests/test_package_exports.py b/rules/contrib/galileo/tests/test_package_exports.py new file mode 100644 index 00000000..8933581d --- /dev/null +++ b/rules/contrib/galileo/tests/test_package_exports.py @@ -0,0 +1,37 @@ +"""Coverage for package-level exports and local-source metadata fallbacks.""" + +from __future__ import annotations + +import importlib.metadata +import importlib.util +from pathlib import Path + + +def test_package_version_falls_back_when_distribution_metadata_is_absent( + monkeypatch, +) -> None: + """Local source-tree imports should work before the package is installed.""" + + def _raise_not_found(_: str) -> str: + raise importlib.metadata.PackageNotFoundError("agent-control-rule-galileo") + + monkeypatch.setattr(importlib.metadata, "version", _raise_not_found) + + init_path = ( + Path(__file__).resolve().parents[1] + / "src" + / "agent_control_rule_galileo" + / "__init__.py" + ) + monkeypatch.syspath_prepend(str(init_path.parents[1])) + spec = importlib.util.spec_from_file_location( + "_agent_control_rule_galileo_version_probe", + init_path, + ) + assert spec is not None + assert spec.loader is not None + + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + + assert module.__version__ == "0.0.0.dev" diff --git a/evaluators/contrib/template/Makefile b/rules/contrib/template/Makefile similarity index 87% rename from evaluators/contrib/template/Makefile rename to rules/contrib/template/Makefile index d6944fc5..1e41a4d4 100644 --- a/evaluators/contrib/template/Makefile +++ b/rules/contrib/template/Makefile @@ -1,10 +1,10 @@ .PHONY: help test lint lint-fix typecheck check build PACKAGE_DIR := $(notdir $(abspath $(CURDIR))) -COVERAGE_XML := ../../../coverage-evaluators-$(PACKAGE_DIR).xml +COVERAGE_XML := ../../../coverage-rules-$(PACKAGE_DIR).xml help: - @echo "Agent Control contrib evaluator template" + @echo "Agent Control contrib rule template" @echo "" @echo " make test - run pytest" @echo " make lint - run ruff check" diff --git a/evaluators/contrib/template/README.md b/rules/contrib/template/README.md similarity index 59% rename from evaluators/contrib/template/README.md rename to rules/contrib/template/README.md index 7b03540d..81135610 100644 --- a/evaluators/contrib/template/README.md +++ b/rules/contrib/template/README.md @@ -1,6 +1,6 @@ -# Contrib Evaluator Template +# Contrib Rule Template -This directory is scaffolding for a new contrib evaluator package. +This directory is scaffolding for a new contrib rule package. It is intentionally excluded from repo automation until you convert it into a real package. In particular, `template/` does not participate in root `make check`, CI, semantic-release, or @@ -12,19 +12,19 @@ publishing because it ships a `pyproject.toml.template` placeholder instead of a Pick `` as a short lowercase single-word identifier such as `galileo`, `cisco`, or `budget`. That same value should appear in the steady-state package shape: -- directory: `evaluators/contrib//` -- pip package: `agent-control-evaluator-` -- Python module: `agent_control_evaluator_` -- extra name: `agent-control-evaluators[]` +- directory: `rules/contrib//` +- pip package: `agent-control-rule-` +- Python module: `agent_control_rule_` +- extra name: `agent-control-rules[]` The template uses `{{NAME}}` for that package identifier. It does not use `{{ORG}}`. -Keep the public evaluator reference separate from the package identifier: +Keep the public rule reference separate from the package identifier: -- `{{ENTRY_POINT}}` is the user-facing evaluator name and should match - `EvaluatorMetadata.name` in your package code. -- Single-evaluator packages can keep that public name flat, such as `budget`. -- Packages that expose a family of evaluator ids should namespace it, such as +- `{{ENTRY_POINT}}` is the user-facing rule name and should match + `RuleMetadata.name` in your package code. +- Single-rule packages can keep that public name flat, such as `budget`. +- Packages that expose a family of rule ids should namespace it, such as `cisco.ai_defense` or `galileo.luna`. ## Scaffold a new contrib package @@ -32,32 +32,32 @@ Keep the public evaluator reference separate from the package identifier: 1. Copy the template and rename the manifest: ```bash - cp -r evaluators/contrib/template evaluators/contrib/ - mv evaluators/contrib//pyproject.toml.template \ - evaluators/contrib//pyproject.toml + cp -r rules/contrib/template rules/contrib/ + mv rules/contrib//pyproject.toml.template \ + rules/contrib//pyproject.toml ``` 2. Replace placeholders in `pyproject.toml`: - `{{NAME}}` -> contrib package identifier - - `{{ENTRY_POINT}}` -> public evaluator reference / `EvaluatorMetadata.name` - - `{{EVALUATOR}}` -> evaluator module path segment (for example `budget` or `ai_defense`) - - `{{CLASS}}` -> evaluator class name + - `{{ENTRY_POINT}}` -> public rule reference / `RuleMetadata.name` + - `{{RULE}}` -> rule module path segment (for example `budget` or `ai_defense`) + - `{{CLASS}}` -> rule class name - `{{AUTHOR}}` -> authoring team - For a package with one primary evaluator, `{{ENTRY_POINT}}` is often just ``. For a - package that groups provider-specific evaluators, use `.`. + For a package with one primary rule, `{{ENTRY_POINT}}` is often just ``. For a + package that groups provider-specific rules, use `.`. The template starts new packages at `0.1.0`; change that if your release plan differs. Also replace the copied `README.md` with package-specific install, configuration, and usage docs before your first build or publish. Then confirm the package `version` reflects your - release plan and that the `agent-control-evaluators` / `agent-control-models` dependency + release plan and that the `agent-control-rules` / `agent-control-models` dependency floors match the compatibility floor you intend to support. Keep those dependency floors aligned with the builtin extra you add below before you commit the new package. 3. Add package code and tests: - - `src/agent_control_evaluator_/` + - `src/agent_control_rule_/` - `tests/` 4. Validate the package locally: @@ -76,11 +76,11 @@ Keep the public evaluator reference separate from the package identifier: Contributor-facing and user-facing package docs should treat this as the canonical install path: ```bash -pip install "agent-control-evaluators[]" +pip install "agent-control-rules[]" ``` -Direct wheel installs such as `pip install agent-control-evaluator-` can still be -documented, but they are secondary to the extra on `agent-control-evaluators`. +Direct wheel installs such as `pip install agent-control-rule-` can still be +documented, but they are secondary to the extra on `agent-control-rules`. In `pyproject.toml`, replace `` intentionally before the first build. For an in-repo contrib package on the shared Agent Control release train, @@ -91,29 +91,29 @@ choose and document the minimum supported Agent Control version explicitly. After the new package exists as a real contrib package, wire it into the repo contract: -1. Add the extra to `evaluators/builtin/pyproject.toml`: +1. Add the extra to `rules/builtin/pyproject.toml`: ```toml [project.optional-dependencies] - = ["agent-control-evaluator->="] + = ["agent-control-rule->="] ``` Keep this extra on the current monorepo release line. The release build rewrites builtin dependency floors to the active release version before publishing - `agent-control-evaluators`, so a lower source floor here would not survive into the + `agent-control-rules`, so a lower source floor here would not survive into the published extra metadata. -2. Add the workspace source pin to `evaluators/builtin/pyproject.toml`: +2. Add the workspace source pin to `rules/builtin/pyproject.toml`: ```toml [tool.uv.sources] - agent-control-evaluator- = { path = "../contrib/", editable = true } + agent-control-rule- = { path = "../contrib/", editable = true } ``` 3. Add the package to `tool.semantic_release.version_toml` in the root `pyproject.toml`: ```toml - "evaluators/contrib//pyproject.toml:project.version", + "rules/contrib//pyproject.toml:project.version", ``` The repo's release automation discovers real contrib packages automatically via @@ -123,4 +123,4 @@ After the new package exists as a real contrib package, wire it into the repo co Until those steps are done, the package is still scaffolding rather than a real contrib package. -Docs: https://docs.agentcontrol.dev/concepts/evaluators/contributing-evaluator +Docs: https://docs.agentcontrol.dev/concepts/rules/contributing-rule diff --git a/evaluators/contrib/template/pyproject.toml.template b/rules/contrib/template/pyproject.toml.template similarity index 57% rename from evaluators/contrib/template/pyproject.toml.template rename to rules/contrib/template/pyproject.toml.template index 2ca97af4..26ebd1b6 100644 --- a/evaluators/contrib/template/pyproject.toml.template +++ b/rules/contrib/template/pyproject.toml.template @@ -1,13 +1,13 @@ [project] -name = "agent-control-evaluator-{{NAME}}" +name = "agent-control-rule-{{NAME}}" version = "0.1.0" -description = "{{NAME}} evaluators for agent-control" +description = "{{NAME}} rules for agent-control" readme = "README.md" requires-python = ">=3.12" license = { text = "Apache-2.0" } authors = [{ name = "{{AUTHOR}}" }] dependencies = [ - "agent-control-evaluators>=", + "agent-control-rules>=", "agent-control-models>=", # Add your package-specific dependencies here ] @@ -21,18 +21,18 @@ dev = [ "mypy>=1.8.0", ] -[project.entry-points."agent_control.evaluators"] -# Keep this aligned with EvaluatorMetadata.name (for example "budget" or +[project.entry-points."agent_control.rules"] +# Keep this aligned with RuleMetadata.name (for example "budget" or # "cisco.ai_defense"). -"{{ENTRY_POINT}}" = "agent_control_evaluator_{{NAME}}.{{EVALUATOR}}:{{CLASS}}" +"{{ENTRY_POINT}}" = "agent_control_rule_{{NAME}}.{{RULE}}:{{CLASS}}" [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/agent_control_evaluator_{{NAME}}"] +packages = ["src/agent_control_rule_{{NAME}}"] [tool.uv.sources] -agent-control-evaluators = { path = "../../builtin", editable = true } +agent-control-rules = { path = "../../builtin", editable = true } agent-control-models = { path = "../../../models", editable = true } diff --git a/scripts/build.py b/scripts/build.py index 1cdc0cd4..246056c2 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -6,7 +6,7 @@ then cleans up afterward. This allows the published wheels to be self-contained. Usage: - python scripts/build.py [models|evaluators|sdk|server|contrib|all|] + python scripts/build.py [models|rules|sdk|server|contrib|all|] """ from __future__ import annotations @@ -160,7 +160,7 @@ def build_sdk() -> None: set_package_version(sdk_pyproject, version) sync_dependency_floors( sdk_pyproject, - ["agent-control-evaluators", *discover_contrib_distribution_names()], + ["agent-control-rules", *discover_contrib_distribution_names()], version, ) @@ -177,7 +177,7 @@ def build_sdk() -> None: def build_server() -> None: """Build agent-control-server with vendored packages. - Note: evaluators are NOT vendored - server uses agent-control-evaluators as a + Note: rules are NOT vendored - server uses agent-control-rules as a runtime dependency to avoid duplicate module conflicts with contrib extras. """ version = get_global_version() @@ -226,7 +226,7 @@ def build_server() -> None: set_package_version(server_pyproject, version) sync_dependency_floors( server_pyproject, - ["agent-control-evaluators", *discover_contrib_distribution_names()], + ["agent-control-rules", *discover_contrib_distribution_names()], version, ) @@ -240,32 +240,32 @@ def build_server() -> None: shutil.rmtree(target) -def build_evaluators() -> None: - """Build agent-control-evaluators (standalone, no vendoring needed).""" +def build_rules() -> None: + """Build agent-control-rules (standalone, no vendoring needed).""" build_python_package( - "agent-control-evaluators", - ROOT / "evaluators" / "builtin", + "agent-control-rules", + ROOT / "rules" / "builtin", get_global_version(), ["agent-control-models", *discover_contrib_distribution_names()], ) def build_contrib_package(package: ContribPackage, version: str) -> None: - """Build a discovered contrib evaluator package.""" + """Build a discovered contrib rule package.""" build_python_package( package.package, ROOT / Path(package.directory), version, - ["agent-control-evaluators", "agent-control-models"], + ["agent-control-rules", "agent-control-models"], ) def build_contrib() -> None: - """Build all discovered contrib evaluator packages.""" + """Build all discovered contrib rule packages.""" version = get_global_version() packages = discover_contrib_packages() if not packages: - print("No contrib evaluator packages discovered.") + print("No contrib rule packages discovered.") return package_names = ", ".join(package.name for package in packages) @@ -275,7 +275,7 @@ def build_contrib() -> None: def build_named_contrib_package(target: str) -> None: - """Build one discovered contrib evaluator package by name.""" + """Build one discovered contrib rule package by name.""" packages = discover_contrib_by_name() package = packages.get(target) if package is None: @@ -291,7 +291,7 @@ def build_all() -> None: """Build all packages.""" print(f"Building all packages (version {get_global_version()})\n") build_models() - build_evaluators() + build_rules() build_contrib() build_sdk() build_server() @@ -302,7 +302,7 @@ def usage() -> str: """Return the CLI usage string.""" return ( "Usage: python scripts/build.py " - "[models|evaluators|sdk|server|contrib|all|]" + "[models|rules|sdk|server|contrib|all|]" ) @@ -311,8 +311,8 @@ def usage() -> str: if target == "models": build_models() - elif target == "evaluators": - build_evaluators() + elif target == "rules": + build_rules() elif target == "sdk": build_sdk() elif target == "server": diff --git a/scripts/contrib_packages.py b/scripts/contrib_packages.py index 2f5d5e9b..421b4b6e 100644 --- a/scripts/contrib_packages.py +++ b/scripts/contrib_packages.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Discover and verify real contrib evaluator packages.""" +"""Discover and verify real contrib rule packages.""" from __future__ import annotations @@ -11,10 +11,9 @@ from pathlib import Path from typing import Any - -EVALUATOR_ENTRY_GROUP = "agent_control.evaluators" +RULE_ENTRY_GROUP = "agent_control.rules" REPO_ROOT = Path(__file__).resolve().parent.parent -CONTRIB_ROOT = REPO_ROOT / "evaluators" / "contrib" +CONTRIB_ROOT = REPO_ROOT / "rules" / "contrib" class ContribPackagesError(Exception): @@ -23,13 +22,13 @@ class ContribPackagesError(Exception): @dataclass(frozen=True) class ContribPackage: - """Normalized metadata for a real contrib evaluator package.""" + """Normalized metadata for a real contrib rule package.""" name: str directory: str package: str extra: str - entry_group: str = EVALUATOR_ENTRY_GROUP + entry_group: str = RULE_ENTRY_GROUP @property def version_toml_entry(self) -> str: @@ -117,7 +116,7 @@ def dependency_name(requirement: str) -> str: def discover_contrib_packages() -> list[ContribPackage]: - """Discover real contrib evaluator packages under evaluators/contrib.""" + """Discover real contrib rule packages under rules/contrib.""" packages: list[ContribPackage] = [] @@ -148,14 +147,14 @@ def discover_contrib_packages() -> list[ContribPackage]: path=manifest_path, parent_description="project", ) - evaluator_entries = entry_points.get(EVALUATOR_ENTRY_GROUP) - if not isinstance(evaluator_entries, dict) or not evaluator_entries: + rule_entries = entry_points.get(RULE_ENTRY_GROUP) + if not isinstance(rule_entries, dict) or not rule_entries: raise ContribPackagesError( f"{display_path(manifest_path)} must define at least one " - f'[project.entry-points."{EVALUATOR_ENTRY_GROUP}"] entry.' + f'[project.entry-points."{RULE_ENTRY_GROUP}"] entry.' ) - expected_package_name = f"agent-control-evaluator-{candidate.name}" + expected_package_name = f"agent-control-rule-{candidate.name}" if project_name != expected_package_name: raise ContribPackagesError( f"{display_path(manifest_path)} declares project.name = {project_name!r}, " @@ -178,12 +177,17 @@ def verify_contrib_packages(packages: list[ContribPackage]) -> list[str]: """Return human-readable verification errors for contrib wiring drift.""" root_pyproject_path = REPO_ROOT / "pyproject.toml" - builtin_pyproject_path = REPO_ROOT / "evaluators" / "builtin" / "pyproject.toml" + builtin_pyproject_path = REPO_ROOT / "rules" / "builtin" / "pyproject.toml" root_pyproject = load_toml(root_pyproject_path) builtin_pyproject = load_toml(builtin_pyproject_path) - tool_table = require_table(root_pyproject, "tool", path=root_pyproject_path, parent_description="") + tool_table = require_table( + root_pyproject, + "tool", + path=root_pyproject_path, + parent_description="", + ) semantic_release = require_table( tool_table, "semantic_release", @@ -191,7 +195,9 @@ def verify_contrib_packages(packages: list[ContribPackage]) -> list[str]: parent_description="tool", ) version_toml = semantic_release.get("version_toml") - if not isinstance(version_toml, list) or not all(isinstance(item, str) for item in version_toml): + if not isinstance(version_toml, list) or not all( + isinstance(item, str) for item in version_toml + ): raise ContribPackagesError( f"{display_path(root_pyproject_path)} must define [tool.semantic_release].version_toml " "as a list of strings." @@ -243,14 +249,15 @@ def verify_contrib_packages(packages: list[ContribPackage]) -> list[str]: errors.append( f"Missing builtin extra for contrib package {package.name!r}: " f"add [project.optional-dependencies].{package.extra} = " - f"[\"{package.package}>=\"] in {display_path(builtin_pyproject_path)}." + f"[\"{package.package}>=\"] in " + f"{display_path(builtin_pyproject_path)}." ) elif not isinstance(extra_dependencies, list) or not all( isinstance(item, str) for item in extra_dependencies ): errors.append( - f"Builtin extra {package.extra!r} in {display_path(builtin_pyproject_path)} must be " - "a list of dependency strings." + f"Builtin extra {package.extra!r} in " + f"{display_path(builtin_pyproject_path)} must be a list of dependency strings." ) else: dependency_names = {dependency_name(item) for item in extra_dependencies} @@ -323,7 +330,7 @@ def build_parser() -> argparse.ArgumentParser: """Build the CLI parser.""" parser = argparse.ArgumentParser( - description="Discover and verify real contrib evaluator packages." + description="Discover and verify real contrib rule packages." ) parser.add_argument( "command", diff --git a/scripts/tests/test_build.py b/scripts/tests/test_build.py index eeb8087c..64696ebc 100644 --- a/scripts/tests/test_build.py +++ b/scripts/tests/test_build.py @@ -16,13 +16,13 @@ def test_sync_dependency_floors_updates_internal_minimums(tmp_path: Path) -> Non """ [project] dependencies = [ - "agent-control-evaluators>=7.5.0", + "agent-control-rules>=7.5.0", "agent-control-models>=7.5.0,<8.0.0", "httpx>=0.28.0", ] [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=7.5.0"] +galileo = ["agent-control-rule-galileo>=7.5.0"] """.strip() ) @@ -30,9 +30,9 @@ def test_sync_dependency_floors_updates_internal_minimums(tmp_path: Path) -> Non build.sync_dependency_floors( pyproject_path, [ - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", - "agent-control-evaluator-galileo", + "agent-control-rule-galileo", ], "7.6.0", ) @@ -42,13 +42,13 @@ def test_sync_dependency_floors_updates_internal_minimums(tmp_path: Path) -> Non """ [project] dependencies = [ - "agent-control-evaluators>=7.6.0", + "agent-control-rules>=7.6.0", "agent-control-models>=7.6.0,<8.0.0", "httpx>=0.28.0", ] [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=7.6.0"] +galileo = ["agent-control-rule-galileo>=7.6.0"] """.strip() ) @@ -59,21 +59,21 @@ def test_sync_dependency_floors_tolerates_whitespace_around_lower_bounds(tmp_pat """ [project] dependencies = [ - "agent-control-evaluators >= 7.5.0", + "agent-control-rules >= 7.5.0", "agent-control-models >= 7.5.0,<8.0.0", ] [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo >= 7.5.0"] +galileo = ["agent-control-rule-galileo >= 7.5.0"] """.strip() ) build.sync_dependency_floors( pyproject_path, [ - "agent-control-evaluators", + "agent-control-rules", "agent-control-models", - "agent-control-evaluator-galileo", + "agent-control-rule-galileo", ], "7.6.0", ) @@ -82,18 +82,18 @@ def test_sync_dependency_floors_tolerates_whitespace_around_lower_bounds(tmp_pat """ [project] dependencies = [ - "agent-control-evaluators >= 7.6.0", + "agent-control-rules >= 7.6.0", "agent-control-models >= 7.6.0,<8.0.0", ] [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo >= 7.6.0"] +galileo = ["agent-control-rule-galileo >= 7.6.0"] """.strip() ) -def test_builtin_evaluators_manifest_keeps_models_floor_rewritable() -> None: - builtin_pyproject = SCRIPTS_DIR.parent / "evaluators" / "builtin" / "pyproject.toml" +def test_builtin_rules_manifest_keeps_models_floor_rewritable() -> None: + builtin_pyproject = SCRIPTS_DIR.parent / "rules" / "builtin" / "pyproject.toml" with builtin_pyproject.open("rb") as handle: manifest = tomllib.load(handle) diff --git a/scripts/tests/test_contrib_packages.py b/scripts/tests/test_contrib_packages.py index 82ddb486..53ca6f85 100644 --- a/scripts/tests/test_contrib_packages.py +++ b/scripts/tests/test_contrib_packages.py @@ -44,17 +44,17 @@ def _write_fake_repo( """Create a minimal repo layout that exercises contrib package wiring.""" version_entry = ( - '"evaluators/contrib/example/pyproject.toml:project.version"' + '"rules/contrib/example/pyproject.toml:project.version"' if include_version_entry else "" ) extra_entry = ( - 'example = ["agent-control-evaluator-example>=1.0.0"]' + 'example = ["agent-control-rule-example>=1.0.0"]' if include_builtin_extra else "" ) source_entry = ( - 'agent-control-evaluator-example = { path = "../contrib/example", editable = true }' + 'agent-control-rule-example = { path = "../contrib/example", editable = true }' if include_builtin_source else "" ) @@ -74,10 +74,10 @@ def _write_fake_repo( """, ) _write_text( - root / "evaluators" / "builtin" / "pyproject.toml", + root / "rules" / "builtin" / "pyproject.toml", f""" [project] - name = "agent-control-evaluators" + name = "agent-control-rules" version = "1.0.0" [project.optional-dependencies] @@ -90,14 +90,14 @@ def _write_fake_repo( """, ) _write_text( - root / "evaluators" / "contrib" / "example" / "pyproject.toml", + root / "rules" / "contrib" / "example" / "pyproject.toml", """ [project] - name = "agent-control-evaluator-example" + name = "agent-control-rule-example" version = "1.0.0" - [project.entry-points."agent_control.evaluators"] - example = "agent_control_evaluator_example:ExampleEvaluator" + [project.entry-points."agent_control.rules"] + example = "agent_control_rule_example:ExampleRule" """, ) @@ -109,18 +109,18 @@ def test_discover_contrib_packages_skips_template_and_non_packages( module = _load_module() repo_root = tmp_path / "repo" _write_fake_repo(repo_root) - (repo_root / "evaluators" / "contrib" / "template").mkdir(parents=True) - (repo_root / "evaluators" / "contrib" / "notes").mkdir(parents=True) + (repo_root / "rules" / "contrib" / "template").mkdir(parents=True) + (repo_root / "rules" / "contrib" / "notes").mkdir(parents=True) monkeypatch.setattr(module, "REPO_ROOT", repo_root) - monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "rules" / "contrib") # When: discovering contrib packages packages = module.discover_contrib_packages() # Then: only the real package is returned assert [package.name for package in packages] == ["example"] - assert packages[0].directory == "evaluators/contrib/example" - assert packages[0].package == "agent-control-evaluator-example" + assert packages[0].directory == "rules/contrib/example" + assert packages[0].package == "agent-control-rule-example" def test_verify_contrib_packages_reports_missing_root_and_builtin_wiring( @@ -136,7 +136,7 @@ def test_verify_contrib_packages_reports_missing_root_and_builtin_wiring( include_builtin_source=False, ) monkeypatch.setattr(module, "REPO_ROOT", repo_root) - monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "rules" / "contrib") # When: verifying the contrib package wiring packages = module.discover_contrib_packages() @@ -156,7 +156,7 @@ def test_verify_contrib_packages_accepts_complete_wiring( repo_root = tmp_path / "repo" _write_fake_repo(repo_root) monkeypatch.setattr(module, "REPO_ROOT", repo_root) - monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "evaluators" / "contrib") + monkeypatch.setattr(module, "CONTRIB_ROOT", repo_root / "rules" / "contrib") # When: verifying the contrib package wiring packages = module.discover_contrib_packages() diff --git a/sdks/python/ARCHITECTURE.md b/sdks/python/ARCHITECTURE.md index 4a48c09a..c22b4144 100644 --- a/sdks/python/ARCHITECTURE.md +++ b/sdks/python/ARCHITECTURE.md @@ -14,14 +14,14 @@ sdks/python/src/agent_control/ ├── policies.py # Policy management operations ├── controls.py # Control management operations ├── control_decorators.py # @control() decorator implementation -├── evaluation.py # Evaluation and evaluator operations +├── evaluation.py # Evaluation and rule operations ├── observability.py # Observability and telemetry ├── settings.py # SDK configuration and settings ├── tracing.py # Distributed tracing support ├── py.typed # PEP 561 type marker -└── evaluators/ # Evaluator base classes and discovery system - ├── __init__.py # Evaluator discovery, registration, and Luna integration - └── base.py # Base Evaluator and EvaluatorMetadata classes +└── rules/ # Rule base classes and discovery system + ├── __init__.py # Rule discovery, registration, and Luna integration + └── base.py # Base Rule and RuleMetadata classes ``` ## Module Responsibilities @@ -134,7 +134,7 @@ async with agent_control.AgentControlClient() as client: "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "\\\\d{3}-\\\\d{2}-\\\\d{4}", "flags": []}, }, @@ -175,7 +175,7 @@ async def chat(message: str) -> str: **Purpose**: Client-side and server-side evaluation management. **Key Components**: -- Evaluator registration and management +- Rule registration and management - Evaluation execution - Integration with control evaluation pipeline @@ -206,29 +206,29 @@ async def chat(message: str) -> str: - Span creation and management - Context propagation -### `evaluators/` - Evaluator System +### `rules/` - Rule System -**Purpose**: Evaluator base classes, discovery, and registration system. +**Purpose**: Rule base classes, discovery, and registration system. **Key Components**: -- Base evaluator classes (`Evaluator`, `EvaluatorMetadata`) -- Evaluator discovery via entry points -- Third-party evaluator integration (e.g., Luna, Guardrails AI) -- Registration functions for custom evaluators +- Base rule classes (`Rule`, `RuleMetadata`) +- Rule discovery via entry points +- Third-party rule integration (e.g., Luna, Guardrails AI) +- Registration functions for custom rules **Structure**: -- `__init__.py` - Evaluator discovery (`discover_evaluators()`, `list_evaluators()`), registration (`register_evaluator()`), and optional Luna integration -- `base.py` - Base `Evaluator` and `EvaluatorMetadata` classes (re-exported from `agent_control_models`) +- `__init__.py` - Rule discovery (`discover_rules()`, `list_rules()`), registration (`register_rule()`), and optional Luna integration +- `base.py` - Base `Rule` and `RuleMetadata` classes (re-exported from `agent_control_models`) **Usage**: ```python -from agent_control.evaluators import Evaluator, EvaluatorMetadata, discover_evaluators +from agent_control.rules import Rule, RuleMetadata, discover_rules -# Discover all available evaluators (built-in and third-party) -discover_evaluators() +# Discover all available rules (built-in and third-party) +discover_rules() -# Create custom evaluator by extending base class -class MyCustomEvaluator(Evaluator): +# Create custom rule by extending base class +class MyCustomRule(Rule): pass ``` @@ -373,5 +373,5 @@ The Agent Control SDK architecture provides: - ✅ **Testability** with independent modules - ✅ **Scalability** for future endpoint and feature additions - ✅ **Type safety** with full type annotations -- ✅ **Extensibility** through evaluator system +- ✅ **Extensibility** through rule system - ✅ **Observability** with built-in tracing and telemetry diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml index a38bc0a3..caef972a 100644 --- a/sdks/python/pyproject.toml +++ b/sdks/python/pyproject.toml @@ -5,7 +5,7 @@ description = "Python SDK for Agent Control - protect your AI agents with contro requires-python = ">=3.12" # Note: agent-control-models, agent-control-engine, and agent-control-telemetry # are bundled at build time -# Note: agent-control-evaluators is a runtime dependency (NOT vendored) to avoid +# Note: agent-control-rules is a runtime dependency (NOT vendored) to avoid # duplicate module conflict when galileo extras are installed dependencies = [ "httpx>=0.26.0", @@ -14,7 +14,7 @@ dependencies = [ "docstring-parser>=0.15", # For @tool decorator schema inference "google-re2>=1.1", # For engine (bundled) "jsonschema>=4.0.0", # For models/engine (bundled) - "agent-control-evaluators>=7.5.0", # NOT vendored - avoid conflict with galileo + "agent-control-rules>=7.5.0", # NOT vendored - avoid conflict with galileo ] authors = [ {name = "Agent Control Team"} @@ -43,7 +43,7 @@ otel = [ "opentelemetry-sdk>=1.24.0", "opentelemetry-exporter-otlp-proto-http>=1.24.0", ] -galileo = ["agent-control-evaluator-galileo>=7.5.0"] +galileo = ["agent-control-rule-galileo>=7.5.0"] [dependency-groups] dev = [ @@ -55,7 +55,7 @@ dev = [ "agent-control-models", "agent-control-engine", "agent-control-telemetry", - "agent-control-evaluators", + "agent-control-rules", "strands-agents>=1.26.0", # For strands integration tests ] @@ -94,6 +94,6 @@ known-first-party = ["agent_control"] agent-control-models = { workspace = true } agent-control-engine = { workspace = true } agent-control-telemetry = { workspace = true } -agent-control-evaluators = { workspace = true } +agent-control-rules = { workspace = true } # For local dev: use local galileo package instead of PyPI -agent-control-evaluator-galileo = { path = "../../evaluators/contrib/galileo", editable = true } +agent-control-rule-galileo = { path = "../../rules/contrib/galileo", editable = true } diff --git a/sdks/python/src/agent_control/__init__.py b/sdks/python/src/agent_control/__init__.py index f0d07520..2b790d09 100644 --- a/sdks/python/src/agent_control/__init__.py +++ b/sdks/python/src/agent_control/__init__.py @@ -63,9 +63,9 @@ async def handle_input(user_message: str) -> str: ControlSelector, EvaluationRequest, EvaluationResult, - EvaluatorResult, - EvaluatorSpec, JSONObject, + RuleResult, + RuleSpec, Step, StepSchema, TemplateControlInput, @@ -79,7 +79,7 @@ async def handle_input(user_message: str) -> str: set_trace_context_provider, ) -from . import agents, control_bindings, controls, evaluation, evaluators, policies +from . import agents, control_bindings, controls, evaluation, policies, rules from ._control_registry import ( StepSchemaDict, get_registered_steps, @@ -867,7 +867,7 @@ async def list_agents( Returns: Dictionary containing: - agents: List of agent summaries with agent_name, - policy_id, created_at, step_count, evaluator_count + policy_id, created_at, step_count, rule_count - pagination: Object with limit, total, next_cursor, has_more Raises: @@ -1137,7 +1137,7 @@ async def main(): "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\\d{3}-\\d{2}-\\d{4}"} } @@ -1578,7 +1578,7 @@ async def main(): "controls", "control_bindings", "evaluation", - "evaluators", + "rules", # Policy-Control management "add_control_to_policy", "remove_control_from_policy", @@ -1628,7 +1628,7 @@ async def main(): "ControlScope", "ControlAction", "ControlMatch", - "EvaluatorSpec", - "EvaluatorResult", + "RuleSpec", + "RuleResult", "TemplateValue", ] diff --git a/sdks/python/src/agent_control/agents.py b/sdks/python/src/agent_control/agents.py index 78c1c36a..52529f76 100644 --- a/sdks/python/src/agent_control/agents.py +++ b/sdks/python/src/agent_control/agents.py @@ -2,7 +2,7 @@ from typing import Any, Literal, cast -from agent_control_engine import ensure_evaluators_discovered +from agent_control_engine import ensure_rules_discovered from agent_control_models import Agent from agent_control_models.server import AgentControlsResponse @@ -45,7 +45,7 @@ async def register_agent( merges controls bound to that target into the returned set. The two fields must be supplied together. """ - ensure_evaluators_discovered() + ensure_rules_discovered() if (target_type is None) != (target_id is None): raise ValueError( diff --git a/sdks/python/src/agent_control/control_decorators.py b/sdks/python/src/agent_control/control_decorators.py index 6a6d3491..089fd1f6 100644 --- a/sdks/python/src/agent_control/control_decorators.py +++ b/sdks/python/src/agent_control/control_decorators.py @@ -23,7 +23,7 @@ async def chat(message: str) -> str: # Server-side controls define: # - stage: "pre" or "post" # - selector.path: "input" or "output" - # - evaluator: regex, list, Luna evaluator, etc. + # - rule: regex, list, Luna rule, etc. # - action: deny, steer, or observe """ @@ -607,7 +607,7 @@ def _handle_evaluation_result(result: dict[str, Any]) -> None: elif isinstance(steering_context_obj, str): steering_context = steering_context_obj else: - # No steering context provided, use evaluator message + # No steering context provided, use rule message steering_context = message raise ControlSteerError( diff --git a/sdks/python/src/agent_control/controls.py b/sdks/python/src/agent_control/controls.py index 8478c357..1d482715 100644 --- a/sdks/python/src/agent_control/controls.py +++ b/sdks/python/src/agent_control/controls.py @@ -232,7 +232,7 @@ async def create_control( "scope": {"step_types": ["llm"], "stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\\d{3}-\\d{2}-\\d{4}"} } diff --git a/sdks/python/src/agent_control/evaluation.py b/sdks/python/src/agent_control/evaluation.py index 767a3e02..543ed2b9 100644 --- a/sdks/python/src/agent_control/evaluation.py +++ b/sdks/python/src/agent_control/evaluation.py @@ -6,7 +6,7 @@ from typing import Any, Literal, cast import httpx -from agent_control_engine import list_evaluators +from agent_control_engine import list_rules from agent_control_engine.core import ControlEngine from agent_control_models import ( ControlDefinitionRuntime, @@ -14,7 +14,7 @@ EvaluationRequest, EvaluationResponse, EvaluationResult, - EvaluatorResult, + RuleResult, Step, ) @@ -349,7 +349,7 @@ async def check_evaluation_with_local( local_controls: list[_ControlAdapter] = [] parse_errors: list[ControlMatch] = [] - available_evaluators = list_evaluators() + available_rules = list_rules() server_control_payloads: list[dict[str, Any]] = [] for control in controls: @@ -372,20 +372,20 @@ async def check_evaluation_with_local( try: control_def = ControlDefinitionRuntime.model_validate(control_data) - for _, evaluator_spec in control_def.iter_condition_leaf_parts(): - evaluator_name = evaluator_spec.name + for _, rule_spec in control_def.iter_condition_leaf_parts(): + rule_name = rule_spec.name - if ":" in evaluator_name: + if ":" in rule_name: raise RuntimeError( f"Control '{control['name']}' is marked execution='sdk' but uses " - f"agent-scoped evaluator '{evaluator_name}' which is server-only. " - "Set execution='server' or use a built-in evaluator." + f"agent-scoped rule '{rule_name}' which is server-only. " + "Set execution='server' or use a built-in rule." ) - if evaluator_name not in available_evaluators: + if rule_name not in available_rules: raise RuntimeError( - f"Control '{control['name']}' is marked execution='sdk' but evaluator " - f"'{evaluator_name}' is not available in the SDK. " - "Install the evaluator or set execution='server'." + f"Control '{control['name']}' is marked execution='sdk' but rule " + f"'{rule_name}' is not available in the SDK. " + "Install the rule or set execution='server'." ) local_controls.append( @@ -405,7 +405,7 @@ async def check_evaluation_with_local( control_id=control_id, control_name=control_name, action="observe", - result=EvaluatorResult( + result=RuleResult( matched=False, confidence=0.0, error=f"Failed to parse local control: {exc}", diff --git a/sdks/python/src/agent_control/evaluation_events.py b/sdks/python/src/agent_control/evaluation_events.py index 8db75f63..8bf102dc 100644 --- a/sdks/python/src/agent_control/evaluation_events.py +++ b/sdks/python/src/agent_control/evaluation_events.py @@ -51,12 +51,12 @@ def observability_metadata( identity = control_def.observability_identity() return ( identity.selector_path, - identity.evaluator_name, + identity.rule_name, { - "primary_evaluator": identity.evaluator_name, + "primary_rule": identity.rule_name, "primary_selector_path": identity.selector_path, "leaf_count": identity.leaf_count, - "all_evaluators": identity.all_evaluators, + "all_rules": identity.all_rules, "all_selector_paths": identity.all_selector_paths, }, ) @@ -110,10 +110,10 @@ def _build_events_for_matches( control_def = control_lookup.get(match.control_id) event_metadata = _safe_event_metadata(dict(match.result.metadata or {})) selector_path = None - evaluator_name = None + rule_name = None if control_def is not None: - selector_path, evaluator_name, identity_metadata = observability_metadata(control_def) + selector_path, rule_name, identity_metadata = observability_metadata(control_def) event_metadata.update(identity_metadata) events.append( @@ -130,7 +130,7 @@ def _build_events_for_matches( matched=matched, confidence=match.result.confidence, timestamp=now, - evaluator_name=evaluator_name, + rule_name=rule_name, selector_path=selector_path, error_message=match.result.error if include_error_message else None, metadata=event_metadata, diff --git a/sdks/python/src/agent_control/evaluators/__init__.py b/sdks/python/src/agent_control/evaluators/__init__.py deleted file mode 100644 index 73714717..00000000 --- a/sdks/python/src/agent_control/evaluators/__init__.py +++ /dev/null @@ -1,64 +0,0 @@ -"""Evaluator system for agent_control. - -This module provides an evaluator architecture for extending agent_control -with external evaluation systems like Galileo Luna, Guardrails AI, etc. - -Evaluator Discovery: - Call `discover_evaluators()` at startup to load evaluators. This loads: - - Built-in evaluators (regex, list, json, sql) from agent_control_evaluators - - Third-party evaluators via the 'agent_control.evaluators' entry point group - - Then use `list_evaluators()` to get available evaluators. - -Galileo evaluators: - When installed with galileo extras, the Galileo evaluator types are available: - ```python - from agent_control.evaluators import LunaEvaluator, LunaEvaluatorConfig # if galileo installed - ``` -""" - -from agent_control_engine import ( - discover_evaluators, - ensure_evaluators_discovered, - list_evaluators, -) -from agent_control_evaluators import register_evaluator - -from .base import Evaluator, EvaluatorMetadata - -__all__ = [ - "Evaluator", - "EvaluatorMetadata", - "discover_evaluators", - "ensure_evaluators_discovered", - "list_evaluators", - "register_evaluator", -] - -# Optionally export Luna types when available -try: - from agent_control_evaluator_galileo.luna import ( # type: ignore[import-not-found] # noqa: F401 - LUNA_AVAILABLE, - GalileoLunaClient, - LunaEvaluator, - LunaEvaluatorConfig, - LunaOperator, - ScorerInvokeInputs, - ScorerInvokeRequest, - ScorerInvokeResponse, - ) - - __all__.extend( - [ - "GalileoLunaClient", - "ScorerInvokeInputs", - "ScorerInvokeRequest", - "ScorerInvokeResponse", - "LunaEvaluator", - "LunaEvaluatorConfig", - "LunaOperator", - "LUNA_AVAILABLE", - ] - ) -except ImportError: - pass diff --git a/sdks/python/src/agent_control/evaluators/base.py b/sdks/python/src/agent_control/evaluators/base.py deleted file mode 100644 index 33c23f9f..00000000 --- a/sdks/python/src/agent_control/evaluators/base.py +++ /dev/null @@ -1,9 +0,0 @@ -"""Base classes for agent_control evaluators. - -Re-exports from agent_control_evaluators for convenience. -""" - -# Re-export from the evaluators package (where they're now defined) -from agent_control_evaluators import Evaluator, EvaluatorMetadata - -__all__ = ["Evaluator", "EvaluatorMetadata"] diff --git a/sdks/python/src/agent_control/integrations/google_adk/_extractors.py b/sdks/python/src/agent_control/integrations/google_adk/_extractors.py index 1552ce0b..af0960b5 100644 --- a/sdks/python/src/agent_control/integrations/google_adk/_extractors.py +++ b/sdks/python/src/agent_control/integrations/google_adk/_extractors.py @@ -79,7 +79,7 @@ def _to_jsonable(value: Any) -> Any: def _json_dumps(value: Any) -> str: - """Serialize structured content deterministically for evaluator input.""" + """Serialize structured content deterministically for rule input.""" return json.dumps(value, sort_keys=True) diff --git a/sdks/python/src/agent_control/otel_sink.py b/sdks/python/src/agent_control/otel_sink.py index e724f5af..3bcc147c 100644 --- a/sdks/python/src/agent_control/otel_sink.py +++ b/sdks/python/src/agent_control/otel_sink.py @@ -129,8 +129,8 @@ def control_event_to_otel_span(event: ControlExecutionEvent) -> OTELControlEvent if event.execution_duration_ms is not None: attributes["agent_control.execution_duration_ms"] = event.execution_duration_ms - if event.evaluator_name is not None: - attributes["agent_control.evaluator_name"] = event.evaluator_name + if event.rule_name is not None: + attributes["agent_control.rule_name"] = event.rule_name if event.selector_path is not None: attributes["agent_control.selector_path"] = event.selector_path if event.error_message is not None: diff --git a/sdks/python/src/agent_control/rules/__init__.py b/sdks/python/src/agent_control/rules/__init__.py new file mode 100644 index 00000000..7b366893 --- /dev/null +++ b/sdks/python/src/agent_control/rules/__init__.py @@ -0,0 +1,64 @@ +"""Rule system for agent_control. + +This module provides a rule architecture for extending agent_control +with external evaluation systems like Galileo Luna, Guardrails AI, etc. + +Rule Discovery: + Call `discover_rules()` at startup to load rules. This loads: + - Built-in rules (regex, list, json, sql) from agent_control_rules + - Third-party rules via the 'agent_control.rules' entry point group + + Then use `list_rules()` to get available rules. + +Galileo rules: + When installed with galileo extras, the Galileo rule types are available: + ```python + from agent_control.rules import LunaRule, LunaRuleConfig # if galileo installed + ``` +""" + +from agent_control_engine import ( + discover_rules, + ensure_rules_discovered, + list_rules, +) +from agent_control_rules import register_rule + +from .base import Rule, RuleMetadata + +__all__ = [ + "Rule", + "RuleMetadata", + "discover_rules", + "ensure_rules_discovered", + "list_rules", + "register_rule", +] + +# Optionally export Luna types when available +try: + from agent_control_rule_galileo.luna import ( # type: ignore[import-not-found] # noqa: F401 + LUNA_AVAILABLE, + GalileoLunaClient, + LunaOperator, + LunaRule, + LunaRuleConfig, + ScorerInvokeInputs, + ScorerInvokeRequest, + ScorerInvokeResponse, + ) + + __all__.extend( + [ + "GalileoLunaClient", + "ScorerInvokeInputs", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + "LunaRule", + "LunaRuleConfig", + "LunaOperator", + "LUNA_AVAILABLE", + ] + ) +except ImportError: + pass diff --git a/sdks/python/src/agent_control/rules/base.py b/sdks/python/src/agent_control/rules/base.py new file mode 100644 index 00000000..82da8f18 --- /dev/null +++ b/sdks/python/src/agent_control/rules/base.py @@ -0,0 +1,9 @@ +"""Base classes for agent_control rules. + +Re-exports from agent_control_rules for convenience. +""" + +# Re-export from the rules package (where they're now defined) +from agent_control_rules import Rule, RuleMetadata + +__all__ = ["Rule", "RuleMetadata"] diff --git a/sdks/python/tests/conftest.py b/sdks/python/tests/conftest.py index 2110594c..c4cab2a8 100644 --- a/sdks/python/tests/conftest.py +++ b/sdks/python/tests/conftest.py @@ -194,7 +194,7 @@ async def test_control( "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "test", "flags": []}, }, diff --git a/sdks/python/tests/test_control_decorators.py b/sdks/python/tests/test_control_decorators.py index ca059c55..2a320aae 100644 --- a/sdks/python/tests/test_control_decorators.py +++ b/sdks/python/tests/test_control_decorators.py @@ -957,7 +957,7 @@ async def test_func(): @pytest.mark.asyncio async def test_steering_context_fallback_to_message(self, mock_agent): - """Test steering_context falls back to evaluator message when not provided.""" + """Test steering_context falls back to rule message when not provided.""" response_without_context = { "is_safe": False, "confidence": 0.85, @@ -969,7 +969,7 @@ async def test_steering_context_fallback_to_message(self, mock_agent): "steering_context": None, # No steering context provided "result": { "matched": True, - "message": "Default evaluator message", + "message": "Default rule message", "metadata": {} } } @@ -986,8 +986,8 @@ async def test_func(): with pytest.raises(ControlSteerError) as exc_info: await test_func() - # Should fall back to evaluator message - assert exc_info.value.steering_context == "Default evaluator message" + # Should fall back to rule message + assert exc_info.value.steering_context == "Default rule message" # ============================================================================= diff --git a/sdks/python/tests/test_controls_api.py b/sdks/python/tests/test_controls_api.py index 78a01c4e..d4381574 100644 --- a/sdks/python/tests/test_controls_api.py +++ b/sdks/python/tests/test_controls_api.py @@ -96,7 +96,7 @@ async def test_create_control_accepts_template_control_input() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -295,7 +295,7 @@ async def test_render_control_template_calls_preview_endpoint() -> None: "scope": {}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "x"}}, + "rule": {"name": "regex", "config": {"pattern": "x"}}, }, "action": {"decision": "deny"}, }, @@ -314,7 +314,7 @@ async def test_render_control_template_calls_preview_endpoint() -> None: "scope": {}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "x"}}, + "rule": {"name": "regex", "config": {"pattern": "x"}}, }, "action": {"decision": "deny"}, }, @@ -345,7 +345,7 @@ async def test_validate_control_data_accepts_template_control_input() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -393,7 +393,7 @@ async def test_set_control_data_accepts_template_control_input() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -422,7 +422,7 @@ def test_to_template_control_input_reshapes_stored_control_data() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "hello"}, }, @@ -440,7 +440,7 @@ def test_to_template_control_input_reshapes_stored_control_data() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -468,7 +468,7 @@ def test_to_template_control_input_rejects_raw_control_data() -> None: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "hello"}, }, @@ -494,7 +494,7 @@ def test_to_template_control_input_accepts_unrendered_template_data() -> None: "execution": "server", "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, diff --git a/sdks/python/tests/test_evaluators.py b/sdks/python/tests/test_evaluators.py deleted file mode 100644 index 777a11d5..00000000 --- a/sdks/python/tests/test_evaluators.py +++ /dev/null @@ -1,260 +0,0 @@ -"""Unit tests for the evaluator system. - -Tests evaluator registration, discovery, and base functionality without -requiring actual evaluator implementations or external services. - -Evaluators take config at __init__, evaluate() only takes data. -Registry, base classes, and discovery are in agent_control_evaluators. -""" - -import pytest -from unittest.mock import MagicMock, patch - -from pydantic import BaseModel - -from agent_control.evaluators import ( - Evaluator, - EvaluatorMetadata, - discover_evaluators, - list_evaluators, - register_evaluator, -) -from agent_control_evaluators import clear_evaluators -from agent_control_engine import reset_evaluator_discovery -from agent_control_models.controls import EvaluatorResult - - -class MockConfig(BaseModel): - """Config model for MockEvaluator.""" - threshold: float = 0.5 - - -class MockEvaluator(Evaluator): - """Mock evaluator for testing. - - Config is passed at __init__, not at evaluate(). - """ - - metadata = EvaluatorMetadata( - name="test-mock-evaluator", - version="1.0.0", - description="Mock evaluator for testing", - requires_api_key=False, - timeout_ms=10, - ) - config_model = MockConfig - - def __init__(self, config: dict): - super().__init__(config) - self.threshold = config.get("threshold", 0.5) - - def evaluate(self, data) -> EvaluatorResult: - """Mock evaluation (synchronous).""" - matched = float(data) > self.threshold if isinstance(data, (int, float)) else False - return EvaluatorResult( - matched=matched, - confidence=1.0, - message=f"Mock evaluation: {matched}", - metadata={"threshold": self.threshold}, - ) - - -class TestEvaluatorMetadata: - """Tests for EvaluatorMetadata dataclass.""" - - def test_metadata_creation(self): - """Test creating evaluator metadata.""" - metadata = EvaluatorMetadata( - name="test-evaluator", - version="1.0.0", - description="Test evaluator", - ) - - assert metadata.name == "test-evaluator" - assert metadata.version == "1.0.0" - assert metadata.description == "Test evaluator" - assert metadata.requires_api_key is False - assert metadata.timeout_ms == 10000 # Default - - def test_metadata_with_all_fields(self): - """Test metadata with all fields populated.""" - metadata = EvaluatorMetadata( - name="full-evaluator", - version="2.0.0", - description="Full test", - requires_api_key=True, - timeout_ms=5000, - ) - - assert metadata.requires_api_key is True - assert metadata.timeout_ms == 5000 - - -class TestEvaluatorRegistry: - """Tests for evaluator registry functionality.""" - - def setup_method(self): - """Clear registry before each test.""" - # Clear all evaluators and reset discovery - clear_evaluators() - reset_evaluator_discovery() - # Run discovery to load built-in evaluators - discover_evaluators() - - def test_register_evaluator(self): - """Test registering an evaluator.""" - # Register mock evaluator - register_evaluator(MockEvaluator) - - # Verify it's registered - evaluator_class = list_evaluators().get("test-mock-evaluator") - assert evaluator_class is MockEvaluator - - def test_get_nonexistent_evaluator(self): - """Test getting an evaluator that doesn't exist.""" - evaluator_class = list_evaluators().get("nonexistent-evaluator-xyz") - assert evaluator_class is None - - def test_list_evaluators_includes_registered(self): - """Test listing evaluators includes registered evaluators.""" - # Register mock evaluator - register_evaluator(MockEvaluator) - - # List evaluators - now returns dict of evaluator classes - evaluators = list_evaluators() - - assert "test-mock-evaluator" in evaluators - assert evaluators["test-mock-evaluator"] is MockEvaluator - - def test_builtin_evaluators_available(self): - """Test that built-in evaluators are available after discovery.""" - evaluators = list_evaluators() - - assert "regex" in evaluators - assert "list" in evaluators - - def test_register_duplicate_evaluator_raises_error(self): - """Test that registering a different evaluator with same name raises ValueError.""" - # Register evaluator first - register_evaluator(MockEvaluator) - - # Create a different class with the same evaluator name - class DuplicateEvaluator(Evaluator): - metadata = EvaluatorMetadata( - name="test-mock-evaluator", # Same name as MockEvaluator - version="2.0.0", - description="Duplicate evaluator", - ) - config_model = MockConfig - - def evaluate(self, data) -> EvaluatorResult: - return EvaluatorResult(matched=False, confidence=1.0, message="duplicate") - - # Second registration with different class should fail - with pytest.raises(ValueError, match="already registered"): - register_evaluator(DuplicateEvaluator) - - def test_re_register_same_evaluator_allowed(self): - """Test that re-registering the same class is allowed (hot reload support).""" - register_evaluator(MockEvaluator) - # Should not raise - same class can be re-registered - result = register_evaluator(MockEvaluator) - assert result is MockEvaluator - - -class TestEvaluatorBase: - """Tests for Evaluator base class.""" - - def test_evaluator_evaluate(self): - """Test synchronous evaluation.""" - # Config is now passed at init - evaluator = MockEvaluator({"threshold": 0.5}) - result = evaluator.evaluate(data=0.8) - - assert isinstance(result, EvaluatorResult) - assert result.matched is True - assert result.confidence == 1.0 - assert "Mock evaluation" in result.message - - def test_evaluator_evaluate_no_match(self): - """Test evaluation when rule doesn't match.""" - evaluator = MockEvaluator({"threshold": 0.5}) - result = evaluator.evaluate(data=0.3) - - assert isinstance(result, EvaluatorResult) - assert result.matched is False - assert result.confidence == 1.0 - - def test_evaluator_with_different_configs(self): - """Test evaluator uses config correctly (set at init).""" - # Create two evaluators with different configs - evaluator_low = MockEvaluator({"threshold": 0.5}) - evaluator_high = MockEvaluator({"threshold": 0.7}) - - # Same data, different thresholds - assert evaluator_low.evaluate(data=0.6).matched is True - assert evaluator_high.evaluate(data=0.6).matched is False - - def test_evaluator_metadata_accessible(self): - """Test that evaluator metadata is accessible.""" - evaluator = MockEvaluator({"threshold": 0.5}) - - assert evaluator.metadata.name == "test-mock-evaluator" - assert evaluator.metadata.version == "1.0.0" - assert evaluator.metadata.timeout_ms == 10 - - def test_evaluator_config_stored(self): - """Test that evaluator stores config.""" - config = {"threshold": 0.75, "extra": "value"} - evaluator = MockEvaluator(config) - - assert evaluator.config == config - assert evaluator.threshold == 0.75 - - -class TestEvaluatorDiscovery: - """Tests for evaluator discovery mechanism.""" - - def setup_method(self): - """Reset discovery state before each test.""" - clear_evaluators() - reset_evaluator_discovery() - - def test_discover_evaluators_loads_builtins(self): - """Test that discover_evaluators loads built-in evaluators.""" - discover_evaluators() - - evaluators = list_evaluators() - assert "regex" in evaluators - assert "list" in evaluators - - def test_discover_evaluators_only_runs_once(self): - """Test that discovery only runs once.""" - count1 = discover_evaluators() - count2 = discover_evaluators() - - # Second call should return 0 (already discovered) - assert count2 == 0 - - @patch("agent_control_evaluators._discovery.entry_points") - def test_discover_evaluators_loads_entry_points(self, mock_entry_points): - """Test loading evaluators via entry points.""" - mock_ep = MagicMock() - mock_ep.name = "custom-evaluator" - mock_ep.load.return_value = MockEvaluator - - mock_entry_points.return_value = [mock_ep] - - discover_evaluators() - - mock_entry_points.assert_called_with(group="agent_control.evaluators") - - def test_ensure_evaluators_discovered_triggers_discovery(self): - """Test that ensure_evaluators_discovered triggers discovery.""" - from agent_control.evaluators import ensure_evaluators_discovered - - ensure_evaluators_discovered() - - evaluators = list_evaluators() - assert "regex" in evaluators - assert "list" in evaluators diff --git a/sdks/python/tests/test_init_validation.py b/sdks/python/tests/test_init_validation.py index c0ecea3d..bc8aeaeb 100644 --- a/sdks/python/tests/test_init_validation.py +++ b/sdks/python/tests/test_init_validation.py @@ -4,7 +4,7 @@ import pytest from agent_control_models import ControlMatch as ModelControlMatch from agent_control_models import ControlScope as ModelControlScope -from agent_control_models import EvaluatorResult as ModelEvaluatorResult +from agent_control_models import RuleResult as ModelRuleResult def test_init_rejects_invalid_agent_name() -> None: @@ -45,6 +45,6 @@ def test_init_exports_control_match() -> None: assert "ControlMatch" in agent_control.__all__ -def test_init_exports_evaluator_result() -> None: - assert agent_control.EvaluatorResult is ModelEvaluatorResult - assert "EvaluatorResult" in agent_control.__all__ +def test_init_exports_rule_result() -> None: + assert agent_control.RuleResult is ModelRuleResult + assert "RuleResult" in agent_control.__all__ diff --git a/sdks/python/tests/test_integration_agents.py b/sdks/python/tests/test_integration_agents.py index 271294fa..8f7eb6f6 100644 --- a/sdks/python/tests/test_integration_agents.py +++ b/sdks/python/tests/test_integration_agents.py @@ -216,7 +216,7 @@ async def test_convenience_agent_association_functions( "scope": {"step_types": ["tool"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": ".*"}, }, diff --git a/sdks/python/tests/test_local_evaluation.py b/sdks/python/tests/test_local_evaluation.py index 1763a136..f1840a5d 100644 --- a/sdks/python/tests/test_local_evaluation.py +++ b/sdks/python/tests/test_local_evaluation.py @@ -16,7 +16,7 @@ from agent_control_models import ( ControlMatch, EvaluationResponse, - EvaluatorResult, + RuleResult, Step, ) @@ -98,7 +98,7 @@ def make_control_dict( *, enabled: bool = True, execution: str = "server", - evaluator: str = "regex", + rule: str = "regex", pattern: str = r"test", action: str = "deny", step_type: str = "llm", @@ -128,8 +128,8 @@ def make_control_dict( "scope": scope, "condition": { "selector": {"path": path}, - "evaluator": { - "name": evaluator, + "rule": { + "name": rule, "config": {"pattern": pattern}, }, }, @@ -153,7 +153,7 @@ def add_template_metadata(control: dict[str, Any], *, pattern: str = "test") -> "scope": control["control"]["scope"], "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -182,7 +182,7 @@ def make_unrendered_template_control( "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -259,13 +259,13 @@ def test_merge_combines_matches(self): control_id=1, control_name="local_ctrl", action="deny", - result=EvaluatorResult(matched=True, confidence=1.0), + result=RuleResult(matched=True, confidence=1.0), ) server_match = ControlMatch( control_id=2, control_name="server_ctrl", action="deny", - result=EvaluatorResult(matched=True, confidence=1.0), + result=RuleResult(matched=True, confidence=1.0), ) local = EvaluationResponse(is_safe=False, confidence=1.0, matches=[local_match]) @@ -284,13 +284,13 @@ def test_merge_combines_errors(self): control_id=1, control_name="local_err", action="deny", - result=EvaluatorResult(matched=False, confidence=0.0, error="local error"), + result=RuleResult(matched=False, confidence=0.0, error="local error"), ) server_error = ControlMatch( control_id=2, control_name="server_err", action="deny", - result=EvaluatorResult(matched=False, confidence=0.0, error="server error"), + result=RuleResult(matched=False, confidence=0.0, error="server error"), ) local = EvaluationResponse(is_safe=False, confidence=0.0, errors=[local_error]) @@ -665,7 +665,7 @@ async def test_non_applicable_local_controls_skip_local_but_still_call_server( @pytest.mark.asyncio async def test_local_legacy_flat_control_executes_locally(self, agent_name, llm_payload): - """Legacy flat selector/evaluator controls should still execute in the SDK.""" + """Legacy flat selector/rule controls should still execute in the SDK.""" controls = [ { "id": 1, @@ -676,7 +676,7 @@ async def test_local_legacy_flat_control_executes_locally(self, agent_name, llm_ "execution": "sdk", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "action": {"decision": "deny"}, }, } @@ -1070,19 +1070,19 @@ async def test_tool_step_local_deny_skips_server(self, agent_name, tool_payload) assert result.matches[0].control_name == "local_deny_ctrl" @pytest.mark.asyncio - async def test_local_control_with_missing_evaluator_raises(self, agent_name, llm_payload): - """Test that local control with unavailable evaluator raises RuntimeError. + async def test_local_control_with_missing_rule_raises(self, agent_name, llm_payload): + """Test that local control with unavailable rule raises RuntimeError. - Given: A local control referencing an evaluator that doesn't exist + Given: A local control referencing a rule that doesn't exist When: check_evaluation_with_local is called Then: RuntimeError is raised with helpful message """ controls = [ make_control_dict( 1, - "local_missing_evaluator", + "local_missing_rule", execution="sdk", - evaluator="nonexistent-evaluator-xyz", + rule="nonexistent-rule-xyz", pattern=r"test", ), ] @@ -1099,24 +1099,24 @@ async def test_local_control_with_missing_evaluator_raises(self, agent_name, llm controls=controls, ) - assert "local_missing_evaluator" in str(exc_info.value) - assert "nonexistent-evaluator-xyz" in str(exc_info.value) + assert "local_missing_rule" in str(exc_info.value) + assert "nonexistent-rule-xyz" in str(exc_info.value) assert "not available" in str(exc_info.value) @pytest.mark.asyncio - async def test_local_control_with_agent_scoped_evaluator_raises(self, agent_name, llm_payload): - """Test that local control with agent-scoped evaluator raises RuntimeError. + async def test_local_control_with_agent_scoped_rule_raises(self, agent_name, llm_payload): + """Test that local control with agent-scoped rule raises RuntimeError. - Given: A local control referencing an agent-scoped evaluator (agent:evaluator) + Given: A local control referencing an agent-scoped rule (agent:rule) When: check_evaluation_with_local is called - Then: RuntimeError is raised explaining agent-scoped evaluators are server-only + Then: RuntimeError is raised explaining agent-scoped rules are server-only """ controls = [ make_control_dict( 1, "local_agent_scoped", execution="sdk", - evaluator="my-agent:custom-evaluator", + rule="my-agent:custom-rule", pattern=r"test", ), ] @@ -1134,14 +1134,14 @@ async def test_local_control_with_agent_scoped_evaluator_raises(self, agent_name ) assert "local_agent_scoped" in str(exc_info.value) - assert "my-agent:custom-evaluator" in str(exc_info.value) + assert "my-agent:custom-rule" in str(exc_info.value) assert "server-only" in str(exc_info.value) @pytest.mark.asyncio - async def test_server_control_with_missing_evaluator_allowed(self, agent_name, llm_payload): - """Test that server control with unavailable evaluator is allowed (server handles it). + async def test_server_control_with_missing_rule_allowed(self, agent_name, llm_payload): + """Test that server control with unavailable rule is allowed (server handles it). - Given: A server control (execution="server") referencing an evaluator that + Given: A server control (execution="server") referencing a rule that doesn't exist locally When: check_evaluation_with_local is called Then: No error, server is called to handle it @@ -1149,9 +1149,9 @@ async def test_server_control_with_missing_evaluator_allowed(self, agent_name, l controls = [ make_control_dict( 1, - "server_custom_evaluator", + "server_custom_rule", execution="server", - evaluator="server-only-evaluator", + rule="server-only-rule", pattern=r"test", ), ] @@ -1164,7 +1164,7 @@ async def test_server_control_with_missing_evaluator_allowed(self, agent_name, l client.http_client = AsyncMock() client.http_client.post = AsyncMock(return_value=mock_response) - # Should not raise - server handles unavailable evaluators + # Should not raise - server handles unavailable rules result = await check_evaluation_with_local( client=client, agent_name=agent_name, @@ -1186,7 +1186,7 @@ async def test_invalid_local_control_populates_errors(self, agent_name, llm_payl Then: The parse error should appear in result.errors """ controls = [ - # Invalid control (missing required evaluator field) + # Invalid control (missing required rule field) {"id": 999, "name": "bad_control", "control": {"execution": "sdk"}}, ] @@ -1274,7 +1274,7 @@ async def test_local_evaluation_includes_steering_context(self, agent_name, llm_ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, }, "action": { "decision": "steer", diff --git a/sdks/python/tests/test_observability_updates.py b/sdks/python/tests/test_observability_updates.py index f9bdf6a1..2d3535db 100644 --- a/sdks/python/tests/test_observability_updates.py +++ b/sdks/python/tests/test_observability_updates.py @@ -75,13 +75,13 @@ def _make_match( matched=True, metadata=None, ): - from agent_control_models import ControlMatch, EvaluatorResult + from agent_control_models import ControlMatch, RuleResult return ControlMatch( control_id=control_id, control_name=control_name, action=action, - result=EvaluatorResult(matched=matched, confidence=0.9, metadata=metadata), + result=RuleResult(matched=matched, confidence=0.9, metadata=metadata), ) def test_combines_matches_errors_and_non_matches(self): @@ -107,7 +107,7 @@ def test_build_server_control_lookup_skips_unparseable_controls(self): "name": "ctrl-1", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -187,13 +187,13 @@ def _make_match( matched=True, metadata=None, ): - from agent_control_models import ControlMatch, EvaluatorResult + from agent_control_models import ControlMatch, RuleResult return ControlMatch( control_id=control_id, control_name=control_name, action=action, - result=EvaluatorResult(matched=matched, confidence=0.9, metadata=metadata), + result=RuleResult(matched=matched, confidence=0.9, metadata=metadata), ) def _make_response(self, matches=None, errors=None, non_matches=None): @@ -215,7 +215,7 @@ def test_builds_events_with_trace_context(self): 1, "ctrl-1", { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, ).control @@ -235,7 +235,7 @@ def test_builds_events_with_trace_context(self): assert event.trace_id == "trace123" assert event.span_id == "span456" assert event.agent_name == "test-agent" - assert event.evaluator_name == "regex" + assert event.rule_name == "regex" assert event.selector_path == "input" def test_uses_safe_selected_data_preview_as_event_input(self): @@ -267,7 +267,7 @@ def test_uses_safe_selected_data_preview_as_event_input(self): 1, "ctrl-1", { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, ).control @@ -300,11 +300,11 @@ def test_composite_control_uses_representative_observability_identity(self): "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, }, { "selector": {"path": "output"}, - "evaluator": {"name": "regex", "config": {"pattern": "done"}}, + "rule": {"name": "regex", "config": {"pattern": "done"}}, }, ] }, @@ -322,16 +322,16 @@ def test_composite_control_uses_representative_observability_identity(self): assert len(events) == 1 event = events[0] - assert event.evaluator_name == "regex" + assert event.rule_name == "regex" assert event.selector_path == "input" - assert event.metadata["primary_evaluator"] == "regex" + assert event.metadata["primary_rule"] == "regex" assert event.metadata["primary_selector_path"] == "input" assert event.metadata["leaf_count"] == 2 - assert event.metadata["all_evaluators"] == ["regex"] + assert event.metadata["all_rules"] == ["regex"] assert event.metadata["all_selector_paths"] == ["input", "output"] def test_preserves_error_message_parity_by_result_category(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult request = self._make_request() control_lookup = { @@ -339,7 +339,7 @@ def test_preserves_error_message_parity_by_result_category(self): 1, "ctrl-1", { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, ).control @@ -352,7 +352,7 @@ def test_preserves_error_message_parity_by_result_category(self): control_id=1, control_name="ctrl-1", action="observe", - result=EvaluatorResult( + result=RuleResult( matched=True, confidence=0.9, metadata={"server_error_message": "match-error"}, @@ -364,7 +364,7 @@ def test_preserves_error_message_parity_by_result_category(self): control_id=1, control_name="ctrl-1", action="observe", - result=EvaluatorResult(matched=False, confidence=0.2, error="eval-error"), + result=RuleResult(matched=False, confidence=0.2, error="eval-error"), ) ], non_matches=[ @@ -372,7 +372,7 @@ def test_preserves_error_message_parity_by_result_category(self): control_id=1, control_name="ctrl-1", action="observe", - result=EvaluatorResult(matched=False, confidence=0.1, error="ignored-error"), + result=RuleResult(matched=False, confidence=0.1, error="ignored-error"), ) ], ) @@ -422,7 +422,7 @@ def teardown_method(self) -> None: @pytest.mark.asyncio async def test_delivers_local_events_in_oss_mode(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step mock_response = EvaluationResponse( is_safe=True, @@ -432,7 +432,7 @@ async def test_delivers_local_events_in_oss_mode(self): control_id=1, control_name="test-ctrl", action="observe", - result=EvaluatorResult(matched=False, confidence=0.1), + result=RuleResult(matched=False, confidence=0.1), ) ], ) @@ -444,7 +444,7 @@ async def test_delivers_local_events_in_oss_mode(self): "name": "test-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -457,7 +457,7 @@ async def test_delivers_local_events_in_oss_mode(self): step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.list_rules", return_value=["regex"]), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( @@ -480,7 +480,7 @@ async def test_delivers_local_events_in_oss_mode(self): @pytest.mark.asyncio async def test_external_sink_receives_local_events(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step sink = RecordingSink() register_control_event_sink(sink) @@ -495,7 +495,7 @@ async def test_external_sink_receives_local_events(self): control_id=1, control_name="test-ctrl", action="observe", - result=EvaluatorResult(matched=False, confidence=0.1), + result=RuleResult(matched=False, confidence=0.1), ) ], ) @@ -507,7 +507,7 @@ async def test_external_sink_receives_local_events(self): "name": "test-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -520,7 +520,7 @@ async def test_external_sink_receives_local_events(self): step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch( - "agent_control.evaluation.list_evaluators", return_value=["regex"] + "agent_control.evaluation.list_rules", return_value=["regex"] ): await evaluation.check_evaluation_with_local( client=client, @@ -545,7 +545,7 @@ async def test_external_sink_receives_local_events(self): @pytest.mark.asyncio async def test_resolves_provider_trace_context_for_local_events(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step mock_response = EvaluationResponse( is_safe=True, @@ -555,7 +555,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): control_id=1, control_name="test-ctrl", action="allow", - result=EvaluatorResult(matched=False, confidence=0.1), + result=RuleResult(matched=False, confidence=0.1), ) ], ) @@ -566,7 +566,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): "name": "test-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -580,7 +580,7 @@ async def test_resolves_provider_trace_context_for_local_events(self): set_trace_context_provider(lambda: {"trace_id": "a" * 32, "span_id": "b" * 16}) with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.list_rules", return_value=["regex"]), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: await evaluation.check_evaluation_with_local( @@ -605,7 +605,7 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): "name": "server-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "deny"}, @@ -634,7 +634,7 @@ async def test_forwards_provider_trace_headers_to_server_when_ids_omitted(self): } ) - with patch("agent_control.evaluation.list_evaluators", return_value=["regex"]): + with patch("agent_control.evaluation.list_rules", return_value=["regex"]): await evaluation.check_evaluation_with_local( client=client, agent_name="agent-000000000001", @@ -758,7 +758,7 @@ async def test_skips_local_event_reconstruction_when_observability_disabled(self "name": "local-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -777,7 +777,7 @@ async def test_skips_local_event_reconstruction_when_observability_disabled(self with ( patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), + patch("agent_control.evaluation.list_rules", return_value=["regex"]), patch("agent_control.evaluation.is_observability_enabled", return_value=False), patch("agent_control.evaluation.build_control_execution_events") as mock_build, patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, @@ -843,7 +843,7 @@ class TestMergedEventCreation: @pytest.mark.asyncio async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_once(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step local_response = EvaluationResponse( is_safe=True, @@ -853,7 +853,7 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ control_id=1, control_name="local-ctrl", action="observe", - result=EvaluatorResult(matched=False, confidence=0.8), + result=RuleResult(matched=False, confidence=0.8), ) ], ) @@ -879,7 +879,7 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ "name": "local-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -891,7 +891,7 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ "name": "server-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -912,7 +912,7 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.list_rules", return_value=["regex"]), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( @@ -934,7 +934,7 @@ async def test_merged_event_mode_enqueues_reconstructed_local_and_server_events_ @pytest.mark.asyncio async def test_external_sink_receives_merged_local_and_server_events(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step sink = RecordingSink() register_control_event_sink(sink) @@ -949,7 +949,7 @@ async def test_external_sink_receives_merged_local_and_server_events(self): control_id=1, control_name="local-ctrl", action="observe", - result=EvaluatorResult(matched=False, confidence=0.8), + result=RuleResult(matched=False, confidence=0.8), ) ], ) @@ -974,7 +974,7 @@ async def test_external_sink_receives_merged_local_and_server_events(self): "name": "local-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -986,7 +986,7 @@ async def test_external_sink_receives_merged_local_and_server_events(self): "name": "server-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -1007,7 +1007,7 @@ async def test_external_sink_receives_merged_local_and_server_events(self): step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), patch( - "agent_control.evaluation.list_evaluators", return_value=["regex"] + "agent_control.evaluation.list_rules", return_value=["regex"] ): await evaluation.check_evaluation_with_local( client=client, @@ -1030,7 +1030,7 @@ async def test_external_sink_receives_merged_local_and_server_events(self): @pytest.mark.asyncio async def test_merged_event_mode_enqueues_local_events_before_reraising_server_failure(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step local_response = EvaluationResponse( is_safe=True, @@ -1040,7 +1040,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f control_id=1, control_name="local-ctrl", action="observe", - result=EvaluatorResult(matched=False, confidence=0.8), + result=RuleResult(matched=False, confidence=0.8), ) ], ) @@ -1051,7 +1051,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f "name": "local-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -1063,7 +1063,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f "name": "server-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -1082,7 +1082,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f with ( patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), + patch("agent_control.evaluation.list_rules", return_value=["regex"]), patch("agent_control.evaluation.is_observability_enabled", return_value=True), patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue, ): @@ -1107,7 +1107,7 @@ async def test_merged_event_mode_enqueues_local_events_before_reraising_server_f @pytest.mark.asyncio async def test_merged_event_mode_enqueues_only_local_events_when_no_server_controls_apply(self): - from agent_control_models import ControlMatch, EvaluationResponse, EvaluatorResult, Step + from agent_control_models import ControlMatch, EvaluationResponse, RuleResult, Step local_response = EvaluationResponse( is_safe=True, @@ -1117,7 +1117,7 @@ async def test_merged_event_mode_enqueues_only_local_events_when_no_server_contr control_id=1, control_name="local-ctrl", action="observe", - result=EvaluatorResult(matched=True, confidence=0.8), + result=RuleResult(matched=True, confidence=0.8), ) ], ) @@ -1127,7 +1127,7 @@ async def test_merged_event_mode_enqueues_only_local_events_when_no_server_contr "name": "local-ctrl", "control": { "condition": { - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "selector": {"path": "input"}, }, "action": {"decision": "observe"}, @@ -1143,7 +1143,7 @@ async def test_merged_event_mode_enqueues_only_local_events_when_no_server_contr step = Step(type="llm", name="test-step", input="hello") with patch("agent_control.evaluation.ControlEngine", return_value=mock_engine), \ - patch("agent_control.evaluation.list_evaluators", return_value=["regex"]), \ + patch("agent_control.evaluation.list_rules", return_value=["regex"]), \ patch("agent_control.evaluation.is_observability_enabled", return_value=True), \ patch("agent_control.evaluation.enqueue_observability_events") as mock_enqueue: result = await evaluation.check_evaluation_with_local( diff --git a/sdks/python/tests/test_otel_sink.py b/sdks/python/tests/test_otel_sink.py index 4d4aa451..2eebc2f0 100644 --- a/sdks/python/tests/test_otel_sink.py +++ b/sdks/python/tests/test_otel_sink.py @@ -36,7 +36,7 @@ def _make_event(**overrides: object) -> ControlExecutionEvent: confidence=0.85, timestamp=datetime(2026, 4, 16, 12, 0, tzinfo=UTC), execution_duration_ms=12.5, - evaluator_name="regex", + rule_name="regex", selector_path="input", error_message=None, metadata={ diff --git a/sdks/python/tests/test_rules.py b/sdks/python/tests/test_rules.py new file mode 100644 index 00000000..2e63ebc8 --- /dev/null +++ b/sdks/python/tests/test_rules.py @@ -0,0 +1,260 @@ +"""Unit tests for the rule system. + +Tests rule registration, discovery, and base functionality without +requiring actual rule implementations or external services. + +Rules take config at __init__, evaluate() only takes data. +Registry, base classes, and discovery are in agent_control_rules. +""" + +import pytest +from unittest.mock import MagicMock, patch + +from pydantic import BaseModel + +from agent_control.rules import ( + Rule, + RuleMetadata, + discover_rules, + list_rules, + register_rule, +) +from agent_control_rules import clear_rules +from agent_control_engine import reset_rule_discovery +from agent_control_models.controls import RuleResult + + +class MockConfig(BaseModel): + """Config model for MockRule.""" + threshold: float = 0.5 + + +class MockRule(Rule): + """Mock rule for testing. + + Config is passed at __init__, not at evaluate(). + """ + + metadata = RuleMetadata( + name="test-mock-rule", + version="1.0.0", + description="Mock rule for testing", + requires_api_key=False, + timeout_ms=10, + ) + config_model = MockConfig + + def __init__(self, config: dict): + super().__init__(config) + self.threshold = config.get("threshold", 0.5) + + def evaluate(self, data) -> RuleResult: + """Mock evaluation (synchronous).""" + matched = float(data) > self.threshold if isinstance(data, (int, float)) else False + return RuleResult( + matched=matched, + confidence=1.0, + message=f"Mock evaluation: {matched}", + metadata={"threshold": self.threshold}, + ) + + +class TestRuleMetadata: + """Tests for RuleMetadata dataclass.""" + + def test_metadata_creation(self): + """Test creating rule metadata.""" + metadata = RuleMetadata( + name="test-rule", + version="1.0.0", + description="Test rule", + ) + + assert metadata.name == "test-rule" + assert metadata.version == "1.0.0" + assert metadata.description == "Test rule" + assert metadata.requires_api_key is False + assert metadata.timeout_ms == 10000 # Default + + def test_metadata_with_all_fields(self): + """Test metadata with all fields populated.""" + metadata = RuleMetadata( + name="full-rule", + version="2.0.0", + description="Full test", + requires_api_key=True, + timeout_ms=5000, + ) + + assert metadata.requires_api_key is True + assert metadata.timeout_ms == 5000 + + +class TestRuleRegistry: + """Tests for rule registry functionality.""" + + def setup_method(self): + """Clear registry before each test.""" + # Clear all rules and reset discovery + clear_rules() + reset_rule_discovery() + # Run discovery to load built-in rules + discover_rules() + + def test_register_rule(self): + """Test registering a rule.""" + # Register mock rule + register_rule(MockRule) + + # Verify it's registered + rule_class = list_rules().get("test-mock-rule") + assert rule_class is MockRule + + def test_get_nonexistent_rule(self): + """Test getting a rule that doesn't exist.""" + rule_class = list_rules().get("nonexistent-rule-xyz") + assert rule_class is None + + def test_list_rules_includes_registered(self): + """Test listing rules includes registered rules.""" + # Register mock rule + register_rule(MockRule) + + # List rules - now returns dict of rule classes + rules = list_rules() + + assert "test-mock-rule" in rules + assert rules["test-mock-rule"] is MockRule + + def test_builtin_rules_available(self): + """Test that built-in rules are available after discovery.""" + rules = list_rules() + + assert "regex" in rules + assert "list" in rules + + def test_register_duplicate_rule_raises_error(self): + """Test that registering a different rule with same name raises ValueError.""" + # Register rule first + register_rule(MockRule) + + # Create a different class with the same rule name + class DuplicateRule(Rule): + metadata = RuleMetadata( + name="test-mock-rule", # Same name as MockRule + version="2.0.0", + description="Duplicate rule", + ) + config_model = MockConfig + + def evaluate(self, data) -> RuleResult: + return RuleResult(matched=False, confidence=1.0, message="duplicate") + + # Second registration with different class should fail + with pytest.raises(ValueError, match="already registered"): + register_rule(DuplicateRule) + + def test_re_register_same_rule_allowed(self): + """Test that re-registering the same class is allowed (hot reload support).""" + register_rule(MockRule) + # Should not raise - same class can be re-registered + result = register_rule(MockRule) + assert result is MockRule + + +class TestRuleBase: + """Tests for Rule base class.""" + + def test_rule_evaluate(self): + """Test synchronous evaluation.""" + # Config is now passed at init + rule = MockRule({"threshold": 0.5}) + result = rule.evaluate(data=0.8) + + assert isinstance(result, RuleResult) + assert result.matched is True + assert result.confidence == 1.0 + assert "Mock evaluation" in result.message + + def test_rule_evaluate_no_match(self): + """Test evaluation when rule doesn't match.""" + rule = MockRule({"threshold": 0.5}) + result = rule.evaluate(data=0.3) + + assert isinstance(result, RuleResult) + assert result.matched is False + assert result.confidence == 1.0 + + def test_rule_with_different_configs(self): + """Test rule uses config correctly (set at init).""" + # Create two rules with different configs + rule_low = MockRule({"threshold": 0.5}) + rule_high = MockRule({"threshold": 0.7}) + + # Same data, different thresholds + assert rule_low.evaluate(data=0.6).matched is True + assert rule_high.evaluate(data=0.6).matched is False + + def test_rule_metadata_accessible(self): + """Test that rule metadata is accessible.""" + rule = MockRule({"threshold": 0.5}) + + assert rule.metadata.name == "test-mock-rule" + assert rule.metadata.version == "1.0.0" + assert rule.metadata.timeout_ms == 10 + + def test_rule_config_stored(self): + """Test that rule stores config.""" + config = {"threshold": 0.75, "extra": "value"} + rule = MockRule(config) + + assert rule.config == config + assert rule.threshold == 0.75 + + +class TestRuleDiscovery: + """Tests for rule discovery mechanism.""" + + def setup_method(self): + """Reset discovery state before each test.""" + clear_rules() + reset_rule_discovery() + + def test_discover_rules_loads_builtins(self): + """Test that discover_rules loads built-in rules.""" + discover_rules() + + rules = list_rules() + assert "regex" in rules + assert "list" in rules + + def test_discover_rules_only_runs_once(self): + """Test that discovery only runs once.""" + count1 = discover_rules() + count2 = discover_rules() + + # Second call should return 0 (already discovered) + assert count2 == 0 + + @patch("agent_control_rules._discovery.entry_points") + def test_discover_rules_loads_entry_points(self, mock_entry_points): + """Test loading rules via entry points.""" + mock_ep = MagicMock() + mock_ep.name = "custom-rule" + mock_ep.load.return_value = MockRule + + mock_entry_points.return_value = [mock_ep] + + discover_rules() + + mock_entry_points.assert_called_with(group="agent_control.rules") + + def test_ensure_rules_discovered_triggers_discovery(self): + """Test that ensure_rules_discovered triggers discovery.""" + from agent_control.rules import ensure_rules_discovered + + ensure_rules_discovered() + + rules = list_rules() + assert "regex" in rules + assert "list" in rules diff --git a/sdks/python/tests/test_evaluators_optional_imports.py b/sdks/python/tests/test_rules_optional_imports.py similarity index 56% rename from sdks/python/tests/test_evaluators_optional_imports.py rename to sdks/python/tests/test_rules_optional_imports.py index b4560fc9..5c727c39 100644 --- a/sdks/python/tests/test_evaluators_optional_imports.py +++ b/sdks/python/tests/test_rules_optional_imports.py @@ -1,7 +1,7 @@ -"""Coverage for the optional galileo import fallbacks in agent_control.evaluators. +"""Coverage for the optional galileo import fallbacks in agent_control.rules. The galileo extras are normally installed in the dev environment, so the -``except ImportError`` branches in ``agent_control/evaluators/__init__.py`` +``except ImportError`` branches in ``agent_control/rules/__init__.py`` never fire under regular tests. This module forces those failures by hiding the relevant modules in ``sys.modules`` and reloading the package. """ @@ -12,6 +12,7 @@ import importlib import importlib.util import sys +import types import pytest @@ -27,11 +28,11 @@ def _module_available(name: str) -> bool: return False -_GALILEO_INSTALLED = _module_available("agent_control_evaluator_galileo.luna") +_GALILEO_INSTALLED = _module_available("agent_control_rule_galileo.luna") -def _reload_evaluators_with_blocked(prefix: str) -> object: - """Reload ``agent_control.evaluators`` while ``prefix.*`` imports fail. +def _reload_rules_with_blocked(prefix: str) -> object: + """Reload ``agent_control.rules`` while ``prefix.*`` imports fail. Returns the freshly loaded module so callers can inspect ``__all__``. Restores the original ``builtins.__import__`` and ``sys.modules`` entries @@ -47,11 +48,11 @@ def fail_for_prefix(name: str, *args: object, **kwargs: object) -> object: # Drop any cached entries so the patched import is consulted. blocked_modules = [m for m in list(sys.modules) if m == prefix or m.startswith(f"{prefix}.")] saved_modules = {m: sys.modules.pop(m) for m in blocked_modules} - saved_evaluators = sys.modules.pop("agent_control.evaluators", None) + saved_rules = sys.modules.pop("agent_control.rules", None) builtins.__import__ = fail_for_prefix try: - import agent_control.evaluators as reloaded + import agent_control.rules as reloaded reloaded = importlib.reload(reloaded) return reloaded @@ -60,51 +61,83 @@ def fail_for_prefix(name: str, *args: object, **kwargs: object) -> object: # Restore the cached modules so other tests keep their state. for name, module in saved_modules.items(): sys.modules[name] = module - if saved_evaluators is not None: - sys.modules["agent_control.evaluators"] = saved_evaluators + if saved_rules is not None: + sys.modules["agent_control.rules"] = saved_rules def test_module_loads_when_galileo_luna_is_unavailable(): - """Hiding ``agent_control_evaluator_galileo.luna`` exercises its except branch.""" - reloaded = _reload_evaluators_with_blocked("agent_control_evaluator_galileo.luna") + """Hiding ``agent_control_rule_galileo.luna`` exercises its except branch.""" + reloaded = _reload_rules_with_blocked("agent_control_rule_galileo.luna") # Core names are always present. - assert "Evaluator" in reloaded.__all__ + assert "Rule" in reloaded.__all__ # Luna1 names are NOT present because the import failed. - assert "LunaEvaluator" not in reloaded.__all__ + assert "LunaRule" not in reloaded.__all__ assert "GalileoLunaClient" not in reloaded.__all__ def test_module_loads_when_galileo_package_is_unavailable(): """Hiding the whole package exercises the ImportError fallback.""" - reloaded = _reload_evaluators_with_blocked("agent_control_evaluator_galileo") + reloaded = _reload_rules_with_blocked("agent_control_rule_galileo") - assert "Evaluator" in reloaded.__all__ + assert "Rule" in reloaded.__all__ # The optional luna names are absent. for absent in ( - "LunaEvaluator", + "LunaRule", "GalileoLunaClient", "LUNA_AVAILABLE", ): assert absent not in reloaded.__all__ +def test_module_exports_galileo_optional_imports_when_module_is_available( + monkeypatch: pytest.MonkeyPatch, +): + """A synthetic luna module exercises the optional import success branch.""" + fake_package = types.ModuleType("agent_control_rule_galileo") + fake_package.__path__ = [] # type: ignore[attr-defined] + fake_luna = types.ModuleType("agent_control_rule_galileo.luna") + + for name in ( + "GalileoLunaClient", + "LunaOperator", + "LunaRule", + "LunaRuleConfig", + "ScorerInvokeInputs", + "ScorerInvokeRequest", + "ScorerInvokeResponse", + ): + setattr(fake_luna, name, type(name, (), {})) + fake_luna.LUNA_AVAILABLE = True + + monkeypatch.setitem(sys.modules, "agent_control_rule_galileo", fake_package) + monkeypatch.setitem(sys.modules, "agent_control_rule_galileo.luna", fake_luna) + sys.modules.pop("agent_control.rules", None) + + import agent_control.rules as reloaded + + reloaded = importlib.reload(reloaded) + + assert "LunaRule" in reloaded.__all__ + assert "ScorerInvokeInputs" in reloaded.__all__ + + @pytest.mark.skipif( not _GALILEO_INSTALLED, - reason="agent-control-evaluator-galileo extras not installed in this environment", + reason="agent-control-rule-galileo extras not installed in this environment", ) def test_module_loads_galileo_optional_imports_when_available(): """Sanity check: with galileo installed, the optional names ARE exposed. Reloading without patching __import__ runs both success branches. """ - saved = sys.modules.pop("agent_control.evaluators", None) + saved = sys.modules.pop("agent_control.rules", None) try: - import agent_control.evaluators as reloaded + import agent_control.rules as reloaded reloaded = importlib.reload(reloaded) # Sanity: at least one luna name should reappear. - assert "LunaEvaluator" in reloaded.__all__ + assert "LunaRule" in reloaded.__all__ finally: if saved is not None: - sys.modules["agent_control.evaluators"] = saved + sys.modules["agent_control.rules"] = saved diff --git a/sdks/python/tests/test_shutdown.py b/sdks/python/tests/test_shutdown.py index 49e5b406..70580fdb 100644 --- a/sdks/python/tests/test_shutdown.py +++ b/sdks/python/tests/test_shutdown.py @@ -292,7 +292,7 @@ async def test_short_lived_process_flushes_sdk_events_on_exit( "scope": {"stages": ["post"]}, "condition": { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"\b123-45-6789\b"}, }, diff --git a/sdks/typescript/CHANGELOG.md b/sdks/typescript/CHANGELOG.md index 6e20c58d..0f3b45ec 100644 --- a/sdks/typescript/CHANGELOG.md +++ b/sdks/typescript/CHANGELOG.md @@ -7,25 +7,25 @@ ### Bug Fixes -* **evaluators:** configure Luna scorer API URL ([#237](https://github.com/agentcontrol/agent-control/issues/237)) ([fc516f0](https://github.com/agentcontrol/agent-control/commit/fc516f05dc8fbb71a35db2831e617e0a222e5f87)) +* **rules:** configure Luna scorer API URL ([#237](https://github.com/agentcontrol/agent-control/issues/237)) ([fc516f0](https://github.com/agentcontrol/agent-control/commit/fc516f05dc8fbb71a35db2831e617e0a222e5f87)) * **server:** use plain psycopg with OS libpq ([#241](https://github.com/agentcontrol/agent-control/issues/241)) ([45a33a5](https://github.com/agentcontrol/agent-control/commit/45a33a5528cdf480bb0510488fd9312f38760d5e)) ## [3.0.0](https://github.com/agentcontrol/agent-control/compare/ts-sdk-v2.6.0...ts-sdk-v3.0.0) (2026-06-05) ### ⚠ BREAKING CHANGES -* **evaluators:** remove Luna2 evaluator (#234) +* **rules:** remove Luna2 rule (#234) ### Bug Fixes -* **evaluators:** remove Luna2 evaluator ([#234](https://github.com/agentcontrol/agent-control/issues/234)) ([ccc4fc6](https://github.com/agentcontrol/agent-control/commit/ccc4fc6d4dc3228169d9db0178d14596349739f1)) +* **rules:** remove Luna2 rule ([#234](https://github.com/agentcontrol/agent-control/issues/234)) ([ccc4fc6](https://github.com/agentcontrol/agent-control/commit/ccc4fc6d4dc3228169d9db0178d14596349739f1)) * **server:** harden auth upstream connection handling ([#235](https://github.com/agentcontrol/agent-control/issues/235)) ([16d0115](https://github.com/agentcontrol/agent-control/commit/16d0115a53289d34b92952210abe7b704ac9b10b)) ## [2.6.0](https://github.com/agentcontrol/agent-control/compare/ts-sdk-v2.5.0...ts-sdk-v2.6.0) (2026-05-28) ### Features -* **evaluators:** add new lluna client ([#213](https://github.com/agentcontrol/agent-control/issues/213)) ([f65beb9](https://github.com/agentcontrol/agent-control/commit/f65beb9d14dca8248c35a620d47af3298f1fe5e7)) +* **rules:** add new lluna client ([#213](https://github.com/agentcontrol/agent-control/issues/213)) ([f65beb9](https://github.com/agentcontrol/agent-control/commit/f65beb9d14dca8248c35a620d47af3298f1fe5e7)) * **sdk:** add otel support ([#177](https://github.com/agentcontrol/agent-control/issues/177)) ([9530368](https://github.com/agentcontrol/agent-control/commit/9530368ff7d609106db600715724ba9ee1dbcaff)) * **sdk:** add runtime token auth ([#215](https://github.com/agentcontrol/agent-control/issues/215)) ([6cc0f38](https://github.com/agentcontrol/agent-control/commit/6cc0f38b89e3870f44aa46e508e29431f8713e0d)) * **server:** add control clone-and-bind endpoint ([#229](https://github.com/agentcontrol/agent-control/issues/229)) ([1728bf9](https://github.com/agentcontrol/agent-control/commit/1728bf9a89d76cdb19732c3b87ce2c9a4382df4e)) @@ -61,7 +61,7 @@ ### Features -* **evaluators:** add built-in budget evaluator for per-agent cost tracking ([#144](https://github.com/agentcontrol/agent-control/issues/144)) ([d4ce113](https://github.com/agentcontrol/agent-control/commit/d4ce113488c42e47cbba9364f47a2905e4cc5b35)), closes [#130](https://github.com/agentcontrol/agent-control/issues/130) +* **rules:** add built-in budget rule for per-agent cost tracking ([#144](https://github.com/agentcontrol/agent-control/issues/144)) ([d4ce113](https://github.com/agentcontrol/agent-control/commit/d4ce113488c42e47cbba9364f47a2905e4cc5b35)), closes [#130](https://github.com/agentcontrol/agent-control/issues/130) * **sdk:** add external sink ([#175](https://github.com/agentcontrol/agent-control/issues/175)) ([45f3645](https://github.com/agentcontrol/agent-control/commit/45f3645ae3a7815d2c59cfa6629f7d65906d093b)) * **server:** Align condition and template depth limits ([#166](https://github.com/agentcontrol/agent-control/issues/166)) ([03f402e](https://github.com/agentcontrol/agent-control/commit/03f402e4ddf36697579f2e6a4d4a9d279cb39ce1)) * **server:** Phase 0: add control version history and soft-delete unusable legacy controls ([#172](https://github.com/agentcontrol/agent-control/issues/172)) ([e5b2b33](https://github.com/agentcontrol/agent-control/commit/e5b2b33ce86cdfbd395d5ee03652ba4baabe23a2)) @@ -84,7 +84,7 @@ ### Features -* **evaluators:** add starts_with/ends_with mode to list evaluator ([#154](https://github.com/agentcontrol/agent-control/issues/154)) ([bf1f7d7](https://github.com/agentcontrol/agent-control/commit/bf1f7d7cf271a231a165699d44f726467b49bd50)) +* **rules:** add starts_with/ends_with mode to list rule ([#154](https://github.com/agentcontrol/agent-control/issues/154)) ([bf1f7d7](https://github.com/agentcontrol/agent-control/commit/bf1f7d7cf271a231a165699d44f726467b49bd50)) * **sdk:** [Enterprise Integration]: Add provider agnostic traceing ([#145](https://github.com/agentcontrol/agent-control/issues/145)) ([f1ca27c](https://github.com/agentcontrol/agent-control/commit/f1ca27ccac5f6666f997c876598ef1bcbbae0b6d)) * **sdk:** Add telemetry package to support sinks ([#164](https://github.com/agentcontrol/agent-control/issues/164)) ([2186ba1](https://github.com/agentcontrol/agent-control/commit/2186ba18bfd7f61d96d2af455f34a29addc7b125)) * **sdk:** default merge events in SDK ([#155](https://github.com/agentcontrol/agent-control/issues/155)) ([5984a60](https://github.com/agentcontrol/agent-control/commit/5984a6065ebf63c638ab9249b15b20bdc43464cf)) @@ -99,7 +99,7 @@ * **docs:** add explicit shutdown to quickstart example ([#149](https://github.com/agentcontrol/agent-control/issues/149)) ([b76014f](https://github.com/agentcontrol/agent-control/commit/b76014f1132218448b93169b3c6350f62bac49ac)) * **sdk:** use sync shutdown flush fallback ([#150](https://github.com/agentcontrol/agent-control/issues/150)) ([90265ba](https://github.com/agentcontrol/agent-control/commit/90265ba79132d8a8669f92948cf73568eb38fd5a)) -* **server:** remove unused evaluator config store ([#152](https://github.com/agentcontrol/agent-control/issues/152)) ([dea2873](https://github.com/agentcontrol/agent-control/commit/dea2873241004c9398909609d4b665808270290e)) +* **server:** remove unused rule config store ([#152](https://github.com/agentcontrol/agent-control/issues/152)) ([dea2873](https://github.com/agentcontrol/agent-control/commit/dea2873241004c9398909609d4b665808270290e)) * **server:** Omit null fields in control JSON editor ([#157](https://github.com/agentcontrol/agent-control/issues/157)) ([0aa2f3c](https://github.com/agentcontrol/agent-control/commit/0aa2f3c83fa8b74947a516025995ba1c49855e4e)) * **server:** Update docker-compose.dev.yml to use different container name ([14d4c87](https://github.com/agentcontrol/agent-control/commit/14d4c87130bbfceac0576b383abbd67fd3b63ac2)) * **ui:** improve edit control ux, no layout shift, consistent spacing ([#122](https://github.com/agentcontrol/agent-control/issues/122)) ([76d67b9](https://github.com/agentcontrol/agent-control/commit/76d67b9c99ff34497ccd38ec3b7a78f9c09f41d4)) @@ -125,7 +125,7 @@ * **docs:** simplify main README quick start v2 ([#100](https://github.com/agentcontrol/agent-control/issues/100)) ([7701c65](https://github.com/agentcontrol/agent-control/commit/7701c65a49ad2a927b965122224e286784feb554)) * **docs:** Update README.md to accurately mention Strands Agents SDK ([#105](https://github.com/agentcontrol/agent-control/issues/105)) ([84dfa57](https://github.com/agentcontrol/agent-control/commit/84dfa57edd0020da147942e06bff065384bdcead)) -* **evaluators:** add Cisco AI Defense evaluator and examples ([#60](https://github.com/agentcontrol/agent-control/issues/60)) ([52e2416](https://github.com/agentcontrol/agent-control/commit/52e241657103f90ba4a6a54fefd68836db36fd16)) +* **rules:** add Cisco AI Defense rule and examples ([#60](https://github.com/agentcontrol/agent-control/issues/60)) ([52e2416](https://github.com/agentcontrol/agent-control/commit/52e241657103f90ba4a6a54fefd68836db36fd16)) * **sdk:** add Google ADK framework integration ([#83](https://github.com/agentcontrol/agent-control/issues/83)) ([f2b53ee](https://github.com/agentcontrol/agent-control/commit/f2b53eeb115bd9f178b00728604ec9f90211e283)) * **server:** implement recursive control condition trees ([#115](https://github.com/agentcontrol/agent-control/issues/115)) ([2ff4c12](https://github.com/agentcontrol/agent-control/commit/2ff4c121723c1a304fecfd2e179e4b6f4c71af2b)) @@ -134,12 +134,12 @@ * **docs:** clarify README quickstart flow ([#119](https://github.com/agentcontrol/agent-control/issues/119)) ([317c626](https://github.com/agentcontrol/agent-control/commit/317c6267812f4887bad8ca8742365c235809f60d)) * **docs:** cleanup readme ([#66](https://github.com/agentcontrol/agent-control/issues/66)) ([eca0d6b](https://github.com/agentcontrol/agent-control/commit/eca0d6b621f29f61cb6a87d04aae95f6e51ec454)) * **docs:** promote Google ADK plugin in README ([#99](https://github.com/agentcontrol/agent-control/issues/99)) ([f038aa4](https://github.com/agentcontrol/agent-control/commit/f038aa4cc048e331f59fa4bcc6df0c266f14953e)) -* **evaluators:** address Cisco AI Defense review issues ([#103](https://github.com/agentcontrol/agent-control/issues/103)) ([81da81b](https://github.com/agentcontrol/agent-control/commit/81da81bdab5b1a8d3ca4904b8cd9e4ed4843a5ba)) +* **rules:** address Cisco AI Defense review issues ([#103](https://github.com/agentcontrol/agent-control/issues/103)) ([81da81b](https://github.com/agentcontrol/agent-control/commit/81da81bdab5b1a8d3ca4904b8cd9e4ed4843a5ba)) * **examples:** shut down SDK in short-lived demos ([#85](https://github.com/agentcontrol/agent-control/issues/85)) ([df43264](https://github.com/agentcontrol/agent-control/commit/df432644ac14d27d10442779532ed98a0263ea88)) * **sdk:** fail closed on post-hook evaluation errors ([#114](https://github.com/agentcontrol/agent-control/issues/114)) ([67f8fc1](https://github.com/agentcontrol/agent-control/commit/67f8fc1a3dd81d7516f8961a2af7372bfd137681)), closes [#112](https://github.com/agentcontrol/agent-control/issues/112) * **sdk:** skip evaluation when no controls apply ([#124](https://github.com/agentcontrol/agent-control/issues/124)) ([d2cd9f9](https://github.com/agentcontrol/agent-control/commit/d2cd9f9aaa8a20df79e6b2bdd9a20263f510946e)) -* **server:** escape ILIKE pattern in evaluator-configs list endpoint ([#108](https://github.com/agentcontrol/agent-control/issues/108)) ([cb6e89f](https://github.com/agentcontrol/agent-control/commit/cb6e89f134bd8cea944bbb99419b3d4a26f98d50)) -* **server:** reject empty string list evaluator values ([#121](https://github.com/agentcontrol/agent-control/issues/121)) ([2c55d27](https://github.com/agentcontrol/agent-control/commit/2c55d27fc4e1b2b3235a2b672226e77f3365c1bf)) +* **server:** escape ILIKE pattern in rule-configs list endpoint ([#108](https://github.com/agentcontrol/agent-control/issues/108)) ([cb6e89f](https://github.com/agentcontrol/agent-control/commit/cb6e89f134bd8cea944bbb99419b3d4a26f98d50)) +* **server:** reject empty string list rule values ([#121](https://github.com/agentcontrol/agent-control/issues/121)) ([2c55d27](https://github.com/agentcontrol/agent-control/commit/2c55d27fc4e1b2b3235a2b672226e77f3365c1bf)) * **server:** revert unwanted changes in server config ([#117](https://github.com/agentcontrol/agent-control/issues/117)) ([d92e168](https://github.com/agentcontrol/agent-control/commit/d92e168fe1c771f7f9824dc29d8d4b1a71501d8e)) * **ui:** create flow - name trimming issue ([#93](https://github.com/agentcontrol/agent-control/issues/93)) ([1a9759d](https://github.com/agentcontrol/agent-control/commit/1a9759d0b187cf5ac3e846a61cb5b82ec9d5e2e0)) @@ -159,11 +159,11 @@ * **docs:** add centered logo, header, and badges to README ([#92](https://github.com/agentcontrol/agent-control/issues/92)) ([39c3cbf](https://github.com/agentcontrol/agent-control/commit/39c3cbf02dc84770284463d3e8c755e2b511306c)) * **docs:** Test all examples ([#16](https://github.com/agentcontrol/agent-control/issues/16)) ([39e95c2](https://github.com/agentcontrol/agent-control/commit/39e95c20dff4275fae9a44f305b0d1b354b3f24e)) -* **evaluators:** migrate sqlglot rs extra to sqlglot c ([#86](https://github.com/agentcontrol/agent-control/issues/86)) ([5e3e48c](https://github.com/agentcontrol/agent-control/commit/5e3e48cb67b80e6035c074fba6e42cc652194813)) +* **rules:** migrate sqlglot rs extra to sqlglot c ([#86](https://github.com/agentcontrol/agent-control/issues/86)) ([5e3e48c](https://github.com/agentcontrol/agent-control/commit/5e3e48cb67b80e6035c074fba6e42cc652194813)) * **infra:** fix docker compose to make ui work ([#82](https://github.com/agentcontrol/agent-control/issues/82)) ([5edbb6b](https://github.com/agentcontrol/agent-control/commit/5edbb6b89801a8d4a9c333f5de93081a9ce0c593)) * **infra:** Remove UI service from docker-compose.yml ([#91](https://github.com/agentcontrol/agent-control/issues/91)) ([330ef55](https://github.com/agentcontrol/agent-control/commit/330ef55b5e369a647476ead4456b1331c6e683c9)) -* **sdk): Revert "fix(sdk:** bundle evaluators in sdk wheel" ([#90](https://github.com/agentcontrol/agent-control/issues/90)) ([b516ea6](https://github.com/agentcontrol/agent-control/commit/b516ea6375257b2116dc68f5974ffd833fd0f783)), closes [agentcontrol/agent-control#89](https://github.com/agentcontrol/agent-control/issues/89) -* **sdk:** bundle evaluators in sdk wheel ([#89](https://github.com/agentcontrol/agent-control/issues/89)) ([ea5889a](https://github.com/agentcontrol/agent-control/commit/ea5889a1de5cd79b7b4fd59b2a914ffb7294c158)) +* **sdk): Revert "fix(sdk:** bundle rules in sdk wheel" ([#90](https://github.com/agentcontrol/agent-control/issues/90)) ([b516ea6](https://github.com/agentcontrol/agent-control/commit/b516ea6375257b2116dc68f5974ffd833fd0f783)), closes [agentcontrol/agent-control#89](https://github.com/agentcontrol/agent-control/issues/89) +* **sdk:** bundle rules in sdk wheel ([#89](https://github.com/agentcontrol/agent-control/issues/89)) ([ea5889a](https://github.com/agentcontrol/agent-control/commit/ea5889a1de5cd79b7b4fd59b2a914ffb7294c158)) * **sdk:** fix observability events not reaching the server ([#63](https://github.com/agentcontrol/agent-control/issues/63)) ([70016db](https://github.com/agentcontrol/agent-control/commit/70016db3089b65a970d81f16b2ef01556857ce2e)) * **ui:** name update being saved now ([#87](https://github.com/agentcontrol/agent-control/issues/87)) ([919672d](https://github.com/agentcontrol/agent-control/commit/919672dc882df4f68cc54bbea08094ee0bf0d7dd)) * **ui:** Step name not getting saved ([#68](https://github.com/agentcontrol/agent-control/issues/68)) ([13abef9](https://github.com/agentcontrol/agent-control/commit/13abef990a6cff0b6cd4c705651a9d12f87de517)) @@ -181,43 +181,43 @@ * **server:** Feature/56688 fix image bug (#48) * **sdk:** a bug in docker file (#46) * **server:** Feature/56688 fix docker and create bash (#45) -* **evaluators:** Evaluator reorganization with new package structure +* **rules:** Rule reorganization with new package structure Package Structure: -- agent-control-evaluators (v3.0.0): core + regex, list, json, sql -- agent-control-evaluator-galileo (v3.0.0): Luna2 evaluator +- agent-control-rules (v3.0.0): core + regex, list, json, sql +- agent-control-rule-galileo (v3.0.0): Luna2 rule Key Changes: -- Entry points for evaluator discovery (agent_control.evaluators) -- Dot notation for external evaluators (galileo.luna2 not galileo/luna2) +- Entry points for rule discovery (agent_control.rules) +- Dot notation for external rules (galileo.luna2 not galileo/luna2) - Dynamic __version__ via importlib.metadata -- Server uses evaluators as runtime dep (no longer vendored) +- Server uses rules as runtime dep (no longer vendored) - Release workflow publishes both packages to PyPI Bug Fixes: -- JSON evaluator: field_constraints/field_patterns in extra-fields allow-list -- SQL evaluator: LIMIT/OFFSET bypass fix +- JSON rule: field_constraints/field_patterns in extra-fields allow-list +- SQL rule: LIMIT/OFFSET bypass fix Migration: -- Import: agent_control_evaluator_galileo.luna2 (not agent_control_evaluators.galileo_luna2) -- DB: UPDATE controls SET evaluator.name replace('/', '.') +- Import: agent_control_rule_galileo.luna2 (not agent_control_rules.galileo_luna2) +- DB: UPDATE controls SET rule.name replace('/', '.') * **server:** add time-series stats and split API endpoints (#6) -* **evaluators:** rename plugin to evaluator throughout (#81) +* **rules:** rename plugin to rule throughout (#81) * **models:** simplify step model and schema (#70) ### Features * Add plugin auto-discovery via Python entry points ([#49](https://github.com/agentcontrol/agent-control/issues/49)) ([1521182](https://github.com/agentcontrol/agent-control/commit/1521182c3adbad9cd160c465bdc8fc7ebb6e14ae)) * **docs:** add GitHub badges and CI coverage reporting ([#90](https://github.com/agentcontrol/agent-control/issues/90)) ([be1fa14](https://github.com/agentcontrol/agent-control/commit/be1fa140e4208993886f0afaef29b4a45fd27253)) -* **evaluators:** add required_column_values for multi-tenant SQL validation ([#30](https://github.com/agentcontrol/agent-control/issues/30)) ([532386c](https://github.com/agentcontrol/agent-control/commit/532386cb933e5d3f07c0939f41701500ef4f4007)) +* **rules:** add required_column_values for multi-tenant SQL validation ([#30](https://github.com/agentcontrol/agent-control/issues/30)) ([532386c](https://github.com/agentcontrol/agent-control/commit/532386cb933e5d3f07c0939f41701500ef4f4007)) * **sdk-ts:** automate semantic-release for npm publishing ([#52](https://github.com/agentcontrol/agent-control/issues/52)) ([2b43958](https://github.com/agentcontrol/agent-control/commit/2b43958b6b2e850a044869f6f417194e1cb79d43)) * **sdk:** Add PyPI packaging with semantic release ([#52](https://github.com/agentcontrol/agent-control/issues/52)) ([7c24f7f](https://github.com/agentcontrol/agent-control/commit/7c24f7f1ceee94b31098b3e3dd5ada1c16aa0f68)) * **sdk:** Auto-populate init() steps from [@control](https://github.com/control)() decorators ([#23](https://github.com/agentcontrol/agent-control/issues/23)) ([dc0f2a4](https://github.com/agentcontrol/agent-control/commit/dc0f2a4061143a8e6de6a4a35677d229f14d03f9)) -* **sdk:** export ControlScope, ControlMatch, and EvaluatorResult models ([#18](https://github.com/agentcontrol/agent-control/issues/18)) ([0d49cad](https://github.com/agentcontrol/agent-control/commit/0d49cad9da5c9e76c32d652fadbc69cec698a611)) +* **sdk:** export ControlScope, ControlMatch, and RuleResult models ([#18](https://github.com/agentcontrol/agent-control/issues/18)) ([0d49cad](https://github.com/agentcontrol/agent-control/commit/0d49cad9da5c9e76c32d652fadbc69cec698a611)) * **sdk:** Get Agent Controls from SDK Init ([#15](https://github.com/agentcontrol/agent-control/issues/15)) ([a485f93](https://github.com/agentcontrol/agent-control/commit/a485f93e15e49ddb975dee4bf4e24dfa3310d976)) * **sdk:** Refresh controls in a background loop ([#43](https://github.com/agentcontrol/agent-control/issues/43)) ([03f826d](https://github.com/agentcontrol/agent-control/commit/03f826d3e02f844d81c1a70ae4ea621a1788f9c7)) * **sdk:** ship TypeScript SDK with deterministic method naming ([#32](https://github.com/agentcontrol/agent-control/issues/32)) ([a76e9b0](https://github.com/agentcontrol/agent-control/commit/a76e9b005a99b3d23eca9d77e498c3c6f0476559)) -* **server:** add evaluator config store ([#78](https://github.com/agentcontrol/agent-control/issues/78)) ([cc14aa6](https://github.com/agentcontrol/agent-control/commit/cc14aa68391fd7fd4a187364a0a9a9fe712129fe)) +* **server:** add rule config store ([#78](https://github.com/agentcontrol/agent-control/issues/78)) ([cc14aa6](https://github.com/agentcontrol/agent-control/commit/cc14aa68391fd7fd4a187364a0a9a9fe712129fe)) * **server:** add initAgent conflict_mode overwrite mode with SDK defaults ([#40](https://github.com/agentcontrol/agent-control/issues/40)) ([f3ed2b8](https://github.com/agentcontrol/agent-control/commit/f3ed2b8a3207ff8fca324e0d01bdc33d768ce137)) * **server:** Add observability system for control execution tracking ([#44](https://github.com/agentcontrol/agent-control/issues/44)) ([fd0bddc](https://github.com/agentcontrol/agent-control/commit/fd0bddce3a9aa53472edb13e1c8fee6305571e98)) * **server:** add prometheus metrics for endpoints ([#68](https://github.com/agentcontrol/agent-control/issues/68)) ([775612c](https://github.com/agentcontrol/agent-control/commit/775612c2ebe4895760c326bb8e23ee29a5101247)) @@ -239,7 +239,7 @@ Migration: * **ci:** Enable pr title on prs ([#56](https://github.com/agentcontrol/agent-control/issues/56)) ([3d8b5fe](https://github.com/agentcontrol/agent-control/commit/3d8b5fe8c21f1ffd7fd6f6bf4d4f4eaebbad9106)) * **ci:** Fix release ([#11](https://github.com/agentcontrol/agent-control/issues/11)) ([9dd3dd7](https://github.com/agentcontrol/agent-control/commit/9dd3dd795227f1d1db62e5ce2e8b336fe1c909f1)) * **ci:** Use galileo-automation bot for releases ([#57](https://github.com/agentcontrol/agent-control/issues/57)) ([bc8eea0](https://github.com/agentcontrol/agent-control/commit/bc8eea088c7e347daf8bc34eee528129ddb61d7f)) -* **docs:** Add Example for Evaluator Extension ([#3](https://github.com/agentcontrol/agent-control/issues/3)) ([c2a70b3](https://github.com/agentcontrol/agent-control/commit/c2a70b335fb55481ad63b367ca87ba46de085343)) +* **docs:** Add Example for Rule Extension ([#3](https://github.com/agentcontrol/agent-control/issues/3)) ([c2a70b3](https://github.com/agentcontrol/agent-control/commit/c2a70b335fb55481ad63b367ca87ba46de085343)) * **docs:** add setup script ([#49](https://github.com/agentcontrol/agent-control/issues/49)) ([7a212c3](https://github.com/agentcontrol/agent-control/commit/7a212c3d182ba3e07e56b0131957315b788f553a)) * **docs:** Clean up Protect ([#76](https://github.com/agentcontrol/agent-control/issues/76)) ([99c16fd](https://github.com/agentcontrol/agent-control/commit/99c16fd8ed6620363f919818ebe4083f1489ba1c)) * **docs:** Fix Examples for LangGraph ([#64](https://github.com/agentcontrol/agent-control/issues/64)) ([23b30ae](https://github.com/agentcontrol/agent-control/commit/23b30ae1ddc5b878d8375b4f39a6617e7a0ae604)) @@ -249,9 +249,9 @@ Migration: * **docs:** Update readme ([#37](https://github.com/agentcontrol/agent-control/issues/37)) ([7531d83](https://github.com/agentcontrol/agent-control/commit/7531d8309d4755d9ceddf4a2d840c147b36a6e14)) * **docs:** Update README ([#2](https://github.com/agentcontrol/agent-control/issues/2)) ([379bb15](https://github.com/agentcontrol/agent-control/commit/379bb158700b93aa549ef00eda57ccc2f88cb71f)) * **examples:** Control sets cleanup with signed ([#65](https://github.com/agentcontrol/agent-control/issues/65)) ([af7b5fb](https://github.com/agentcontrol/agent-control/commit/af7b5fb44fe800a98c617ee70ae258576e146115)) -* **examples:** Update crew ai example to use evaluator ([#93](https://github.com/agentcontrol/agent-control/issues/93)) ([1c65084](https://github.com/agentcontrol/agent-control/commit/1c6508434860ed5bb56c622a721197c5a8f7ad4e)) +* **examples:** Update crew ai example to use rule ([#93](https://github.com/agentcontrol/agent-control/issues/93)) ([1c65084](https://github.com/agentcontrol/agent-control/commit/1c6508434860ed5bb56c622a721197c5a8f7ad4e)) * **infra:** Add plugins directory to Dockerfile ([#58](https://github.com/agentcontrol/agent-control/issues/58)) ([171d459](https://github.com/agentcontrol/agent-control/commit/171d459377aa294087f0af1561345a5e010120cb)) -* **infra:** install engine/evaluators in server image ([#14](https://github.com/agentcontrol/agent-control/issues/14)) ([d5ae157](https://github.com/agentcontrol/agent-control/commit/d5ae1571071afd34a00b376e650d9e1ce02f0b2d)) +* **infra:** install engine/rules in server image ([#14](https://github.com/agentcontrol/agent-control/issues/14)) ([d5ae157](https://github.com/agentcontrol/agent-control/commit/d5ae1571071afd34a00b376e650d9e1ce02f0b2d)) * **models:** use StrEnum for error enums ([#12](https://github.com/agentcontrol/agent-control/issues/12)) ([3f41c9f](https://github.com/agentcontrol/agent-control/commit/3f41c9f974ba5afe873f30197aba7f2ff142f616)) * **sdk-ts:** add conventional commits preset dependency ([#55](https://github.com/agentcontrol/agent-control/issues/55)) ([540fe9d](https://github.com/agentcontrol/agent-control/commit/540fe9df8fc69f223218ef2defbcc662ab3a902a)) * **sdk-ts:** export npm token for semantic-release npm auth ([#54](https://github.com/agentcontrol/agent-control/issues/54)) ([1b6b993](https://github.com/agentcontrol/agent-control/commit/1b6b993eaed8c63ea4e61b21bc4fb27accaa2a49)) @@ -261,7 +261,7 @@ Migration: * **sdk:** emit observability events for SDK-evaluated controls and fix non_matches propagation ([#24](https://github.com/agentcontrol/agent-control/issues/24)) ([6a9da69](https://github.com/agentcontrol/agent-control/commit/6a9da69aeb89a0a0a207f05fcbb03f1260faa40e)) * **sdk:** enforce UUID agent IDs ([#9](https://github.com/agentcontrol/agent-control/issues/9)) ([5ccdbd0](https://github.com/agentcontrol/agent-control/commit/5ccdbd04c73b50e86362ca3e4204cb0e84fa1b5c)) * **sdk:** Fix logging ([#77](https://github.com/agentcontrol/agent-control/issues/77)) ([b1f078c](https://github.com/agentcontrol/agent-control/commit/b1f078c52c29ac048a9bcbea09252786e842acbd)) -* **sdk:** plugin to evaluator.. agent_protect to agent_control ([#88](https://github.com/agentcontrol/agent-control/issues/88)) ([fc9b088](https://github.com/agentcontrol/agent-control/commit/fc9b088fcd091132a1e38deae372b73fc2834beb)) +* **sdk:** plugin to rule.. agent_protect to agent_control ([#88](https://github.com/agentcontrol/agent-control/issues/88)) ([fc9b088](https://github.com/agentcontrol/agent-control/commit/fc9b088fcd091132a1e38deae372b73fc2834beb)) * **server:** enforce public-safe API error responses ([#20](https://github.com/agentcontrol/agent-control/issues/20)) ([e50d817](https://github.com/agentcontrol/agent-control/commit/e50d817e2b81347c9143dcae8d461e7e3e581fd9)) * **server:** Feature/56688 fix docker and create bash ([#45](https://github.com/agentcontrol/agent-control/issues/45)) ([7277e27](https://github.com/agentcontrol/agent-control/commit/7277e2730c16e717ad5ba8248b946252b22e69cb)) * **server:** Feature/56688 fix image bug ([#48](https://github.com/agentcontrol/agent-control/issues/48)) ([71e6b44](https://github.com/agentcontrol/agent-control/commit/71e6b44f3cdee26c7a97a2cfc0ef6223e52bdb3a)) @@ -275,8 +275,8 @@ Migration: ### Code Refactoring -* **evaluators:** rename plugin to evaluator throughout ([#81](https://github.com/agentcontrol/agent-control/issues/81)) ([0134682](https://github.com/agentcontrol/agent-control/commit/0134682c1d0f167528d7267507dbcf3a1e7b3192)) -* **evaluators:** split into builtin + extra packages for PyPI ([#5](https://github.com/agentcontrol/agent-control/issues/5)) ([0e0a78a](https://github.com/agentcontrol/agent-control/commit/0e0a78a9fa9c39a5709299c2e3d77c0609110280)) +* **rules:** rename plugin to rule throughout ([#81](https://github.com/agentcontrol/agent-control/issues/81)) ([0134682](https://github.com/agentcontrol/agent-control/commit/0134682c1d0f167528d7267507dbcf3a1e7b3192)) +* **rules:** split into builtin + extra packages for PyPI ([#5](https://github.com/agentcontrol/agent-control/issues/5)) ([0e0a78a](https://github.com/agentcontrol/agent-control/commit/0e0a78a9fa9c39a5709299c2e3d77c0609110280)) * **models:** simplify step model and schema ([#70](https://github.com/agentcontrol/agent-control/issues/70)) ([4c1d637](https://github.com/agentcontrol/agent-control/commit/4c1d6378a4a05edc44f02fa78c1698b9203da81b)) # Changelog diff --git a/sdks/typescript/overlays/method-names.overlay.yaml b/sdks/typescript/overlays/method-names.overlay.yaml index 967847c6..51ab025e 100644 --- a/sdks/typescript/overlays/method-names.overlay.yaml +++ b/sdks/typescript/overlays/method-names.overlay.yaml @@ -55,16 +55,6 @@ actions: x-speakeasy-group: agents x-speakeasy-name-override: removeControl - - target: $["paths"]["/api/v1/agents/{agent_name}/evaluators"]["get"] - update: - x-speakeasy-group: agents - x-speakeasy-name-override: listEvaluators - - - target: $["paths"]["/api/v1/agents/{agent_name}/evaluators/{evaluator_name}"]["get"] - update: - x-speakeasy-group: agents - x-speakeasy-name-override: getEvaluator - - target: $["paths"]["/api/v1/agents/{agent_name}/policies"]["get"] update: x-speakeasy-group: agents @@ -100,6 +90,16 @@ actions: x-speakeasy-group: agents x-speakeasy-name-override: updatePolicy + - target: $["paths"]["/api/v1/agents/{agent_name}/rules"]["get"] + update: + x-speakeasy-group: agents + x-speakeasy-name-override: listRules + + - target: $["paths"]["/api/v1/agents/{agent_name}/rules/{rule_name}"]["get"] + update: + x-speakeasy-group: agents + x-speakeasy-name-override: getRule + - target: $["paths"]["/api/v1/auth/runtime-token-exchange"]["post"] update: x-speakeasy-group: auth @@ -215,11 +215,6 @@ actions: x-speakeasy-group: evaluation x-speakeasy-name-override: evaluate - - target: $["paths"]["/api/v1/evaluators"]["get"] - update: - x-speakeasy-group: evaluators - x-speakeasy-name-override: list - - target: $["paths"]["/api/v1/observability/events"]["post"] update: x-speakeasy-group: observability @@ -265,6 +260,11 @@ actions: x-speakeasy-group: policies x-speakeasy-name-override: removeControl + - target: $["paths"]["/api/v1/rules"]["get"] + update: + x-speakeasy-group: rules + x-speakeasy-name-override: list + - target: $["paths"]["/health"]["get"] update: x-speakeasy-group: system diff --git a/sdks/typescript/src/client.ts b/sdks/typescript/src/client.ts index c6978c11..c3ae5f77 100644 --- a/sdks/typescript/src/client.ts +++ b/sdks/typescript/src/client.ts @@ -24,7 +24,7 @@ export type AuthApi = AgentControlSDK["auth"]; export type ControlBindingsApi = AgentControlSDK["controlBindings"]; export type ControlsApi = AgentControlSDK["controls"]; export type EvaluationApi = AgentControlSDK["evaluation"]; -export type EvaluatorsApi = AgentControlSDK["evaluators"]; +export type RulesApi = AgentControlSDK["rules"]; export type ObservabilityApi = AgentControlSDK["observability"]; export type PoliciesApi = AgentControlSDK["policies"]; export type SystemApi = AgentControlSDK["system"]; @@ -72,8 +72,8 @@ export class AgentControlClient { return this.requireSDK().evaluation; } - get evaluators(): EvaluatorsApi { - return this.requireSDK().evaluators; + get rules(): RulesApi { + return this.requireSDK().rules; } get observability(): ObservabilityApi { diff --git a/sdks/typescript/src/generated/funcs/agents-get-evaluator.ts b/sdks/typescript/src/generated/funcs/agents-get-rule.ts similarity index 82% rename from sdks/typescript/src/generated/funcs/agents-get-evaluator.ts rename to sdks/typescript/src/generated/funcs/agents-get-rule.ts index ceca1ec0..db14185f 100644 --- a/sdks/typescript/src/generated/funcs/agents-get-evaluator.ts +++ b/sdks/typescript/src/generated/funcs/agents-get-rule.ts @@ -28,31 +28,30 @@ import { APICall, APIPromise } from "../types/async.js"; import { Result } from "../types/fp.js"; /** - * Get specific evaluator schema + * Get specific rule schema * * @remarks - * Get a specific evaluator schema registered with an agent. + * Get a specific rule schema registered with an agent. * * Args: * agent_name: Agent identifier - * evaluator_name: Name of the evaluator + * rule_name: Name of the rule * db: Database session (injected) * principal: Authorized request principal * * Returns: - * EvaluatorSchemaItem with schema details + * RuleSchemaItem with schema details * * Raises: - * HTTPException 404: Agent or evaluator not found + * HTTPException 404: Agent or rule not found */ -export function agentsGetEvaluator( +export function agentsGetRule( client: AgentControlSDKCore, - request: - operations.GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest, + request: operations.GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest, options?: RequestOptions, ): APIPromise< Result< - models.EvaluatorSchemaItem, + models.RuleSchemaItem, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -73,13 +72,12 @@ export function agentsGetEvaluator( async function $do( client: AgentControlSDKCore, - request: - operations.GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest, + request: operations.GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest, options?: RequestOptions, ): Promise< [ Result< - models.EvaluatorSchemaItem, + models.RuleSchemaItem, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -98,7 +96,7 @@ async function $do( (value) => z.parse( operations - .GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest$outboundSchema, + .GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest$outboundSchema, value, ), "Input validation failed", @@ -114,15 +112,15 @@ async function $do( explode: false, charEncoding: "percent", }), - evaluator_name: encodeSimple("evaluator_name", payload.evaluator_name, { + rule_name: encodeSimple("rule_name", payload.rule_name, { explode: false, charEncoding: "percent", }), }; - const path = pathToFunc( - "/api/v1/agents/{agent_name}/evaluators/{evaluator_name}", - )(pathParams); + const path = pathToFunc("/api/v1/agents/{agent_name}/rules/{rule_name}")( + pathParams, + ); const headers = new Headers(compactMap({ Accept: "application/json", @@ -136,7 +134,7 @@ async function $do( options: client._options, baseURL: options?.serverURL ?? client._baseURL ?? "", operationID: - "get_agent_evaluator_api_v1_agents__agent_name__evaluators__evaluator_name__get", + "get_agent_rule_api_v1_agents__agent_name__rules__rule_name__get", oAuth2Scopes: null, resolvedSecurity: requestSecurity, @@ -179,7 +177,7 @@ async function $do( }; const [result] = await M.match< - models.EvaluatorSchemaItem, + models.RuleSchemaItem, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -190,7 +188,7 @@ async function $do( | UnexpectedClientError | SDKValidationError >( - M.json(200, models.EvaluatorSchemaItem$inboundSchema), + M.json(200, models.RuleSchemaItem$inboundSchema), M.jsonErr(422, errors.HTTPValidationError$inboundSchema), M.fail("4XX"), M.fail("5XX"), diff --git a/sdks/typescript/src/generated/funcs/agents-init.ts b/sdks/typescript/src/generated/funcs/agents-init.ts index d1136c2f..9268da59 100644 --- a/sdks/typescript/src/generated/funcs/agents-init.ts +++ b/sdks/typescript/src/generated/funcs/agents-init.ts @@ -38,7 +38,7 @@ import { Result } from "../types/fp.js"; * * conflict_mode controls registration conflict handling: * - strict (default): preserve compatibility checks and conflict errors - * - overwrite: latest init payload replaces steps/evaluators and returns change summary + * - overwrite: latest init payload replaces steps/rules and returns change summary * * The returned ``controls`` list is the de-duplicated union of the agent's * direct controls, policy-derived controls, and (when ``target_type`` and diff --git a/sdks/typescript/src/generated/funcs/agents-list-evaluators.ts b/sdks/typescript/src/generated/funcs/agents-list-rules.ts similarity index 84% rename from sdks/typescript/src/generated/funcs/agents-list-evaluators.ts rename to sdks/typescript/src/generated/funcs/agents-list-rules.ts index 4217e752..8fc5d594 100644 --- a/sdks/typescript/src/generated/funcs/agents-list-evaluators.ts +++ b/sdks/typescript/src/generated/funcs/agents-list-rules.ts @@ -28,36 +28,35 @@ import { APICall, APIPromise } from "../types/async.js"; import { Result } from "../types/fp.js"; /** - * List agent's registered evaluator schemas + * List agent's registered rule schemas * * @remarks - * List all evaluator schemas registered with an agent. + * List all rule schemas registered with an agent. * - * Evaluator schemas are registered via initAgent and used for: + * Rule schemas are registered via initAgent and used for: * - Config validation when creating Controls * - UI to display available config options * * Args: * agent_name: Agent identifier - * cursor: Optional cursor for pagination (name of last evaluator from previous page) + * cursor: Optional cursor for pagination (name of last rule from previous page) * limit: Pagination limit (default 20, max 100) * db: Database session (injected) * principal: Authorized request principal * * Returns: - * ListEvaluatorsResponse with evaluator schemas and pagination + * ListRulesResponse with rule schemas and pagination * * Raises: * HTTPException 404: Agent not found */ -export function agentsListEvaluators( +export function agentsListRules( client: AgentControlSDKCore, - request: - operations.ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest, + request: operations.ListAgentRulesApiV1AgentsAgentNameRulesGetRequest, options?: RequestOptions, ): APIPromise< Result< - models.ListEvaluatorsResponse, + models.ListRulesResponse, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -78,13 +77,12 @@ export function agentsListEvaluators( async function $do( client: AgentControlSDKCore, - request: - operations.ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest, + request: operations.ListAgentRulesApiV1AgentsAgentNameRulesGetRequest, options?: RequestOptions, ): Promise< [ Result< - models.ListEvaluatorsResponse, + models.ListRulesResponse, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -103,7 +101,7 @@ async function $do( (value) => z.parse( operations - .ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest$outboundSchema, + .ListAgentRulesApiV1AgentsAgentNameRulesGetRequest$outboundSchema, value, ), "Input validation failed", @@ -121,7 +119,7 @@ async function $do( }), }; - const path = pathToFunc("/api/v1/agents/{agent_name}/evaluators")(pathParams); + const path = pathToFunc("/api/v1/agents/{agent_name}/rules")(pathParams); const query = encodeFormQuery({ "cursor": payload.cursor, @@ -139,8 +137,7 @@ async function $do( const context = { options: client._options, baseURL: options?.serverURL ?? client._baseURL ?? "", - operationID: - "list_agent_evaluators_api_v1_agents__agent_name__evaluators_get", + operationID: "list_agent_rules_api_v1_agents__agent_name__rules_get", oAuth2Scopes: null, resolvedSecurity: requestSecurity, @@ -184,7 +181,7 @@ async function $do( }; const [result] = await M.match< - models.ListEvaluatorsResponse, + models.ListRulesResponse, | errors.HTTPValidationError | AgentControlSDKError | ResponseValidationError @@ -195,7 +192,7 @@ async function $do( | UnexpectedClientError | SDKValidationError >( - M.json(200, models.ListEvaluatorsResponse$inboundSchema), + M.json(200, models.ListRulesResponse$inboundSchema), M.jsonErr(422, errors.HTTPValidationError$inboundSchema), M.fail("4XX"), M.fail("5XX"), diff --git a/sdks/typescript/src/generated/funcs/agents-list.ts b/sdks/typescript/src/generated/funcs/agents-list.ts index f887d0b5..40d31848 100644 --- a/sdks/typescript/src/generated/funcs/agents-list.ts +++ b/sdks/typescript/src/generated/funcs/agents-list.ts @@ -34,7 +34,7 @@ import { Result } from "../types/fp.js"; * List all registered agents with cursor-based pagination. * * Returns a summary of each agent including identifier, policy associations, - * and counts of registered steps and evaluators. Results are scoped to + * and counts of registered steps and rules. Results are scoped to * the request's namespace; agents in other namespaces are not visible. * * Args: diff --git a/sdks/typescript/src/generated/funcs/agents-update.ts b/sdks/typescript/src/generated/funcs/agents-update.ts index aff9d827..914d2d31 100644 --- a/sdks/typescript/src/generated/funcs/agents-update.ts +++ b/sdks/typescript/src/generated/funcs/agents-update.ts @@ -28,17 +28,17 @@ import { APICall, APIPromise } from "../types/async.js"; import { Result } from "../types/fp.js"; /** - * Modify agent (remove steps/evaluators) + * Modify agent (remove steps/rules) * * @remarks - * Remove steps and/or evaluators from an agent. + * Remove steps and/or rules from an agent. * * This is the complement to initAgent which only adds items. * Removals are idempotent - attempting to remove non-existent items is not an error. * * Args: * agent_name: Agent identifier - * request: Lists of step/evaluator identifiers to remove + * request: Lists of step/rule identifiers to remove * db: Database session (injected) * principal: Authorized request principal * diff --git a/sdks/typescript/src/generated/funcs/evaluators-list.ts b/sdks/typescript/src/generated/funcs/rules-list.ts similarity index 85% rename from sdks/typescript/src/generated/funcs/evaluators-list.ts rename to sdks/typescript/src/generated/funcs/rules-list.ts index 2f378a5f..3d9c04a4 100644 --- a/sdks/typescript/src/generated/funcs/evaluators-list.ts +++ b/sdks/typescript/src/generated/funcs/rules-list.ts @@ -24,28 +24,28 @@ import { APICall, APIPromise } from "../types/async.js"; import { Result } from "../types/fp.js"; /** - * List available evaluators + * List available rules * * @remarks - * List all available evaluators. + * List all available rules. * - * Returns metadata and JSON Schema for each built-in evaluator. + * Returns metadata and JSON Schema for each built-in rule. * - * Built-in evaluators: + * Built-in rules: * - **regex**: Regular expression pattern matching * - **list**: List-based value matching with flexible logic * - **json**: JSON validation with schema, types, constraints * - **sql**: SQL query validation * - * Custom evaluators are registered per-agent via initAgent. - * Use GET /agents/{agent_name}/evaluators to list agent-specific schemas. + * Custom rules are registered per-agent via initAgent. + * Use GET /agents/{agent_name}/rules to list agent-specific schemas. */ -export function evaluatorsList( +export function rulesList( client: AgentControlSDKCore, options?: RequestOptions, ): APIPromise< Result< - { [k: string]: models.EvaluatorInfo }, + { [k: string]: models.RuleInfo }, | AgentControlSDKError | ResponseValidationError | ConnectionError @@ -68,7 +68,7 @@ async function $do( ): Promise< [ Result< - { [k: string]: models.EvaluatorInfo }, + { [k: string]: models.RuleInfo }, | AgentControlSDKError | ResponseValidationError | ConnectionError @@ -81,7 +81,7 @@ async function $do( APICall, ] > { - const path = pathToFunc("/api/v1/evaluators")(); + const path = pathToFunc("/api/v1/rules")(); const headers = new Headers(compactMap({ Accept: "application/json", @@ -94,7 +94,7 @@ async function $do( const context = { options: client._options, baseURL: options?.serverURL ?? client._baseURL ?? "", - operationID: "get_evaluators_api_v1_evaluators_get", + operationID: "get_rules_api_v1_rules_get", oAuth2Scopes: null, resolvedSecurity: requestSecurity, @@ -132,7 +132,7 @@ async function $do( const response = doResult.value; const [result] = await M.match< - { [k: string]: models.EvaluatorInfo }, + { [k: string]: models.RuleInfo }, | AgentControlSDKError | ResponseValidationError | ConnectionError @@ -142,7 +142,7 @@ async function $do( | UnexpectedClientError | SDKValidationError >( - M.json(200, z.record(z.string(), models.EvaluatorInfo$inboundSchema)), + M.json(200, z.record(z.string(), models.RuleInfo$inboundSchema)), M.fail("4XX"), M.fail("5XX"), )(response, req); diff --git a/sdks/typescript/src/generated/models/agent-summary.ts b/sdks/typescript/src/generated/models/agent-summary.ts index 8f9732c4..15a101e6 100644 --- a/sdks/typescript/src/generated/models/agent-summary.ts +++ b/sdks/typescript/src/generated/models/agent-summary.ts @@ -25,14 +25,14 @@ export type AgentSummary = { * ISO 8601 timestamp when agent was created */ createdAt?: string | null | undefined; - /** - * Number of evaluators registered with the agent - */ - evaluatorCount: number; /** * IDs of policies associated with the agent */ policyIds?: Array | undefined; + /** + * Number of rules registered with the agent + */ + ruleCount: number; /** * Number of steps registered with the agent */ @@ -46,8 +46,8 @@ export const AgentSummary$inboundSchema: z.ZodMiniType = active_controls_count: z._default(types.number(), 0), agent_name: types.string(), created_at: z.optional(z.nullable(types.string())), - evaluator_count: z._default(types.number(), 0), policy_ids: types.optional(z.array(types.number())), + rule_count: z._default(types.number(), 0), step_count: z._default(types.number(), 0), }), z.transform((v) => { @@ -55,8 +55,8 @@ export const AgentSummary$inboundSchema: z.ZodMiniType = "active_controls_count": "activeControlsCount", "agent_name": "agentName", "created_at": "createdAt", - "evaluator_count": "evaluatorCount", "policy_ids": "policyIds", + "rule_count": "ruleCount", "step_count": "stepCount", }); }), diff --git a/sdks/typescript/src/generated/models/condition-node-input.ts b/sdks/typescript/src/generated/models/condition-node-input.ts index a156596b..f4d82350 100644 --- a/sdks/typescript/src/generated/models/condition-node-input.ts +++ b/sdks/typescript/src/generated/models/condition-node-input.ts @@ -9,10 +9,10 @@ import { ControlSelector$outboundSchema, } from "./control-selector.js"; import { - EvaluatorSpec, - EvaluatorSpec$Outbound, - EvaluatorSpec$outboundSchema, -} from "./evaluator-spec.js"; + RuleSpec, + RuleSpec$Outbound, + RuleSpec$outboundSchema, +} from "./rule-spec.js"; /** * Recursive boolean condition tree for control evaluation. @@ -22,10 +22,6 @@ export type ConditionNodeInput = { * Logical AND over child conditions. */ and?: Array | null | undefined; - /** - * Leaf evaluator. Must be provided together with selector. - */ - evaluator?: EvaluatorSpec | null | undefined; /** * Logical NOT over a single child condition. */ @@ -35,7 +31,11 @@ export type ConditionNodeInput = { */ or?: Array | null | undefined; /** - * Leaf selector. Must be provided together with evaluator. + * Leaf rule. Must be provided together with selector. + */ + rule?: RuleSpec | null | undefined; + /** + * Leaf selector. Must be provided together with rule. */ selector?: ControlSelector | null | undefined; }; @@ -43,9 +43,9 @@ export type ConditionNodeInput = { /** @internal */ export type ConditionNodeInput$Outbound = { and?: Array | null | undefined; - evaluator?: EvaluatorSpec$Outbound | null | undefined; not?: ConditionNodeInput$Outbound | null | undefined; or?: Array | null | undefined; + rule?: RuleSpec$Outbound | null | undefined; selector?: ControlSelector$Outbound | null | undefined; }; @@ -57,11 +57,11 @@ export const ConditionNodeInput$outboundSchema: z.ZodMiniType< and: z.optional( z.nullable(z.array(z.lazy(() => ConditionNodeInput$outboundSchema))), ), - evaluator: z.optional(z.nullable(EvaluatorSpec$outboundSchema)), not: z.optional(z.nullable(z.lazy(() => ConditionNodeInput$outboundSchema))), or: z.optional( z.nullable(z.array(z.lazy(() => ConditionNodeInput$outboundSchema))), ), + rule: z.optional(z.nullable(RuleSpec$outboundSchema)), selector: z.optional(z.nullable(ControlSelector$outboundSchema)), }); diff --git a/sdks/typescript/src/generated/models/condition-node-output.ts b/sdks/typescript/src/generated/models/condition-node-output.ts index d0320504..7b593495 100644 --- a/sdks/typescript/src/generated/models/condition-node-output.ts +++ b/sdks/typescript/src/generated/models/condition-node-output.ts @@ -10,10 +10,7 @@ import { ControlSelector$inboundSchema, } from "./control-selector.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; -import { - EvaluatorSpec, - EvaluatorSpec$inboundSchema, -} from "./evaluator-spec.js"; +import { RuleSpec, RuleSpec$inboundSchema } from "./rule-spec.js"; /** * Recursive boolean condition tree for control evaluation. @@ -23,10 +20,6 @@ export type ConditionNodeOutput = { * Logical AND over child conditions. */ and?: Array | null | undefined; - /** - * Leaf evaluator. Must be provided together with selector. - */ - evaluator?: EvaluatorSpec | null | undefined; /** * Logical NOT over a single child condition. */ @@ -36,7 +29,11 @@ export type ConditionNodeOutput = { */ or?: Array | null | undefined; /** - * Leaf selector. Must be provided together with evaluator. + * Leaf rule. Must be provided together with selector. + */ + rule?: RuleSpec | null | undefined; + /** + * Leaf selector. Must be provided together with rule. */ selector?: ControlSelector | null | undefined; }; @@ -49,11 +46,11 @@ export const ConditionNodeOutput$inboundSchema: z.ZodMiniType< and: z.optional( z.nullable(z.array(z.lazy(() => ConditionNodeOutput$inboundSchema))), ), - evaluator: z.optional(z.nullable(EvaluatorSpec$inboundSchema)), not: z.optional(z.nullable(z.lazy(() => ConditionNodeOutput$inboundSchema))), or: z.optional( z.nullable(z.array(z.lazy(() => ConditionNodeOutput$inboundSchema))), ), + rule: z.optional(z.nullable(RuleSpec$inboundSchema)), selector: z.optional(z.nullable(ControlSelector$inboundSchema)), }); diff --git a/sdks/typescript/src/generated/models/conflict-mode.ts b/sdks/typescript/src/generated/models/conflict-mode.ts index 4df68c65..ed10059b 100644 --- a/sdks/typescript/src/generated/models/conflict-mode.ts +++ b/sdks/typescript/src/generated/models/conflict-mode.ts @@ -11,7 +11,7 @@ import { ClosedEnum } from "../types/enums.js"; * @remarks * * STRICT preserves compatibility checks and raises conflicts on incompatible changes. - * OVERWRITE applies latest-init-wins replacement for steps and evaluators. + * OVERWRITE applies latest-init-wins replacement for steps and rules. */ export const ConflictMode = { Strict: "strict", @@ -23,7 +23,7 @@ export const ConflictMode = { * @remarks * * STRICT preserves compatibility checks and raises conflicts on incompatible changes. - * OVERWRITE applies latest-init-wins replacement for steps and evaluators. + * OVERWRITE applies latest-init-wins replacement for steps and rules. */ export type ConflictMode = ClosedEnum; diff --git a/sdks/typescript/src/generated/models/control-action.ts b/sdks/typescript/src/generated/models/control-action.ts index ca5af786..d8da2e76 100644 --- a/sdks/typescript/src/generated/models/control-action.ts +++ b/sdks/typescript/src/generated/models/control-action.ts @@ -25,7 +25,7 @@ import { export type ControlAction = { decision: ActionDecision; /** - * Steering context object for steer actions. Strongly recommended when decision='steer' to provide correction suggestions. If not provided, the evaluator result message will be used as fallback. + * Steering context object for steer actions. Strongly recommended when decision='steer' to provide correction suggestions. If not provided, the rule result message will be used as fallback. */ steeringContext?: SteeringContext | null | undefined; }; diff --git a/sdks/typescript/src/generated/models/control-execution-event.ts b/sdks/typescript/src/generated/models/control-execution-event.ts index b9f4dd68..f1f96f50 100644 --- a/sdks/typescript/src/generated/models/control-execution-event.ts +++ b/sdks/typescript/src/generated/models/control-execution-event.ts @@ -52,7 +52,7 @@ export type CheckStage = OpenEnum; * - Context: agent, control, check stage, applies to * - Result: action taken, whether matched, confidence score * - Timing: when it happened, how long it took - * - Optional details: evaluator name, selector path, errors, metadata + * - Optional details: rule name, selector path, errors, metadata * * Attributes: * control_execution_id: Unique ID for this specific control execution @@ -64,11 +64,11 @@ export type CheckStage = OpenEnum; * check_stage: "pre" (before execution) or "post" (after execution) * applies_to: "llm_call" or "tool_call" * action: The action taken (deny, steer, observe) - * matched: Whether the control evaluator matched - * confidence: Confidence score from the evaluator (0.0-1.0) + * matched: Whether the control rule matched + * confidence: Confidence score from the rule (0.0-1.0) * timestamp: When the control was executed (UTC) * execution_duration_ms: How long the control evaluation took - * evaluator_name: Name of the evaluator used + * rule_name: Name of the rule used * selector_path: The selector path used to extract data * error_message: Error message if evaluation failed * metadata: Additional metadata for extensibility @@ -107,22 +107,22 @@ export type ControlExecutionEvent = { * Error message if evaluation failed */ errorMessage?: string | null | undefined; - /** - * Name of the evaluator used - */ - evaluatorName?: string | null | undefined; /** * Execution duration in milliseconds */ executionDurationMs?: number | null | undefined; /** - * Whether the evaluator matched (True) or not (False) + * Whether the rule matched (True) or not (False) */ matched: boolean; /** * Additional metadata */ metadata?: { [k: string]: any } | undefined; + /** + * Name of the rule used + */ + ruleName?: string | null | undefined; /** * Selector path used to extract data */ @@ -174,10 +174,10 @@ export const ControlExecutionEvent$inboundSchema: z.ZodMiniType< control_id: types.number(), control_name: types.string(), error_message: z.optional(z.nullable(types.string())), - evaluator_name: z.optional(z.nullable(types.string())), execution_duration_ms: z.optional(z.nullable(types.number())), matched: types.boolean(), metadata: types.optional(z.record(z.string(), z.any())), + rule_name: z.optional(z.nullable(types.string())), selector_path: z.optional(z.nullable(types.string())), span_id: types.string(), timestamp: types.optional(types.date()), @@ -192,8 +192,8 @@ export const ControlExecutionEvent$inboundSchema: z.ZodMiniType< "control_id": "controlId", "control_name": "controlName", "error_message": "errorMessage", - "evaluator_name": "evaluatorName", "execution_duration_ms": "executionDurationMs", + "rule_name": "ruleName", "selector_path": "selectorPath", "span_id": "spanId", "trace_id": "traceId", @@ -211,10 +211,10 @@ export type ControlExecutionEvent$Outbound = { control_id: number; control_name: string; error_message?: string | null | undefined; - evaluator_name?: string | null | undefined; execution_duration_ms?: number | null | undefined; matched: boolean; metadata?: { [k: string]: any } | undefined; + rule_name?: string | null | undefined; selector_path?: string | null | undefined; span_id: string; timestamp?: string | undefined; @@ -236,10 +236,10 @@ export const ControlExecutionEvent$outboundSchema: z.ZodMiniType< controlId: z.int(), controlName: z.string(), errorMessage: z.optional(z.nullable(z.string())), - evaluatorName: z.optional(z.nullable(z.string())), executionDurationMs: z.optional(z.nullable(z.number())), matched: z.boolean(), metadata: z.optional(z.record(z.string(), z.any())), + ruleName: z.optional(z.nullable(z.string())), selectorPath: z.optional(z.nullable(z.string())), spanId: z.string(), timestamp: z.optional(z.pipe(z.date(), z.transform(v => v.toISOString()))), @@ -254,8 +254,8 @@ export const ControlExecutionEvent$outboundSchema: z.ZodMiniType< controlId: "control_id", controlName: "control_name", errorMessage: "error_message", - evaluatorName: "evaluator_name", executionDurationMs: "execution_duration_ms", + ruleName: "rule_name", selectorPath: "selector_path", spanId: "span_id", traceId: "trace_id", diff --git a/sdks/typescript/src/generated/models/control-match.ts b/sdks/typescript/src/generated/models/control-match.ts index 6a7caa1f..6e57769e 100644 --- a/sdks/typescript/src/generated/models/control-match.ts +++ b/sdks/typescript/src/generated/models/control-match.ts @@ -12,10 +12,7 @@ import { ActionDecision$inboundSchema, } from "./action-decision.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; -import { - EvaluatorResult, - EvaluatorResult$inboundSchema, -} from "./evaluator-result.js"; +import { RuleResult, RuleResult$inboundSchema } from "./rule-result.js"; import { SteeringContext, SteeringContext$inboundSchema, @@ -39,22 +36,22 @@ export type ControlMatch = { */ controlName: string; /** - * Result from a control evaluator. + * Result from a control rule. * * @remarks * - * The `error` field indicates evaluator failures, NOT validation failures: - * - Set `error` for: evaluator crashes, timeouts, missing dependencies, external service errors + * The `error` field indicates rule failures, NOT validation failures: + * - Set `error` for: rule crashes, timeouts, missing dependencies, external service errors * - Do NOT set `error` for: invalid input, syntax errors, schema violations, constraint failures * - * When `error` is set, `matched` must be False (fail-open on evaluator errors). + * When `error` is set, `matched` must be False (fail-open on rule errors). * When `error` is None, `matched` reflects the actual validation result. * * This distinction allows: - * - Clients to distinguish "data violated rules" from "evaluator is broken" - * - Observability systems to monitor evaluator health separately from validation outcomes + * - Clients to distinguish "data violated rules" from "rule is broken" + * - Observability systems to monitor rule health separately from validation outcomes */ - result: EvaluatorResult; + result: RuleResult; /** * Steering context for steer actions if configured */ @@ -69,7 +66,7 @@ export const ControlMatch$inboundSchema: z.ZodMiniType = control_execution_id: types.optional(types.string()), control_id: types.number(), control_name: types.string(), - result: EvaluatorResult$inboundSchema, + result: RuleResult$inboundSchema, steering_context: z.optional(z.nullable(SteeringContext$inboundSchema)), }), z.transform((v) => { diff --git a/sdks/typescript/src/generated/models/control-selector.ts b/sdks/typescript/src/generated/models/control-selector.ts index 8144bb20..c4cab8ed 100644 --- a/sdks/typescript/src/generated/models/control-selector.ts +++ b/sdks/typescript/src/generated/models/control-selector.ts @@ -13,7 +13,7 @@ import { SDKValidationError } from "./errors/sdk-validation-error.js"; * * @remarks * - * - path: which slice of the Step to feed into the evaluator. Optional, defaults to "*" + * - path: which slice of the Step to feed into the rule. Optional, defaults to "*" * meaning the entire Step object. */ export type ControlSelector = { diff --git a/sdks/typescript/src/generated/models/control-stats-response.ts b/sdks/typescript/src/generated/models/control-stats-response.ts index 285796ac..ff970230 100644 --- a/sdks/typescript/src/generated/models/control-stats-response.ts +++ b/sdks/typescript/src/generated/models/control-stats-response.ts @@ -49,8 +49,8 @@ export type ControlStatsResponse = { * * Attributes: * execution_count: Total executions across all controls - * match_count: Total matches across all controls (evaluator matched) - * non_match_count: Total non-matches across all controls (evaluator didn't match) + * match_count: Total matches across all controls (rule matched) + * non_match_count: Total non-matches across all controls (rule didn't match) * error_count: Total errors across all controls (evaluation failed) * action_counts: Breakdown of actions for matched executions * timeseries: Time-series data points (only when include_timeseries=true) diff --git a/sdks/typescript/src/generated/models/evaluator-spec.ts b/sdks/typescript/src/generated/models/evaluator-spec.ts deleted file mode 100644 index f821429a..00000000 --- a/sdks/typescript/src/generated/models/evaluator-spec.ts +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. - */ - -import * as z from "zod/v4-mini"; -import { safeParse } from "../lib/schemas.js"; -import { Result as SafeParseResult } from "../types/fp.js"; -import * as types from "../types/primitives.js"; -import { SDKValidationError } from "./errors/sdk-validation-error.js"; - -/** - * Evaluator specification. See GET /evaluators for available evaluators and schemas. - * - * @remarks - * - * Evaluator reference formats: - * - Built-in: "regex", "list", "json", "sql" - * - External: "galileo.luna" (requires agent-control-evaluators[galileo]) - * - Agent-scoped: "my-agent:my-evaluator" (validated in endpoint, not here) - */ -export type EvaluatorSpec = { - /** - * Evaluator-specific configuration - */ - config: { [k: string]: any }; - /** - * Evaluator name or agent-scoped reference (agent:evaluator) - */ - name: string; -}; - -/** @internal */ -export const EvaluatorSpec$inboundSchema: z.ZodMiniType< - EvaluatorSpec, - unknown -> = z.object({ - config: z.record(z.string(), z.any()), - name: types.string(), -}); -/** @internal */ -export type EvaluatorSpec$Outbound = { - config: { [k: string]: any }; - name: string; -}; - -/** @internal */ -export const EvaluatorSpec$outboundSchema: z.ZodMiniType< - EvaluatorSpec$Outbound, - EvaluatorSpec -> = z.object({ - config: z.record(z.string(), z.any()), - name: z.string(), -}); - -export function evaluatorSpecToJSON(evaluatorSpec: EvaluatorSpec): string { - return JSON.stringify(EvaluatorSpec$outboundSchema.parse(evaluatorSpec)); -} -export function evaluatorSpecFromJSON( - jsonString: string, -): SafeParseResult { - return safeParse( - jsonString, - (x) => EvaluatorSpec$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'EvaluatorSpec' from JSON`, - ); -} diff --git a/sdks/typescript/src/generated/models/get-agent-response.ts b/sdks/typescript/src/generated/models/get-agent-response.ts index 10eacfb3..af21e996 100644 --- a/sdks/typescript/src/generated/models/get-agent-response.ts +++ b/sdks/typescript/src/generated/models/get-agent-response.ts @@ -8,10 +8,7 @@ import { Result as SafeParseResult } from "../types/fp.js"; import * as types from "../types/primitives.js"; import { Agent, Agent$inboundSchema } from "./agent.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; -import { - EvaluatorSchema, - EvaluatorSchema$inboundSchema, -} from "./evaluator-schema.js"; +import { RuleSchema, RuleSchema$inboundSchema } from "./rule-schema.js"; import { StepSchema, StepSchema$inboundSchema } from "./step-schema.js"; /** @@ -28,9 +25,9 @@ export type GetAgentResponse = { */ agent: Agent; /** - * Custom evaluators registered with this agent + * Custom rules registered with this agent */ - evaluators?: Array | undefined; + rules?: Array | undefined; /** * Steps registered with this agent */ @@ -43,7 +40,7 @@ export const GetAgentResponse$inboundSchema: z.ZodMiniType< unknown > = z.object({ agent: Agent$inboundSchema, - evaluators: types.optional(z.array(EvaluatorSchema$inboundSchema)), + rules: types.optional(z.array(RuleSchema$inboundSchema)), steps: z.array(StepSchema$inboundSchema), }); diff --git a/sdks/typescript/src/generated/models/index.ts b/sdks/typescript/src/generated/models/index.ts index 8043233b..a474764f 100644 --- a/sdks/typescript/src/generated/models/index.ts +++ b/sdks/typescript/src/generated/models/index.ts @@ -46,11 +46,6 @@ export * from "./delete-policy-response.js"; export * from "./enum-template-parameter.js"; export * from "./evaluation-request.js"; export * from "./evaluation-response.js"; -export * from "./evaluator-info.js"; -export * from "./evaluator-result.js"; -export * from "./evaluator-schema-item.js"; -export * from "./evaluator-schema.js"; -export * from "./evaluator-spec.js"; export * from "./event-query-request.js"; export * from "./event-query-response.js"; export * from "./get-agent-policies-response.js"; @@ -63,15 +58,15 @@ export * from "./get-control-version-response.js"; export * from "./get-policy-controls-response.js"; export * from "./get-policy-response.js"; export * from "./health-response.js"; -export * from "./init-agent-evaluator-removal.js"; export * from "./init-agent-overwrite-changes.js"; export * from "./init-agent-request.js"; export * from "./init-agent-response.js"; +export * from "./init-agent-rule-removal.js"; export * from "./list-agents-response.js"; export * from "./list-control-bindings-response.js"; export * from "./list-control-versions-response.js"; export * from "./list-controls-response.js"; -export * from "./list-evaluators-response.js"; +export * from "./list-rules-response.js"; export * from "./login-request.js"; export * from "./login-response.js"; export * from "./pagination-info.js"; @@ -87,6 +82,11 @@ export * from "./regex-template-parameter.js"; export * from "./remove-agent-control-response.js"; export * from "./render-control-template-request.js"; export * from "./render-control-template-response.js"; +export * from "./rule-info.js"; +export * from "./rule-result.js"; +export * from "./rule-schema-item.js"; +export * from "./rule-schema.js"; +export * from "./rule-spec.js"; export * from "./runtime-token-exchange-request.js"; export * from "./runtime-token-exchange-response.js"; export * from "./security.js"; diff --git a/sdks/typescript/src/generated/models/init-agent-overwrite-changes.ts b/sdks/typescript/src/generated/models/init-agent-overwrite-changes.ts index d406b7e7..8710bbb4 100644 --- a/sdks/typescript/src/generated/models/init-agent-overwrite-changes.ts +++ b/sdks/typescript/src/generated/models/init-agent-overwrite-changes.ts @@ -9,9 +9,9 @@ import { Result as SafeParseResult } from "../types/fp.js"; import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; import { - InitAgentEvaluatorRemoval, - InitAgentEvaluatorRemoval$inboundSchema, -} from "./init-agent-evaluator-removal.js"; + InitAgentRuleRemoval, + InitAgentRuleRemoval$inboundSchema, +} from "./init-agent-rule-removal.js"; import { StepKey, StepKey$inboundSchema } from "./step-key.js"; /** @@ -19,25 +19,25 @@ import { StepKey, StepKey$inboundSchema } from "./step-key.js"; */ export type InitAgentOverwriteChanges = { /** - * Per-evaluator removal details, including active control references + * Whether agent metadata changed */ - evaluatorRemovals?: Array | undefined; + metadataChanged: boolean; /** - * Evaluator names added by overwrite + * Per-rule removal details, including active control references */ - evaluatorsAdded?: Array | undefined; + ruleRemovals?: Array | undefined; /** - * Evaluator names removed by overwrite + * Rule names added by overwrite */ - evaluatorsRemoved?: Array | undefined; + rulesAdded?: Array | undefined; /** - * Existing evaluator names updated by overwrite + * Rule names removed by overwrite */ - evaluatorsUpdated?: Array | undefined; + rulesRemoved?: Array | undefined; /** - * Whether agent metadata changed + * Existing rule names updated by overwrite */ - metadataChanged: boolean; + rulesUpdated?: Array | undefined; /** * Steps added by overwrite */ @@ -58,24 +58,22 @@ export const InitAgentOverwriteChanges$inboundSchema: z.ZodMiniType< unknown > = z.pipe( z.object({ - evaluator_removals: types.optional( - z.array(InitAgentEvaluatorRemoval$inboundSchema), - ), - evaluators_added: types.optional(z.array(types.string())), - evaluators_removed: types.optional(z.array(types.string())), - evaluators_updated: types.optional(z.array(types.string())), metadata_changed: z._default(types.boolean(), false), + rule_removals: types.optional(z.array(InitAgentRuleRemoval$inboundSchema)), + rules_added: types.optional(z.array(types.string())), + rules_removed: types.optional(z.array(types.string())), + rules_updated: types.optional(z.array(types.string())), steps_added: types.optional(z.array(StepKey$inboundSchema)), steps_removed: types.optional(z.array(StepKey$inboundSchema)), steps_updated: types.optional(z.array(StepKey$inboundSchema)), }), z.transform((v) => { return remap$(v, { - "evaluator_removals": "evaluatorRemovals", - "evaluators_added": "evaluatorsAdded", - "evaluators_removed": "evaluatorsRemoved", - "evaluators_updated": "evaluatorsUpdated", "metadata_changed": "metadataChanged", + "rule_removals": "ruleRemovals", + "rules_added": "rulesAdded", + "rules_removed": "rulesRemoved", + "rules_updated": "rulesUpdated", "steps_added": "stepsAdded", "steps_removed": "stepsRemoved", "steps_updated": "stepsUpdated", diff --git a/sdks/typescript/src/generated/models/init-agent-request.ts b/sdks/typescript/src/generated/models/init-agent-request.ts index 90d1393a..00743fcb 100644 --- a/sdks/typescript/src/generated/models/init-agent-request.ts +++ b/sdks/typescript/src/generated/models/init-agent-request.ts @@ -7,10 +7,10 @@ import { remap as remap$ } from "../lib/primitives.js"; import { Agent, Agent$Outbound, Agent$outboundSchema } from "./agent.js"; import { ConflictMode, ConflictMode$outboundSchema } from "./conflict-mode.js"; import { - EvaluatorSchema, - EvaluatorSchema$Outbound, - EvaluatorSchema$outboundSchema, -} from "./evaluator-schema.js"; + RuleSchema, + RuleSchema$Outbound, + RuleSchema$outboundSchema, +} from "./rule-schema.js"; import { StepSchema, StepSchema$Outbound, @@ -36,17 +36,17 @@ export type InitAgentRequest = { * @remarks * * STRICT preserves compatibility checks and raises conflicts on incompatible changes. - * OVERWRITE applies latest-init-wins replacement for steps and evaluators. + * OVERWRITE applies latest-init-wins replacement for steps and rules. */ conflictMode?: ConflictMode | undefined; - /** - * Custom evaluator schemas for config validation - */ - evaluators?: Array | undefined; /** * If true, replace corrupted agent data instead of failing. Use only when agent data is corrupted and cannot be parsed. */ forceReplace?: boolean | undefined; + /** + * Custom rule schemas for config validation + */ + rules?: Array | undefined; /** * List of steps available to the agent */ @@ -65,8 +65,8 @@ export type InitAgentRequest = { export type InitAgentRequest$Outbound = { agent: Agent$Outbound; conflict_mode?: string | undefined; - evaluators?: Array | undefined; force_replace: boolean; + rules?: Array | undefined; steps?: Array | undefined; target_id?: string | null | undefined; target_type?: string | null | undefined; @@ -80,8 +80,8 @@ export const InitAgentRequest$outboundSchema: z.ZodMiniType< z.object({ agent: Agent$outboundSchema, conflictMode: z._default(z.optional(ConflictMode$outboundSchema), "overwrite"), - evaluators: z.optional(z.array(EvaluatorSchema$outboundSchema)), forceReplace: z._default(z.boolean(), false), + rules: z.optional(z.array(RuleSchema$outboundSchema)), steps: z.optional(z.array(StepSchema$outboundSchema)), targetId: z.optional(z.nullable(z.string())), targetType: z.optional(z.nullable(z.string())), diff --git a/sdks/typescript/src/generated/models/init-agent-evaluator-removal.ts b/sdks/typescript/src/generated/models/init-agent-rule-removal.ts similarity index 62% rename from sdks/typescript/src/generated/models/init-agent-evaluator-removal.ts rename to sdks/typescript/src/generated/models/init-agent-rule-removal.ts index 9eb84b0d..b992fa25 100644 --- a/sdks/typescript/src/generated/models/init-agent-evaluator-removal.ts +++ b/sdks/typescript/src/generated/models/init-agent-rule-removal.ts @@ -10,30 +10,30 @@ import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; /** - * Details for an evaluator removed during overwrite mode. + * Details for a rule removed during overwrite mode. */ -export type InitAgentEvaluatorRemoval = { +export type InitAgentRuleRemoval = { /** - * IDs of active controls referencing this evaluator + * IDs of active controls referencing this rule */ controlIds?: Array | undefined; /** - * Names of active controls referencing this evaluator + * Names of active controls referencing this rule */ controlNames?: Array | undefined; /** - * Evaluator name removed by overwrite + * Rule name removed by overwrite */ name: string; /** - * Whether this evaluator is still referenced by active controls + * Whether this rule is still referenced by active controls */ referencedByActiveControls: boolean; }; /** @internal */ -export const InitAgentEvaluatorRemoval$inboundSchema: z.ZodMiniType< - InitAgentEvaluatorRemoval, +export const InitAgentRuleRemoval$inboundSchema: z.ZodMiniType< + InitAgentRuleRemoval, unknown > = z.pipe( z.object({ @@ -51,12 +51,12 @@ export const InitAgentEvaluatorRemoval$inboundSchema: z.ZodMiniType< }), ); -export function initAgentEvaluatorRemovalFromJSON( +export function initAgentRuleRemovalFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => InitAgentEvaluatorRemoval$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'InitAgentEvaluatorRemoval' from JSON`, + (x) => InitAgentRuleRemoval$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'InitAgentRuleRemoval' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/list-evaluators-response.ts b/sdks/typescript/src/generated/models/list-rules-response.ts similarity index 51% rename from sdks/typescript/src/generated/models/list-evaluators-response.ts rename to sdks/typescript/src/generated/models/list-rules-response.ts index 309ee4d8..2eb1c4ce 100644 --- a/sdks/typescript/src/generated/models/list-evaluators-response.ts +++ b/sdks/typescript/src/generated/models/list-rules-response.ts @@ -6,41 +6,41 @@ import * as z from "zod/v4-mini"; import { safeParse } from "../lib/schemas.js"; import { Result as SafeParseResult } from "../types/fp.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; -import { - EvaluatorSchemaItem, - EvaluatorSchemaItem$inboundSchema, -} from "./evaluator-schema-item.js"; import { PaginationInfo, PaginationInfo$inboundSchema, } from "./pagination-info.js"; +import { + RuleSchemaItem, + RuleSchemaItem$inboundSchema, +} from "./rule-schema-item.js"; /** - * Response for listing agent's evaluator schemas. + * Response for listing agent's rule schemas. */ -export type ListEvaluatorsResponse = { - evaluators: Array; +export type ListRulesResponse = { /** * Pagination metadata for cursor-based pagination. */ pagination: PaginationInfo; + rules: Array; }; /** @internal */ -export const ListEvaluatorsResponse$inboundSchema: z.ZodMiniType< - ListEvaluatorsResponse, +export const ListRulesResponse$inboundSchema: z.ZodMiniType< + ListRulesResponse, unknown > = z.object({ - evaluators: z.array(EvaluatorSchemaItem$inboundSchema), pagination: PaginationInfo$inboundSchema, + rules: z.array(RuleSchemaItem$inboundSchema), }); -export function listEvaluatorsResponseFromJSON( +export function listRulesResponseFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => ListEvaluatorsResponse$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'ListEvaluatorsResponse' from JSON`, + (x) => ListRulesResponse$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'ListRulesResponse' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/operations/get-agent-evaluator-api-v1-agents-agent-name-evaluators-evaluator-name-get.ts b/sdks/typescript/src/generated/models/operations/get-agent-evaluator-api-v1-agents-agent-name-evaluators-evaluator-name-get.ts deleted file mode 100644 index 39eda5c3..00000000 --- a/sdks/typescript/src/generated/models/operations/get-agent-evaluator-api-v1-agents-agent-name-evaluators-evaluator-name-get.ts +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. - */ - -import * as z from "zod/v4-mini"; -import { remap as remap$ } from "../../lib/primitives.js"; - -export type GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest = - { - agentName: string; - evaluatorName: string; - }; - -/** @internal */ -export type GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest$Outbound = - { - agent_name: string; - evaluator_name: string; - }; - -/** @internal */ -export const GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest$outboundSchema: - z.ZodMiniType< - GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest$Outbound, - GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest - > = z.pipe( - z.object({ - agentName: z.string(), - evaluatorName: z.string(), - }), - z.transform((v) => { - return remap$(v, { - agentName: "agent_name", - evaluatorName: "evaluator_name", - }); - }), - ); - -export function getAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequestToJSON( - getAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest: - GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest, -): string { - return JSON.stringify( - GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest$outboundSchema - .parse( - getAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest, - ), - ); -} diff --git a/sdks/typescript/src/generated/models/operations/get-agent-rule-api-v1-agents-agent-name-rules-rule-name-get.ts b/sdks/typescript/src/generated/models/operations/get-agent-rule-api-v1-agents-agent-name-rules-rule-name-get.ts new file mode 100644 index 00000000..073b7bf2 --- /dev/null +++ b/sdks/typescript/src/generated/models/operations/get-agent-rule-api-v1-agents-agent-name-rules-rule-name-get.ts @@ -0,0 +1,45 @@ +/* + * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. + */ + +import * as z from "zod/v4-mini"; +import { remap as remap$ } from "../../lib/primitives.js"; + +export type GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest = { + agentName: string; + ruleName: string; +}; + +/** @internal */ +export type GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest$Outbound = { + agent_name: string; + rule_name: string; +}; + +/** @internal */ +export const GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest$outboundSchema: + z.ZodMiniType< + GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest$Outbound, + GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest + > = z.pipe( + z.object({ + agentName: z.string(), + ruleName: z.string(), + }), + z.transform((v) => { + return remap$(v, { + agentName: "agent_name", + ruleName: "rule_name", + }); + }), + ); + +export function getAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequestToJSON( + getAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest: + GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest, +): string { + return JSON.stringify( + GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest$outboundSchema + .parse(getAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest), + ); +} diff --git a/sdks/typescript/src/generated/models/operations/index.ts b/sdks/typescript/src/generated/models/operations/index.ts index b031df32..20c889d6 100644 --- a/sdks/typescript/src/generated/models/operations/index.ts +++ b/sdks/typescript/src/generated/models/operations/index.ts @@ -10,9 +10,9 @@ export * from "./delete-agent-policy-api-v1-agents-agent-name-policy-delete.js"; export * from "./delete-control-api-v1-controls-control-id-delete.js"; export * from "./delete-control-binding-api-v1-control-bindings-binding-id-delete.js"; export * from "./get-agent-api-v1-agents-agent-name-get.js"; -export * from "./get-agent-evaluator-api-v1-agents-agent-name-evaluators-evaluator-name-get.js"; export * from "./get-agent-policies-api-v1-agents-agent-name-policies-get.js"; export * from "./get-agent-policy-api-v1-agents-agent-name-policy-get.js"; +export * from "./get-agent-rule-api-v1-agents-agent-name-rules-rule-name-get.js"; export * from "./get-control-api-v1-controls-control-id-get.js"; export * from "./get-control-binding-api-v1-control-bindings-binding-id-get.js"; export * from "./get-control-data-api-v1-controls-control-id-data-get.js"; @@ -20,7 +20,7 @@ export * from "./get-control-stats-api-v1-observability-stats-controls-control-i export * from "./get-control-version-api-v1-controls-control-id-versions-version-num-get.js"; export * from "./get-stats-api-v1-observability-stats-get.js"; export * from "./list-agent-controls-api-v1-agents-agent-name-controls-get.js"; -export * from "./list-agent-evaluators-api-v1-agents-agent-name-evaluators-get.js"; +export * from "./list-agent-rules-api-v1-agents-agent-name-rules-get.js"; export * from "./list-agents-api-v1-agents-get.js"; export * from "./list-control-bindings-api-v1-control-bindings-get.js"; export * from "./list-control-versions-api-v1-controls-control-id-versions-get.js"; diff --git a/sdks/typescript/src/generated/models/operations/list-agent-evaluators-api-v1-agents-agent-name-evaluators-get.ts b/sdks/typescript/src/generated/models/operations/list-agent-evaluators-api-v1-agents-agent-name-evaluators-get.ts deleted file mode 100644 index 2aaf3560..00000000 --- a/sdks/typescript/src/generated/models/operations/list-agent-evaluators-api-v1-agents-agent-name-evaluators-get.ts +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. - */ - -import * as z from "zod/v4-mini"; -import { remap as remap$ } from "../../lib/primitives.js"; - -export type ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest = { - agentName: string; - cursor?: string | null | undefined; - limit?: number | undefined; -}; - -/** @internal */ -export type ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest$Outbound = - { - agent_name: string; - cursor?: string | null | undefined; - limit: number; - }; - -/** @internal */ -export const ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest$outboundSchema: - z.ZodMiniType< - ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest$Outbound, - ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest - > = z.pipe( - z.object({ - agentName: z.string(), - cursor: z.optional(z.nullable(z.string())), - limit: z._default(z.int(), 20), - }), - z.transform((v) => { - return remap$(v, { - agentName: "agent_name", - }); - }), - ); - -export function listAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequestToJSON( - listAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest: - ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest, -): string { - return JSON.stringify( - ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest$outboundSchema - .parse(listAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest), - ); -} diff --git a/sdks/typescript/src/generated/models/operations/list-agent-rules-api-v1-agents-agent-name-rules-get.ts b/sdks/typescript/src/generated/models/operations/list-agent-rules-api-v1-agents-agent-name-rules-get.ts new file mode 100644 index 00000000..c7be2561 --- /dev/null +++ b/sdks/typescript/src/generated/models/operations/list-agent-rules-api-v1-agents-agent-name-rules-get.ts @@ -0,0 +1,48 @@ +/* + * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. + */ + +import * as z from "zod/v4-mini"; +import { remap as remap$ } from "../../lib/primitives.js"; + +export type ListAgentRulesApiV1AgentsAgentNameRulesGetRequest = { + agentName: string; + cursor?: string | null | undefined; + limit?: number | undefined; +}; + +/** @internal */ +export type ListAgentRulesApiV1AgentsAgentNameRulesGetRequest$Outbound = { + agent_name: string; + cursor?: string | null | undefined; + limit: number; +}; + +/** @internal */ +export const ListAgentRulesApiV1AgentsAgentNameRulesGetRequest$outboundSchema: + z.ZodMiniType< + ListAgentRulesApiV1AgentsAgentNameRulesGetRequest$Outbound, + ListAgentRulesApiV1AgentsAgentNameRulesGetRequest + > = z.pipe( + z.object({ + agentName: z.string(), + cursor: z.optional(z.nullable(z.string())), + limit: z._default(z.int(), 20), + }), + z.transform((v) => { + return remap$(v, { + agentName: "agent_name", + }); + }), + ); + +export function listAgentRulesApiV1AgentsAgentNameRulesGetRequestToJSON( + listAgentRulesApiV1AgentsAgentNameRulesGetRequest: + ListAgentRulesApiV1AgentsAgentNameRulesGetRequest, +): string { + return JSON.stringify( + ListAgentRulesApiV1AgentsAgentNameRulesGetRequest$outboundSchema.parse( + listAgentRulesApiV1AgentsAgentNameRulesGetRequest, + ), + ); +} diff --git a/sdks/typescript/src/generated/models/patch-agent-request.ts b/sdks/typescript/src/generated/models/patch-agent-request.ts index 6a2a631f..e98b590f 100644 --- a/sdks/typescript/src/generated/models/patch-agent-request.ts +++ b/sdks/typescript/src/generated/models/patch-agent-request.ts @@ -11,13 +11,13 @@ import { } from "./step-key.js"; /** - * Request to modify an agent (remove steps/evaluators). + * Request to modify an agent (remove steps/rules). */ export type PatchAgentRequest = { /** - * Evaluator names to remove from the agent + * Rule names to remove from the agent */ - removeEvaluators?: Array | undefined; + removeRules?: Array | undefined; /** * Step identifiers to remove from the agent */ @@ -26,7 +26,7 @@ export type PatchAgentRequest = { /** @internal */ export type PatchAgentRequest$Outbound = { - remove_evaluators?: Array | undefined; + remove_rules?: Array | undefined; remove_steps?: Array | undefined; }; @@ -36,12 +36,12 @@ export const PatchAgentRequest$outboundSchema: z.ZodMiniType< PatchAgentRequest > = z.pipe( z.object({ - removeEvaluators: z.optional(z.array(z.string())), + removeRules: z.optional(z.array(z.string())), removeSteps: z.optional(z.array(StepKey$outboundSchema)), }), z.transform((v) => { return remap$(v, { - removeEvaluators: "remove_evaluators", + removeRules: "remove_rules", removeSteps: "remove_steps", }); }), diff --git a/sdks/typescript/src/generated/models/patch-agent-response.ts b/sdks/typescript/src/generated/models/patch-agent-response.ts index 6677bd8d..e3e3eb82 100644 --- a/sdks/typescript/src/generated/models/patch-agent-response.ts +++ b/sdks/typescript/src/generated/models/patch-agent-response.ts @@ -15,9 +15,9 @@ import { StepKey, StepKey$inboundSchema } from "./step-key.js"; */ export type PatchAgentResponse = { /** - * Evaluator names that were removed + * Rule names that were removed */ - evaluatorsRemoved?: Array | undefined; + rulesRemoved?: Array | undefined; /** * Step identifiers that were removed */ @@ -30,12 +30,12 @@ export const PatchAgentResponse$inboundSchema: z.ZodMiniType< unknown > = z.pipe( z.object({ - evaluators_removed: types.optional(z.array(types.string())), + rules_removed: types.optional(z.array(types.string())), steps_removed: types.optional(z.array(StepKey$inboundSchema)), }), z.transform((v) => { return remap$(v, { - "evaluators_removed": "evaluatorsRemoved", + "rules_removed": "rulesRemoved", "steps_removed": "stepsRemoved", }); }), diff --git a/sdks/typescript/src/generated/models/evaluator-info.ts b/sdks/typescript/src/generated/models/rule-info.ts similarity index 70% rename from sdks/typescript/src/generated/models/evaluator-info.ts rename to sdks/typescript/src/generated/models/rule-info.ts index 1f2a0dd6..30214add 100644 --- a/sdks/typescript/src/generated/models/evaluator-info.ts +++ b/sdks/typescript/src/generated/models/rule-info.ts @@ -10,23 +10,23 @@ import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; /** - * Information about a registered evaluator. + * Information about a registered rule. */ -export type EvaluatorInfo = { +export type RuleInfo = { /** * JSON Schema for config */ configSchema: { [k: string]: any }; /** - * Evaluator description + * Rule description */ description: string; /** - * Evaluator name + * Rule name */ name: string; /** - * Whether evaluator requires API key + * Whether rule requires API key */ requiresApiKey: boolean; /** @@ -34,16 +34,13 @@ export type EvaluatorInfo = { */ timeoutMs: number; /** - * Evaluator version + * Rule version */ version: string; }; /** @internal */ -export const EvaluatorInfo$inboundSchema: z.ZodMiniType< - EvaluatorInfo, - unknown -> = z.pipe( +export const RuleInfo$inboundSchema: z.ZodMiniType = z.pipe( z.object({ config_schema: z.record(z.string(), z.any()), description: types.string(), @@ -61,12 +58,12 @@ export const EvaluatorInfo$inboundSchema: z.ZodMiniType< }), ); -export function evaluatorInfoFromJSON( +export function ruleInfoFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => EvaluatorInfo$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'EvaluatorInfo' from JSON`, + (x) => RuleInfo$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'RuleInfo' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/evaluator-result.ts b/sdks/typescript/src/generated/models/rule-result.ts similarity index 52% rename from sdks/typescript/src/generated/models/evaluator-result.ts rename to sdks/typescript/src/generated/models/rule-result.ts index 4e39d10b..1a77918b 100644 --- a/sdks/typescript/src/generated/models/evaluator-result.ts +++ b/sdks/typescript/src/generated/models/rule-result.ts @@ -9,22 +9,22 @@ import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; /** - * Result from a control evaluator. + * Result from a control rule. * * @remarks * - * The `error` field indicates evaluator failures, NOT validation failures: - * - Set `error` for: evaluator crashes, timeouts, missing dependencies, external service errors + * The `error` field indicates rule failures, NOT validation failures: + * - Set `error` for: rule crashes, timeouts, missing dependencies, external service errors * - Do NOT set `error` for: invalid input, syntax errors, schema violations, constraint failures * - * When `error` is set, `matched` must be False (fail-open on evaluator errors). + * When `error` is set, `matched` must be False (fail-open on rule errors). * When `error` is None, `matched` reflects the actual validation result. * * This distinction allows: - * - Clients to distinguish "data violated rules" from "evaluator is broken" - * - Observability systems to monitor evaluator health separately from validation outcomes + * - Clients to distinguish "data violated rules" from "rule is broken" + * - Observability systems to monitor rule health separately from validation outcomes */ -export type EvaluatorResult = { +export type RuleResult = { /** * Confidence in the evaluation */ @@ -48,23 +48,21 @@ export type EvaluatorResult = { }; /** @internal */ -export const EvaluatorResult$inboundSchema: z.ZodMiniType< - EvaluatorResult, - unknown -> = z.object({ - confidence: types.number(), - error: z.optional(z.nullable(types.string())), - matched: types.boolean(), - message: z.optional(z.nullable(types.string())), - metadata: z.optional(z.nullable(z.record(z.string(), z.any()))), -}); +export const RuleResult$inboundSchema: z.ZodMiniType = z + .object({ + confidence: types.number(), + error: z.optional(z.nullable(types.string())), + matched: types.boolean(), + message: z.optional(z.nullable(types.string())), + metadata: z.optional(z.nullable(z.record(z.string(), z.any()))), + }); -export function evaluatorResultFromJSON( +export function ruleResultFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => EvaluatorResult$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'EvaluatorResult' from JSON`, + (x) => RuleResult$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'RuleResult' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/evaluator-schema-item.ts b/sdks/typescript/src/generated/models/rule-schema-item.ts similarity index 67% rename from sdks/typescript/src/generated/models/evaluator-schema-item.ts rename to sdks/typescript/src/generated/models/rule-schema-item.ts index ebb0150e..2b313565 100644 --- a/sdks/typescript/src/generated/models/evaluator-schema-item.ts +++ b/sdks/typescript/src/generated/models/rule-schema-item.ts @@ -10,17 +10,17 @@ import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; /** - * Evaluator schema summary for list response. + * Rule schema summary for list response. */ -export type EvaluatorSchemaItem = { +export type RuleSchemaItem = { configSchema: { [k: string]: any }; description: string | null; name: string; }; /** @internal */ -export const EvaluatorSchemaItem$inboundSchema: z.ZodMiniType< - EvaluatorSchemaItem, +export const RuleSchemaItem$inboundSchema: z.ZodMiniType< + RuleSchemaItem, unknown > = z.pipe( z.object({ @@ -35,12 +35,12 @@ export const EvaluatorSchemaItem$inboundSchema: z.ZodMiniType< }), ); -export function evaluatorSchemaItemFromJSON( +export function ruleSchemaItemFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => EvaluatorSchemaItem$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'EvaluatorSchemaItem' from JSON`, + (x) => RuleSchemaItem$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'RuleSchemaItem' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/evaluator-schema.ts b/sdks/typescript/src/generated/models/rule-schema.ts similarity index 50% rename from sdks/typescript/src/generated/models/evaluator-schema.ts rename to sdks/typescript/src/generated/models/rule-schema.ts index 941c27f2..cadc5c64 100644 --- a/sdks/typescript/src/generated/models/evaluator-schema.ts +++ b/sdks/typescript/src/generated/models/rule-schema.ts @@ -10,16 +10,16 @@ import * as types from "../types/primitives.js"; import { SDKValidationError } from "./errors/sdk-validation-error.js"; /** - * Schema for a custom evaluator registered with an agent. + * Schema for a custom rule registered with an agent. * * @remarks * - * Custom evaluators are Evaluator classes deployed with the engine. + * Custom rules are Rule classes deployed with the engine. * This schema is registered via initAgent for validation and UI purposes. */ -export type EvaluatorSchema = { +export type RuleSchema = { /** - * JSON Schema for evaluator config validation + * JSON Schema for rule config validation */ configSchema?: { [k: string]: any } | undefined; /** @@ -27,38 +27,36 @@ export type EvaluatorSchema = { */ description?: string | null | undefined; /** - * Unique evaluator name + * Unique rule name */ name: string; }; /** @internal */ -export const EvaluatorSchema$inboundSchema: z.ZodMiniType< - EvaluatorSchema, - unknown -> = z.pipe( - z.object({ - config_schema: types.optional(z.record(z.string(), z.any())), - description: z.optional(z.nullable(types.string())), - name: types.string(), - }), - z.transform((v) => { - return remap$(v, { - "config_schema": "configSchema", - }); - }), -); +export const RuleSchema$inboundSchema: z.ZodMiniType = z + .pipe( + z.object({ + config_schema: types.optional(z.record(z.string(), z.any())), + description: z.optional(z.nullable(types.string())), + name: types.string(), + }), + z.transform((v) => { + return remap$(v, { + "config_schema": "configSchema", + }); + }), + ); /** @internal */ -export type EvaluatorSchema$Outbound = { +export type RuleSchema$Outbound = { config_schema?: { [k: string]: any } | undefined; description?: string | null | undefined; name: string; }; /** @internal */ -export const EvaluatorSchema$outboundSchema: z.ZodMiniType< - EvaluatorSchema$Outbound, - EvaluatorSchema +export const RuleSchema$outboundSchema: z.ZodMiniType< + RuleSchema$Outbound, + RuleSchema > = z.pipe( z.object({ configSchema: z.optional(z.record(z.string(), z.any())), @@ -72,17 +70,15 @@ export const EvaluatorSchema$outboundSchema: z.ZodMiniType< }), ); -export function evaluatorSchemaToJSON( - evaluatorSchema: EvaluatorSchema, -): string { - return JSON.stringify(EvaluatorSchema$outboundSchema.parse(evaluatorSchema)); +export function ruleSchemaToJSON(ruleSchema: RuleSchema): string { + return JSON.stringify(RuleSchema$outboundSchema.parse(ruleSchema)); } -export function evaluatorSchemaFromJSON( +export function ruleSchemaFromJSON( jsonString: string, -): SafeParseResult { +): SafeParseResult { return safeParse( jsonString, - (x) => EvaluatorSchema$inboundSchema.parse(JSON.parse(x)), - `Failed to parse 'EvaluatorSchema' from JSON`, + (x) => RuleSchema$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'RuleSchema' from JSON`, ); } diff --git a/sdks/typescript/src/generated/models/rule-spec.ts b/sdks/typescript/src/generated/models/rule-spec.ts new file mode 100644 index 00000000..850ccad7 --- /dev/null +++ b/sdks/typescript/src/generated/models/rule-spec.ts @@ -0,0 +1,64 @@ +/* + * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. + */ + +import * as z from "zod/v4-mini"; +import { safeParse } from "../lib/schemas.js"; +import { Result as SafeParseResult } from "../types/fp.js"; +import * as types from "../types/primitives.js"; +import { SDKValidationError } from "./errors/sdk-validation-error.js"; + +/** + * Rule specification. See GET /rules for available rules and schemas. + * + * @remarks + * + * Rule reference formats: + * - Built-in: "regex", "list", "json", "sql" + * - External: "galileo.luna" (requires agent-control-rules[galileo]) + * - Agent-scoped: "my-agent:my-rule" (validated in endpoint, not here) + */ +export type RuleSpec = { + /** + * Rule-specific configuration + */ + config: { [k: string]: any }; + /** + * Rule name or agent-scoped reference (agent:rule) + */ + name: string; +}; + +/** @internal */ +export const RuleSpec$inboundSchema: z.ZodMiniType = z + .object({ + config: z.record(z.string(), z.any()), + name: types.string(), + }); +/** @internal */ +export type RuleSpec$Outbound = { + config: { [k: string]: any }; + name: string; +}; + +/** @internal */ +export const RuleSpec$outboundSchema: z.ZodMiniType< + RuleSpec$Outbound, + RuleSpec +> = z.object({ + config: z.record(z.string(), z.any()), + name: z.string(), +}); + +export function ruleSpecToJSON(ruleSpec: RuleSpec): string { + return JSON.stringify(RuleSpec$outboundSchema.parse(ruleSpec)); +} +export function ruleSpecFromJSON( + jsonString: string, +): SafeParseResult { + return safeParse( + jsonString, + (x) => RuleSpec$inboundSchema.parse(JSON.parse(x)), + `Failed to parse 'RuleSpec' from JSON`, + ); +} diff --git a/sdks/typescript/src/generated/models/stats-response.ts b/sdks/typescript/src/generated/models/stats-response.ts index 003c0364..eba46a45 100644 --- a/sdks/typescript/src/generated/models/stats-response.ts +++ b/sdks/typescript/src/generated/models/stats-response.ts @@ -49,8 +49,8 @@ export type StatsResponse = { * * Attributes: * execution_count: Total executions across all controls - * match_count: Total matches across all controls (evaluator matched) - * non_match_count: Total non-matches across all controls (evaluator didn't match) + * match_count: Total matches across all controls (rule matched) + * non_match_count: Total non-matches across all controls (rule didn't match) * error_count: Total errors across all controls (evaluation failed) * action_counts: Breakdown of actions for matched executions * timeseries: Time-series data points (only when include_timeseries=true) diff --git a/sdks/typescript/src/generated/models/stats-totals.ts b/sdks/typescript/src/generated/models/stats-totals.ts index 11ab1098..aa10224c 100644 --- a/sdks/typescript/src/generated/models/stats-totals.ts +++ b/sdks/typescript/src/generated/models/stats-totals.ts @@ -25,8 +25,8 @@ import { * * Attributes: * execution_count: Total executions across all controls - * match_count: Total matches across all controls (evaluator matched) - * non_match_count: Total non-matches across all controls (evaluator didn't match) + * match_count: Total matches across all controls (rule matched) + * non_match_count: Total non-matches across all controls (rule didn't match) * error_count: Total errors across all controls (evaluation failed) * action_counts: Breakdown of actions for matched executions * timeseries: Time-series data points (only when include_timeseries=true) diff --git a/sdks/typescript/src/generated/sdk/agents.ts b/sdks/typescript/src/generated/sdk/agents.ts index bed5b41f..9ccc2246 100644 --- a/sdks/typescript/src/generated/sdk/agents.ts +++ b/sdks/typescript/src/generated/sdk/agents.ts @@ -5,13 +5,13 @@ import { agentsAddControl } from "../funcs/agents-add-control.js"; import { agentsAddPolicy } from "../funcs/agents-add-policy.js"; import { agentsDeletePolicy } from "../funcs/agents-delete-policy.js"; -import { agentsGetEvaluator } from "../funcs/agents-get-evaluator.js"; import { agentsGetPolicies } from "../funcs/agents-get-policies.js"; import { agentsGetPolicy } from "../funcs/agents-get-policy.js"; +import { agentsGetRule } from "../funcs/agents-get-rule.js"; import { agentsGet } from "../funcs/agents-get.js"; import { agentsInit } from "../funcs/agents-init.js"; import { agentsListControls } from "../funcs/agents-list-controls.js"; -import { agentsListEvaluators } from "../funcs/agents-list-evaluators.js"; +import { agentsListRules } from "../funcs/agents-list-rules.js"; import { agentsList } from "../funcs/agents-list.js"; import { agentsRemoveAllAgentPolicies } from "../funcs/agents-remove-all-agent-policies.js"; import { agentsRemoveControl } from "../funcs/agents-remove-control.js"; @@ -31,7 +31,7 @@ export class Agents extends ClientSDK { * List all registered agents with cursor-based pagination. * * Returns a summary of each agent including identifier, policy associations, - * and counts of registered steps and evaluators. Results are scoped to + * and counts of registered steps and rules. Results are scoped to * the request's namespace; agents in other namespaces are not visible. * * Args: @@ -67,7 +67,7 @@ export class Agents extends ClientSDK { * * conflict_mode controls registration conflict handling: * - strict (default): preserve compatibility checks and conflict errors - * - overwrite: latest init payload replaces steps/evaluators and returns change summary + * - overwrite: latest init payload replaces steps/rules and returns change summary * * The returned ``controls`` list is the de-duplicated union of the agent's * direct controls, policy-derived controls, and (when ``target_type`` and @@ -129,17 +129,17 @@ export class Agents extends ClientSDK { } /** - * Modify agent (remove steps/evaluators) + * Modify agent (remove steps/rules) * * @remarks - * Remove steps and/or evaluators from an agent. + * Remove steps and/or rules from an agent. * * This is the complement to initAgent which only adds items. * Removals are idempotent - attempting to remove non-existent items is not an error. * * Args: * agent_name: Agent identifier - * request: Lists of step/evaluator identifiers to remove + * request: Lists of step/rule identifiers to remove * db: Database session (injected) * principal: Authorized request principal * @@ -244,71 +244,6 @@ export class Agents extends ClientSDK { )); } - /** - * List agent's registered evaluator schemas - * - * @remarks - * List all evaluator schemas registered with an agent. - * - * Evaluator schemas are registered via initAgent and used for: - * - Config validation when creating Controls - * - UI to display available config options - * - * Args: - * agent_name: Agent identifier - * cursor: Optional cursor for pagination (name of last evaluator from previous page) - * limit: Pagination limit (default 20, max 100) - * db: Database session (injected) - * principal: Authorized request principal - * - * Returns: - * ListEvaluatorsResponse with evaluator schemas and pagination - * - * Raises: - * HTTPException 404: Agent not found - */ - async listEvaluators( - request: - operations.ListAgentEvaluatorsApiV1AgentsAgentNameEvaluatorsGetRequest, - options?: RequestOptions, - ): Promise { - return unwrapAsync(agentsListEvaluators( - this, - request, - options, - )); - } - - /** - * Get specific evaluator schema - * - * @remarks - * Get a specific evaluator schema registered with an agent. - * - * Args: - * agent_name: Agent identifier - * evaluator_name: Name of the evaluator - * db: Database session (injected) - * principal: Authorized request principal - * - * Returns: - * EvaluatorSchemaItem with schema details - * - * Raises: - * HTTPException 404: Agent or evaluator not found - */ - async getEvaluator( - request: - operations.GetAgentEvaluatorApiV1AgentsAgentNameEvaluatorsEvaluatorNameGetRequest, - options?: RequestOptions, - ): Promise { - return unwrapAsync(agentsGetEvaluator( - this, - request, - options, - )); - } - /** * Remove all policy associations from agent * @@ -435,4 +370,67 @@ export class Agents extends ClientSDK { options, )); } + + /** + * List agent's registered rule schemas + * + * @remarks + * List all rule schemas registered with an agent. + * + * Rule schemas are registered via initAgent and used for: + * - Config validation when creating Controls + * - UI to display available config options + * + * Args: + * agent_name: Agent identifier + * cursor: Optional cursor for pagination (name of last rule from previous page) + * limit: Pagination limit (default 20, max 100) + * db: Database session (injected) + * principal: Authorized request principal + * + * Returns: + * ListRulesResponse with rule schemas and pagination + * + * Raises: + * HTTPException 404: Agent not found + */ + async listRules( + request: operations.ListAgentRulesApiV1AgentsAgentNameRulesGetRequest, + options?: RequestOptions, + ): Promise { + return unwrapAsync(agentsListRules( + this, + request, + options, + )); + } + + /** + * Get specific rule schema + * + * @remarks + * Get a specific rule schema registered with an agent. + * + * Args: + * agent_name: Agent identifier + * rule_name: Name of the rule + * db: Database session (injected) + * principal: Authorized request principal + * + * Returns: + * RuleSchemaItem with schema details + * + * Raises: + * HTTPException 404: Agent or rule not found + */ + async getRule( + request: operations.GetAgentRuleApiV1AgentsAgentNameRulesRuleNameGetRequest, + options?: RequestOptions, + ): Promise { + return unwrapAsync(agentsGetRule( + this, + request, + options, + )); + } } diff --git a/sdks/typescript/src/generated/sdk/evaluators.ts b/sdks/typescript/src/generated/sdk/rules.ts similarity index 53% rename from sdks/typescript/src/generated/sdk/evaluators.ts rename to sdks/typescript/src/generated/sdk/rules.ts index b5bab688..dd6a8bde 100644 --- a/sdks/typescript/src/generated/sdk/evaluators.ts +++ b/sdks/typescript/src/generated/sdk/rules.ts @@ -2,33 +2,33 @@ * Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT. */ -import { evaluatorsList } from "../funcs/evaluators-list.js"; +import { rulesList } from "../funcs/rules-list.js"; import { ClientSDK, RequestOptions } from "../lib/sdks.js"; import * as models from "../models/index.js"; import { unwrapAsync } from "../types/fp.js"; -export class Evaluators extends ClientSDK { +export class Rules extends ClientSDK { /** - * List available evaluators + * List available rules * * @remarks - * List all available evaluators. + * List all available rules. * - * Returns metadata and JSON Schema for each built-in evaluator. + * Returns metadata and JSON Schema for each built-in rule. * - * Built-in evaluators: + * Built-in rules: * - **regex**: Regular expression pattern matching * - **list**: List-based value matching with flexible logic * - **json**: JSON validation with schema, types, constraints * - **sql**: SQL query validation * - * Custom evaluators are registered per-agent via initAgent. - * Use GET /agents/{agent_name}/evaluators to list agent-specific schemas. + * Custom rules are registered per-agent via initAgent. + * Use GET /agents/{agent_name}/rules to list agent-specific schemas. */ async list( options?: RequestOptions, - ): Promise<{ [k: string]: models.EvaluatorInfo }> { - return unwrapAsync(evaluatorsList( + ): Promise<{ [k: string]: models.RuleInfo }> { + return unwrapAsync(rulesList( this, options, )); diff --git a/sdks/typescript/src/generated/sdk/sdk.ts b/sdks/typescript/src/generated/sdk/sdk.ts index 7a66b2af..42ba31fd 100644 --- a/sdks/typescript/src/generated/sdk/sdk.ts +++ b/sdks/typescript/src/generated/sdk/sdk.ts @@ -8,9 +8,9 @@ import { Auth } from "./auth.js"; import { ControlBindings } from "./control-bindings.js"; import { Controls } from "./controls.js"; import { Evaluation } from "./evaluation.js"; -import { Evaluators } from "./evaluators.js"; import { Observability } from "./observability.js"; import { Policies } from "./policies.js"; +import { Rules } from "./rules.js"; import { System } from "./system.js"; export class AgentControlSDK extends ClientSDK { @@ -44,11 +44,6 @@ export class AgentControlSDK extends ClientSDK { return (this._evaluation ??= new Evaluation(this._options)); } - private _evaluators?: Evaluators; - get evaluators(): Evaluators { - return (this._evaluators ??= new Evaluators(this._options)); - } - private _observability?: Observability; get observability(): Observability { return (this._observability ??= new Observability(this._options)); @@ -58,4 +53,9 @@ export class AgentControlSDK extends ClientSDK { get policies(): Policies { return (this._policies ??= new Policies(this._options)); } + + private _rules?: Rules; + get rules(): Rules { + return (this._rules ??= new Rules(this._options)); + } } diff --git a/sdks/typescript/src/index.ts b/sdks/typescript/src/index.ts index ec230d8c..10ea0c04 100644 --- a/sdks/typescript/src/index.ts +++ b/sdks/typescript/src/index.ts @@ -9,7 +9,7 @@ export type { AgentsApi, ControlsApi, EvaluationApi, - EvaluatorsApi, + RulesApi, ObservabilityApi, PoliciesApi, StepSchema, diff --git a/sdks/typescript/tests/client-api.test.ts b/sdks/typescript/tests/client-api.test.ts index b0fa91cb..67d10f69 100644 --- a/sdks/typescript/tests/client-api.test.ts +++ b/sdks/typescript/tests/client-api.test.ts @@ -120,7 +120,7 @@ describe("AgentControlClient API wiring", () => { decision: "deny", }, condition: { - evaluator: { + rule: { name: "regex", config: { pattern: "pii" }, }, @@ -149,7 +149,7 @@ describe("AgentControlClient API wiring", () => { decision: "deny", }, condition: { - evaluator: { + rule: { name: "regex", config: { pattern: "pii" }, }, diff --git a/server/Dockerfile b/server/Dockerfile index 7d21f115..4ce3f4cd 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -47,7 +47,7 @@ COPY server/alembic/ alembic/ COPY models/ models/ COPY engine/ engine/ COPY telemetry/ telemetry/ -COPY evaluators/ evaluators/ +COPY rules/ rules/ # Copy server application COPY server/ server/ @@ -55,19 +55,19 @@ COPY server/ server/ # Copy the exported UI bundle that FastAPI serves for hosted deployments COPY --from=ui-builder /ui/out ./server/ui-dist -# Install server + bundled runtime packages (engine/evaluators/models/telemetry) +# Install server + bundled runtime packages (engine/rules/models/telemetry) # We ignore the lockfile if it's missing or out of sync to ensure build succeeds during dev RUN uv sync \ --package agent-control-server \ --package agent-control-engine \ - --package agent-control-evaluators \ + --package agent-control-rules \ --package agent-control-models \ --package agent-control-telemetry \ --no-dev \ || uv sync \ --package agent-control-server \ --package agent-control-engine \ - --package agent-control-evaluators \ + --package agent-control-rules \ --package agent-control-models \ --package agent-control-telemetry \ --no-dev \ diff --git a/server/README.md b/server/README.md index 7c56e446..0c554d57 100644 --- a/server/README.md +++ b/server/README.md @@ -5,7 +5,7 @@ FastAPI server that powers Agent Control. It manages agents and controls, evalua ## What it provides - Agent registration and control association -- Control CRUD and evaluator configuration +- Control CRUD and rule configuration - Runtime evaluation (`/api/v1/evaluation`) with pre/post stages - Observability endpoints for events and stats - API key authentication for production deployments @@ -54,6 +54,6 @@ psycopg's bundled binary package. ## Configuration -Server configuration is driven by environment variables (database, auth, observability, evaluators). For the full list and examples, see the docs. +Server configuration is driven by environment variables (database, auth, observability, rules). For the full list and examples, see the docs. Full guide: https://docs.agentcontrol.dev/components/server diff --git a/server/alembic/versions/c1e9f9c4a1d2_control_versions_and_soft_delete_legacy_controls.py b/server/alembic/versions/c1e9f9c4a1d2_control_versions_and_soft_delete_legacy_controls.py index cb9a09ed..3d828aca 100644 --- a/server/alembic/versions/c1e9f9c4a1d2_control_versions_and_soft_delete_legacy_controls.py +++ b/server/alembic/versions/c1e9f9c4a1d2_control_versions_and_soft_delete_legacy_controls.py @@ -31,26 +31,73 @@ _BACKFILL_NOTE = "Backfilled from existing control" -def _classify_control_payload(data: Any) -> tuple[bool, str | None]: - """Return whether a legacy control payload is still usable.""" - if data == {}: - return False, "empty payload" +def _condition_with_current_leaf_key(value: Any) -> Any: + """Return a validation-only copy with historical evaluator leaves as rules.""" + if not isinstance(value, dict): + return value + + result = dict(value) + if "evaluator" in result and "rule" not in result: + result["rule"] = result.pop("evaluator") + + if isinstance(result.get("and"), list): + result["and"] = [_condition_with_current_leaf_key(child) for child in result["and"]] + if isinstance(result.get("or"), list): + result["or"] = [_condition_with_current_leaf_key(child) for child in result["or"]] + if isinstance(result.get("not"), dict): + result["not"] = _condition_with_current_leaf_key(result["not"]) + + return result + + +def _control_with_current_leaf_key(data: Any) -> Any: + """Return a validation-only copy compatible with the current control model.""" if not isinstance(data, dict): - return False, "invalid control payload" + return data + + result = dict(data) + + if "evaluator" in result and "rule" not in result: + result["rule"] = result.pop("evaluator") + + if isinstance(result.get("condition"), dict): + result["condition"] = _condition_with_current_leaf_key(result["condition"]) + + return result + +def _is_control_payload_usable(data: Any) -> bool: try: UnrenderedTemplateControl.model_validate(data) except ValidationError: pass else: - return True, None + return True try: ControlDefinition.model_validate(data) except ValidationError: + return False + + return True + + +def _classify_control_payload(data: Any) -> tuple[bool, str | None]: + """Return whether a legacy control payload is still usable.""" + if data == {}: + return False, "empty payload" + if not isinstance(data, dict): return False, "invalid control payload" - return True, None + if _is_control_payload_usable(data): + return True, None + + # This migration runs before the later evaluator->rule payload rename. + # Keep classification stable after the model rename without writing rows here. + if _is_control_payload_usable(_control_with_current_leaf_key(data)): + return True, None + + return False, "invalid control payload" def _snapshot_payload( diff --git a/server/alembic/versions/d4f0b2e1c9a8_rename_evaluator_payload_keys_to_rules.py b/server/alembic/versions/d4f0b2e1c9a8_rename_evaluator_payload_keys_to_rules.py new file mode 100644 index 00000000..e913748e --- /dev/null +++ b/server/alembic/versions/d4f0b2e1c9a8_rename_evaluator_payload_keys_to_rules.py @@ -0,0 +1,335 @@ +"""rename evaluator payload keys to rules + +Revision ID: d4f0b2e1c9a8 +Revises: e2b7f4a9c6d1 +Create Date: 2026-06-22 13:45:00.000000 + +""" + +from __future__ import annotations + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "d4f0b2e1c9a8" +down_revision = "e2b7f4a9c6d1" +branch_labels = None +depends_on = None + + +_HELPER_FUNCTIONS = """ +CREATE OR REPLACE FUNCTION _ac_rename_object_key( + input_value jsonb, + old_key text, + new_key text +) RETURNS jsonb AS $$ +BEGIN + IF ( + input_value IS NULL + OR jsonb_typeof(input_value) <> 'object' + OR NOT input_value ? old_key + ) THEN + RETURN input_value; + END IF; + + IF input_value ? new_key THEN + RETURN input_value - old_key; + END IF; + + RETURN (input_value - old_key) || jsonb_build_object(new_key, input_value->old_key); +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION _ac_rename_condition_node( + input_value jsonb, + old_key text, + new_key text +) RETURNS jsonb AS $$ +DECLARE + result jsonb; + rewritten_children jsonb; +BEGIN + IF input_value IS NULL OR jsonb_typeof(input_value) <> 'object' THEN + RETURN input_value; + END IF; + + result := _ac_rename_object_key(input_value, old_key, new_key); + + IF jsonb_typeof(result->'and') = 'array' THEN + SELECT COALESCE( + jsonb_agg(_ac_rename_condition_node(child.value, old_key, new_key)), + '[]'::jsonb + ) + INTO rewritten_children + FROM jsonb_array_elements(result->'and') AS child(value); + result := jsonb_set(result, '{and}', rewritten_children, false); + END IF; + + IF jsonb_typeof(result->'or') = 'array' THEN + SELECT COALESCE( + jsonb_agg(_ac_rename_condition_node(child.value, old_key, new_key)), + '[]'::jsonb + ) + INTO rewritten_children + FROM jsonb_array_elements(result->'or') AS child(value); + result := jsonb_set(result, '{or}', rewritten_children, false); + END IF; + + IF jsonb_typeof(result->'not') = 'object' THEN + result := jsonb_set( + result, + '{not}', + _ac_rename_condition_node(result->'not', old_key, new_key), + false + ); + END IF; + + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION _ac_rename_control_data( + input_value jsonb, + old_key text, + new_key text +) RETURNS jsonb AS $$ +DECLARE + result jsonb; + template_value jsonb; +BEGIN + IF input_value IS NULL OR jsonb_typeof(input_value) <> 'object' THEN + RETURN input_value; + END IF; + + -- Legacy flat controls used top-level selector + evaluator/rule. + result := _ac_rename_object_key(input_value, old_key, new_key); + + IF jsonb_typeof(result->'condition') = 'object' THEN + result := jsonb_set( + result, + '{condition}', + _ac_rename_condition_node(result->'condition', old_key, new_key), + false + ); + END IF; + + IF ( + jsonb_typeof(result->'template') = 'object' + AND result->'template' ? 'definition_template' + ) THEN + template_value := jsonb_set( + result->'template', + '{definition_template}', + _ac_rename_control_data( + result->'template'->'definition_template', + old_key, + new_key + ), + false + ); + result := jsonb_set(result, '{template}', template_value, false); + END IF; + + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION _ac_rename_condition_trace( + input_value jsonb, + old_key text, + new_key text +) RETURNS jsonb AS $$ +DECLARE + result jsonb; + rewritten_children jsonb; +BEGIN + IF input_value IS NULL OR jsonb_typeof(input_value) <> 'object' THEN + RETURN input_value; + END IF; + + result := _ac_rename_object_key(input_value, old_key, new_key); + + IF jsonb_typeof(result->'children') = 'array' THEN + SELECT COALESCE( + jsonb_agg(_ac_rename_condition_trace(child.value, old_key, new_key)), + '[]'::jsonb + ) + INTO rewritten_children + FROM jsonb_array_elements(result->'children') AS child(value); + result := jsonb_set(result, '{children}', rewritten_children, false); + END IF; + + RETURN result; +END; +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION _ac_rename_event_data( + input_value jsonb, + old_rule_name_key text, + new_rule_name_key text, + old_primary_key text, + new_primary_key text, + old_all_key text, + new_all_key text +) RETURNS jsonb AS $$ +DECLARE + result jsonb; + metadata_value jsonb; +BEGIN + IF input_value IS NULL OR jsonb_typeof(input_value) <> 'object' THEN + RETURN input_value; + END IF; + + result := _ac_rename_object_key(input_value, old_rule_name_key, new_rule_name_key); + + IF jsonb_typeof(result->'metadata') = 'object' THEN + metadata_value := result->'metadata'; + metadata_value := _ac_rename_object_key( + metadata_value, + old_primary_key, + new_primary_key + ); + metadata_value := _ac_rename_object_key(metadata_value, old_all_key, new_all_key); + + IF jsonb_typeof(metadata_value->'condition_trace') = 'object' THEN + metadata_value := jsonb_set( + metadata_value, + '{condition_trace}', + _ac_rename_condition_trace( + metadata_value->'condition_trace', + old_rule_name_key, + new_rule_name_key + ), + false + ); + END IF; + + result := jsonb_set(result, '{metadata}', metadata_value, false); + END IF; + + RETURN result; +END; +$$ LANGUAGE plpgsql; +""" + +_DROP_HELPER_FUNCTIONS = """ +DROP FUNCTION IF EXISTS _ac_rename_event_data( + jsonb, + text, + text, + text, + text, + text, + text +); +DROP FUNCTION IF EXISTS _ac_rename_condition_trace(jsonb, text, text); +DROP FUNCTION IF EXISTS _ac_rename_control_data(jsonb, text, text); +DROP FUNCTION IF EXISTS _ac_rename_condition_node(jsonb, text, text); +DROP FUNCTION IF EXISTS _ac_rename_object_key(jsonb, text, text); +""" + + +def _rename_payloads( + *, + old_leaf_key: str, + new_leaf_key: str, + old_agent_rules_key: str, + new_agent_rules_key: str, + old_rule_name_key: str, + new_rule_name_key: str, + old_primary_key: str, + new_primary_key: str, + old_all_key: str, + new_all_key: str, +) -> None: + op.execute( + f""" + UPDATE agents + SET data = _ac_rename_object_key( + data, + '{old_agent_rules_key}', + '{new_agent_rules_key}' + ) + WHERE jsonb_typeof(data) = 'object' + AND data ? '{old_agent_rules_key}' + """ + ) + + op.execute( + f""" + UPDATE controls + SET data = _ac_rename_control_data(data, '{old_leaf_key}', '{new_leaf_key}') + WHERE data::text LIKE '%"{old_leaf_key}"%' + """ + ) + + op.execute( + f""" + UPDATE control_versions + SET snapshot = jsonb_set( + snapshot, + '{{data}}', + _ac_rename_control_data(snapshot->'data', '{old_leaf_key}', '{new_leaf_key}'), + false + ) + WHERE jsonb_typeof(snapshot) = 'object' + AND snapshot ? 'data' + AND (snapshot->'data')::text LIKE '%"{old_leaf_key}"%' + """ + ) + + op.execute( + f""" + UPDATE control_execution_events + SET data = _ac_rename_event_data( + data, + '{old_rule_name_key}', + '{new_rule_name_key}', + '{old_primary_key}', + '{new_primary_key}', + '{old_all_key}', + '{new_all_key}' + ) + WHERE data::text LIKE '%"{old_rule_name_key}"%' + OR data::text LIKE '%"{old_primary_key}"%' + OR data::text LIKE '%"{old_all_key}"%' + """ + ) + + +def upgrade() -> None: + op.execute(_HELPER_FUNCTIONS) + try: + _rename_payloads( + old_leaf_key="evaluator", + new_leaf_key="rule", + old_agent_rules_key="evaluators", + new_agent_rules_key="rules", + old_rule_name_key="evaluator_name", + new_rule_name_key="rule_name", + old_primary_key="primary_evaluator", + new_primary_key="primary_rule", + old_all_key="all_evaluators", + new_all_key="all_rules", + ) + finally: + op.execute(_DROP_HELPER_FUNCTIONS) + + +def downgrade() -> None: + op.execute(_HELPER_FUNCTIONS) + try: + _rename_payloads( + old_leaf_key="rule", + new_leaf_key="evaluator", + old_agent_rules_key="rules", + new_agent_rules_key="evaluators", + old_rule_name_key="rule_name", + new_rule_name_key="evaluator_name", + old_primary_key="primary_rule", + new_primary_key="primary_evaluator", + old_all_key="all_rules", + new_all_key="all_evaluators", + ) + finally: + op.execute(_DROP_HELPER_FUNCTIONS) diff --git a/server/pyproject.toml b/server/pyproject.toml index 8e77a210..d754903e 100644 --- a/server/pyproject.toml +++ b/server/pyproject.toml @@ -5,7 +5,7 @@ description = "Server for Agent Control - manage and evaluate controls for AI ag requires-python = ">=3.12" # Note: agent-control-models, agent-control-engine, and agent-control-telemetry # are bundled at build time -# Note: agent-control-evaluators is a runtime dependency (NOT vendored) to avoid +# Note: agent-control-rules is a runtime dependency (NOT vendored) to avoid # duplicate module conflict when galileo extras are installed dependencies = [ "fastapi>=0.109.0", @@ -24,7 +24,7 @@ dependencies = [ "jsonschema-rs>=0.22.0", "PyJWT>=2.8.0", "google-re2>=1.1", # For engine (bundled) - "agent-control-evaluators>=7.5.0", # NOT vendored - avoid conflict with galileo + "agent-control-rules>=7.5.0", # NOT vendored - avoid conflict with galileo ] authors = [ {name = "Agent Control Team"} @@ -33,7 +33,7 @@ readme = "README.md" license = {text = "Apache-2.0"} [project.optional-dependencies] -galileo = ["agent-control-evaluator-galileo>=7.5.0"] +galileo = ["agent-control-rule-galileo>=7.5.0"] binary = ["psycopg[binary]>=3.1"] c = ["psycopg[c]>=3.1"] @@ -49,7 +49,7 @@ dev = [ "agent-control-models", "agent-control-engine", "agent-control-telemetry", - "agent-control-evaluators", + "agent-control-rules", ] [project.scripts] @@ -65,7 +65,7 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] # Note: agent_control_models, agent_control_engine, and agent_control_telemetry # are copied by scripts/build.py -# Note: agent_control_evaluators is a runtime dep, not vendored +# Note: agent_control_rules is a runtime dep, not vendored packages = [ "src/agent_control_server", "src/agent_control_models", @@ -108,6 +108,6 @@ known-first-party = ["agent_control_server"] agent-control-models = { workspace = true } agent-control-engine = { workspace = true } agent-control-telemetry = { workspace = true } -agent-control-evaluators = { workspace = true } +agent-control-rules = { workspace = true } # For local dev: use local galileo package instead of PyPI -agent-control-evaluator-galileo = { path = "../evaluators/contrib/galileo", editable = true } +agent-control-rule-galileo = { path = "../rules/contrib/galileo", editable = true } diff --git a/server/src/agent_control_server/auth.py b/server/src/agent_control_server/auth.py index 9c1a3da9..850ac692 100644 --- a/server/src/agent_control_server/auth.py +++ b/server/src/agent_control_server/auth.py @@ -243,7 +243,7 @@ async def require_admin_key( """ Dependency that requires an admin API key or admin session cookie. - Use for sensitive operations like evaluator management or configuration: + Use for sensitive operations like rule management or configuration: @router.delete("/dangerous", dependencies=[Depends(require_admin_key)]) async def dangerous_op(): diff --git a/server/src/agent_control_server/auth_framework/core.py b/server/src/agent_control_server/auth_framework/core.py index 011c62de..a495555a 100644 --- a/server/src/agent_control_server/auth_framework/core.py +++ b/server/src/agent_control_server/auth_framework/core.py @@ -55,7 +55,7 @@ class Operation(StrEnum): AGENTS_READ = "agents.read" AGENTS_CREATE = "agents.create" AGENTS_UPDATE = "agents.update" - EVALUATORS_READ = "evaluators.read" + RULES_READ = "rules.read" OBSERVABILITY_READ = "observability.read" OBSERVABILITY_WRITE = "observability.write" RUNTIME_USE = "runtime.use" diff --git a/server/src/agent_control_server/auth_framework/providers/header.py b/server/src/agent_control_server/auth_framework/providers/header.py index 2d917d91..fe6aff35 100644 --- a/server/src/agent_control_server/auth_framework/providers/header.py +++ b/server/src/agent_control_server/auth_framework/providers/header.py @@ -48,7 +48,7 @@ class AccessLevel(Enum): Operation.AGENTS_READ: AccessLevel.AUTHENTICATED, Operation.AGENTS_CREATE: AccessLevel.AUTHENTICATED, Operation.AGENTS_UPDATE: AccessLevel.ADMIN, - Operation.EVALUATORS_READ: AccessLevel.AUTHENTICATED, + Operation.RULES_READ: AccessLevel.AUTHENTICATED, Operation.OBSERVABILITY_READ: AccessLevel.AUTHENTICATED, Operation.OBSERVABILITY_WRITE: AccessLevel.AUTHENTICATED, Operation.RUNTIME_TOKEN_EXCHANGE: AccessLevel.AUTHENTICATED, diff --git a/server/src/agent_control_server/endpoints/agents.py b/server/src/agent_control_server/endpoints/agents.py index 1d8efe4b..12f56e79 100644 --- a/server/src/agent_control_server/endpoints/agents.py +++ b/server/src/agent_control_server/endpoints/agents.py @@ -1,7 +1,7 @@ from collections.abc import Sequence from typing import Any -from agent_control_engine import list_evaluators +from agent_control_engine import list_rules from agent_control_models.agent import Agent as APIAgent from agent_control_models.agent import StepSchema from agent_control_models.controls import ControlDefinition, ControlDefinitionRuntime @@ -13,19 +13,19 @@ AssocResponse, ConflictMode, DeletePolicyResponse, - EvaluatorSchema, GetAgentPoliciesResponse, GetAgentResponse, GetPolicyResponse, - InitAgentEvaluatorRemoval, InitAgentOverwriteChanges, InitAgentRequest, InitAgentResponse, + InitAgentRuleRemoval, ListAgentsResponse, PaginationInfo, PatchAgentRequest, PatchAgentResponse, RemoveAgentControlResponse, + RuleSchema, SetPolicyResponse, StepKey, ) @@ -60,11 +60,11 @@ AgentControlRenderedState, ControlService, ) -from ..services.evaluator_utils import ( - parse_evaluator_ref_full, +from ..services.query_utils import escape_like_pattern +from ..services.rule_utils import ( + parse_rule_ref_full, validate_config_against_schema, ) -from ..services.query_utils import escape_like_pattern from ..services.schema_compat import ( check_schema_compatibility, format_compatibility_error, @@ -74,8 +74,8 @@ _logger = get_logger(__name__) -# Cache for built-in evaluator names (populated on first use) -_BUILTIN_EVALUATOR_NAMES: set[str] | None = None +# Cache for built-in rule names (populated on first use) +_BUILTIN_RULE_NAMES: set[str] | None = None # Pagination constants _DEFAULT_PAGINATION_OFFSET = 0 @@ -196,28 +196,28 @@ async def _authorize_existing_agent_overwrite( # ============================================================================= -def _get_builtin_evaluator_names() -> set[str]: - """Get built-in evaluator names (cached).""" - global _BUILTIN_EVALUATOR_NAMES - if _BUILTIN_EVALUATOR_NAMES is None: - _BUILTIN_EVALUATOR_NAMES = set(list_evaluators().keys()) - return _BUILTIN_EVALUATOR_NAMES +def _get_builtin_rule_names() -> set[str]: + """Get built-in rule names (cached).""" + global _BUILTIN_RULE_NAMES + if _BUILTIN_RULE_NAMES is None: + _BUILTIN_RULE_NAMES = set(list_rules().keys()) + return _BUILTIN_RULE_NAMES def _validate_controls_for_agent(agent: Agent, controls: list[Control]) -> list[str]: """Validate controls can run on this agent.""" errors: list[str] = [] - # Parse agent's registered evaluators + # Parse agent's registered rules try: agent_data = AgentData.model_validate(agent.data) except ValidationError: return [f"Agent '{agent.name}' has corrupted data"] - agent_evaluators = {e.name: e for e in (agent_data.evaluators or [])} + agent_rules = {e.name: e for e in (agent_data.rules or [])} for control in controls: - # Skip unrendered template controls - they have no evaluators to validate. + # Skip unrendered template controls - they have no rules to validate. if ( isinstance(control.data, dict) and control.data.get("template") is not None @@ -231,36 +231,36 @@ def _validate_controls_for_agent(agent: Agent, controls: list[Control]) -> list[ errors.append(f"Control '{control.name}' has corrupted data") continue - for _, evaluator_cfg in control_definition.iter_condition_leaf_parts(): - evaluator_name = evaluator_cfg.name - parsed = parse_evaluator_ref_full(evaluator_name) + for _, rule_cfg in control_definition.iter_condition_leaf_parts(): + rule_name = rule_cfg.name + parsed = parse_rule_ref_full(rule_name) if parsed.type != "agent": - continue # Built-in/external evaluator, already validated at control creation + continue # Built-in/external rule, already validated at control creation - # Agent-scoped evaluator - check if target matches this agent + # Agent-scoped rule - check if target matches this agent if parsed.namespace != agent.name: errors.append( - f"Control '{control.name}' references evaluator '{evaluator_name}' " + f"Control '{control.name}' references rule '{rule_name}' " f"which belongs to agent '{parsed.namespace}', not '{agent.name}'" ) continue - # Check if evaluator exists on this agent - if parsed.local_name not in agent_evaluators: + # Check if rule exists on this agent + if parsed.local_name not in agent_rules: errors.append( - f"Control '{control.name}' references evaluator '{parsed.local_name}' " + f"Control '{control.name}' references rule '{parsed.local_name}' " f"which is not registered with agent '{agent.name}'. " - f"Register it via initAgent or use a different evaluator." + f"Register it via initAgent or use a different rule." ) continue # Validate config against schema - registered_ev = agent_evaluators[parsed.local_name] - if registered_ev.config_schema: + registered_rule = agent_rules[parsed.local_name] + if registered_rule.config_schema: try: validate_config_against_schema( - evaluator_cfg.config, - registered_ev.config_schema, + rule_cfg.config, + registered_rule.config_schema, ) except JSONSchemaValidationError as e: errors.append( @@ -271,23 +271,23 @@ def _validate_controls_for_agent(agent: Agent, controls: list[Control]) -> list[ return errors -def _find_referencing_controls_for_removed_evaluators( +def _find_referencing_controls_for_removed_rules( controls: Sequence[APIControl], agent_name: str, - remove_evaluator_set: set[str], + remove_rule_set: set[str], ) -> list[tuple[str, str]]: - """Return sorted unique control/evaluator pairs blocking evaluator removal.""" + """Return sorted unique control/rule pairs blocking rule removal.""" referencing_control_set: set[tuple[str, str]] = set() for ctrl in controls: if not isinstance(ctrl.control, ControlDefinition): continue # Skip unrendered template controls - for _, evaluator_spec in ctrl.control.iter_condition_leaf_parts(): - evaluator_ref = evaluator_spec.name - if ":" not in evaluator_ref: + for _, rule_spec in ctrl.control.iter_condition_leaf_parts(): + rule_ref = rule_spec.name + if ":" not in rule_ref: continue - ref_agent, ref_eval = evaluator_ref.split(":", 1) - if ref_agent == agent_name and ref_eval in remove_evaluator_set: + ref_agent, ref_eval = rule_ref.split(":", 1) + if ref_agent == agent_name and ref_eval in remove_rule_set: referencing_control_set.add((ctrl.name, ref_eval)) return sorted(referencing_control_set, key=lambda item: (item[0], item[1])) @@ -319,15 +319,15 @@ def _step_key_model(step_key: StepKeyTuple) -> StepKey: return StepKey(type=step_type, name=step_name) -async def _build_overwrite_evaluator_removals( +async def _build_overwrite_rule_removals( agent: Agent, - removed_evaluators: set[str], + removed_rules: set[str], db: AsyncSession, *, namespace_key: str, -) -> list[InitAgentEvaluatorRemoval]: - """Build evaluator removal details, including active-control references.""" - if not removed_evaluators: +) -> list[InitAgentRuleRemoval]: + """Build rule removal details, including active-control references.""" + if not removed_rules: return [] try: @@ -338,40 +338,40 @@ async def _build_overwrite_evaluator_removals( ) except APIValidationError: _logger.warning( - "Skipping evaluator removal reference checks for agent '%s' " + "Skipping rule removal reference checks for agent '%s' " "due to invalid control data", agent.name, exc_info=True, ) - return [InitAgentEvaluatorRemoval(name=name) for name in sorted(removed_evaluators)] + return [InitAgentRuleRemoval(name=name) for name in sorted(removed_rules)] - references_by_evaluator: dict[str, set[tuple[int, str]]] = {} + references_by_rule: dict[str, set[tuple[int, str]]] = {} for control in controls: if not isinstance(control.control, ControlDefinition): continue # Skip unrendered template controls - for _, evaluator_spec in control.control.iter_condition_leaf_parts(): - evaluator_ref = evaluator_spec.name - parsed = parse_evaluator_ref_full(evaluator_ref) + for _, rule_spec in control.control.iter_condition_leaf_parts(): + rule_ref = rule_spec.name + parsed = parse_rule_ref_full(rule_ref) if parsed.type != "agent": continue if parsed.namespace != agent.name: continue - if parsed.local_name not in removed_evaluators: + if parsed.local_name not in removed_rules: continue - references_by_evaluator.setdefault(parsed.local_name, set()).add( + references_by_rule.setdefault(parsed.local_name, set()).add( (control.id, control.name) ) - removals: list[InitAgentEvaluatorRemoval] = [] - for evaluator_name in sorted(removed_evaluators): - references = references_by_evaluator.get(evaluator_name) + removals: list[InitAgentRuleRemoval] = [] + for rule_name in sorted(removed_rules): + references = references_by_rule.get(rule_name) if references is None: - removals.append(InitAgentEvaluatorRemoval(name=evaluator_name)) + removals.append(InitAgentRuleRemoval(name=rule_name)) continue sorted_references = sorted(references, key=lambda item: (item[1], item[0])) removals.append( - InitAgentEvaluatorRemoval( - name=evaluator_name, + InitAgentRuleRemoval( + name=rule_name, referenced_by_active_controls=True, control_ids=[control_id for control_id, _ in sorted_references], control_names=[control_name for _, control_name in sorted_references], @@ -397,7 +397,7 @@ async def list_agents( List all registered agents with cursor-based pagination. Returns a summary of each agent including identifier, policy associations, - and counts of registered steps and evaluators. Results are scoped to + and counts of registered steps and rules. Results are scoped to the request's namespace; agents in other namespaces are not visible. Args: @@ -503,13 +503,13 @@ async def list_agents( summaries: list[AgentSummary] = [] for agent in agents: step_count = 0 - evaluator_count = 0 + rule_count = 0 # Parse agent data to get counts try: data_model = AgentData.model_validate(agent.data) step_count = len(data_model.steps or []) - evaluator_count = len(data_model.evaluators or []) + rule_count = len(data_model.rules or []) except ValidationError: # If data is corrupted, log and use zero counts _logger.warning("Agent '%s' has invalid data, using zero counts", agent.name) @@ -525,7 +525,7 @@ async def list_agents( policy_ids=policy_ids, created_at=agent.created_at.isoformat() if agent.created_at else None, step_count=step_count, - evaluator_count=evaluator_count, + rule_count=rule_count, active_controls_count=active_controls, ) ) @@ -563,7 +563,7 @@ async def init_agent( conflict_mode controls registration conflict handling: - strict (default): preserve compatibility checks and conflict errors - - overwrite: latest init payload replaces steps/evaluators and returns change summary + - overwrite: latest init payload replaces steps/rules and returns change summary The returned ``controls`` list is the de-duplicated union of the agent's direct controls, policy-derived controls, and (when ``target_type`` and @@ -585,23 +585,23 @@ async def init_agent( namespace_key = principal.namespace_key _ensure_target_principal_matches_namespace(principal, target_principal) - # Check for evaluator name collisions with built-in evaluators - builtin_names = _get_builtin_evaluator_names() - for ev in request.evaluators: - if ev.name in builtin_names: + # Check for rule name collisions with built-in rules + builtin_names = _get_builtin_rule_names() + for rule in request.rules: + if rule.name in builtin_names: raise ConflictError( - error_code=ErrorCode.EVALUATOR_NAME_CONFLICT, - detail=f"Evaluator name '{ev.name}' conflicts with built-in evaluator.", - resource="Evaluator", - resource_id=ev.name, - hint="Choose a different name that does not conflict with built-in evaluators.", + error_code=ErrorCode.RULE_NAME_CONFLICT, + detail=f"Rule name '{rule.name}' conflicts with built-in rule.", + resource="Rule", + resource_id=rule.name, + hint="Choose a different name that does not conflict with built-in rules.", errors=[ ValidationErrorItem( - resource="Evaluator", + resource="Rule", field="name", code="name_conflict", - message=f"Name '{ev.name}' conflicts with a built-in evaluator", - value=ev.name, + message=f"Name '{rule.name}' conflicts with a built-in rule", + value=rule.name, ) ], ) @@ -645,7 +645,7 @@ async def init_agent( data_model = AgentData( agent_metadata=request.agent.model_dump(mode="json"), steps=list(request.steps), - evaluators=list(request.evaluators), + rules=list(request.rules), ) new_agent = Agent( @@ -658,7 +658,7 @@ async def init_agent( await db.commit() _logger.info( f"Created agent '{request.agent.agent_name}' with {len(request.steps)} steps, " - f"{len(request.evaluators)} evaluators" + f"{len(request.rules)} rules" ) except Exception: await db.rollback() @@ -716,10 +716,10 @@ async def init_agent( "due to force_replace=true.", exc_info=True, ) - data_model = AgentData(agent_metadata={}, steps=[], evaluators=[]) + data_model = AgentData(agent_metadata={}, steps=[], rules=[]) steps_changed = False - evaluators_changed = False + rules_changed = False force_write = request.force_replace # Always persist when force_replace=true overwrite_applied = False overwrite_changes = InitAgentOverwriteChanges() @@ -731,10 +731,10 @@ async def init_agent( data_model.agent_metadata = new_metadata new_steps: list[StepSchema] - new_evaluators: list[EvaluatorSchema] + new_rules: list[RuleSchema] if request.conflict_mode == ConflictMode.OVERWRITE: - # Latest-init-wins: overwrite steps/evaluators exactly with incoming payload. + # Latest-init-wins: overwrite steps/rules exactly with incoming payload. existing_steps = list(data_model.steps or []) existing_steps_by_key = {(step.type, step.name): step for step in existing_steps} existing_step_keys = set(existing_steps_by_key) @@ -748,34 +748,34 @@ async def init_agent( if _step_registration_changed(existing_steps_by_key[key], incoming_steps_by_key[key]) ) - existing_evaluators = list(data_model.evaluators or []) - existing_evals_by_name: dict[str, EvaluatorSchema] = { - evaluator.name: evaluator for evaluator in existing_evaluators + existing_rules = list(data_model.rules or []) + existing_rules_by_name: dict[str, RuleSchema] = { + rule.name: rule for rule in existing_rules } - incoming_evals_by_name: dict[str, EvaluatorSchema] = { - evaluator.name: evaluator for evaluator in request.evaluators + incoming_rules_by_name: dict[str, RuleSchema] = { + rule.name: rule for rule in request.rules } - existing_eval_names = set(existing_evals_by_name) - incoming_eval_names = set(incoming_evals_by_name) + existing_rule_names = set(existing_rules_by_name) + incoming_rule_names = set(incoming_rules_by_name) - evaluators_added_names = sorted(incoming_eval_names - existing_eval_names) - evaluators_removed_names = sorted(existing_eval_names - incoming_eval_names) - evaluators_updated_names = sorted( + rules_added_names = sorted(incoming_rule_names - existing_rule_names) + rules_removed_names = sorted(existing_rule_names - incoming_rule_names) + rules_updated_names = sorted( name - for name in (existing_eval_names & incoming_eval_names) + for name in (existing_rule_names & incoming_rule_names) if ( - existing_evals_by_name[name].config_schema - != incoming_evals_by_name[name].config_schema - or existing_evals_by_name[name].description - != incoming_evals_by_name[name].description + existing_rules_by_name[name].config_schema + != incoming_rules_by_name[name].config_schema + or existing_rules_by_name[name].description + != incoming_rules_by_name[name].description ) ) - evaluator_removals: list[InitAgentEvaluatorRemoval] = [] - if evaluators_removed_names: - evaluator_removals = await _build_overwrite_evaluator_removals( + rule_removals: list[InitAgentRuleRemoval] = [] + if rules_removed_names: + rule_removals = await _build_overwrite_rule_removals( existing, - set(evaluators_removed_names), + set(rules_removed_names), db, namespace_key=namespace_key, ) @@ -785,22 +785,22 @@ async def init_agent( steps_added=[_step_key_model(step_key) for step_key in steps_added_keys], steps_updated=[_step_key_model(step_key) for step_key in steps_updated_keys], steps_removed=[_step_key_model(step_key) for step_key in steps_removed_keys], - evaluators_added=evaluators_added_names, - evaluators_updated=evaluators_updated_names, - evaluators_removed=evaluators_removed_names, - evaluator_removals=evaluator_removals, + rules_added=rules_added_names, + rules_updated=rules_updated_names, + rules_removed=rules_removed_names, + rule_removals=rule_removals, ) steps_changed = bool(steps_added_keys or steps_updated_keys or steps_removed_keys) - evaluators_changed = bool( - evaluators_added_names or evaluators_updated_names or evaluators_removed_names + rules_changed = bool( + rules_added_names or rules_updated_names or rules_removed_names ) - overwrite_applied = bool(metadata_changed or steps_changed or evaluators_changed) + overwrite_applied = bool(metadata_changed or steps_changed or rules_changed) new_steps = list(request.steps) - new_evaluators = list(request.evaluators) + new_rules = list(request.rules) data_model.steps = new_steps - data_model.evaluators = new_evaluators + data_model.rules = new_rules else: # --- Process steps --- # Note: incoming_steps_by_key already built during validation above @@ -856,19 +856,19 @@ async def init_agent( data_model.steps = new_steps - # --- Process evaluators with schema compatibility check --- - incoming_evals_by_name = {evaluator.name: evaluator for evaluator in request.evaluators} - existing_evals_by_name = { - evaluator.name: evaluator for evaluator in (data_model.evaluators or []) + # --- Process rules with schema compatibility check --- + incoming_rules_by_name = {rule.name: rule for rule in request.rules} + existing_rules_by_name = { + rule.name: rule for rule in (data_model.rules or []) } - new_evaluators = [] + new_rules = [] - # Check existing evaluators for compatibility - for name, existing_ev in existing_evals_by_name.items(): - if name in incoming_evals_by_name: - incoming_ev = incoming_evals_by_name[name] - old_schema = existing_ev.config_schema - new_schema = incoming_ev.config_schema + # Check existing rules for compatibility + for name, existing_rule in existing_rules_by_name.items(): + if name in incoming_rules_by_name: + incoming_rule = incoming_rules_by_name[name] + old_schema = existing_rule.config_schema + new_schema = incoming_rule.config_schema # Short-circuit: only check compatibility if schemas differ if old_schema != new_schema: @@ -879,12 +879,12 @@ async def init_agent( raise ConflictError( error_code=ErrorCode.SCHEMA_INCOMPATIBLE, detail=format_compatibility_error(name, compat_errors), - resource="Evaluator", + resource="Rule", resource_id=name, - hint="Ensure backward compatibility or use a new evaluator name.", + hint="Ensure backward compatibility or use a new rule name.", errors=[ ValidationErrorItem( - resource="Evaluator", + resource="Rule", field="config_schema", code="schema_incompatible", message=err, @@ -893,40 +893,40 @@ async def init_agent( ], ) - # Check if evaluator changed (compare fields directly, avoid model_dump()) + # Check if rule changed (compare fields directly, avoid model_dump()) if ( - existing_ev.config_schema != incoming_ev.config_schema - or existing_ev.description != incoming_ev.description + existing_rule.config_schema != incoming_rule.config_schema + or existing_rule.description != incoming_rule.description ): - evaluators_changed = True - new_evaluators.append(incoming_ev) + rules_changed = True + new_rules.append(incoming_rule) else: - # Keep existing evaluator not in incoming request - new_evaluators.append(existing_ev) + # Keep existing rule not in incoming request + new_rules.append(existing_rule) - # Add new evaluators - for name, evaluator in incoming_evals_by_name.items(): - if name not in existing_evals_by_name: - new_evaluators.append(evaluator) - evaluators_changed = True + # Add new rules + for name, rule in incoming_rules_by_name.items(): + if name not in existing_rules_by_name: + new_rules.append(rule) + rules_changed = True - data_model.evaluators = new_evaluators + data_model.rules = new_rules if ( not request.force_replace and request.conflict_mode != ConflictMode.OVERWRITE - and (steps_changed or evaluators_changed or metadata_changed) + and (steps_changed or rules_changed or metadata_changed) ): await _authorize_existing_agent_overwrite(http_request, principal) - if steps_changed or evaluators_changed or metadata_changed or force_write: + if steps_changed or rules_changed or metadata_changed or force_write: existing.data = data_model.model_dump(mode="json") try: await db.commit() _logger.info( f"Updated agent '{request.agent.agent_name}' with {len(new_steps)} steps, " - f"{len(new_evaluators)} evaluators" + f"{len(new_rules)} rules" ) except Exception: await db.rollback() @@ -1033,7 +1033,7 @@ async def get_agent( hint="The agent's metadata is invalid. Re-register the agent with initAgent.", ) - return GetAgentResponse(agent=agent_meta, steps=latest_steps, evaluators=data_model.evaluators) + return GetAgentResponse(agent=agent_meta, steps=latest_steps, rules=data_model.rules) async def _get_agent_or_404( @@ -1103,11 +1103,11 @@ async def add_agent_policy( raise BadRequestError( error_code=ErrorCode.POLICY_CONTROL_INCOMPATIBLE, detail="Policy contains controls incompatible with this agent", - hint="Ensure all controls in the policy are compatible with this agent's evaluators.", + hint="Ensure all controls in the policy are compatible with this agent's rules.", errors=[ ValidationErrorItem( resource="Control", - field="evaluator", + field="rule", code="incompatible", message=err, ) @@ -1180,11 +1180,11 @@ async def set_agent_policy( raise BadRequestError( error_code=ErrorCode.POLICY_CONTROL_INCOMPATIBLE, detail="Policy contains controls incompatible with this agent", - hint="Ensure all controls in the policy are compatible with this agent's evaluators.", + hint="Ensure all controls in the policy are compatible with this agent's rules.", errors=[ ValidationErrorItem( resource="Control", - field="evaluator", + field="rule", code="incompatible", message=err, ) @@ -1482,11 +1482,11 @@ async def add_agent_control( raise BadRequestError( error_code=ErrorCode.POLICY_CONTROL_INCOMPATIBLE, detail="Control is incompatible with this agent", - hint="Ensure the control is compatible with this agent's evaluators.", + hint="Ensure the control is compatible with this agent's rules.", errors=[ ValidationErrorItem( resource="Control", - field="evaluator", + field="rule", code="incompatible", message=err, ) @@ -1678,54 +1678,54 @@ async def list_agent_controls( # ============================================================================= -# Evaluator Schema Endpoints +# Rule Schema Endpoints # ============================================================================= -class EvaluatorSchemaItem(BaseModel): - """Evaluator schema summary for list response.""" +class RuleSchemaItem(BaseModel): + """Rule schema summary for list response.""" name: str description: str | None config_schema: dict[str, Any] -class ListEvaluatorsResponse(BaseModel): - """Response for listing agent's evaluator schemas.""" +class ListRulesResponse(BaseModel): + """Response for listing agent's rule schemas.""" - evaluators: list[EvaluatorSchemaItem] + rules: list[RuleSchemaItem] pagination: PaginationInfo @router.get( - "/{agent_name}/evaluators", - response_model=ListEvaluatorsResponse, - summary="List agent's registered evaluator schemas", - response_description="Evaluator schemas registered with this agent", + "/{agent_name}/rules", + response_model=ListRulesResponse, + summary="List agent's registered rule schemas", + response_description="Rule schemas registered with this agent", ) -async def list_agent_evaluators( +async def list_agent_rules( agent_name: str, cursor: str | None = None, limit: int = _DEFAULT_PAGINATION_LIMIT, db: AsyncSession = Depends(get_async_db), principal: Principal = Depends(require_operation(Operation.AGENTS_READ)), -) -> ListEvaluatorsResponse: +) -> ListRulesResponse: """ - List all evaluator schemas registered with an agent. + List all rule schemas registered with an agent. - Evaluator schemas are registered via initAgent and used for: + Rule schemas are registered via initAgent and used for: - Config validation when creating Controls - UI to display available config options Args: agent_name: Agent identifier - cursor: Optional cursor for pagination (name of last evaluator from previous page) + cursor: Optional cursor for pagination (name of last rule from previous page) limit: Pagination limit (default 20, max 100) db: Database session (injected) principal: Authorized request principal Returns: - ListEvaluatorsResponse with evaluator schemas and pagination + ListRulesResponse with rule schemas and pagination Raises: HTTPException 404: Agent not found @@ -1751,43 +1751,43 @@ async def list_agent_evaluators( try: data_model = AgentData.model_validate(agent.data) except ValidationError: - data_model = AgentData(agent_metadata={}, steps=[], evaluators=[]) + data_model = AgentData(agent_metadata={}, steps=[], rules=[]) - all_evaluators = data_model.evaluators or [] - total = len(all_evaluators) + all_rules = data_model.rules or [] + total = len(all_rules) # Apply cursor-based pagination - # For evaluators, we use name as cursor (simple string comparison) + # For rules, we use name as cursor (simple string comparison) start_idx = 0 if cursor: - # Find the index of the cursor evaluator - for idx, ev in enumerate(all_evaluators): - if ev.name == cursor: + # Find the index of the cursor rule + for idx, rule in enumerate(all_rules): + if rule.name == cursor: start_idx = idx + 1 break # Fetch limit + 1 to check if there are more pages end_idx = start_idx + limit + 1 - paginated = all_evaluators[start_idx:end_idx] + paginated = all_rules[start_idx:end_idx] # Check if there are more pages has_more = len(paginated) > limit if has_more: paginated = paginated[:-1] # Remove the extra item - # Determine next cursor (name of last evaluator in this page) + # Determine next cursor (name of last rule in this page) next_cursor: str | None = None if has_more and paginated: next_cursor = paginated[-1].name - return ListEvaluatorsResponse( - evaluators=[ - EvaluatorSchemaItem( - name=ev.name, - description=ev.description, - config_schema=ev.config_schema, + return ListRulesResponse( + rules=[ + RuleSchemaItem( + name=rule.name, + description=rule.description, + config_schema=rule.config_schema, ) - for ev in paginated + for rule in paginated ], pagination=PaginationInfo( limit=limit, @@ -1799,31 +1799,31 @@ async def list_agent_evaluators( @router.get( - "/{agent_name}/evaluators/{evaluator_name}", - response_model=EvaluatorSchemaItem, - summary="Get specific evaluator schema", - response_description="Evaluator schema details", + "/{agent_name}/rules/{rule_name}", + response_model=RuleSchemaItem, + summary="Get specific rule schema", + response_description="Rule schema details", ) -async def get_agent_evaluator( +async def get_agent_rule( agent_name: str, - evaluator_name: str, + rule_name: str, db: AsyncSession = Depends(get_async_db), principal: Principal = Depends(require_operation(Operation.AGENTS_READ)), -) -> EvaluatorSchemaItem: +) -> RuleSchemaItem: """ - Get a specific evaluator schema registered with an agent. + Get a specific rule schema registered with an agent. Args: agent_name: Agent identifier - evaluator_name: Name of the evaluator + rule_name: Name of the rule db: Database session (injected) principal: Authorized request principal Returns: - EvaluatorSchemaItem with schema details + RuleSchemaItem with schema details Raises: - HTTPException 404: Agent or evaluator not found + HTTPException 404: Agent or rule not found """ namespace_key = principal.namespace_key agent_name = normalize_agent_name_or_422(agent_name) @@ -1844,34 +1844,34 @@ async def get_agent_evaluator( data_model = AgentData.model_validate(agent.data) except ValidationError: raise NotFoundError( - error_code=ErrorCode.EVALUATOR_NOT_FOUND, - detail=f"Evaluator '{evaluator_name}' not found", - resource="Evaluator", - resource_id=evaluator_name, + error_code=ErrorCode.RULE_NOT_FOUND, + detail=f"Rule '{rule_name}' not found", + resource="Rule", + resource_id=rule_name, hint="The agent's data may be corrupted. Re-register the agent with initAgent.", ) - for ev in data_model.evaluators or []: - if ev.name == evaluator_name: - return EvaluatorSchemaItem( - name=ev.name, - description=ev.description, - config_schema=ev.config_schema, + for rule in data_model.rules or []: + if rule.name == rule_name: + return RuleSchemaItem( + name=rule.name, + description=rule.description, + config_schema=rule.config_schema, ) raise NotFoundError( - error_code=ErrorCode.EVALUATOR_NOT_FOUND, - detail=f"Evaluator '{evaluator_name}' not found on agent '{agent.name}'", - resource="Evaluator", - resource_id=evaluator_name, - hint="Register the evaluator with this agent via initAgent.", + error_code=ErrorCode.RULE_NOT_FOUND, + detail=f"Rule '{rule_name}' not found on agent '{agent.name}'", + resource="Rule", + resource_id=rule_name, + hint="Register the rule with this agent via initAgent.", ) @router.patch( "/{agent_name}", response_model=PatchAgentResponse, - summary="Modify agent (remove steps/evaluators)", + summary="Modify agent (remove steps/rules)", response_description="Lists of removed items", ) async def patch_agent( @@ -1881,14 +1881,14 @@ async def patch_agent( principal: Principal = Depends(require_operation(Operation.AGENTS_UPDATE)), ) -> PatchAgentResponse: """ - Remove steps and/or evaluators from an agent. + Remove steps and/or rules from an agent. This is the complement to initAgent which only adds items. Removals are idempotent - attempting to remove non-existent items is not an error. Args: agent_name: Agent identifier - request: Lists of step/evaluator identifiers to remove + request: Lists of step/rule identifiers to remove db: Database session (injected) principal: Authorized request principal @@ -1928,7 +1928,7 @@ async def patch_agent( ) steps_removed: list[StepKey] = [] - evaluators_removed: list[str] = [] + rules_removed: list[str] = [] # Remove steps if request.remove_steps: @@ -1942,52 +1942,52 @@ async def patch_agent( new_steps.append(step) data_model.steps = new_steps - # Remove evaluators (with dependency check) - if request.remove_evaluators: - remove_evaluator_set = set(request.remove_evaluators) + # Remove rules (with dependency check) + if request.remove_rules: + remove_rule_set = set(request.remove_rules) - # Check if any active controls reference evaluators being removed. + # Check if any active controls reference rules being removed. controls = await ControlService(db).list_controls_for_agent( agent.name, namespace_key=namespace_key, ) - referencing_controls = _find_referencing_controls_for_removed_evaluators( - controls, agent.name, remove_evaluator_set + referencing_controls = _find_referencing_controls_for_removed_rules( + controls, agent.name, remove_rule_set ) if referencing_controls: raise ConflictError( - error_code=ErrorCode.EVALUATOR_IN_USE, - detail="Cannot remove evaluators: active controls reference them", - resource="Evaluator", - hint="Remove or update the controls that reference these evaluators first.", + error_code=ErrorCode.RULE_IN_USE, + detail="Cannot remove rules: active controls reference them", + resource="Rule", + hint="Remove or update the controls that reference these rules first.", errors=[ ValidationErrorItem( resource="Control", - field="evaluator.name", + field="rule.name", code="in_use", - message=f"Control '{ctrl}' uses evaluator '{ev}'", + message=f"Control '{ctrl}' uses rule '{rule}'", ) - for ctrl, ev in referencing_controls + for ctrl, rule in referencing_controls ], ) - new_evaluators = [] - for ev in data_model.evaluators or []: - if ev.name in remove_evaluator_set: - evaluators_removed.append(ev.name) + new_rules = [] + for rule in data_model.rules or []: + if rule.name in remove_rule_set: + rules_removed.append(rule.name) else: - new_evaluators.append(ev) - data_model.evaluators = new_evaluators + new_rules.append(rule) + data_model.rules = new_rules # Only update if something changed - if steps_removed or evaluators_removed: + if steps_removed or rules_removed: agent.data = data_model.model_dump(mode="json") try: await db.commit() _logger.info( f"Patched agent '{agent.name}': removed {len(steps_removed)} steps, " - f"{len(evaluators_removed)} evaluators" + f"{len(rules_removed)} rules" ) except Exception: await db.rollback() @@ -2003,5 +2003,5 @@ async def patch_agent( return PatchAgentResponse( steps_removed=steps_removed, - evaluators_removed=evaluators_removed, + rules_removed=rules_removed, ) diff --git a/server/src/agent_control_server/endpoints/controls.py b/server/src/agent_control_server/endpoints/controls.py index d328c7f9..b400ec2f 100644 --- a/server/src/agent_control_server/endpoints/controls.py +++ b/server/src/agent_control_server/endpoints/controls.py @@ -3,7 +3,7 @@ from copy import deepcopy from typing import Any -from agent_control_engine import list_evaluators +from agent_control_engine import list_rules from agent_control_models import ControlDefinition, TemplateControlInput, UnrenderedTemplateControl from agent_control_models.errors import ErrorCode, ValidationErrorItem from agent_control_models.server import ( @@ -66,8 +66,8 @@ validate_template_structure, ) from ..services.controls import ControlService -from ..services.evaluator_utils import ( - parse_evaluator_ref_full, +from ..services.rule_utils import ( + parse_rule_ref_full, validate_config_against_schema, ) from ..services.validation_paths import format_field_path @@ -75,9 +75,9 @@ # Pagination constants _DEFAULT_PAGINATION_LIMIT = 20 _MAX_PAGINATION_LIMIT = 100 -_INVALID_PARAMETERS_MESSAGE = "Invalid config parameters for evaluator." +_INVALID_PARAMETERS_MESSAGE = "Invalid config parameters for rule." _CORRUPTED_CONTROL_DATA_MESSAGE = "Stored control data is corrupted and cannot be parsed." -_SCHEMA_VALIDATION_FAILED_MESSAGE = "Config does not satisfy the evaluator schema." +_SCHEMA_VALIDATION_FAILED_MESSAGE = "Config does not satisfy the rule schema." router = APIRouter(prefix="/controls", tags=["controls"]) template_router = APIRouter(prefix="/control-templates", tags=["controls"]) @@ -376,7 +376,7 @@ async def _render_and_validate_template_input( namespace_key: str, enabled: bool = True, ) -> ControlDefinition: - """Render a template-backed input and validate evaluator config.""" + """Render a template-backed input and validate rule config.""" rendered = render_template_control_input(template_input, enabled=enabled) try: await _validate_control_definition( @@ -462,12 +462,12 @@ async def _validate_control_definition( *, namespace_key: str, ) -> None: - """Validate evaluator config for definitions referencing known global evaluators. + """Validate rule config for definitions referencing known global rules. - Agent-scoped evaluators must exist on the referenced agent. Builtin and external + Agent-scoped rules must exist on the referenced agent. Builtin and external names that are not loaded in this process are accepted without config checks. """ - available_evaluators = list_evaluators() + available_rules = list_rules() agent_data_by_name: dict[str, AgentData] = {} for field_prefix, leaf in iter_condition_leaves_with_paths( control_def.condition, @@ -476,10 +476,10 @@ async def _validate_control_definition( leaf_parts = leaf.leaf_parts() if leaf_parts is None: continue - _, evaluator_spec = leaf_parts + _, rule_spec = leaf_parts - evaluator_ref = evaluator_spec.name - parsed = parse_evaluator_ref_full(evaluator_ref) + rule_ref = rule_spec.name + parsed = parse_rule_ref_full(rule_ref) if parsed.type == "agent": agent_namespace = parsed.namespace @@ -503,7 +503,7 @@ async def _validate_control_definition( resource_id=agent_namespace, hint=( "Ensure the agent exists before creating controls " - "that reference its evaluators." + "that reference its rules." ), ) @@ -526,55 +526,55 @@ async def _validate_control_definition( ) from e agent_data_by_name[agent_namespace] = agent_data - evaluator = next( - (e for e in (agent_data.evaluators or []) if e.name == parsed.local_name), + rule = next( + (e for e in (agent_data.rules or []) if e.name == parsed.local_name), None, ) - if evaluator is None: - available = [e.name for e in (agent_data.evaluators or [])] + if rule is None: + available = [e.name for e in (agent_data.rules or [])] raise APIValidationError( - error_code=ErrorCode.EVALUATOR_NOT_FOUND, + error_code=ErrorCode.RULE_NOT_FOUND, detail=( - f"Evaluator '{parsed.local_name}' is not registered " + f"Rule '{parsed.local_name}' is not registered " f"with agent '{agent_namespace}'" ), - resource="Evaluator", + resource="Rule", hint=( f"Register it via initAgent first. " - f"Available evaluators: {available or 'none'}." + f"Available rules: {available or 'none'}." ), errors=[ ValidationErrorItem( resource="Control", - field=f"{field_prefix}.evaluator.name", - code="evaluator_not_found", + field=f"{field_prefix}.rule.name", + code="rule_not_found", message=( - f"Evaluator '{parsed.local_name}' not found " + f"Rule '{parsed.local_name}' not found " f"on agent '{agent_namespace}'" ), - value=evaluator_ref, + value=rule_ref, ) ], ) - if evaluator.config_schema: + if rule.config_schema: try: validate_config_against_schema( - evaluator_spec.config, - evaluator.config_schema, + rule_spec.config, + rule.config_schema, ) except JSONSchemaValidationError: raise APIValidationError( error_code=ErrorCode.INVALID_CONFIG, - detail=f"Config validation failed for evaluator '{evaluator_ref}'", + detail=f"Config validation failed for rule '{rule_ref}'", resource="Control", hint=( - "Check the evaluator's config schema for required fields and types." + "Check the rule's config schema for required fields and types." ), errors=[ ValidationErrorItem( resource="Control", - field=f"{field_prefix}.evaluator.config", + field=f"{field_prefix}.rule.config", code="schema_validation_error", message=_SCHEMA_VALIDATION_FAILED_MESSAGE, ) @@ -582,26 +582,26 @@ async def _validate_control_definition( ) continue - evaluator_cls = available_evaluators.get(parsed.name) - if evaluator_cls is None: - # Global (builtin / external) evaluators may be absent from this runtime + rule_cls = available_rules.get(parsed.name) + if rule_cls is None: + # Global (builtin / external) rules may be absent from this runtime # (optional packages, forward compatibility). Store the definition without - # config validation; evaluation will fail later if the evaluator is missing. + # config validation; evaluation will fail later if the rule is missing. continue try: - evaluator_cls.config_model(**evaluator_spec.config) + rule_cls.config_model(**rule_spec.config) except ValidationError as e: raise APIValidationError( error_code=ErrorCode.INVALID_CONFIG, - detail=f"Config validation failed for evaluator '{parsed.name}'", + detail=f"Config validation failed for rule '{parsed.name}'", resource="Control", - hint="Check the evaluator's config schema for required fields and types.", + hint="Check the rule's config schema for required fields and types.", errors=[ ValidationErrorItem( resource="Control", field=( - f"{field_prefix}.evaluator.config." + f"{field_prefix}.rule.config." f"{format_field_path(err.get('loc', ())) or ''}" ).rstrip("."), code=err.get("type", "validation_error"), @@ -612,19 +612,19 @@ async def _validate_control_definition( ) except TypeError: _logger.warning( - "Config validation raised TypeError for evaluator '%s'", + "Config validation raised TypeError for rule '%s'", parsed.name, exc_info=True, ) raise APIValidationError( error_code=ErrorCode.INVALID_CONFIG, - detail=f"Invalid config parameters for evaluator '{parsed.name}'", + detail=f"Invalid config parameters for rule '{parsed.name}'", resource="Control", - hint="Check the evaluator's config schema for valid parameter names.", + hint="Check the rule's config schema for valid parameter names.", errors=[ ValidationErrorItem( resource="Control", - field=f"{field_prefix}.evaluator.config", + field=f"{field_prefix}.rule.config", code="invalid_parameters", message=_INVALID_PARAMETERS_MESSAGE, ) diff --git a/server/src/agent_control_server/endpoints/evaluation.py b/server/src/agent_control_server/endpoints/evaluation.py index a31d757d..5481aa64 100644 --- a/server/src/agent_control_server/endpoints/evaluation.py +++ b/server/src/agent_control_server/endpoints/evaluation.py @@ -25,8 +25,8 @@ _logger = get_logger(__name__) -SAFE_EVALUATOR_ERROR = "Evaluation failed due to an internal evaluator error." -SAFE_EVALUATOR_TIMEOUT_ERROR = "Evaluation timed out before completion." +SAFE_RULE_ERROR = "Evaluation failed due to an internal rule error." +SAFE_RULE_TIMEOUT_ERROR = "Evaluation timed out before completion." SAFE_INVALID_STEP_REGEX_ERROR = "Control configuration error: invalid step name regex." SAFE_ENGINE_VALIDATION_MESSAGE = "Invalid evaluation request or control configuration." @@ -40,17 +40,17 @@ class ControlAdapter: control: ControlDefinitionRuntime -def _sanitize_evaluator_error(error_message: str) -> str: - """Convert evaluator runtime errors into safe client-facing text.""" +def _sanitize_rule_error(error_message: str) -> str: + """Convert rule runtime errors into safe client-facing text.""" if "invalid step_name_regex" in error_message.lower(): return SAFE_INVALID_STEP_REGEX_ERROR if "timeout" in error_message.lower(): - return SAFE_EVALUATOR_TIMEOUT_ERROR - return SAFE_EVALUATOR_ERROR + return SAFE_RULE_TIMEOUT_ERROR + return SAFE_RULE_ERROR def _sanitize_condition_trace(trace: object) -> object: - """Recursively redact internal evaluator errors from condition traces.""" + """Recursively redact internal rule errors from condition traces.""" if isinstance(trace, list): return [_sanitize_condition_trace(item) for item in trace] @@ -64,7 +64,7 @@ def _sanitize_condition_trace(trace: object) -> object: raw_error = sanitized.get("error") if isinstance(raw_error, str) and raw_error: - safe_error = _sanitize_evaluator_error(raw_error) + safe_error = _sanitize_rule_error(raw_error) sanitized["error"] = safe_error raw_message = sanitized.get("message") if raw_message is None or isinstance(raw_message, str): @@ -74,11 +74,11 @@ def _sanitize_condition_trace(trace: object) -> object: def _sanitize_control_match(match: ControlMatch) -> ControlMatch: - """Redact internal evaluator error strings from a control match.""" + """Redact internal rule error strings from a control match.""" if match.result.error is None: return match - safe_error = _sanitize_evaluator_error(match.result.error) + safe_error = _sanitize_rule_error(match.result.error) safe_message = safe_error metadata = dict(match.result.metadata or {}) condition_trace = metadata.get("condition_trace") @@ -139,7 +139,7 @@ async def _load_engine_controls( request: EvaluationRequest, principal: Principal, ) -> list[ControlAdapter]: - """Load and materialize controls before evaluator execution starts.""" + """Load and materialize controls before rule execution starts.""" namespace_key = principal.namespace_key async with AsyncSessionLocal() as db: diff --git a/server/src/agent_control_server/endpoints/evaluators.py b/server/src/agent_control_server/endpoints/evaluators.py deleted file mode 100644 index 6bbeddfc..00000000 --- a/server/src/agent_control_server/endpoints/evaluators.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Evaluator discovery endpoints.""" - -from typing import Any - -from agent_control_engine import list_evaluators -from fastapi import APIRouter, Depends -from pydantic import BaseModel, Field - -from ..auth_framework import Operation, require_operation - -router = APIRouter(prefix="/evaluators", tags=["evaluators"]) - - -class EvaluatorInfo(BaseModel): - """Information about a registered evaluator.""" - - name: str = Field(..., description="Evaluator name") - version: str = Field(..., description="Evaluator version") - description: str = Field(..., description="Evaluator description") - requires_api_key: bool = Field(..., description="Whether evaluator requires API key") - timeout_ms: int = Field(..., description="Default timeout in milliseconds") - config_schema: dict[str, Any] = Field(..., description="JSON Schema for config") - - -@router.get( - "", - response_model=dict[str, EvaluatorInfo], - summary="List available evaluators", - response_description="Dictionary of evaluator name to evaluator info", - dependencies=[Depends(require_operation(Operation.EVALUATORS_READ))], -) -async def get_evaluators() -> dict[str, EvaluatorInfo]: - """List all available evaluators. - - Returns metadata and JSON Schema for each built-in evaluator. - - Built-in evaluators: - - **regex**: Regular expression pattern matching - - **list**: List-based value matching with flexible logic - - **json**: JSON validation with schema, types, constraints - - **sql**: SQL query validation - - Custom evaluators are registered per-agent via initAgent. - Use GET /agents/{agent_name}/evaluators to list agent-specific schemas. - """ - evaluators = list_evaluators() - - return { - name: EvaluatorInfo( - name=evaluator_cls.metadata.name, - version=evaluator_cls.metadata.version, - description=evaluator_cls.metadata.description, - requires_api_key=evaluator_cls.metadata.requires_api_key, - timeout_ms=evaluator_cls.metadata.timeout_ms, - config_schema=evaluator_cls.config_model.model_json_schema(), - ) - for name, evaluator_cls in evaluators.items() - } diff --git a/server/src/agent_control_server/endpoints/rules.py b/server/src/agent_control_server/endpoints/rules.py new file mode 100644 index 00000000..8fc2c01f --- /dev/null +++ b/server/src/agent_control_server/endpoints/rules.py @@ -0,0 +1,58 @@ +"""Rule discovery endpoints.""" + +from typing import Any + +from agent_control_engine import list_rules +from fastapi import APIRouter, Depends +from pydantic import BaseModel, Field + +from ..auth_framework import Operation, require_operation + +router = APIRouter(prefix="/rules", tags=["rules"]) + + +class RuleInfo(BaseModel): + """Information about a registered rule.""" + + name: str = Field(..., description="Rule name") + version: str = Field(..., description="Rule version") + description: str = Field(..., description="Rule description") + requires_api_key: bool = Field(..., description="Whether rule requires API key") + timeout_ms: int = Field(..., description="Default timeout in milliseconds") + config_schema: dict[str, Any] = Field(..., description="JSON Schema for config") + + +@router.get( + "", + response_model=dict[str, RuleInfo], + summary="List available rules", + response_description="Dictionary of rule name to rule info", + dependencies=[Depends(require_operation(Operation.RULES_READ))], +) +async def get_rules() -> dict[str, RuleInfo]: + """List all available rules. + + Returns metadata and JSON Schema for each built-in rule. + + Built-in rules: + - **regex**: Regular expression pattern matching + - **list**: List-based value matching with flexible logic + - **json**: JSON validation with schema, types, constraints + - **sql**: SQL query validation + + Custom rules are registered per-agent via initAgent. + Use GET /agents/{agent_name}/rules to list agent-specific schemas. + """ + rules = list_rules() + + return { + name: RuleInfo( + name=rule_cls.metadata.name, + version=rule_cls.metadata.version, + description=rule_cls.metadata.description, + requires_api_key=rule_cls.metadata.requires_api_key, + timeout_ms=rule_cls.metadata.timeout_ms, + config_schema=rule_cls.config_model.model_json_schema(), + ) + for name, rule_cls in rules.items() + } diff --git a/server/src/agent_control_server/errors.py b/server/src/agent_control_server/errors.py index 1066a7cb..1790ee41 100644 --- a/server/src/agent_control_server/errors.py +++ b/server/src/agent_control_server/errors.py @@ -22,7 +22,7 @@ errors=[ ValidationErrorItem( resource="Control", - field="data.evaluator.config", + field="data.rule.config", code="invalid_format", message="Config must be an object", ) @@ -587,7 +587,7 @@ async def validation_exception_handler( prefix_map = { "agent": "Agent", "steps": "Step", - "evaluators": "Evaluator", + "rules": "Rule", "data": "Control", "policy": "Policy", } diff --git a/server/src/agent_control_server/main.py b/server/src/agent_control_server/main.py index 16152824..609489ba 100644 --- a/server/src/agent_control_server/main.py +++ b/server/src/agent_control_server/main.py @@ -7,7 +7,7 @@ from typing import Any import uvicorn -from agent_control_engine import discover_evaluators, list_evaluators +from agent_control_engine import discover_rules, list_rules from agent_control_models import HealthResponse from agent_control_telemetry import DEFAULT_CONTROL_EVENT_SINK_NAME, ControlEventSinkSelection from fastapi import Depends, FastAPI, HTTPException @@ -26,9 +26,9 @@ from .endpoints.controls import router as control_router from .endpoints.controls import template_router as control_template_router from .endpoints.evaluation import router as evaluation_router -from .endpoints.evaluators import router as evaluator_router from .endpoints.observability import router as observability_router from .endpoints.policies import router as policy_router +from .endpoints.rules import router as rule_router from .endpoints.system import router as system_router from .errors import ( APIError, @@ -137,10 +137,10 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: configure_auth_from_env() - # Discover evaluators at startup - discover_evaluators() - available = list(list_evaluators().keys()) - logger.info(f"Evaluator discovery complete. Available evaluators: {available}") + # Discover rules at startup + discover_rules() + available = list(list_rules().keys()) + logger.info(f"Rule discovery complete. Available rules: {available}") # Initialize observability components (stored on app.state) if observability_settings.enabled: @@ -324,7 +324,7 @@ async def attach_version_header(request, call_next): # type: ignore[no-untyped- ) app.include_router( - evaluator_router, + rule_router, prefix=api_v1_prefix, dependencies=[Depends(get_api_key_from_header)], ) diff --git a/server/src/agent_control_server/models.py b/server/src/agent_control_server/models.py index c31ccddf..e67dc027 100644 --- a/server/src/agent_control_server/models.py +++ b/server/src/agent_control_server/models.py @@ -3,7 +3,7 @@ from agent_control_models.agent import StepSchema, normalize_agent_name from agent_control_models.base import BaseModel -from agent_control_models.server import EvaluatorSchema +from agent_control_models.server import RuleSchema from pydantic import Field from sqlalchemy import ( Boolean, @@ -35,7 +35,7 @@ class AgentData(BaseModel): agent_metadata: dict[str, Any] steps: list[StepSchema] = Field(default_factory=list) - evaluators: list[EvaluatorSchema] = Field(default_factory=list) + rules: list[RuleSchema] = Field(default_factory=list) # Association table for Policy <> Control many-to-many relationship. diff --git a/server/src/agent_control_server/observability/store/base.py b/server/src/agent_control_server/observability/store/base.py index f7231f2d..29af9dac 100644 --- a/server/src/agent_control_server/observability/store/base.py +++ b/server/src/agent_control_server/observability/store/base.py @@ -43,8 +43,8 @@ class StatsResult(BaseModel): Attributes: stats: List of per-control statistics total_executions: Total executions across all controls - total_matches: Total matches across all controls (evaluator matched) - total_non_matches: Total non-matches across all controls (evaluator didn't match) + total_matches: Total matches across all controls (rule matched) + total_non_matches: Total non-matches across all controls (rule didn't match) total_errors: Total errors across all controls (evaluation failed) action_counts: Breakdown of actions for matched executions timeseries: Optional time-series data points diff --git a/server/src/agent_control_server/services/control_bindings.py b/server/src/agent_control_server/services/control_bindings.py index f6b04d44..ee719627 100644 --- a/server/src/agent_control_server/services/control_bindings.py +++ b/server/src/agent_control_server/services/control_bindings.py @@ -20,7 +20,7 @@ from ..errors import BadRequestError, ConflictError, NotFoundError from ..models import Control, ControlBinding -from .evaluator_utils import parse_evaluator_ref_full +from .rule_utils import parse_rule_ref_full @dataclass(frozen=True) @@ -345,9 +345,9 @@ async def _require_control(self, *, namespace_key: str, control_id: int) -> None Bindings attach a control to a target ``(target_type, target_id)``, so the control must be runnable against any agent that later - evaluates against that target. Agent-scoped evaluators - (``agent_name:evaluator_name``) are tied to a specific agent's - registered evaluator set, so a control referencing one cannot be + evaluates against that target. Agent-scoped rules + (``agent_name:rule_name``) are tied to a specific agent's + registered rule set, so a control referencing one cannot be validated at binding time without choosing an agent. Reject those controls here so the misuse surfaces as a clear 400 instead of a runtime evaluation failure. @@ -372,25 +372,25 @@ async def _require_control(self, *, namespace_key: str, control_id: int) -> None ) _, control_name, control_data = row - agent_scoped_refs = _agent_scoped_evaluators(control_data) + agent_scoped_refs = _agent_scoped_rules(control_data) if agent_scoped_refs: raise BadRequestError( error_code=ErrorCode.CONTROL_BINDING_INCOMPATIBLE, detail=( f"Control '{control_name}' references agent-scoped " - f"evaluator(s) {sorted(agent_scoped_refs)!r} and cannot " + f"rule(s) {sorted(agent_scoped_refs)!r} and cannot " f"be attached to a target binding." ), hint=( - "Use a control whose evaluators are all global (built-in " + "Use a control whose rules are all global (built-in " "or external), or attach this control directly to the " - "specific agent that registered the evaluator." + "specific agent that registered the rule." ), ) -def _agent_scoped_evaluators(control_data: object) -> set[str]: - """Return the set of agent-scoped evaluator references in a control. +def _agent_scoped_rules(control_data: object) -> set[str]: + """Return the set of agent-scoped rule references in a control. Returns an empty set for unrendered template controls (no condition tree yet) and for any control whose stored data fails to parse — @@ -406,7 +406,7 @@ def _agent_scoped_evaluators(control_data: object) -> set[str]: except ValidationError: return set() refs: set[str] = set() - for _, evaluator_cfg in definition.iter_condition_leaf_parts(): - if parse_evaluator_ref_full(evaluator_cfg.name).type == "agent": - refs.add(evaluator_cfg.name) + for _, rule_cfg in definition.iter_condition_leaf_parts(): + if parse_rule_ref_full(rule_cfg.name).type == "agent": + refs.add(rule_cfg.name) return refs diff --git a/server/src/agent_control_server/services/control_templates.py b/server/src/agent_control_server/services/control_templates.py index bc800312..d422e834 100644 --- a/server/src/agent_control_server/services/control_templates.py +++ b/server/src/agent_control_server/services/control_templates.py @@ -483,13 +483,13 @@ def remap_template_api_error( ) -def _reject_agent_scoped_evaluators( +def _reject_agent_scoped_rules( control: ControlDefinition, *, reverse_path_map: Mapping[str, str], template: TemplateDefinition, ) -> None: - """Reject agent-scoped evaluator references in v1 templates.""" + """Reject agent-scoped rule references in v1 templates.""" for field_prefix, leaf in iter_condition_leaves_with_paths( control.condition, path="condition", @@ -497,16 +497,16 @@ def _reject_agent_scoped_evaluators( leaf_parts = leaf.leaf_parts() if leaf_parts is None: continue - _, evaluator_spec = leaf_parts - if ":" not in evaluator_spec.name: + _, rule_spec = leaf_parts + if ":" not in rule_spec.name: continue item = ValidationErrorItem( resource="Control", - field=f"{field_prefix}.evaluator.name", - code="agent_scoped_evaluator_not_supported", + field=f"{field_prefix}.rule.name", + code="agent_scoped_rule_not_supported", message=( - "Agent-scoped evaluators are not supported in control templates." + "Agent-scoped rules are not supported in control templates." ), ) remapped_error, mapped = _map_rendered_error_item( @@ -518,9 +518,9 @@ def _reject_agent_scoped_evaluators( error_code=( ErrorCode.TEMPLATE_PARAMETER_INVALID if mapped else ErrorCode.TEMPLATE_RENDER_ERROR ), - detail="Agent-scoped evaluators are not supported in control templates", + detail="Agent-scoped rules are not supported in control templates", resource="Control", - hint="Use a built-in or package-scoped evaluator in template-backed controls.", + hint="Use a built-in or package-scoped rule in template-backed controls.", errors=[remapped_error], ) @@ -594,7 +594,7 @@ def validate_template_structure(template: TemplateDefinition) -> None: Performs all structural checks that don't require parameter values: forbidden top-level keys, legacy format, $param reference validity, - unused parameter detection, and agent-scoped evaluator rejection. + unused parameter detection, and agent-scoped rule rejection. """ definition_template = template.definition_template if not isinstance(definition_template, dict): @@ -618,7 +618,7 @@ def validate_template_structure(template: TemplateDefinition) -> None: ) if "condition" not in definition_template and ( - "selector" in definition_template or "evaluator" in definition_template + "selector" in definition_template or "rule" in definition_template ): raise _render_error( detail="Templates must use the canonical 'condition' format", @@ -626,7 +626,7 @@ def validate_template_structure(template: TemplateDefinition) -> None: code="legacy_condition_format_not_supported", message=( "Templates must use the canonical 'condition' wrapper instead of " - "top-level selector/evaluator fields." + "top-level selector/rule fields." ), ) @@ -660,8 +660,8 @@ def validate_template_structure(template: TemplateDefinition) -> None: ], ) - # Reject agent-scoped evaluator names baked into the template (not via $param). - _reject_hardcoded_agent_scoped_evaluators(definition_template) + # Reject agent-scoped rule names baked into the template (not via $param). + _reject_hardcoded_agent_scoped_rules(definition_template) def validate_partial_template_values( @@ -698,10 +698,10 @@ def validate_partial_template_values( _coerce_parameter_value(name, template.parameters[name], value) -def _reject_hardcoded_agent_scoped_evaluators( +def _reject_hardcoded_agent_scoped_rules( definition_template: dict[str, JsonValue], ) -> None: - """Reject agent-scoped evaluator names that are hardcoded in the template.""" + """Reject agent-scoped rule names that are hardcoded in the template.""" condition = definition_template.get("condition") if not isinstance(condition, dict): return @@ -710,15 +710,15 @@ def _reject_hardcoded_agent_scoped_evaluators( stack: list[tuple[dict[str, JsonValue], str]] = [(condition, "condition")] while stack: node, path = stack.pop() - evaluator = node.get("evaluator") - if isinstance(evaluator, dict): - name = evaluator.get("name") + rule = node.get("rule") + if isinstance(rule, dict): + name = rule.get("name") if isinstance(name, str) and ":" in name: raise _render_error( - detail="Agent-scoped evaluators are not supported in control templates", - field=f"{path}.evaluator.name", - code="agent_scoped_evaluator_not_supported", - message="Agent-scoped evaluators are not supported in control templates.", + detail="Agent-scoped rules are not supported in control templates", + field=f"{path}.rule.name", + code="agent_scoped_rule_not_supported", + message="Agent-scoped rules are not supported in control templates.", ) for key in ("and", "or"): @@ -743,7 +743,7 @@ def render_template_control_input( definition_template = template.definition_template # Reuse structural validation (dict type, forbidden keys, legacy format, - # $param references, unused params, agent-scoped evaluators). + # $param references, unused params, agent-scoped rules). validate_template_structure(template) assert isinstance(definition_template, dict) # guaranteed by validate_template_structure @@ -795,7 +795,7 @@ def render_template_control_input( errors=mapped_items, ) from exc - _reject_agent_scoped_evaluators( + _reject_agent_scoped_rules( rendered_control, reverse_path_map=reverse_path_map, template=template, diff --git a/server/src/agent_control_server/services/evaluator_utils.py b/server/src/agent_control_server/services/rule_utils.py similarity index 51% rename from server/src/agent_control_server/services/evaluator_utils.py rename to server/src/agent_control_server/services/rule_utils.py index 12ce7e9b..c90e9b42 100644 --- a/server/src/agent_control_server/services/evaluator_utils.py +++ b/server/src/agent_control_server/services/rule_utils.py @@ -1,13 +1,13 @@ -"""Utilities for working with evaluator references. +"""Utilities for working with rule references. -Evaluator Type Name Formats: +Rule Type Name Formats: - Built-in: "regex", "list", "json", "sql" - External: "galileo.luna", "nvidia.nemo" (dot separator) - Agent-scoped: "my-agent:pii-detector" (colon separator) The key distinction is: - - Built-in and external evaluators are global (available to all agents) - - Agent-scoped evaluators are custom implementations deployed with a specific agent + - Built-in and external rules are global (available to all agents) + - Agent-scoped rules are custom implementations deployed with a specific agent """ import json @@ -19,14 +19,14 @@ @dataclass -class ParsedEvaluatorRef: - """Parsed evaluator reference with type information. +class ParsedRuleRef: + """Parsed rule reference with type information. Attributes: - type: The evaluator category ("builtin", "external", or "agent") - name: The full evaluator name (e.g., "regex", "galileo.luna", "my-agent:pii") - namespace: For external evaluators, the provider name; for agent-scoped, the agent name - local_name: The evaluator name without namespace prefix + type: The rule category ("builtin", "external", or "agent") + name: The full rule name (e.g., "regex", "galileo.luna", "my-agent:pii") + namespace: For external rules, the provider name; for agent-scoped, the agent name + local_name: The rule name without namespace prefix """ type: Literal["builtin", "external", "agent"] @@ -35,71 +35,71 @@ class ParsedEvaluatorRef: local_name: str -def parse_evaluator_ref_full(evaluator_ref: str) -> ParsedEvaluatorRef: - """Parse evaluator reference into structured form with type detection. +def parse_rule_ref_full(rule_ref: str) -> ParsedRuleRef: + """Parse rule reference into structured form with type detection. - Determines the evaluator type based on the name format: + Determines the rule type based on the name format: - Contains ":" → agent-scoped (split on first ":") - Contains "." → external (split on first ".") - Otherwise → built-in Args: - evaluator_ref: Evaluator reference string + rule_ref: Rule reference string Returns: - ParsedEvaluatorRef with type, namespace, and local name + ParsedRuleRef with type, namespace, and local name Examples: - >>> parse_evaluator_ref_full("regex") - ParsedEvaluatorRef(type="builtin", name="regex", ...) + >>> parse_rule_ref_full("regex") + ParsedRuleRef(type="builtin", name="regex", ...) - >>> parse_evaluator_ref_full("galileo.luna") - ParsedEvaluatorRef(type="external", namespace="galileo", ...) + >>> parse_rule_ref_full("galileo.luna") + ParsedRuleRef(type="external", namespace="galileo", ...) - >>> parse_evaluator_ref_full("my-agent:pii-detector") - ParsedEvaluatorRef(type="agent", namespace="my-agent", ...) + >>> parse_rule_ref_full("my-agent:pii-detector") + ParsedRuleRef(type="agent", namespace="my-agent", ...) """ - if ":" in evaluator_ref: + if ":" in rule_ref: # Agent-scoped: "my-agent:pii-detector" - agent, local_name = evaluator_ref.split(":", 1) - return ParsedEvaluatorRef( + agent, local_name = rule_ref.split(":", 1) + return ParsedRuleRef( type="agent", - name=evaluator_ref, + name=rule_ref, namespace=agent, local_name=local_name, ) - elif "." in evaluator_ref: + elif "." in rule_ref: # External: "galileo.luna" - provider, local_name = evaluator_ref.split(".", 1) - return ParsedEvaluatorRef( + provider, local_name = rule_ref.split(".", 1) + return ParsedRuleRef( type="external", - name=evaluator_ref, + name=rule_ref, namespace=provider, local_name=local_name, ) else: # Built-in: "regex" - return ParsedEvaluatorRef( + return ParsedRuleRef( type="builtin", - name=evaluator_ref, + name=rule_ref, namespace=None, - local_name=evaluator_ref, + local_name=rule_ref, ) -def is_agent_scoped(evaluator_ref: str) -> bool: - """Check if an evaluator reference is agent-scoped. +def is_agent_scoped(rule_ref: str) -> bool: + """Check if a rule reference is agent-scoped. - Agent-scoped evaluators use the "agent:name" format and reference + Agent-scoped rules use the "agent:name" format and reference custom implementations deployed with a specific agent. Args: - evaluator_ref: Evaluator reference string + rule_ref: Rule reference string Returns: - True if agent-scoped, False for built-in or external evaluators + True if agent-scoped, False for built-in or external rules """ - return ":" in evaluator_ref + return ":" in rule_ref def _canonicalize_schema(schema: dict[str, Any]) -> str: diff --git a/server/src/agent_control_server/services/schema_compat.py b/server/src/agent_control_server/services/schema_compat.py index 99755bc9..d5950e9e 100644 --- a/server/src/agent_control_server/services/schema_compat.py +++ b/server/src/agent_control_server/services/schema_compat.py @@ -1,4 +1,4 @@ -"""JSON Schema compatibility checking for evaluator schemas. +"""JSON Schema compatibility checking for rule schemas. Determines if a new schema is backward-compatible with an existing schema. Used during initAgent to reject breaking changes. @@ -116,11 +116,11 @@ def _get_type(prop_schema: dict[str, Any]) -> str: return "unknown" -def format_compatibility_error(evaluator_name: str, errors: list[str]) -> str: +def format_compatibility_error(rule_name: str, errors: list[str]) -> str: """Format a user-friendly error message for incompatible schema change. Args: - evaluator_name: Name of the evaluator with incompatible change + rule_name: Name of the rule with incompatible change errors: List of specific compatibility errors Returns: @@ -128,7 +128,7 @@ def format_compatibility_error(evaluator_name: str, errors: list[str]) -> str: """ error_list = "; ".join(errors) return ( - f"Evaluator '{evaluator_name}' schema change is not backward compatible. " + f"Rule '{rule_name}' schema change is not backward compatible. " f"Changes detected: {error_list}. " "To make breaking changes, create a new agent with a different name." ) diff --git a/server/tests/conftest.py b/server/tests/conftest.py index d7dda97a..a85a03f1 100644 --- a/server/tests/conftest.py +++ b/server/tests/conftest.py @@ -3,15 +3,15 @@ from sqlalchemy import MetaData, create_engine, inspect, text from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker -from agent_control_engine import discover_evaluators +from agent_control_engine import discover_rules from agent_control_server.config import auth_settings, db_config from agent_control_server.db import Base from agent_control_server.main import app as fastapi_app import agent_control_server.models # ensure models are imported so tables are registered -# Discover evaluators at test session start -discover_evaluators() +# Discover rules at test session start +discover_rules() # Test API keys TEST_API_KEY = "test-api-key-12345" diff --git a/server/tests/test_agents_additional.py b/server/tests/test_agents_additional.py index 1f0d9cc4..bd5c9e96 100644 --- a/server/tests/test_agents_additional.py +++ b/server/tests/test_agents_additional.py @@ -19,7 +19,7 @@ def _init_agent( *, agent_name: str | None = None, steps: list[dict] | None = None, - evaluators: list[dict] | None = None, + rules: list[dict] | None = None, ) -> tuple[str, str]: name = (agent_name or f"agent-{uuid.uuid4().hex[:12]}").lower() if len(name) < 10: @@ -31,7 +31,7 @@ def _init_agent( "agent_version": "1.0", }, "steps": steps or [], - "evaluators": evaluators or [], + "rules": rules or [], } resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 @@ -78,7 +78,7 @@ def _unrendered_template_payload() -> dict: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -105,67 +105,67 @@ def _create_policy(client: TestClient) -> int: return resp.json()["policy_id"] -def test_list_agent_evaluators_pagination_and_get(client: TestClient) -> None: - # Given: an agent with multiple evaluators - evaluators = [ +def test_list_agent_rules_pagination_and_get(client: TestClient) -> None: + # Given: an agent with multiple rules + rules = [ {"name": "eval-a", "description": "a", "config_schema": {}}, {"name": "eval-b", "description": "b", "config_schema": {"type": "object"}}, {"name": "eval-c", "description": "c", "config_schema": {}}, ] - agent_name, _ = _init_agent(client, evaluators=evaluators) + agent_name, _ = _init_agent(client, rules=rules) # When: listing with pagination - resp = client.get(f"/api/v1/agents/{agent_name}/evaluators", params={"limit": 2}) + resp = client.get(f"/api/v1/agents/{agent_name}/rules", params={"limit": 2}) assert resp.status_code == 200 body = resp.json() # Then: first page returns two items and a next cursor - assert len(body["evaluators"]) == 2 + assert len(body["rules"]) == 2 assert body["pagination"]["has_more"] is True assert body["pagination"]["next_cursor"] == "eval-b" # When: fetching next page using cursor resp2 = client.get( - f"/api/v1/agents/{agent_name}/evaluators", + f"/api/v1/agents/{agent_name}/rules", params={"limit": 2, "cursor": body["pagination"]["next_cursor"]}, ) assert resp2.status_code == 200 body2 = resp2.json() # Then: second page has remaining item and no next cursor assert body2["pagination"]["has_more"] is False - assert [e["name"] for e in body2["evaluators"]] == ["eval-c"] + assert [e["name"] for e in body2["rules"]] == ["eval-c"] - # When: getting a specific evaluator - get_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators/eval-b") + # When: getting a specific rule + get_resp = client.get(f"/api/v1/agents/{agent_name}/rules/eval-b") assert get_resp.status_code == 200 - evaluator = get_resp.json() - # Then: evaluator details are returned - assert evaluator["name"] == "eval-b" - assert evaluator["description"] == "b" + rule = get_resp.json() + # Then: rule details are returned + assert rule["name"] == "eval-b" + assert rule["description"] == "b" -def test_list_agent_evaluators_invalid_cursor_returns_first_page(client: TestClient) -> None: - # Given: an agent with multiple evaluators - evaluators = [ +def test_list_agent_rules_invalid_cursor_returns_first_page(client: TestClient) -> None: + # Given: an agent with multiple rules + rules = [ {"name": "eval-a", "description": "a", "config_schema": {}}, {"name": "eval-b", "description": "b", "config_schema": {}}, ] - agent_name, _ = _init_agent(client, evaluators=evaluators) + agent_name, _ = _init_agent(client, rules=rules) # When: listing without cursor - resp = client.get(f"/api/v1/agents/{agent_name}/evaluators", params={"limit": 1}) + resp = client.get(f"/api/v1/agents/{agent_name}/rules", params={"limit": 1}) assert resp.status_code == 200 base = resp.json() # When: listing with an invalid cursor resp2 = client.get( - f"/api/v1/agents/{agent_name}/evaluators", + f"/api/v1/agents/{agent_name}/rules", params={"limit": 1, "cursor": "does-not-exist"}, ) assert resp2.status_code == 200 with_cursor = resp2.json() # Then: results match the first page - assert with_cursor["evaluators"] == base["evaluators"] + assert with_cursor["rules"] == base["rules"] assert with_cursor["pagination"]["total"] == base["pagination"]["total"] @@ -188,7 +188,7 @@ def test_init_agent_preserves_existing_steps_when_missing_from_payload( "agent_version": "1.0", }, "steps": [steps[0]], - "evaluators": [], + "rules": [], } resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 @@ -200,48 +200,48 @@ def test_init_agent_preserves_existing_steps_when_missing_from_payload( assert step_names == {"tool-a", "tool-b"} -def test_get_agent_evaluator_not_found(client: TestClient) -> None: - # Given: an existing agent with no matching evaluator +def test_get_agent_rule_not_found(client: TestClient) -> None: + # Given: an existing agent with no matching rule agent_name, _ = _init_agent(client) - # When: requesting a missing evaluator - resp = client.get(f"/api/v1/agents/{agent_name}/evaluators/missing") + # When: requesting a missing rule + resp = client.get(f"/api/v1/agents/{agent_name}/rules/missing") # Then: 404 not found assert resp.status_code == 404 - assert resp.json()["error_code"] == "EVALUATOR_NOT_FOUND" + assert resp.json()["error_code"] == "RULE_NOT_FOUND" -def test_get_agent_evaluator_missing_agent_returns_404(client: TestClient) -> None: +def test_get_agent_rule_missing_agent_returns_404(client: TestClient) -> None: # Given: a missing agent id missing_agent = str(uuid.uuid4()) - # When: fetching evaluator for missing agent - resp = client.get(f"/api/v1/agents/{missing_agent}/evaluators/anything") + # When: fetching rule for missing agent + resp = client.get(f"/api/v1/agents/{missing_agent}/rules/anything") # Then: agent not found error is returned assert resp.status_code == 404 assert resp.json()["error_code"] == "AGENT_NOT_FOUND" -def test_patch_agent_remove_steps_and_evaluators(client: TestClient) -> None: - # Given: an agent with steps and evaluators +def test_patch_agent_remove_steps_and_rules(client: TestClient) -> None: + # Given: an agent with steps and rules steps = [ {"type": "tool", "name": "tool-a", "input_schema": {}, "output_schema": {}}, {"type": "tool", "name": "tool-b", "input_schema": {}, "output_schema": {}}, ] - evaluators = [ + rules = [ {"name": "eval-a", "description": "a", "config_schema": {}}, {"name": "eval-b", "description": "b", "config_schema": {}}, ] - agent_name, _ = _init_agent(client, steps=steps, evaluators=evaluators) + agent_name, _ = _init_agent(client, steps=steps, rules=rules) - # When: removing one step and one evaluator + # When: removing one step and one rule resp = client.patch( f"/api/v1/agents/{agent_name}", json={ "remove_steps": [{"type": "tool", "name": "tool-a"}], - "remove_evaluators": ["eval-b"], + "remove_rules": ["eval-b"], }, ) @@ -249,19 +249,19 @@ def test_patch_agent_remove_steps_and_evaluators(client: TestClient) -> None: assert resp.status_code == 200 body = resp.json() assert body["steps_removed"] == [{"type": "tool", "name": "tool-a"}] - assert body["evaluators_removed"] == ["eval-b"] + assert body["rules_removed"] == ["eval-b"] # Then: agent data reflects removal get_resp = client.get(f"/api/v1/agents/{agent_name}") assert get_resp.status_code == 200 data = get_resp.json() assert {s["name"] for s in data["steps"]} == {"tool-b"} - assert {e["name"] for e in data["evaluators"]} == {"eval-a"} + assert {e["name"] for e in data["rules"]} == {"eval-a"} -def test_patch_agent_remove_evaluator_in_use_conflict(client: TestClient) -> None: - # Given: agent with evaluator and a policy containing a control that references it - evaluators = [ +def test_patch_agent_remove_rule_in_use_conflict(client: TestClient) -> None: + # Given: agent with rule and a policy containing a control that references it + rules = [ { "name": "custom", "description": "custom", @@ -272,10 +272,10 @@ def test_patch_agent_remove_evaluator_in_use_conflict(client: TestClient) -> Non }, } ] - agent_name, agent_name = _init_agent(client, evaluators=evaluators) + agent_name, agent_name = _init_agent(client, rules=rules) control_payload = deepcopy(VALID_CONTROL_PAYLOAD) - control_payload["condition"]["evaluator"] = { + control_payload["condition"]["rule"] = { "name": f"{agent_name}:custom", "config": {"pattern": "x"}, } @@ -287,30 +287,30 @@ def test_patch_agent_remove_evaluator_in_use_conflict(client: TestClient) -> Non assign = client.post(f"/api/v1/agents/{agent_name}/policy/{policy_id}") assert assign.status_code == 200 - # When: attempting to remove evaluator in use + # When: attempting to remove rule in use resp = client.patch( f"/api/v1/agents/{agent_name}", - json={"remove_evaluators": ["custom"]}, + json={"remove_rules": ["custom"]}, ) # Then: conflict assert resp.status_code == 409 - assert resp.json()["error_code"] == "EVALUATOR_IN_USE" + assert resp.json()["error_code"] == "RULE_IN_USE" def test_set_agent_policy_incompatible_controls(client: TestClient) -> None: - # Given: a policy with a control referencing an evaluator from Agent A - evaluators = [ + # Given: a policy with a control referencing a rule from Agent A + rules = [ { "name": "custom", "description": "custom", "config_schema": {"type": "object", "properties": {}, "additionalProperties": True}, } ] - agent_a_id, agent_a_name = _init_agent(client, evaluators=evaluators) + agent_a_id, agent_a_name = _init_agent(client, rules=rules) control_payload = deepcopy(VALID_CONTROL_PAYLOAD) - control_payload["condition"]["evaluator"] = { + control_payload["condition"]["rule"] = { "name": f"{agent_a_name}:custom", "config": {}, } @@ -320,7 +320,7 @@ def test_set_agent_policy_incompatible_controls(client: TestClient) -> None: assoc = client.post(f"/api/v1/policies/{policy_id}/controls/{control_id}") assert assoc.status_code == 200 - # Given: a different agent B without that evaluator + # Given: a different agent B without that rule agent_b_id, _ = _init_agent(client) # When: assigning policy to agent B @@ -331,8 +331,8 @@ def test_set_agent_policy_incompatible_controls(client: TestClient) -> None: assert resp.json()["error_code"] == "POLICY_CONTROL_INCOMPATIBLE" -def test_init_agent_rejects_builtin_evaluator_name(client: TestClient) -> None: - # Given: a payload that registers an evaluator matching a built-in name +def test_init_agent_rejects_builtin_rule_name(client: TestClient) -> None: + # Given: a payload that registers a rule matching a built-in name payload = { "agent": { "agent_name": str(uuid.uuid4()), @@ -341,7 +341,7 @@ def test_init_agent_rejects_builtin_evaluator_name(client: TestClient) -> None: "agent_version": "1.0", }, "steps": [], - "evaluators": [ + "rules": [ {"name": "regex", "description": "conflict", "config_schema": {}}, ], } @@ -351,7 +351,7 @@ def test_init_agent_rejects_builtin_evaluator_name(client: TestClient) -> None: # Then: conflict is returned assert resp.status_code == 409 - assert resp.json()["error_code"] == "EVALUATOR_NAME_CONFLICT" + assert resp.json()["error_code"] == "RULE_NAME_CONFLICT" def test_init_agent_same_name_is_idempotent(client: TestClient) -> None: @@ -402,7 +402,7 @@ def test_list_agent_controls_corrupted_control_data_returns_422( # Given: an agent with a policy that includes a control agent_name, _ = _init_agent(client) control_payload = deepcopy(VALID_CONTROL_PAYLOAD) - control_payload["condition"]["evaluator"] = {"name": "regex", "config": {"pattern": "x"}} + control_payload["condition"]["rule"] = {"name": "regex", "config": {"pattern": "x"}} control_id = _create_control_with_data(client, control_payload) policy_id = _create_policy(client) assoc = client.post(f"/api/v1/policies/{policy_id}/controls/{control_id}") @@ -469,22 +469,22 @@ def test_list_agents_invalid_cursor_returns_first_page(client: TestClient) -> No assert with_cursor["pagination"]["total"] == base["pagination"]["total"] -def test_list_agent_evaluators_corrupted_data_returns_empty(client: TestClient) -> None: +def test_list_agent_rules_corrupted_data_returns_empty(client: TestClient) -> None: # Given: an agent with corrupted stored data - agent_name, _ = _init_agent(client, evaluators=[{"name": "eval-a", "config_schema": {}}]) + agent_name, _ = _init_agent(client, rules=[{"name": "eval-a", "config_schema": {}}]) with engine.begin() as conn: conn.execute( text("UPDATE agents SET data = CAST(:data AS JSONB) WHERE name = :id"), {"data": "{\"bad\": \"data\"}", "id": agent_name}, ) - # When: listing evaluator schemas - resp = client.get(f"/api/v1/agents/{agent_name}/evaluators") + # When: listing rule schemas + resp = client.get(f"/api/v1/agents/{agent_name}/rules") # Then: empty list is returned assert resp.status_code == 200 body = resp.json() - assert body["evaluators"] == [] + assert body["rules"] == [] assert body["pagination"]["total"] == 0 @@ -512,8 +512,8 @@ def test_set_agent_policy_rejects_corrupted_agent_data(client: TestClient) -> No assert any("corrupted data" in err.get("message", "").lower() for err in body.get("errors", [])) -def test_set_agent_policy_rejects_missing_agent_evaluator(client: TestClient) -> None: - # Given: an agent with no evaluators and a control referencing a missing evaluator +def test_set_agent_policy_rejects_missing_agent_rule(client: TestClient) -> None: + # Given: an agent with no rules and a control referencing a missing rule agent_name, agent_name = _init_agent(client) policy_id = _create_policy(client) control_id = _create_control_with_data(client, VALID_CONTROL_PAYLOAD) @@ -522,7 +522,7 @@ def test_set_agent_policy_rejects_missing_agent_evaluator(client: TestClient) -> with engine.begin() as conn: corrupted_payload = deepcopy(VALID_CONTROL_PAYLOAD) - corrupted_payload["condition"]["evaluator"] = { + corrupted_payload["condition"]["rule"] = { "name": f"{agent_name}:missing", "config": {}, } @@ -544,11 +544,11 @@ def test_set_agent_policy_rejects_missing_agent_evaluator(client: TestClient) -> assert any("not registered" in err.get("message", "").lower() for err in body.get("errors", [])) -def test_set_agent_policy_rejects_invalid_agent_evaluator_config(client: TestClient) -> None: - # Given: an agent with an evaluator schema requiring \"pattern\" +def test_set_agent_policy_rejects_invalid_agent_rule_config(client: TestClient) -> None: + # Given: an agent with a rule schema requiring \"pattern\" agent_name, agent_name = _init_agent( client, - evaluators=[ + rules=[ { "name": "custom", "description": "custom", @@ -567,7 +567,7 @@ def test_set_agent_policy_rejects_invalid_agent_evaluator_config(client: TestCli with engine.begin() as conn: corrupted_payload = deepcopy(VALID_CONTROL_PAYLOAD) - corrupted_payload["condition"]["evaluator"] = { + corrupted_payload["condition"]["rule"] = { "name": f"{agent_name}:custom", "config": {}, } @@ -630,7 +630,7 @@ def test_list_agents_corrupted_data_sets_zero_counts(client: TestClient) -> None agent_name, _ = _init_agent( client, steps=[{"type": "tool", "name": "tool-a", "input_schema": {}, "output_schema": {}}], - evaluators=[{"name": "eval-a", "config_schema": {}}], + rules=[{"name": "eval-a", "config_schema": {}}], ) with engine.begin() as conn: conn.execute( @@ -641,12 +641,12 @@ def test_list_agents_corrupted_data_sets_zero_counts(client: TestClient) -> None # When: listing agents resp = client.get("/api/v1/agents") - # Then: step/evaluator counts are zeroed for corrupted data + # Then: step/rule counts are zeroed for corrupted data assert resp.status_code == 200 agents = {a["agent_name"]: a for a in resp.json()["agents"]} agent = agents[agent_name] assert agent["step_count"] == 0 - assert agent["evaluator_count"] == 0 + assert agent["rule_count"] == 0 def test_get_agent_corrupted_data_returns_422(client: TestClient) -> None: @@ -669,7 +669,7 @@ def test_get_agent_corrupted_data_returns_422(client: TestClient) -> None: def test_get_agent_corrupted_metadata_returns_422(client: TestClient) -> None: # Given: an agent with invalid agent_metadata payload agent_name, _ = _init_agent(client) - corrupted = {"agent_metadata": {}, "steps": [], "evaluators": []} + corrupted = {"agent_metadata": {}, "steps": [], "rules": []} with engine.begin() as conn: conn.execute( text("UPDATE agents SET data = CAST(:data AS JSONB) WHERE name = :id"), @@ -751,8 +751,8 @@ def test_set_agent_policy_rejects_controls_without_data(client: TestClient) -> N assert any("corrupted data" in err.get("message", "").lower() for err in body["errors"]) -def test_set_agent_policy_rejects_controls_without_evaluator_name(client: TestClient) -> None: - # Given: an agent and a policy with a stored control whose leaf is missing evaluator name +def test_set_agent_policy_rejects_controls_without_rule_name(client: TestClient) -> None: + # Given: an agent and a policy with a stored control whose leaf is missing rule name agent_name, _ = _init_agent(client) policy_id = _create_policy(client) control_id = _create_control_with_data(client, VALID_CONTROL_PAYLOAD) @@ -761,7 +761,7 @@ def test_set_agent_policy_rejects_controls_without_evaluator_name(client: TestCl with engine.begin() as conn: corrupted_payload = deepcopy(VALID_CONTROL_PAYLOAD) - corrupted_payload["condition"]["evaluator"] = {"config": {}} + corrupted_payload["condition"]["rule"] = {"config": {}} conn.execute( text("UPDATE controls SET data = CAST(:data AS JSONB) WHERE id = :id"), {"data": json.dumps(corrupted_payload), "id": control_id}, @@ -820,8 +820,8 @@ def test_list_agents_valid_cursor_not_found_returns_first_page(client: TestClien assert with_cursor["pagination"]["total"] == base["pagination"]["total"] -def test_init_agent_adds_new_evaluator(client: TestClient) -> None: - # Given: an existing agent with one evaluator +def test_init_agent_adds_new_rule(client: TestClient) -> None: + # Given: an existing agent with one rule agent_name = f"agent-{uuid.uuid4().hex[:12]}" payload = { "agent": { @@ -831,12 +831,12 @@ def test_init_agent_adds_new_evaluator(client: TestClient) -> None: "agent_version": "1.0", }, "steps": [], - "evaluators": [{"name": "eval-a", "config_schema": {}}], + "rules": [{"name": "eval-a", "config_schema": {}}], } resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 - # When: re-registering with an additional evaluator + # When: re-registering with an additional rule resp2 = client.post( "/api/v1/agents/initAgent", json={ @@ -847,14 +847,14 @@ def test_init_agent_adds_new_evaluator(client: TestClient) -> None: "agent_version": "1.0", }, "steps": [], - "evaluators": [{"name": "eval-b", "config_schema": {}}], + "rules": [{"name": "eval-b", "config_schema": {}}], }, ) - # Then: both evaluators are present + # Then: both rules are present assert resp2.status_code == 200 get_resp = client.get(f"/api/v1/agents/{agent_name}") - names = {e["name"] for e in get_resp.json()["evaluators"]} + names = {e["name"] for e in get_resp.json()["rules"]} assert names == {"eval-a", "eval-b"} @@ -871,7 +871,7 @@ def test_init_agent_returns_controls_when_policy_assigned(client: TestClient) -> "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], }, ) assert init_resp.status_code == 200 @@ -894,7 +894,7 @@ def test_init_agent_returns_controls_when_policy_assigned(client: TestClient) -> "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], }, ) @@ -918,7 +918,7 @@ def test_init_agent_returns_only_active_controls_by_default(client: TestClient) "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], }, ) assert init_resp.status_code == 200 @@ -956,7 +956,7 @@ def test_init_agent_returns_only_active_controls_by_default(client: TestClient) "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], }, ) @@ -985,21 +985,21 @@ def test_patch_agent_corrupted_data_returns_422(client: TestClient) -> None: assert resp.json()["error_code"] == "CORRUPTED_DATA" -def test_get_agent_evaluator_corrupted_data_returns_404(client: TestClient) -> None: - # Given: an agent with evaluator data that becomes corrupted - agent_name, _ = _init_agent(client, evaluators=[{"name": "eval-a", "config_schema": {}}]) +def test_get_agent_rule_corrupted_data_returns_404(client: TestClient) -> None: + # Given: an agent with rule data that becomes corrupted + agent_name, _ = _init_agent(client, rules=[{"name": "eval-a", "config_schema": {}}]) with engine.begin() as conn: conn.execute( text("UPDATE agents SET data = CAST(:data AS JSONB) WHERE name = :id"), {"data": json.dumps({"bad": "data"}), "id": agent_name}, ) - # When: fetching a specific evaluator - resp = client.get(f"/api/v1/agents/{agent_name}/evaluators/eval-a") + # When: fetching a specific rule + resp = client.get(f"/api/v1/agents/{agent_name}/rules/eval-a") - # Then: evaluator not found is returned due to corrupted data + # Then: rule not found is returned due to corrupted data assert resp.status_code == 404 - assert resp.json()["error_code"] == "EVALUATOR_NOT_FOUND" + assert resp.json()["error_code"] == "RULE_NOT_FOUND" def test_init_agent_rejects_duplicate_step_names_in_single_request( @@ -1017,7 +1017,7 @@ def test_init_agent_rejects_duplicate_step_names_in_single_request( {"type": "tool", "name": "duplicate", "input_schema": {}, "output_schema": {}}, {"type": "tool", "name": "duplicate", "input_schema": {}, "output_schema": {}}, ], - "evaluators": [], + "rules": [], } # When: initializing the agent @@ -1053,7 +1053,7 @@ def test_init_agent_rejects_step_schema_conflict_across_registrations( "output_schema": {"type": "array"}, } ], - "evaluators": [], + "rules": [], } resp = client.post("/api/v1/agents/initAgent", json=original_payload) assert resp.status_code == 200 @@ -1074,7 +1074,7 @@ def test_init_agent_rejects_step_schema_conflict_across_registrations( "output_schema": {"type": "object"}, # Different schema } ], - "evaluators": [], + "rules": [], } resp = client.post("/api/v1/agents/initAgent", json=conflicting_payload) @@ -1106,7 +1106,7 @@ def test_init_agent_accepts_identical_step_schema_across_registrations( "output_schema": {"type": "array"}, } ], - "evaluators": [], + "rules": [], } resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 diff --git a/server/tests/test_auth.py b/server/tests/test_auth.py index fba5088c..8f0967b3 100644 --- a/server/tests/test_auth.py +++ b/server/tests/test_auth.py @@ -100,48 +100,48 @@ def test_admin_key_works_on_protected_endpoints(self, admin_client: TestClient) assert response.status_code == 404 -class TestEvaluatorsEndpoint: - """Evaluators endpoint requires valid API key (regular or admin).""" +class TestRulesEndpoint: + """Rules endpoint requires valid API key (regular or admin).""" - def test_regular_key_works_on_evaluators(self, non_admin_client: TestClient) -> None: - """Given regular API key, when listing evaluators, then returns 200.""" + def test_regular_key_works_on_rules(self, non_admin_client: TestClient) -> None: + """Given regular API key, when listing rules, then returns 200.""" # When: - response = non_admin_client.get("/api/v1/evaluators") + response = non_admin_client.get("/api/v1/rules") # Then: assert response.status_code == 200 - def test_admin_key_works_on_evaluators(self, admin_client: TestClient) -> None: - """Given admin API key, when listing evaluators, then returns 200.""" + def test_admin_key_works_on_rules(self, admin_client: TestClient) -> None: + """Given admin API key, when listing rules, then returns 200.""" # When: - response = admin_client.get("/api/v1/evaluators") + response = admin_client.get("/api/v1/rules") # Then: assert response.status_code == 200 - def test_missing_key_returns_401_on_evaluators( + def test_missing_key_returns_401_on_rules( self, unauthenticated_client: TestClient ) -> None: - """Given no API key, when listing evaluators, then returns 401.""" + """Given no API key, when listing rules, then returns 401.""" # When: - response = unauthenticated_client.get("/api/v1/evaluators") + response = unauthenticated_client.get("/api/v1/rules") # Then: assert response.status_code == 401 - def test_evaluators_use_auth_framework_provider(self, app: object) -> None: - """Given a custom authorizer, when listing evaluators, then route uses it.""" + def test_rules_use_auth_framework_provider(self, app: object) -> None: + """Given a custom authorizer, when listing rules, then route uses it.""" # Given: authorizer = _RecordingAuthorizer() set_authorizer(authorizer) client = TestClient(app, raise_server_exceptions=True) # When: - response = client.get("/api/v1/evaluators") + response = client.get("/api/v1/rules") # Then: assert response.status_code == 200 - assert authorizer.calls == [(Operation.EVALUATORS_READ, None)] + assert authorizer.calls == [(Operation.RULES_READ, None)] class TestAuthDisabled: @@ -160,10 +160,10 @@ def test_no_key_allowed_when_disabled(self, unauthenticated_client: TestClient) # Then: (404 for non-existent resource, but NOT 401) assert response.status_code == 404 - def test_evaluators_accessible_when_disabled(self, unauthenticated_client: TestClient) -> None: - """Given auth disabled, when listing evaluators without API key, then returns 200.""" + def test_rules_accessible_when_disabled(self, unauthenticated_client: TestClient) -> None: + """Given auth disabled, when listing rules without API key, then returns 200.""" # When: - response = unauthenticated_client.get("/api/v1/evaluators") + response = unauthenticated_client.get("/api/v1/rules") # Then: assert response.status_code == 200 @@ -176,7 +176,7 @@ def test_evaluators_accessible_when_disabled(self, unauthenticated_client: TestC "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "test", "flags": []}, }, @@ -213,7 +213,7 @@ class TestAdminWriteEndpointAuthorization: ( "PATCH", "/api/v1/agents/agent-authz-test01", - {"remove_steps": [], "remove_evaluators": []}, + {"remove_steps": [], "remove_rules": []}, ), ], ) @@ -248,7 +248,7 @@ def test_non_admin_key_can_init_agent_and_fetch_controls( "output_schema": {"type": "object"}, } ], - "evaluators": [], + "rules": [], } init_response = non_admin_client.post("/api/v1/agents/initAgent", json=init_payload) @@ -285,7 +285,7 @@ def test_admin_key_allowed_on_representative_mutations(self, admin_client: TestC "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], } init_response = admin_client.post("/api/v1/agents/initAgent", json=init_payload) assert init_response.status_code == 200 diff --git a/server/tests/test_control_compatibility.py b/server/tests/test_control_compatibility.py index 0c528df6..68cb0487 100644 --- a/server/tests/test_control_compatibility.py +++ b/server/tests/test_control_compatibility.py @@ -26,7 +26,7 @@ def _init_agent(client: TestClient, *, agent_name: str | None = None) -> str: "agent_version": "1.0", }, "steps": [], - "evaluators": [], + "rules": [], }, ) assert resp.status_code == 200 @@ -42,7 +42,7 @@ def _create_policy(client: TestClient) -> int: def _legacy_control_payload() -> dict[str, object]: payload = deepcopy(VALID_CONTROL_PAYLOAD) payload["selector"] = payload["condition"]["selector"] - payload["evaluator"] = payload["condition"]["evaluator"] + payload["rule"] = payload["condition"]["rule"] payload.pop("condition") return payload @@ -100,9 +100,9 @@ def test_get_control_data_returns_canonical_shape_for_legacy_stored_payload( assert resp.status_code == 200 data = resp.json()["data"] assert "selector" not in data - assert "evaluator" not in data + assert "rule" not in data assert data["condition"]["selector"]["path"] == "input" - assert data["condition"]["evaluator"]["name"] == "regex" + assert data["condition"]["rule"]["name"] == "regex" def test_list_agent_controls_returns_canonical_shape_for_legacy_stored_payload( @@ -139,9 +139,9 @@ def test_list_agent_controls_returns_canonical_shape_for_legacy_stored_payload( assert len(controls) == 1 control = controls[0]["control"] assert "selector" not in control - assert "evaluator" not in control + assert "rule" not in control assert control["condition"]["selector"]["path"] == "input" - assert control["condition"]["evaluator"]["name"] == "regex" + assert control["condition"]["rule"]["name"] == "regex" def test_list_agent_controls_omits_null_condition_fields_in_response( @@ -157,7 +157,7 @@ def test_list_agent_controls_omits_null_condition_fields_in_response( { "not": { "selector": {"path": "context.channel.scope"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["slack:direct:U123"], @@ -171,7 +171,7 @@ def test_list_agent_controls_omits_null_condition_fields_in_response( "or": [ { "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": r"(^|\.)(read|memory_search|memory_get)$"}, }, @@ -207,13 +207,13 @@ def test_list_agent_controls_omits_null_condition_fields_in_response( first_child = condition["and"][0] assert set(first_child.keys()) == {"not"} assert first_child["not"]["selector"]["path"] == "context.channel.scope" - assert first_child["not"]["evaluator"]["name"] == "list" + assert first_child["not"]["rule"]["name"] == "list" second_child = condition["and"][1] assert set(second_child.keys()) == {"or"} assert len(second_child["or"]) == 1 assert second_child["or"][0]["selector"]["path"] == "name" - assert second_child["or"][0]["evaluator"]["name"] == "regex" + assert second_child["or"][0]["rule"]["name"] == "regex" def test_get_control_data_rejects_partial_legacy_stored_payload( @@ -228,7 +228,7 @@ def test_get_control_data_rejects_partial_legacy_stored_payload( control_id = control_resp.json()["control_id"] invalid_payload = _legacy_control_payload() - invalid_payload.pop("evaluator") + invalid_payload.pop("rule") with engine.begin() as conn: conn.execute( text("UPDATE controls SET data = CAST(:data AS JSONB) WHERE id = :id"), @@ -243,7 +243,7 @@ def test_get_control_data_rejects_partial_legacy_stored_payload( body = resp.json() assert body["error_code"] == "CORRUPTED_DATA" assert any( - "Legacy control definition must include both selector and evaluator." + "Legacy control definition must include both selector and rule." in error.get("message", "") for error in body.get("errors", []) ) diff --git a/server/tests/test_control_condition_alembic_migration.py b/server/tests/test_control_condition_alembic_migration.py index 7227a8d8..1e8660db 100644 --- a/server/tests/test_control_condition_alembic_migration.py +++ b/server/tests/test_control_condition_alembic_migration.py @@ -38,13 +38,14 @@ def _legacy_control_payload() -> dict[str, Any]: payload = deepcopy(VALID_CONTROL_PAYLOAD) payload["selector"] = payload["condition"]["selector"] - payload["evaluator"] = payload["condition"]["evaluator"] + payload["evaluator"] = payload["condition"]["rule"] payload.pop("condition") return payload def _composite_control_payload() -> dict[str, Any]: first_leaf = deepcopy(VALID_CONTROL_PAYLOAD["condition"]) + first_leaf["evaluator"] = first_leaf.pop("rule") second_leaf = { "selector": {"path": "output"}, "evaluator": {"name": "regex", "config": {"pattern": "blocked"}}, @@ -379,7 +380,7 @@ async def _override_get_async_db(): assert "selector" not in data assert "evaluator" not in data assert data["condition"]["selector"]["path"] == "input" - assert data["condition"]["evaluator"]["name"] == "regex" + assert data["condition"]["rule"]["name"] == "regex" finally: app.dependency_overrides.pop(get_async_db, None) asyncio.run(async_engine.dispose()) diff --git a/server/tests/test_control_phase0_alembic_migration.py b/server/tests/test_control_phase0_alembic_migration.py index 3c5553cb..49584314 100644 --- a/server/tests/test_control_phase0_alembic_migration.py +++ b/server/tests/test_control_phase0_alembic_migration.py @@ -45,7 +45,7 @@ def _unrendered_template_payload() -> dict[str, Any]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, diff --git a/server/tests/test_control_templates.py b/server/tests/test_control_templates.py index ab669c55..11722b08 100644 --- a/server/tests/test_control_templates.py +++ b/server/tests/test_control_templates.py @@ -38,7 +38,7 @@ def _template_payload() -> dict[str, object]: }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -54,7 +54,7 @@ def _template_payload() -> dict[str, object]: def _defaults_only_template_payload() -> dict[str, object]: return { "template": { - "description": "List evaluator template", + "description": "List rule template", "parameters": { "values": { "type": "string_list", @@ -79,7 +79,7 @@ def _defaults_only_template_payload() -> dict[str, object]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": {"$param": "values"}, @@ -132,7 +132,7 @@ def _case_sensitive_template_payload( }, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": {"$param": "values"}, @@ -156,7 +156,7 @@ def _raw_control_payload(pattern: str = "raw", *, action: str = "deny") -> dict[ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": pattern}, }, @@ -275,7 +275,7 @@ def test_render_control_template_preview_returns_rendered_control(client: TestCl "pattern": "hello", "step_name": "templated-step", } - assert control["condition"]["evaluator"]["config"]["pattern"] == "hello" + assert control["condition"]["rule"]["config"]["pattern"] == "hello" assert control["scope"]["step_names"] == ["templated-step"] @@ -297,8 +297,8 @@ def test_render_control_template_preview_uses_defaults_when_values_omitted( "logic": "any", "case_sensitive": False, } - assert control["condition"]["evaluator"]["name"] == "list" - assert control["condition"]["evaluator"]["config"] == { + assert control["condition"]["rule"]["name"] == "list" + assert control["condition"]["rule"]["config"] == { "values": ["secret", "blocked"], "logic": "any", "case_sensitive": False, @@ -354,7 +354,7 @@ def test_create_template_backed_control_persists_template_metadata(client: TestC "pattern": "hello", "step_name": "templated-step", } - assert data["condition"]["evaluator"]["config"]["pattern"] == "hello" + assert data["condition"]["rule"]["config"]["pattern"] == "hello" assert data["scope"]["step_names"] == ["templated-step"] @@ -400,7 +400,7 @@ def test_create_template_backed_control_persists_resolved_defaults_when_values_o "logic": "any", "case_sensitive": False, } - assert data["condition"]["evaluator"]["config"] == { + assert data["condition"]["rule"]["config"] == { "values": ["secret", "blocked"], "logic": "any", "case_sensitive": False, @@ -785,7 +785,7 @@ def test_raw_control_can_be_replaced_with_template_backed_control(client: TestCl data = get_response.json()["data"] assert data["template"]["description"] == "Regex denial template" assert data["template_values"]["pattern"] == "hello" - assert data["condition"]["evaluator"]["config"]["pattern"] == "hello" + assert data["condition"]["rule"]["config"]["pattern"] == "hello" def test_raw_control_failed_template_replacement_does_not_mutate_raw_control( @@ -849,7 +849,7 @@ def test_template_update_preserves_enabled_value(client: TestClient) -> None: "pattern": "updated", "step_name": "updated-step", } - assert data["condition"]["evaluator"]["config"]["pattern"] == "updated" + assert data["condition"]["rule"]["config"]["pattern"] == "updated" assert data["scope"]["step_names"] == ["updated-step"] @@ -933,14 +933,14 @@ def test_template_update_accepts_different_template_structure(client: TestClient get_response = client.get(f"/api/v1/controls/{control_id}/data") assert get_response.status_code == 200, get_response.text data = get_response.json()["data"] - assert data["template"]["description"] == "List evaluator template" + assert data["template"]["description"] == "List rule template" assert data["template_values"] == { "values": ["secret", "blocked"], "logic": "any", "case_sensitive": False, } - assert data["condition"]["evaluator"]["name"] == "list" - assert data["condition"]["evaluator"]["config"] == { + assert data["condition"]["rule"]["name"] == "list" + assert data["condition"]["rule"]["config"] == { "values": ["secret", "blocked"], "logic": "any", "case_sensitive": False, @@ -1027,7 +1027,7 @@ def test_template_validate_rejects_structurally_invalid_unrendered( # Given: a template with an undefined $param reference and empty values payload = _template_payload() payload["template_values"] = {} - payload["template"]["definition_template"]["condition"]["evaluator"]["config"]["extra"] = { # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["config"]["extra"] = { # type: ignore[index] "$param": "nonexistent", } @@ -1076,7 +1076,7 @@ def test_render_control_template_rejects_undefined_param_reference(client: TestC # Given: a template definition that references an undeclared parameter payload = _template_payload() payload["template"] = deepcopy(payload["template"]) - payload["template"]["definition_template"]["condition"]["evaluator"]["config"]["pattern"] = { # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["config"]["pattern"] = { # type: ignore[index] "$param": "undefined_pattern", } @@ -1089,7 +1089,7 @@ def test_render_control_template_rejects_undefined_param_reference(client: TestC assert body["error_code"] == "TEMPLATE_RENDER_ERROR" assert any( err.get("code") == "undefined_parameter_reference" - and err.get("field") == "condition.evaluator.config.pattern" + and err.get("field") == "condition.rule.config.pattern" for err in body.get("errors", []) ) @@ -1127,7 +1127,7 @@ def test_template_backed_control_rejects_raw_put_update(client: TestClient) -> N "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "raw"}, }, @@ -1284,7 +1284,7 @@ def test_render_control_template_rejects_malformed_param_binding(client: TestCli # Given: a malformed $param binding object with extra keys payload = _template_payload() payload["template"] = deepcopy(payload["template"]) - payload["template"]["definition_template"]["condition"]["evaluator"]["config"]["pattern"] = { # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["config"]["pattern"] = { # type: ignore[index] "$param": "pattern", "extra": True, } @@ -1303,7 +1303,7 @@ def test_render_control_template_rejects_non_string_param_reference(client: Test # Given: a malformed $param binding whose reference is not a string payload = _template_payload() payload["template"] = deepcopy(payload["template"]) - payload["template"]["definition_template"]["condition"]["evaluator"]["config"]["pattern"] = { # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["config"]["pattern"] = { # type: ignore[index] "$param": 123, } @@ -1341,51 +1341,51 @@ def test_render_control_template_rejects_unused_parameter(client: TestClient) -> ) -def test_render_control_template_rejects_agent_scoped_evaluator(client: TestClient) -> None: - # Given: a template definition that uses an agent-scoped evaluator directly +def test_render_control_template_rejects_agent_scoped_rule(client: TestClient) -> None: + # Given: a template definition that uses an agent-scoped rule directly payload = _template_payload() payload["template"] = deepcopy(payload["template"]) - payload["template"]["definition_template"]["condition"]["evaluator"]["name"] = "agent-x:custom" # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["name"] = "agent-x:custom" # type: ignore[index] # When: rendering a template preview response = client.post("/api/v1/control-templates/render", json=payload) - # Then: the API rejects agent-scoped evaluators for template-backed controls + # Then: the API rejects agent-scoped rules for template-backed controls assert response.status_code == 422 body = response.json() assert body["error_code"] == "TEMPLATE_RENDER_ERROR" assert any( - err.get("code") == "agent_scoped_evaluator_not_supported" + err.get("code") == "agent_scoped_rule_not_supported" for err in body.get("errors", []) ) -def test_render_control_template_remaps_param_bound_agent_scoped_evaluator_error( +def test_render_control_template_remaps_param_bound_agent_scoped_rule_error( client: TestClient, ) -> None: - # Given: a template whose evaluator name comes from a bound parameter + # Given: a template whose rule name comes from a bound parameter payload = _template_payload() payload["template"] = deepcopy(payload["template"]) - payload["template"]["parameters"]["evaluator_name"] = { # type: ignore[index] + payload["template"]["parameters"]["rule_name"] = { # type: ignore[index] "type": "string", - "label": "Evaluator Name", + "label": "Rule Name", } - payload["template"]["definition_template"]["condition"]["evaluator"]["name"] = { # type: ignore[index] - "$param": "evaluator_name", + payload["template"]["definition_template"]["condition"]["rule"]["name"] = { # type: ignore[index] + "$param": "rule_name", } - payload["template_values"]["evaluator_name"] = "agent-x:custom" # type: ignore[index] + payload["template_values"]["rule_name"] = "agent-x:custom" # type: ignore[index] # When: rendering a template preview response = client.post("/api/v1/control-templates/render", json=payload) - # Then: the agent-scoped evaluator error is remapped to the bound parameter + # Then: the agent-scoped rule error is remapped to the bound parameter assert response.status_code == 422 body = response.json() assert body["error_code"] == "TEMPLATE_PARAMETER_INVALID" assert any( - err.get("field") == "template_values.evaluator_name" - and err.get("parameter") == "evaluator_name" - and err.get("rendered_field") == "condition.evaluator.name" + err.get("field") == "template_values.rule_name" + and err.get("parameter") == "rule_name" + and err.get("rendered_field") == "condition.rule.name" and err.get("code") == "template_parameter_invalid" for err in body.get("errors", []) ) @@ -1414,14 +1414,14 @@ def test_render_control_template_rejects_forbidden_top_level_template_fields( def test_render_control_template_rejects_legacy_flat_format(client: TestClient) -> None: - # Given: a template that uses the legacy flat selector/evaluator format + # Given: a template that uses the legacy flat selector/rule format payload = _template_payload() payload["template"] = deepcopy(payload["template"]) payload["template"]["definition_template"] = { # type: ignore[index] "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -1506,7 +1506,7 @@ def _unrendered_template_payload() -> dict[str, object]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, @@ -1580,7 +1580,7 @@ def test_update_unrendered_template_with_complete_values_renders(client: TestCli get_response = client.get(f"/api/v1/controls/{control_id}/data") assert get_response.status_code == 200, get_response.text data = get_response.json()["data"] - assert data["condition"]["evaluator"]["config"]["pattern"] == "hello" + assert data["condition"]["rule"]["config"]["pattern"] == "hello" assert data["template_values"]["pattern"] == "hello" # And: enabled remains false (preserved from unrendered state) assert data["enabled"] is False @@ -1914,7 +1914,7 @@ def test_create_unrendered_template_rejects_undefined_param_reference( ) -> None: # Given: a template whose definition_template references an undefined parameter payload = _unrendered_template_payload() - payload["template"]["definition_template"]["condition"]["evaluator"]["config"]["extra"] = { # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["config"]["extra"] = { # type: ignore[index] "$param": "nonexistent", } @@ -1961,12 +1961,12 @@ def test_create_unrendered_template_rejects_unused_parameter( ) -def test_create_unrendered_template_rejects_agent_scoped_evaluator( +def test_create_unrendered_template_rejects_agent_scoped_rule( client: TestClient, ) -> None: - # Given: a template with a hardcoded agent-scoped evaluator name + # Given: a template with a hardcoded agent-scoped rule name payload = _unrendered_template_payload() - payload["template"]["definition_template"]["condition"]["evaluator"]["name"] = "agent-x:custom" # type: ignore[index] + payload["template"]["definition_template"]["condition"]["rule"]["name"] = "agent-x:custom" # type: ignore[index] # When: creating the unrendered control response = client.put( @@ -1979,7 +1979,7 @@ def test_create_unrendered_template_rejects_agent_scoped_evaluator( body = response.json() assert body["error_code"] == "TEMPLATE_RENDER_ERROR" assert any( - err.get("code") == "agent_scoped_evaluator_not_supported" + err.get("code") == "agent_scoped_rule_not_supported" for err in body.get("errors", []) ) @@ -2005,4 +2005,4 @@ def test_rendered_template_rejects_update_with_incomplete_values( get_response = client.get(f"/api/v1/controls/{control_id}/data") data = get_response.json()["data"] assert "condition" in data - assert data["condition"]["evaluator"]["config"]["pattern"] == "hello" + assert data["condition"]["rule"]["config"]["pattern"] == "hello" diff --git a/server/tests/test_controls.py b/server/tests/test_controls.py index eac24890..a2db2d4a 100644 --- a/server/tests/test_controls.py +++ b/server/tests/test_controls.py @@ -59,14 +59,14 @@ def test_create_control_with_data_stores_configured_payload(client: TestClient) data = data_resp.json()["data"] assert data["description"] == VALID_CONTROL_DATA["description"] assert data["execution"] == VALID_CONTROL_DATA["execution"] - assert data["condition"]["evaluator"] == VALID_CONTROL_DATA["condition"]["evaluator"] + assert data["condition"]["rule"] == VALID_CONTROL_DATA["condition"]["rule"] def test_create_control_invalid_data_returns_422_without_persisting(client: TestClient) -> None: - # Given: a create request whose control data fails evaluator validation + # Given: a create request whose control data fails rule validation name = f"control-{uuid.uuid4()}" invalid_data = deepcopy(VALID_CONTROL_DATA) - invalid_data["condition"]["evaluator"] = { + invalid_data["condition"]["rule"] = { "name": "list", "config": { "values": ["a", "b"], @@ -127,7 +127,7 @@ def test_get_control_schema_returns_control_definition_schema(client: TestClient "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "test", "flags": []} }, @@ -156,17 +156,17 @@ def test_set_control_data_replaces_existing(client: TestClient) -> None: assert data["enabled"] == payload["enabled"] assert data["execution"] == payload["execution"] assert data["scope"] == payload["scope"] - assert data["condition"]["evaluator"] == payload["condition"]["evaluator"] + assert data["condition"]["rule"] == payload["condition"]["rule"] assert data["action"] == payload["action"] assert data["condition"]["selector"]["path"] == payload["condition"]["selector"]["path"] def test_set_control_data_accepts_legacy_leaf_payload(client: TestClient) -> None: - # Given: a legacy flat selector/evaluator payload + # Given: a legacy flat selector/rule payload control_id = create_control(client) payload = deepcopy(VALID_CONTROL_DATA) payload["selector"] = payload["condition"]["selector"] - payload["evaluator"] = payload["condition"]["evaluator"] + payload["rule"] = payload["condition"]["rule"] payload.pop("condition") # When: saving and reading back the control data @@ -178,9 +178,9 @@ def test_set_control_data_accepts_legacy_leaf_payload(client: TestClient) -> Non assert resp_get.status_code == 200 data = resp_get.json()["data"] assert "selector" not in data - assert "evaluator" not in data + assert "rule" not in data assert data["condition"]["selector"]["path"] == "input" - assert data["condition"]["evaluator"]["name"] == "regex" + assert data["condition"]["rule"]["name"] == "regex" def test_set_control_data_with_empty_dict_fails(client: TestClient) -> None: diff --git a/server/tests/test_controls_additional.py b/server/tests/test_controls_additional.py index cf7aa4b0..80818a0a 100644 --- a/server/tests/test_controls_additional.py +++ b/server/tests/test_controls_additional.py @@ -9,7 +9,7 @@ from unittest.mock import AsyncMock, MagicMock import pytest -from agent_control_evaluators import RegexEvaluatorConfig +from agent_control_rules import RegexRuleConfig from agent_control_models import ConditionNode from agent_control_models.errors import ErrorCode, ErrorReason from agent_control_server.auth_framework import Operation, Principal, set_authorizer @@ -1719,9 +1719,9 @@ def test_set_control_data_agent_scoped_agent_not_found(client: TestClient) -> No # Given: a control control_id, _ = _create_control(client) - # When: setting data with a missing agent in evaluator ref + # When: setting data with a missing agent in rule ref payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": "missing-agent:custom", "config": {"pattern": "x"}} + payload["condition"]["rule"] = {"name": "missing-agent:custom", "config": {"pattern": "x"}} resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) # Then: not found @@ -1729,8 +1729,8 @@ def test_set_control_data_agent_scoped_agent_not_found(client: TestClient) -> No assert resp.json()["error_code"] == "AGENT_NOT_FOUND" -def test_set_control_data_agent_scoped_evaluator_missing(client: TestClient) -> None: - # Given: an agent without the referenced evaluator +def test_set_control_data_agent_scoped_rule_missing(client: TestClient) -> None: + # Given: an agent without the referenced rule agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name resp = client.post( @@ -1738,27 +1738,27 @@ def test_set_control_data_agent_scoped_evaluator_missing(client: TestClient) -> json={ "agent": {"agent_name": agent_name, "agent_name": agent_name}, "steps": [], - "evaluators": [], + "rules": [], }, ) assert resp.status_code == 200 control_id, _ = _create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": f"{agent_name}:missing", "config": {"pattern": "x"}} + payload["condition"]["rule"] = {"name": f"{agent_name}:missing", "config": {"pattern": "x"}} - # When: setting data with evaluator not registered on agent + # When: setting data with rule not registered on agent resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) # Then: validation error assert resp.status_code == 422 body = resp.json() - assert body["error_code"] == "EVALUATOR_NOT_FOUND" - assert any(err.get("field") == "data.condition.evaluator.name" for err in body.get("errors", [])) + assert body["error_code"] == "RULE_NOT_FOUND" + assert any(err.get("field") == "data.condition.rule.name" for err in body.get("errors", [])) def test_set_control_data_agent_scoped_invalid_schema(client: TestClient) -> None: - # Given: an agent with evaluator schema requiring "pattern" + # Given: an agent with rule schema requiring "pattern" agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name resp = client.post( @@ -1766,7 +1766,7 @@ def test_set_control_data_agent_scoped_invalid_schema(client: TestClient) -> Non json={ "agent": {"agent_name": agent_name, "agent_name": agent_name}, "steps": [], - "evaluators": [ + "rules": [ { "name": "custom", "description": "custom", @@ -1783,7 +1783,7 @@ def test_set_control_data_agent_scoped_invalid_schema(client: TestClient) -> Non control_id, _ = _create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": f"{agent_name}:custom", "config": {}} + payload["condition"]["rule"] = {"name": f"{agent_name}:custom", "config": {}} # When: setting data with config missing required fields resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) @@ -1792,7 +1792,7 @@ def test_set_control_data_agent_scoped_invalid_schema(client: TestClient) -> Non assert resp.status_code == 422 body = resp.json() assert body["error_code"] == "INVALID_CONFIG" - assert any(err.get("field") == "data.condition.evaluator.config" for err in body.get("errors", [])) + assert any(err.get("field") == "data.condition.rule.config" for err in body.get("errors", [])) def test_patch_control_updates_name_and_enabled(client: TestClient) -> None: @@ -1856,7 +1856,7 @@ def test_set_control_data_agent_scoped_corrupted_agent_data_returns_422( json={ "agent": {"agent_name": agent_name, "agent_name": agent_name}, "steps": [], - "evaluators": [{"name": "custom", "config_schema": {"type": "object"}}], + "rules": [{"name": "custom", "config_schema": {"type": "object"}}], }, ) assert resp.status_code == 200 @@ -1869,9 +1869,9 @@ def test_set_control_data_agent_scoped_corrupted_agent_data_returns_422( control_id, _ = _create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": f"{agent_name}:custom", "config": {}} + payload["condition"]["rule"] = {"name": f"{agent_name}:custom", "config": {}} - # When: setting control data referencing the corrupted agent's evaluator + # When: setting control data referencing the corrupted agent's rule resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) # Then: corrupted agent data error is returned @@ -1879,37 +1879,37 @@ def test_set_control_data_agent_scoped_corrupted_agent_data_returns_422( assert resp.json()["error_code"] == "CORRUPTED_DATA" -def test_set_control_data_unknown_evaluator_allowed(client: TestClient) -> None: - # Given: a control with a non-registered evaluator name +def test_set_control_data_unknown_rule_allowed(client: TestClient) -> None: + # Given: a control with a non-registered rule name control_id, _ = _create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": "unknown-eval", "config": {}} + payload["condition"]["rule"] = {"name": "unknown-eval", "config": {}} # When: setting the control data resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) - # Then: update succeeds (unknown evaluators are allowed) + # Then: update succeeds (unknown rules are allowed) assert resp.status_code == 200 assert resp.json()["success"] is True -def test_set_control_data_builtin_evaluator_validation_error( +def test_set_control_data_builtin_rule_validation_error( client: TestClient, monkeypatch ) -> None: - # Given: a control and a server-side evaluator that enforces a schema + # Given: a control and a server-side rule that enforces a schema control_id, _ = _create_control(client) - class DummyEvaluator: - config_model = RegexEvaluatorConfig + class DummyRule: + config_model = RegexRuleConfig monkeypatch.setattr( controls_module, - "list_evaluators", - lambda: {"dummy": DummyEvaluator}, + "list_rules", + lambda: {"dummy": DummyRule}, ) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": "dummy", "config": {}} + payload["condition"]["rule"] = {"name": "dummy", "config": {}} # When: setting control data with invalid config resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) @@ -1919,30 +1919,30 @@ class DummyEvaluator: body = resp.json() assert body["error_code"] == "INVALID_CONFIG" assert any( - "data.condition.evaluator.config" in err.get("field", "") + "data.condition.rule.config" in err.get("field", "") for err in body.get("errors", []) ) -def test_set_control_data_builtin_evaluator_invalid_parameters( +def test_set_control_data_builtin_rule_invalid_parameters( client: TestClient, monkeypatch ) -> None: - # Given: a control and a server-side evaluator that raises TypeError + # Given: a control and a server-side rule that raises TypeError control_id, _ = _create_control(client) - class DummyEvaluator: + class DummyRule: @staticmethod def config_model(**_kwargs): # type: ignore[no-untyped-def] raise TypeError("unexpected parameter") monkeypatch.setattr( controls_module, - "list_evaluators", - lambda: {"dummy": DummyEvaluator}, + "list_rules", + lambda: {"dummy": DummyRule}, ) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": "dummy", "config": {"unexpected": "value"}} + payload["condition"]["rule"] = {"name": "dummy", "config": {"unexpected": "value"}} # When: setting control data with invalid parameters resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) @@ -1953,7 +1953,7 @@ def config_model(**_kwargs): # type: ignore[no-untyped-def] assert body["error_code"] == "INVALID_CONFIG" assert any(err.get("code") == "invalid_parameters" for err in body.get("errors", [])) assert any( - err.get("message") == "Invalid config parameters for evaluator." + err.get("message") == "Invalid config parameters for rule." for err in body.get("errors", []) ) assert "unexpected parameter" not in resp.text diff --git a/server/tests/test_controls_auth.py b/server/tests/test_controls_auth.py index 7975dad9..4ad6cd34 100644 --- a/server/tests/test_controls_auth.py +++ b/server/tests/test_controls_auth.py @@ -30,7 +30,7 @@ def _valid_template_render_payload() -> dict[str, object]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, diff --git a/server/tests/test_controls_validation.py b/server/tests/test_controls_validation.py index 1d78287a..db50eb58 100644 --- a/server/tests/test_controls_validation.py +++ b/server/tests/test_controls_validation.py @@ -22,7 +22,7 @@ def test_validation_invalid_logic_enum(client: TestClient): # Given: a control and a payload with invalid 'logic' value control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = { + payload["condition"]["rule"] = { "name": "list", "config": { "values": ["a", "b"], @@ -45,14 +45,14 @@ def test_validation_invalid_logic_enum(client: TestClient): def test_validation_discriminator_mismatch(client: TestClient): - """Test that config must match the evaluator type.""" - # Given: a control and type='list' but config has 'pattern' (RegexEvaluatorConfig) + """Test that config must match the rule type.""" + # Given: a control and type='list' but config has 'pattern' (RegexRuleConfig) control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = { + payload["condition"]["rule"] = { "name": "list", "config": { - "pattern": "some_regex", # Invalid for ListEvaluatorConfig + "pattern": "some_regex", # Invalid for ListRuleConfig # Missing 'values' } } @@ -63,7 +63,7 @@ def test_validation_discriminator_mismatch(client: TestClient): # Then: 422 Unprocessable Entity assert resp.status_code == 422 - # Then: error mentions missing required field for ListEvaluatorConfig (RFC 7807 format) + # Then: error mentions missing required field for ListRuleConfig (RFC 7807 format) response_data = resp.json() errors = response_data.get("errors", []) # Expecting 'values' field missing @@ -76,7 +76,7 @@ def test_validation_regex_flags_list(client: TestClient): # Given: a control and regex config with invalid flags type (string instead of list) control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = { + payload["condition"]["rule"] = { "name": "regex", "config": { "pattern": "abc", @@ -95,11 +95,11 @@ def test_validation_regex_flags_list(client: TestClient): def test_validation_list_values_reject_blank_strings(client: TestClient): - """Test that list evaluator config rejects empty and whitespace-only entries.""" - # Given: a control and a list evaluator payload with a whitespace-only value + """Test that list rule config rejects empty and whitespace-only entries.""" + # Given: a control and a list rule payload with a whitespace-only value control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = { + payload["condition"]["rule"] = { "name": "list", "config": { "values": [" "], @@ -125,7 +125,7 @@ def test_validation_invalid_regex_pattern(client: TestClient): # Given: a control and regex config with invalid pattern (unclosed bracket) control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = { + payload["condition"]["rule"] = { "name": "regex", "config": { "pattern": "[", # Invalid regex @@ -166,11 +166,11 @@ def test_validation_empty_string_path_rejected(client: TestClient): assert any("empty string" in e.get("message", "") for e in errors) -def test_validation_empty_evaluator_name_rejected(client: TestClient): - """Test that empty evaluator names are rejected at the request boundary.""" +def test_validation_empty_rule_name_rejected(client: TestClient): + """Test that empty rule names are rejected at the request boundary.""" control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": "", "config": {"pattern": "x"}} + payload["condition"]["rule"] = {"name": "", "config": {"pattern": "x"}} resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) @@ -178,7 +178,7 @@ def test_validation_empty_evaluator_name_rejected(client: TestClient): body = resp.json() assert body["error_code"] == "VALIDATION_ERROR" assert any( - "evaluator.name" in str(err.get("field", "")) + "rule.name" in str(err.get("field", "")) for err in body.get("errors", []) ) assert any( @@ -187,10 +187,10 @@ def test_validation_empty_evaluator_name_rejected(client: TestClient): ) -def test_validate_endpoint_whitespace_evaluator_name_rejected(client: TestClient): - """Whitespace-only evaluator names are rejected during validate-without-save too.""" +def test_validate_endpoint_whitespace_rule_name_rejected(client: TestClient): + """Whitespace-only rule names are rejected during validate-without-save too.""" payload = deepcopy(VALID_CONTROL_PAYLOAD) - payload["condition"]["evaluator"] = {"name": " ", "config": {"pattern": "x"}} + payload["condition"]["rule"] = {"name": " ", "config": {"pattern": "x"}} resp = client.post("/api/v1/controls/validate", json={"data": payload}) @@ -198,7 +198,7 @@ def test_validate_endpoint_whitespace_evaluator_name_rejected(client: TestClient body = resp.json() assert body["error_code"] == "VALIDATION_ERROR" assert any( - "evaluator.name" in str(err.get("field", "")) + "rule.name" in str(err.get("field", "")) for err in body.get("errors", []) ) assert any( @@ -207,7 +207,7 @@ def test_validate_endpoint_whitespace_evaluator_name_rejected(client: TestClient ) -def test_get_control_data_rejects_stored_blank_nested_evaluator_name(client: TestClient): +def test_get_control_data_rejects_stored_blank_nested_rule_name(client: TestClient): """Stored rows are revalidated on read using the same ControlDefinition model.""" control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) @@ -216,7 +216,7 @@ def test_get_control_data_rejects_stored_blank_nested_evaluator_name(client: Tes deepcopy(VALID_CONTROL_PAYLOAD["condition"]), { "selector": {"path": "input"}, - "evaluator": {"name": " ", "config": {"pattern": "x"}}, + "rule": {"name": " ", "config": {"pattern": "x"}}, }, ] } @@ -233,7 +233,7 @@ def test_get_control_data_rejects_stored_blank_nested_evaluator_name(client: Tes body = resp.json() assert body["error_code"] == "CORRUPTED_DATA" assert any( - err.get("field") == "condition.or[1].evaluator.name" + err.get("field") == "condition.or[1].rule.name" for err in body.get("errors", []) ) assert any( @@ -242,15 +242,15 @@ def test_get_control_data_rejects_stored_blank_nested_evaluator_name(client: Tes ) -def test_list_agent_controls_rejects_stored_blank_nested_evaluator_name(client: TestClient): - """Agent control listing rejects persisted blank evaluator names with the same validator.""" +def test_list_agent_controls_rejects_stored_blank_nested_rule_name(client: TestClient): + """Agent control listing rejects persisted blank rule names with the same validator.""" agent_name = f"agent-{uuid.uuid4().hex[:12]}" init_resp = client.post( "/api/v1/agents/initAgent", json={ "agent": {"agent_name": agent_name}, "steps": [], - "evaluators": [], + "rules": [], }, ) assert init_resp.status_code == 200 @@ -265,7 +265,7 @@ def test_list_agent_controls_rejects_stored_blank_nested_evaluator_name(client: deepcopy(VALID_CONTROL_PAYLOAD["condition"]), { "selector": {"path": "input"}, - "evaluator": {"name": "", "config": {"pattern": "x"}}, + "rule": {"name": "", "config": {"pattern": "x"}}, }, ] } @@ -282,7 +282,7 @@ def test_list_agent_controls_rejects_stored_blank_nested_evaluator_name(client: body = resp.json() assert body["error_code"] == "CORRUPTED_DATA" assert any( - err.get("field") == "data.condition.or[1].evaluator.name" + err.get("field") == "data.condition.or[1].rule.name" for err in body.get("errors", []) ) assert any( @@ -366,14 +366,14 @@ def test_validation_nested_condition_error_uses_bracketed_field_path( client: TestClient, ): """Nested condition leaf errors should report full dot/bracket paths.""" - # Given: a nested condition whose first leaf has invalid evaluator config + # Given: a nested condition whose first leaf has invalid rule config control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) payload["condition"] = { "and": [ { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["a", "b"], @@ -384,7 +384,7 @@ def test_validation_nested_condition_error_uses_bracketed_field_path( }, { "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "ok"}, }, @@ -399,23 +399,23 @@ def test_validation_nested_condition_error_uses_bracketed_field_path( assert resp.status_code == 422 errors = resp.json().get("errors", []) assert any( - err.get("field") == "data.condition.and[0].evaluator.logic" + err.get("field") == "data.condition.and[0].rule.logic" for err in errors ) -def test_validation_nested_agent_scoped_evaluator_error_uses_bracketed_field_path( +def test_validation_nested_agent_scoped_rule_error_uses_bracketed_field_path( client: TestClient, ): - """Nested agent-scoped evaluator failures should identify the exact leaf path.""" - # Given: an agent and a nested condition that references a missing agent evaluator + """Nested agent-scoped rule failures should identify the exact leaf path.""" + # Given: an agent and a nested condition that references a missing agent rule agent_name = f"agent-{uuid.uuid4().hex[:12]}" init_resp = client.post( "/api/v1/agents/initAgent", json={ "agent": {"agent_name": agent_name}, "steps": [], - "evaluators": [], + "rules": [], }, ) assert init_resp.status_code == 200 @@ -426,8 +426,8 @@ def test_validation_nested_agent_scoped_evaluator_error_uses_bracketed_field_pat "or": [ { "selector": {"path": "input"}, - "evaluator": { - "name": f"{agent_name}:missing-evaluator", + "rule": { + "name": f"{agent_name}:missing-rule", "config": {}, }, } @@ -437,28 +437,28 @@ def test_validation_nested_agent_scoped_evaluator_error_uses_bracketed_field_pat # When: validating the nested control definition through the API resp = client.put(f"/api/v1/controls/{control_id}/data", json={"data": payload}) - # Then: the error points at the exact nested evaluator name field + # Then: the error points at the exact nested rule name field assert resp.status_code == 422 body = resp.json() - assert body["error_code"] == "EVALUATOR_NOT_FOUND" + assert body["error_code"] == "RULE_NOT_FOUND" assert any( - err.get("field") == "data.condition.or[0].evaluator.name" - and err.get("code") == "evaluator_not_found" + err.get("field") == "data.condition.or[0].rule.name" + and err.get("code") == "rule_not_found" for err in body.get("errors", []) ) -def test_validation_standalone_evaluator_error_uses_bracketed_field_path( +def test_validation_standalone_rule_error_uses_bracketed_field_path( client: TestClient, ): - """Nested standalone (global) evaluator config errors use bracketed leaf paths.""" + """Nested standalone (global) rule config errors use bracketed leaf paths.""" control_id = create_control(client) payload = deepcopy(VALID_CONTROL_PAYLOAD) payload["condition"] = { "or": [ { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {}, }, @@ -472,6 +472,6 @@ def test_validation_standalone_evaluator_error_uses_bracketed_field_path( body = resp.json() assert body["error_code"] == "VALIDATION_ERROR" assert any( - err.get("field", "").startswith("data.condition.or[0].evaluator") + err.get("field", "").startswith("data.condition.or[0].rule") for err in body.get("errors", []) ) diff --git a/server/tests/test_error_handling.py b/server/tests/test_error_handling.py index 5cc4e18f..29d2a343 100644 --- a/server/tests/test_error_handling.py +++ b/server/tests/test_error_handling.py @@ -490,7 +490,7 @@ async def mock_db_returns_control() -> AsyncGenerator[AsyncSession, None]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "x"}}, + "rule": {"name": "regex", "config": {"pattern": "x"}}, }, "action": {"decision": "deny"} } diff --git a/server/tests/test_evaluation_e2e.py b/server/tests/test_evaluation_e2e.py index fbe9a483..fb6b0a0e 100644 --- a/server/tests/test_evaluation_e2e.py +++ b/server/tests/test_evaluation_e2e.py @@ -15,7 +15,7 @@ def test_evaluation_flow_deny(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "secret"} }, @@ -94,7 +94,7 @@ def test_evaluation_empty_policy(client: TestClient): def test_evaluation_path_failure(client: TestClient): - """Test that if path selection fails (returns None), the evaluator handles it gracefully.""" + """Test that if path selection fails (returns None), the rule handles it gracefully.""" # Given: A control selecting a non-existent path control_data = { "description": "Check non-existent field", @@ -102,7 +102,7 @@ def test_evaluation_path_failure(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input.non_existent_field"}, # Invalid for string input - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": ".*"} # Match anything if found }, @@ -119,7 +119,7 @@ def test_evaluation_path_failure(client: TestClient): ) resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) - # Then: It should remain safe because selector returns None, and RegexEvaluator(None) -> False + # Then: It should remain safe because selector returns None, and RegexRule(None) -> False assert resp.status_code == 200 data = resp.json() assert data["is_safe"] is True @@ -127,14 +127,14 @@ def test_evaluation_path_failure(client: TestClient): def test_evaluation_selector_star_uses_full_step_json(client: TestClient): - # Given: a control with selector "*" and JSON evaluator + # Given: a control with selector "*" and JSON rule control_data = { "description": "Validate full step JSON", "enabled": True, "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "*"}, - "evaluator": {"name": "json", "config": {"required_fields": ["type"]}}, + "rule": {"name": "json", "config": {"required_fields": ["type"]}}, "action": {"decision": "deny"}, } agent_name, _ = create_and_assign_policy(client, control_data, agent_name="JsonStarAgent") @@ -144,7 +144,7 @@ def test_evaluation_selector_star_uses_full_step_json(client: TestClient): req = EvaluationRequest(agent_name=agent_name, step=payload, stage="pre") resp = client.post("/api/v1/evaluation", json=req.model_dump(mode="json")) - # Then: evaluation is safe (JSON evaluator accepts the full payload) + # Then: evaluation is safe (JSON rule accepts the full payload) assert resp.status_code == 200 assert resp.json()["is_safe"] is True assert resp.json()["matches"] is None @@ -159,7 +159,7 @@ def test_evaluation_tool_step_nested(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.config.risk_level"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "^critical$"} }, @@ -215,7 +215,7 @@ def test_evaluation_deny_precedence(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "keyword"}}, + "rule": {"name": "regex", "config": {"pattern": "keyword"}}, "action": {"decision": "observe"} } # Use helper to setup agent with first control @@ -233,7 +233,7 @@ def test_evaluation_deny_precedence(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "keyword"}}, + "rule": {"name": "regex", "config": {"pattern": "keyword"}}, "action": {"decision": "deny"} } resp = client.put( @@ -275,7 +275,7 @@ def test_evaluation_stage_filtering(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["post"]}, "selector": {"path": "output"}, - "evaluator": {"name": "regex", "config": {"pattern": "bad_output"}}, + "rule": {"name": "regex", "config": {"pattern": "bad_output"}}, "action": {"decision": "deny"} } agent_name, _ = create_and_assign_policy(client, control_data, agent_name="StageAgent") @@ -314,7 +314,7 @@ def test_evaluation_step_type_filtering(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "name"}, - "evaluator": {"name": "regex", "config": {"pattern": "rm_rf"}}, + "rule": {"name": "regex", "config": {"pattern": "rm_rf"}}, "action": {"decision": "deny"} } agent_name, _ = create_and_assign_policy(client, control_data, agent_name="AppliesToAgent") @@ -350,7 +350,7 @@ def test_evaluation_denylist_step_name(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "list", # Matches if value is IN list (exact match) "config": {"values": ["dangerous_tool", "rm_rf"], "match_on": "match"} }, diff --git a/server/tests/test_evaluation_e2e_list_evaluator.py b/server/tests/test_evaluation_e2e_list_rule.py similarity index 95% rename from server/tests/test_evaluation_e2e_list_evaluator.py rename to server/tests/test_evaluation_e2e_list_rule.py index 4c73c863..b6529b8a 100644 --- a/server/tests/test_evaluation_e2e_list_evaluator.py +++ b/server/tests/test_evaluation_e2e_list_rule.py @@ -1,10 +1,10 @@ -"""End-to-end tests for AllowList/DenyList logic using the new ListEvaluator.""" +"""End-to-end tests for AllowList/DenyList logic using the new ListRule.""" import uuid from fastapi.testclient import TestClient from agent_control_models import EvaluationRequest, Step from .utils import create_and_assign_policy -def test_list_evaluator_denylist_behavior(client: TestClient): +def test_list_rule_denylist_behavior(client: TestClient): """Test DenyList behavior: Block if ANY value matches.""" # Given: A registered agent with a DenyList control blocking "rm" and "shutdown" control_data = { @@ -13,7 +13,7 @@ def test_list_evaluator_denylist_behavior(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.cmd"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["rm", "shutdown"], @@ -51,7 +51,7 @@ def test_list_evaluator_denylist_behavior(client: TestClient): assert resp.json()["matches"][0]["control_name"] == control_name -def test_list_evaluator_allowlist_behavior(client: TestClient): +def test_list_rule_allowlist_behavior(client: TestClient): """Test AllowList behavior: Block if value is NOT in list.""" # Given: A registered agent with an AllowList control allowing ONLY "safe_tool" # We use match_on="no_match" to trigger the control (Deny) if the value is NOT found in the list @@ -61,7 +61,7 @@ def test_list_evaluator_allowlist_behavior(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["safe_tool"], @@ -99,7 +99,7 @@ def test_list_evaluator_allowlist_behavior(client: TestClient): assert resp.json()["matches"][0]["control_name"] == control_name -def test_list_evaluator_case_insensitive(client: TestClient): +def test_list_rule_case_insensitive(client: TestClient): """Test case-insensitive matching.""" # Given: A control blocking "BlockMe" with case_sensitive=False control_data = { @@ -108,7 +108,7 @@ def test_list_evaluator_case_insensitive(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["BlockMe"], @@ -132,7 +132,7 @@ def test_list_evaluator_case_insensitive(client: TestClient): assert resp.json()["is_safe"] is False -def test_list_evaluator_list_input_any_match(client: TestClient): +def test_list_rule_list_input_any_match(client: TestClient): """Test matching against a list input (e.g. tags) with logic='any'.""" # Given: A control blocking request if ANY tag is "restricted" control_data = { @@ -141,7 +141,7 @@ def test_list_evaluator_list_input_any_match(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.tags"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["restricted"], @@ -178,7 +178,7 @@ def test_list_evaluator_list_input_any_match(client: TestClient): assert resp.json()["is_safe"] is True -def test_list_evaluator_list_input_all_match(client: TestClient): +def test_list_rule_list_input_all_match(client: TestClient): """Test matching against a list input with logic='all'.""" # Given: A control requiring ALL tags to be "safe_tag" or "audit_approved" # We trigger Deny if the condition (ALL items match) is NOT met (match_on="no_match") @@ -188,7 +188,7 @@ def test_list_evaluator_list_input_all_match(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.tags"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["safe_tag", "audit_approved"], @@ -225,7 +225,7 @@ def test_list_evaluator_list_input_all_match(client: TestClient): assert resp.json()["is_safe"] is False -def test_list_evaluator_disallow_name(client: TestClient): +def test_list_rule_disallow_name(client: TestClient): """Test Disallowing specific tool names (DenyList on name).""" # Given: A control blocking "delete_user" and "drop_db" tools control_data = { @@ -234,7 +234,7 @@ def test_list_evaluator_disallow_name(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["delete_user", "drop_db"], @@ -270,7 +270,7 @@ def test_list_evaluator_disallow_name(client: TestClient): assert resp.json()["matches"][0]["control_name"] == control_name -def test_list_evaluator_allow_only_argument_values(client: TestClient): +def test_list_rule_allow_only_argument_values(client: TestClient): """Test Allowing ONLY specific values for an argument (AllowList on argument).""" # Given: A control allowing only "us-east-1" or "us-west-2" for "region" argument control_data = { @@ -279,7 +279,7 @@ def test_list_evaluator_allow_only_argument_values(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.region"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["us-east-1", "us-west-2"], @@ -315,7 +315,7 @@ def test_list_evaluator_allow_only_argument_values(client: TestClient): assert resp.json()["matches"][0]["control_name"] == control_name -def test_list_evaluator_edge_cases(client: TestClient): +def test_list_rule_edge_cases(client: TestClient): """Test edge cases: Empty inputs, Empty controls, Type coercion, Special chars.""" # 1. Empty Control Values # Given: Control with empty values list @@ -325,7 +325,7 @@ def test_list_evaluator_edge_cases(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "name"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [], @@ -355,7 +355,7 @@ def test_list_evaluator_edge_cases(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.count"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": [10, 20], # Integers in control @@ -395,7 +395,7 @@ def test_list_evaluator_edge_cases(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["(test)", "a.b*c"], # Literal parens and dot/star @@ -436,7 +436,7 @@ def test_list_evaluator_edge_cases(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.missing_arg"}, # Will be None - "evaluator": { + "rule": { "name": "list", "config": { "values": ["something"], @@ -459,7 +459,7 @@ def test_list_evaluator_edge_cases(client: TestClient): assert resp.json()["is_safe"] is True -def test_list_evaluator_re2_corner_cases(client: TestClient): +def test_list_rule_re2_corner_cases(client: TestClient): """Test re2 specific corner cases: Large lists, Null bytes, Newlines.""" # 1. Large List (Performance/Limits) # Given: A control with 1000 values @@ -472,7 +472,7 @@ def test_list_evaluator_re2_corner_cases(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.item"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": large_list, @@ -495,7 +495,7 @@ def test_list_evaluator_re2_corner_cases(client: TestClient): assert resp.json()["is_safe"] is False -def test_list_evaluator_newline_strictness(client: TestClient): +def test_list_rule_newline_strictness(client: TestClient): """Test that list matching is strict about newlines.""" # Given: Control matching "exact" control_strict = { @@ -504,7 +504,7 @@ def test_list_evaluator_newline_strictness(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.val"}, - "evaluator": { + "rule": { "name": "list", "config": { "values": ["exact"], diff --git a/server/tests/test_evaluation_e2e_sql_evaluator.py b/server/tests/test_evaluation_e2e_sql_rule.py similarity index 98% rename from server/tests/test_evaluation_e2e_sql_evaluator.py rename to server/tests/test_evaluation_e2e_sql_rule.py index e82bdb7b..6545643d 100644 --- a/server/tests/test_evaluation_e2e_sql_evaluator.py +++ b/server/tests/test_evaluation_e2e_sql_rule.py @@ -1,4 +1,4 @@ -"""End-to-end tests for SQL evaluator.""" +"""End-to-end tests for SQL rule.""" from agent_control_models import EvaluationRequest, Step from fastapi.testclient import TestClient @@ -20,7 +20,7 @@ def test_sql_read_only_agent(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allowed_operations": ["SELECT"], @@ -105,7 +105,7 @@ def test_sql_multi_tenant_security(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "required_columns": ["tenant_id"], @@ -174,7 +174,7 @@ def test_sql_block_destructive_operations(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "blocked_operations": ["DROP", "TRUNCATE", "DELETE"] @@ -272,7 +272,7 @@ def test_sql_table_restrictions(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allowed_tables": ["users", "orders"] @@ -355,7 +355,7 @@ def test_sql_multi_statement_blocking(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allow_multi_statements": False @@ -408,7 +408,7 @@ def test_sql_limit_enforcement(client: TestClient): "execution": "server", "scope": {"step_types": ["tool"], "stages": ["pre"]}, "selector": {"path": "input.query"}, - "evaluator": { + "rule": { "name": "sql", "config": { "require_limit": True, @@ -513,7 +513,7 @@ def test_sql_llm_output_validation_read_only(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["post"]}, "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allowed_operations": ["SELECT"], @@ -579,7 +579,7 @@ def test_sql_llm_output_multi_statement_blocking(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["post"]}, "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allow_multi_statements": False @@ -630,7 +630,7 @@ def test_sql_llm_output_table_restrictions(client: TestClient): "execution": "server", "scope": {"step_types": ["llm"], "stages": ["post"]}, "selector": {"path": "output"}, - "evaluator": { + "rule": { "name": "sql", "config": { "allowed_tables": ["analytics", "reports"] diff --git a/server/tests/test_evaluation_error_handling.py b/server/tests/test_evaluation_error_handling.py index 9e577dff..e770274a 100644 --- a/server/tests/test_evaluation_error_handling.py +++ b/server/tests/test_evaluation_error_handling.py @@ -1,4 +1,4 @@ -"""End-to-end tests for evaluator error handling.""" +"""End-to-end tests for rule error handling.""" import uuid from unittest.mock import AsyncMock, MagicMock @@ -6,13 +6,13 @@ ControlMatch, EvaluationRequest, EvaluationResponse, - EvaluatorResult, + RuleResult, Step, ) from agent_control_server.db import async_engine from agent_control_server.endpoints.evaluation import ( - SAFE_EVALUATOR_ERROR, - SAFE_EVALUATOR_TIMEOUT_ERROR, + SAFE_RULE_ERROR, + SAFE_RULE_TIMEOUT_ERROR, _sanitize_control_match, ) from fastapi.testclient import TestClient @@ -20,52 +20,52 @@ from .utils import create_and_assign_policy -def test_evaluation_with_agent_scoped_evaluator_missing(client: TestClient): - """Test that referencing a missing agent evaluator fails during control creation. +def test_evaluation_with_agent_scoped_rule_missing(client: TestClient): + """Test that referencing a missing agent rule fails during control creation. - Given: A control referencing agent:evaluator that doesn't exist + Given: A control referencing agent:rule that doesn't exist When: Creating the control - Then: Returns 422 EVALUATOR_NOT_FOUND + Then: Returns 422 RULE_NOT_FOUND """ - # Given: an agent without evaluators + # Given: an agent without rules agent_name = f"testagent-{uuid.uuid4().hex[:12]}" client.post("/api/v1/agents/initAgent", json={ "agent": { "agent_name": agent_name }, "steps": [], - "evaluators": [] + "rules": [] }) - # And: a control referencing a non-existent agent evaluator + # And: a control referencing a non-existent agent rule control_data = { "description": "Test control", "enabled": True, "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { - "name": f"{agent_name}:missing-evaluator", + "rule": { + "name": f"{agent_name}:missing-rule", "config": {} }, "action": {"decision": "deny"} } - # When: creating the control with a missing agent-scoped evaluator + # When: creating the control with a missing agent-scoped rule set_resp = client.put( "/api/v1/controls", json={"name": f"control-{uuid.uuid4().hex[:8]}", "data": control_data}, ) - # Then: the missing evaluator is surfaced deterministically + # Then: the missing rule is surfaced deterministically assert set_resp.status_code == 422 - assert set_resp.json()["error_code"] == "EVALUATOR_NOT_FOUND" + assert set_resp.json()["error_code"] == "RULE_NOT_FOUND" def test_evaluation_control_with_invalid_config_caught_early(client: TestClient): - """Test that invalid evaluator config is caught at control creation. + """Test that invalid rule config is caught at control creation. - Given: A control with invalid config for an evaluator + Given: A control with invalid config for a rule When: Setting control data Then: Returns 422 with validation error """ @@ -76,7 +76,7 @@ def test_evaluation_control_with_invalid_config_caught_early(client: TestClient) "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {} # Missing required 'pattern' field }, @@ -93,12 +93,12 @@ def test_evaluation_control_with_invalid_config_caught_early(client: TestClient) assert "pattern" in set_resp.text.lower() or "required" in set_resp.text.lower() -def test_evaluation_errors_field_populated_on_evaluator_failure( +def test_evaluation_errors_field_populated_on_rule_failure( client: TestClient, monkeypatch ): - """Test that errors field is populated when evaluator fails at runtime. + """Test that errors field is populated when rule fails at runtime. - Given: A valid control with an evaluator that crashes during evaluation + Given: A valid control with a rule that crashes during evaluation When: Evaluation is requested Then: Response has errors field populated and is_safe=False (for deny) """ @@ -109,7 +109,7 @@ def test_evaluation_errors_field_populated_on_evaluator_failure( "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "test"} }, @@ -117,18 +117,18 @@ def test_evaluation_errors_field_populated_on_evaluator_failure( } agent_name, control_name = create_and_assign_policy(client, control_data) - # And: an evaluator instance that throws during evaluation - mock_evaluator = MagicMock() - mock_evaluator.evaluate = AsyncMock(side_effect=RuntimeError("Simulated evaluator crash")) - mock_evaluator.get_timeout_seconds = MagicMock(return_value=30.0) + # And: a rule instance that throws during evaluation + mock_rule = MagicMock() + mock_rule.evaluate = AsyncMock(side_effect=RuntimeError("Simulated rule crash")) + mock_rule.get_timeout_seconds = MagicMock(return_value=30.0) # Patch where it's used (in core module), not where it's defined import agent_control_engine.core as core_module - def mock_get_evaluator_instance(config): - return mock_evaluator + def mock_get_rule_instance(config): + return mock_rule - monkeypatch.setattr(core_module, "get_evaluator_instance", mock_get_evaluator_instance) + monkeypatch.setattr(core_module, "get_rule_instance", mock_get_rule_instance) # When: sending an evaluation request payload = Step(type="llm", name="test-step", input="test content", output=None) @@ -155,15 +155,15 @@ def mock_get_evaluator_instance(config): assert data["errors"][0]["control_name"] == control_name assert ( data["errors"][0]["result"]["error"] - == "Evaluation failed due to an internal evaluator error." + == "Evaluation failed due to an internal rule error." ) assert "RuntimeError" not in data["errors"][0]["result"]["error"] - assert "Simulated evaluator crash" not in data["errors"][0]["result"]["error"] + assert "Simulated rule crash" not in data["errors"][0]["result"]["error"] condition_trace = data["errors"][0]["result"]["metadata"]["condition_trace"] - assert condition_trace["error"] == SAFE_EVALUATOR_ERROR - assert condition_trace["message"] == SAFE_EVALUATOR_ERROR + assert condition_trace["error"] == SAFE_RULE_ERROR + assert condition_trace["message"] == SAFE_RULE_ERROR assert "RuntimeError" not in condition_trace["error"] - assert "Simulated evaluator crash" not in condition_trace["message"] + assert "Simulated rule crash" not in condition_trace["message"] # And: no matches are returned because evaluation failed assert data["matches"] is None or len(data["matches"]) == 0 @@ -180,7 +180,7 @@ def test_evaluation_response_is_sanitized_without_server_side_observability( "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": "test"} }, @@ -188,16 +188,16 @@ def test_evaluation_response_is_sanitized_without_server_side_observability( } agent_name, control_name = create_and_assign_policy(client, control_data) - mock_evaluator = MagicMock() - mock_evaluator.evaluate = AsyncMock(side_effect=RuntimeError("Simulated evaluator crash")) - mock_evaluator.get_timeout_seconds = MagicMock(return_value=30.0) + mock_rule = MagicMock() + mock_rule.evaluate = AsyncMock(side_effect=RuntimeError("Simulated rule crash")) + mock_rule.get_timeout_seconds = MagicMock(return_value=30.0) import agent_control_engine.core as core_module monkeypatch.setattr( core_module, - "get_evaluator_instance", - lambda _config: mock_evaluator, + "get_rule_instance", + lambda _config: mock_rule, ) payload = Step(type="llm", name="test-step", input="test content", output=None) @@ -213,16 +213,16 @@ def test_evaluation_response_is_sanitized_without_server_side_observability( assert data["errors"] is not None assert len(data["errors"]) == 1 assert data["errors"][0]["control_name"] == control_name - assert data["errors"][0]["result"]["error"] == SAFE_EVALUATOR_ERROR + assert data["errors"][0]["result"]["error"] == SAFE_RULE_ERROR def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: - # Given: a control match whose nested condition trace contains raw evaluator errors + # Given: a control match whose nested condition trace contains raw rule errors match = ControlMatch( control_id=1, control_name="nested-trace", action="deny", - result=EvaluatorResult( + result=RuleResult( matched=False, confidence=0.0, error="RuntimeError: nested boom", @@ -247,10 +247,10 @@ def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: child_trace = sanitized.result.metadata["condition_trace"]["children"][0] # Then: both the top-level result and nested trace are redacted - assert sanitized.result.error == SAFE_EVALUATOR_ERROR - assert sanitized.result.message == SAFE_EVALUATOR_ERROR - assert child_trace["error"] == SAFE_EVALUATOR_ERROR - assert child_trace["message"] == SAFE_EVALUATOR_ERROR + assert sanitized.result.error == SAFE_RULE_ERROR + assert sanitized.result.message == SAFE_RULE_ERROR + assert child_trace["error"] == SAFE_RULE_ERROR + assert child_trace["message"] == SAFE_RULE_ERROR def test_sanitize_control_match_redacts_nested_condition_trace_timeouts() -> None: @@ -259,21 +259,21 @@ def test_sanitize_control_match_redacts_nested_condition_trace_timeouts() -> Non control_id=1, control_name="nested-timeout", action="deny", - result=EvaluatorResult( + result=RuleResult( matched=False, confidence=0.0, - error="TimeoutError: Evaluator exceeded 30s timeout", - message="Condition evaluation failed: TimeoutError: Evaluator exceeded 30s timeout", + error="TimeoutError: Rule exceeded 30s timeout", + message="Condition evaluation failed: TimeoutError: Rule exceeded 30s timeout", metadata={ "condition_trace": { "type": "or", "children": [ { "type": "leaf", - "error": "TimeoutError: Evaluator exceeded 30s timeout", + "error": "TimeoutError: Rule exceeded 30s timeout", "message": ( "Evaluation failed: TimeoutError: " - "Evaluator exceeded 30s timeout" + "Rule exceeded 30s timeout" ), } ], @@ -287,10 +287,10 @@ def test_sanitize_control_match_redacts_nested_condition_trace_timeouts() -> Non child_trace = sanitized.result.metadata["condition_trace"]["children"][0] # Then: both the top-level result and nested trace use the safe timeout text - assert sanitized.result.error == SAFE_EVALUATOR_TIMEOUT_ERROR - assert sanitized.result.message == SAFE_EVALUATOR_TIMEOUT_ERROR - assert child_trace["error"] == SAFE_EVALUATOR_TIMEOUT_ERROR - assert child_trace["message"] == SAFE_EVALUATOR_TIMEOUT_ERROR + assert sanitized.result.error == SAFE_RULE_TIMEOUT_ERROR + assert sanitized.result.message == SAFE_RULE_TIMEOUT_ERROR + assert child_trace["error"] == SAFE_RULE_TIMEOUT_ERROR + assert child_trace["message"] == SAFE_RULE_TIMEOUT_ERROR def test_evaluation_engine_value_error_returns_422(client: TestClient, monkeypatch) -> None: @@ -302,7 +302,7 @@ def test_evaluation_engine_value_error_returns_422(client: TestClient, monkeypat "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, "action": {"decision": "deny"}, } agent_name, _ = create_and_assign_policy(client, control_data) @@ -332,7 +332,7 @@ def test_evaluation_releases_db_connection_before_engine_processing( client: TestClient, monkeypatch, ) -> None: - """Evaluation should not hold a DB connection while evaluator work runs.""" + """Evaluation should not hold a DB connection while rule work runs.""" agent_name, _ = create_and_assign_policy(client) checked_out_counts: list[int] = [] diff --git a/server/tests/test_init_agent.py b/server/tests/test_init_agent.py index fe88ce30..e3259020 100644 --- a/server/tests/test_init_agent.py +++ b/server/tests/test_init_agent.py @@ -141,12 +141,12 @@ def test_agent_endpoints_normalize_mixed_case_agent_name(client: TestClient) -> controls_resp = client.get(f"/api/v1/agents/{mixed_case_name}/controls") assert controls_resp.status_code == 200 - evaluators_resp = client.get(f"/api/v1/agents/{mixed_case_name}/evaluators") - assert evaluators_resp.status_code == 200 + rules_resp = client.get(f"/api/v1/agents/{mixed_case_name}/rules") + assert rules_resp.status_code == 200 patch_resp = client.patch( f"/api/v1/agents/{mixed_case_name}", - json={"remove_steps": [], "remove_evaluators": []}, + json={"remove_steps": [], "remove_rules": []}, ) assert patch_resp.status_code == 200 @@ -276,12 +276,12 @@ def test_init_agent_overwrites_step_on_signature_change(client: TestClient) -> N assert "schema conflict" in body["detail"].lower() -def test_get_agent_returns_evaluators(client: TestClient) -> None: - """Test that GET /agents/{id} returns evaluators.""" - # Given: an agent with evaluators +def test_get_agent_returns_rules(client: TestClient) -> None: + """Test that GET /agents/{id} returns rules.""" + # Given: an agent with rules agent_name = str(uuid.uuid4()) payload = make_agent_payload(agent_name=agent_name) - payload["evaluators"] = [ + payload["rules"] = [ {"name": "eval-a", "description": "First", "config_schema": {}}, {"name": "eval-b", "description": "Second", "config_schema": {"type": "object"}}, ] @@ -290,12 +290,12 @@ def test_get_agent_returns_evaluators(client: TestClient) -> None: # When: fetching the agent get_resp = client.get(f"/api/v1/agents/{agent_name}") - # Then: evaluators are included in the response + # Then: rules are included in the response assert get_resp.status_code == 200 data = get_resp.json() - assert "evaluators" in data - assert len(data["evaluators"]) == 2 - names = {e["name"] for e in data["evaluators"]} + assert "rules" in data + assert len(data["rules"]) == 2 + names = {e["name"] for e in data["rules"]} assert names == {"eval-a", "eval-b"} @@ -537,9 +537,9 @@ def test_list_agents_empty(client: TestClient) -> None: def test_list_agents_returns_created_agents(client: TestClient) -> None: """Test listing agents returns created agents with correct summaries.""" - # Given: two agents with different steps/evaluators + # Given: two agents with different steps/rules payload1 = make_agent_payload(name="agent-one-01") - payload1["evaluators"] = [ + payload1["rules"] = [ {"name": "eval-1", "description": "Test", "config_schema": {}}, ] r1 = client.post("/api/v1/agents/initAgent", json=payload1) @@ -568,14 +568,14 @@ def test_list_agents_returns_created_agents(client: TestClient) -> None: agent1 = agent_map["agent-one-01"] assert agent1["agent_name"] == "agent-one-01" assert agent1["step_count"] == 1 # from make_agent_payload - assert agent1["evaluator_count"] == 1 + assert agent1["rule_count"] == 1 assert agent1["policy_ids"] == [] assert "agent-two-02" in agent_map agent2 = agent_map["agent-two-02"] assert agent2["agent_name"] == "agent-two-02" assert agent2["step_count"] == 2 - assert agent2["evaluator_count"] == 0 + assert agent2["rule_count"] == 0 assert agent2["policy_ids"] == [] diff --git a/server/tests/test_init_agent_conflict_mode.py b/server/tests/test_init_agent_conflict_mode.py index 0397ce94..d126bdb0 100644 --- a/server/tests/test_init_agent_conflict_mode.py +++ b/server/tests/test_init_agent_conflict_mode.py @@ -38,7 +38,7 @@ def _init_payload( agent_description: str = "desc", agent_version: str = "1.0", steps: list[dict[str, Any]] | None = None, - evaluators: list[dict[str, Any]] | None = None, + rules: list[dict[str, Any]] | None = None, conflict_mode: str | None = None, ) -> dict[str, Any]: canonical_name = agent_name.lower() @@ -49,23 +49,23 @@ def _init_payload( "agent_version": agent_version, }, "steps": steps or [], - "evaluators": evaluators or [], + "rules": rules or [], } if conflict_mode is not None: payload["conflict_mode"] = conflict_mode return payload -def _create_policy_with_agent_evaluator_control( +def _create_policy_with_agent_rule_control( client: TestClient, *, agent_name: str, - evaluator_name: str, + rule_name: str, ) -> tuple[int, int, str]: control_data = deepcopy(VALID_CONTROL_PAYLOAD) control_name = f"control-{uuid.uuid4().hex[:8]}" - control_data["condition"]["evaluator"] = { - "name": f"{agent_name}:{evaluator_name}", + control_data["condition"]["rule"] = { + "name": f"{agent_name}:{rule_name}", "config": {}, } create_control_resp = client.put( @@ -85,8 +85,8 @@ def _create_policy_with_agent_evaluator_control( return policy_id, control_id, control_name -def test_init_agent_overwrite_replaces_steps_and_evaluators(client: TestClient) -> None: - # Given: an existing agent registration with baseline steps and evaluators. +def test_init_agent_overwrite_replaces_steps_and_rules(client: TestClient) -> None: + # Given: an existing agent registration with baseline steps and rules. agent_name = f"agent-{uuid.uuid4().hex[:12]}" create_payload = _init_payload( @@ -106,7 +106,7 @@ def test_init_agent_overwrite_replaces_steps_and_evaluators(client: TestClient) "output_schema": {"type": "boolean"}, }, ], - evaluators=[ + rules=[ {"name": "eval-a", "description": "v1", "config_schema": {"type": "object"}}, {"name": "eval-b", "description": "v1", "config_schema": {"type": "object"}}, ], @@ -135,7 +135,7 @@ def test_init_agent_overwrite_replaces_steps_and_evaluators(client: TestClient) "output_schema": {"type": "string"}, }, ], - evaluators=[ + rules=[ {"name": "eval-a", "description": "v2", "config_schema": {"type": "string"}}, {"name": "eval-c", "description": "new", "config_schema": {"type": "object"}}, ], @@ -154,10 +154,10 @@ def test_init_agent_overwrite_replaces_steps_and_evaluators(client: TestClient) assert changes["steps_added"] == [{"type": "tool", "name": "tool-c"}] assert changes["steps_updated"] == [{"type": "tool", "name": "tool-a"}] assert changes["steps_removed"] == [{"type": "tool", "name": "tool-b"}] - assert changes["evaluators_added"] == ["eval-c"] - assert changes["evaluators_updated"] == ["eval-a"] - assert changes["evaluators_removed"] == ["eval-b"] - assert changes["evaluator_removals"] == [ + assert changes["rules_added"] == ["eval-c"] + assert changes["rules_updated"] == ["eval-a"] + assert changes["rules_removed"] == ["eval-b"] + assert changes["rule_removals"] == [ { "name": "eval-b", "referenced_by_active_controls": False, @@ -171,7 +171,7 @@ def test_init_agent_overwrite_replaces_steps_and_evaluators(client: TestClient) get_data = get_resp.json() assert get_data["agent"]["agent_description"] == "updated desc" assert {step["name"] for step in get_data["steps"]} == {"tool-a", "tool-c"} - assert {evaluator["name"] for evaluator in get_data["evaluators"]} == {"eval-a", "eval-c"} + assert {rule["name"] for rule in get_data["rules"]} == {"eval-a", "eval-c"} def test_init_agent_overwrite_existing_agent_requires_update_auth( @@ -241,74 +241,74 @@ def test_init_agent_strict_existing_agent_mutation_requires_update_auth( assert strict_resp.status_code == 403 -def test_init_agent_overwrite_warns_on_removed_referenced_evaluator(client: TestClient) -> None: - # Given: an agent whose assigned policy contains a control referencing an agent evaluator. +def test_init_agent_overwrite_warns_on_removed_referenced_rule(client: TestClient) -> None: + # Given: an agent whose assigned policy contains a control referencing an agent rule. agent_name = f"agent-{uuid.uuid4().hex[:12]}" - evaluator_name = "custom-eval" + rule_name = "custom-eval" init_resp = client.post( "/api/v1/agents/initAgent", json=_init_payload( agent_name=agent_name, - evaluators=[{"name": evaluator_name, "config_schema": {"type": "object"}}], + rules=[{"name": rule_name, "config_schema": {"type": "object"}}], ), ) assert init_resp.status_code == 200 - policy_id, control_id, control_name = _create_policy_with_agent_evaluator_control( - client, agent_name=agent_name, evaluator_name=evaluator_name + policy_id, control_id, control_name = _create_policy_with_agent_rule_control( + client, agent_name=agent_name, rule_name=rule_name ) assign_resp = client.post(f"/api/v1/agents/{agent_name}/policy/{policy_id}") assert assign_resp.status_code == 200 - # When: overwrite mode removes the evaluator from the incoming registration payload. + # When: overwrite mode removes the rule from the incoming registration payload. overwrite_resp = client.post( "/api/v1/agents/initAgent", json=_init_payload( agent_name=agent_name, - evaluators=[], + rules=[], conflict_mode="overwrite", ), ) assert overwrite_resp.status_code == 200 body = overwrite_resp.json() - # Then: the response includes active-control reference warnings and evaluator removal. + # Then: the response includes active-control reference warnings and rule removal. assert body["overwrite_applied"] is True - assert body["overwrite_changes"]["evaluators_removed"] == [evaluator_name] - assert body["overwrite_changes"]["evaluator_removals"] == [ + assert body["overwrite_changes"]["rules_removed"] == [rule_name] + assert body["overwrite_changes"]["rule_removals"] == [ { - "name": evaluator_name, + "name": rule_name, "referenced_by_active_controls": True, "control_ids": [control_id], "control_names": [control_name], } ] - get_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators") + get_resp = client.get(f"/api/v1/agents/{agent_name}/rules") assert get_resp.status_code == 200 - assert get_resp.json()["evaluators"] == [] + assert get_resp.json()["rules"] == [] -def test_init_agent_overwrite_dedupes_composite_references_for_removed_evaluator( +def test_init_agent_overwrite_dedupes_composite_references_for_removed_rule( client: TestClient, ) -> None: # Given: an agent whose assigned policy contains one composite control with - # multiple leaves referencing the same agent evaluator. + # multiple leaves referencing the same agent rule. agent_name = f"agent-{uuid.uuid4().hex[:12]}" - evaluator_name = "custom-eval" + rule_name = "custom-eval" init_resp = client.post( "/api/v1/agents/initAgent", json=_init_payload( agent_name=agent_name, - evaluators=[{"name": evaluator_name, "config_schema": {"type": "object"}}], + rules=[{"name": rule_name, "config_schema": {"type": "object"}}], ), ) assert init_resp.status_code == 200 - policy_id, control_id, control_name = _create_policy_with_agent_evaluator_control( - client, agent_name=agent_name, evaluator_name=evaluator_name + policy_id, control_id, control_name = _create_policy_with_agent_rule_control( + client, agent_name=agent_name, rule_name=rule_name ) control_data = deepcopy(VALID_CONTROL_PAYLOAD) @@ -316,11 +316,11 @@ def test_init_agent_overwrite_dedupes_composite_references_for_removed_evaluator "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": f"{agent_name}:{evaluator_name}", "config": {}}, + "rule": {"name": f"{agent_name}:{rule_name}", "config": {}}, }, { "selector": {"path": "output"}, - "evaluator": {"name": f"{agent_name}:{evaluator_name}", "config": {}}, + "rule": {"name": f"{agent_name}:{rule_name}", "config": {}}, }, ] } @@ -333,12 +333,12 @@ def test_init_agent_overwrite_dedupes_composite_references_for_removed_evaluator assign_resp = client.post(f"/api/v1/agents/{agent_name}/policy/{policy_id}") assert assign_resp.status_code == 200 - # When: overwrite mode removes the referenced evaluator. + # When: overwrite mode removes the referenced rule. overwrite_resp = client.post( "/api/v1/agents/initAgent", json=_init_payload( agent_name=agent_name, - evaluators=[], + rules=[], conflict_mode="overwrite", ), ) @@ -347,9 +347,9 @@ def test_init_agent_overwrite_dedupes_composite_references_for_removed_evaluator # Then: the response dedupes the control reference even though two leaves match. assert body["overwrite_applied"] is True - assert body["overwrite_changes"]["evaluator_removals"] == [ + assert body["overwrite_changes"]["rule_removals"] == [ { - "name": evaluator_name, + "name": rule_name, "referenced_by_active_controls": True, "control_ids": [control_id], "control_names": [control_name], @@ -363,7 +363,7 @@ def test_init_agent_overwrite_noop_reports_not_applied(client: TestClient) -> No payload = _init_payload( agent_name=agent_name, steps=[{"type": "tool", "name": "tool-a", "input_schema": {}, "output_schema": {}}], - evaluators=[{"name": "eval-a", "description": "x", "config_schema": {"type": "object"}}], + rules=[{"name": "eval-a", "description": "x", "config_schema": {"type": "object"}}], ) first_resp = client.post("/api/v1/agents/initAgent", json=payload) assert first_resp.status_code == 200 @@ -382,8 +382,8 @@ def test_init_agent_overwrite_noop_reports_not_applied(client: TestClient) -> No "steps_added": [], "steps_updated": [], "steps_removed": [], - "evaluators_added": [], - "evaluators_updated": [], - "evaluators_removed": [], - "evaluator_removals": [], + "rules_added": [], + "rules_updated": [], + "rules_removed": [], + "rule_removals": [], } diff --git a/server/tests/test_new_features.py b/server/tests/test_new_features.py index 435273a7..dbdb4ebe 100644 --- a/server/tests/test_new_features.py +++ b/server/tests/test_new_features.py @@ -1,4 +1,4 @@ -"""Tests for new features: evaluators endpoint, policy validation, PATCH agents.""" +"""Tests for new features: rules endpoint, policy validation, PATCH agents.""" import uuid @@ -11,7 +11,7 @@ def make_agent_payload( agent_name: str | None = None, name: str | None = None, steps: list | None = None, - evaluators: list | None = None, + rules: list | None = None, ): """Helper to create agent payload.""" if agent_name is not None: @@ -29,29 +29,29 @@ def make_agent_payload( "agent_version": "1.0", }, "steps": steps or [], - "evaluators": evaluators or [], + "rules": rules or [], } # ============================================================================= -# GET /evaluators endpoint +# GET /rules endpoint # ============================================================================= -def test_get_evaluators(client: TestClient) -> None: - """Given built-in evaluators are registered, when listing evaluators, then returns all with schemas.""" - # Given: built-in evaluators are registered - # When: listing evaluators - resp = client.get("/api/v1/evaluators") +def test_get_rules(client: TestClient) -> None: + """Given built-in rules are registered, when listing rules, then returns all with schemas.""" + # Given: built-in rules are registered + # When: listing rules + resp = client.get("/api/v1/rules") - # Then: response includes built-in evaluators with schemas + # Then: response includes built-in rules with schemas assert resp.status_code == 200 - evaluators = resp.json() - assert isinstance(evaluators, dict) - assert "regex" in evaluators - assert "list" in evaluators + rules = resp.json() + assert isinstance(rules, dict) + assert "regex" in rules + assert "list" in rules - regex = evaluators["regex"] + regex = rules["regex"] assert regex["name"] == "regex" assert "version" in regex assert "description" in regex @@ -59,16 +59,16 @@ def test_get_evaluators(client: TestClient) -> None: assert isinstance(regex["config_schema"], dict) -def test_get_evaluators_schema_has_properties(client: TestClient) -> None: - """Given the regex evaluator is registered, when listing evaluators, then schema has pattern property.""" - # Given: the regex evaluator is registered - # When: listing evaluators - resp = client.get("/api/v1/evaluators") +def test_get_rules_schema_has_properties(client: TestClient) -> None: + """Given the regex rule is registered, when listing rules, then schema has pattern property.""" + # Given: the regex rule is registered + # When: listing rules + resp = client.get("/api/v1/rules") # Then: regex schema includes expected properties assert resp.status_code == 200 - evaluators = resp.json() - regex_schema = evaluators["regex"]["config_schema"] + rules = resp.json() + regex_schema = rules["regex"]["config_schema"] assert "properties" in regex_schema assert "pattern" in regex_schema["properties"] @@ -103,7 +103,7 @@ def test_patch_agent_remove_step(client: TestClient) -> None: assert patch_resp.status_code == 200 data = patch_resp.json() assert data["steps_removed"] == [{"type": "tool", "name": "tool1"}] - assert data["evaluators_removed"] == [] + assert data["rules_removed"] == [] get_resp = client.get(f"/api/v1/agents/{agent_name}") steps = [s["name"] for s in get_resp.json()["steps"]] @@ -111,15 +111,15 @@ def test_patch_agent_remove_step(client: TestClient) -> None: assert "tool2" in steps -def test_patch_agent_remove_evaluator(client: TestClient) -> None: - """Given an agent with multiple evaluators, when removing one, then only that evaluator is removed.""" +def test_patch_agent_remove_rule(client: TestClient) -> None: + """Given an agent with multiple rules, when removing one, then only that rule is removed.""" # Given: agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" payload = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ {"name": "eval1", "config_schema": {}}, {"name": "eval2", "config_schema": {}}, ], @@ -129,16 +129,16 @@ def test_patch_agent_remove_evaluator(client: TestClient) -> None: # When: patch_resp = client.patch( f"/api/v1/agents/{agent_name}", - json={"remove_evaluators": ["eval1"]}, + json={"remove_rules": ["eval1"]}, ) # Then: assert patch_resp.status_code == 200 data = patch_resp.json() - assert data["evaluators_removed"] == ["eval1"] + assert data["rules_removed"] == ["eval1"] - get_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators") - evals = [e["name"] for e in get_resp.json()["evaluators"]] + get_resp = client.get(f"/api/v1/agents/{agent_name}/rules") + evals = [e["name"] for e in get_resp.json()["rules"]] assert "eval1" not in evals assert "eval2" in evals @@ -156,7 +156,7 @@ def test_patch_agent_remove_nonexistent_is_idempotent(client: TestClient) -> Non f"/api/v1/agents/{agent_name}", json={ "remove_steps": [{"type": "tool", "name": "nonexistent"}], - "remove_evaluators": ["also_nonexistent"], + "remove_rules": ["also_nonexistent"], }, ) @@ -164,7 +164,7 @@ def test_patch_agent_remove_nonexistent_is_idempotent(client: TestClient) -> Non assert patch_resp.status_code == 200 data = patch_resp.json() assert data["steps_removed"] == [] - assert data["evaluators_removed"] == [] + assert data["rules_removed"] == [] def test_patch_agent_not_found(client: TestClient) -> None: @@ -183,7 +183,7 @@ def test_patch_agent_not_found(client: TestClient) -> None: def test_patch_agent_remove_both(client: TestClient) -> None: - """Given an agent with steps and evaluators, when removing both, then both are removed.""" + """Given an agent with steps and rules, when removing both, then both are removed.""" # Given: agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" @@ -191,7 +191,7 @@ def test_patch_agent_remove_both(client: TestClient) -> None: agent_name=agent_name, name=name, steps=[{"type": "tool", "name": "my_tool", "input_schema": {}, "output_schema": {}}], - evaluators=[{"name": "my_eval", "config_schema": {}}], + rules=[{"name": "my_eval", "config_schema": {}}], ) client.post("/api/v1/agents/initAgent", json=payload) @@ -200,7 +200,7 @@ def test_patch_agent_remove_both(client: TestClient) -> None: f"/api/v1/agents/{agent_name}", json={ "remove_steps": [{"type": "tool", "name": "my_tool"}], - "remove_evaluators": ["my_eval"], + "remove_rules": ["my_eval"], }, ) @@ -208,7 +208,7 @@ def test_patch_agent_remove_both(client: TestClient) -> None: assert patch_resp.status_code == 200 data = patch_resp.json() assert data["steps_removed"] == [{"type": "tool", "name": "my_tool"}] - assert data["evaluators_removed"] == ["my_eval"] + assert data["rules_removed"] == ["my_eval"] def test_patch_agent_empty_request_is_noop(client: TestClient) -> None: @@ -220,29 +220,29 @@ def test_patch_agent_empty_request_is_noop(client: TestClient) -> None: agent_name=agent_name, name=name, steps=[{"type": "tool", "name": "keep_me", "input_schema": {}, "output_schema": {}}], - evaluators=[{"name": "keep_me_too", "config_schema": {}}], + rules=[{"name": "keep_me_too", "config_schema": {}}], ) client.post("/api/v1/agents/initAgent", json=payload) # When: patch_resp = client.patch( f"/api/v1/agents/{agent_name}", - json={"remove_steps": [], "remove_evaluators": []}, + json={"remove_steps": [], "remove_rules": []}, ) # Then: assert patch_resp.status_code == 200 data = patch_resp.json() assert data["steps_removed"] == [] - assert data["evaluators_removed"] == [] + assert data["rules_removed"] == [] # Verify nothing was removed get_resp = client.get(f"/api/v1/agents/{agent_name}") steps = [s["name"] for s in get_resp.json()["steps"]] assert "keep_me" in steps - get_evals = client.get(f"/api/v1/agents/{agent_name}/evaluators") - evals = [e["name"] for e in get_evals.json()["evaluators"]] + get_evals = client.get(f"/api/v1/agents/{agent_name}/rules") + evals = [e["name"] for e in get_evals.json()["rules"]] assert "keep_me_too" in evals @@ -277,8 +277,8 @@ def _create_policy_with_control( return policy_id, control_id -def test_policy_assignment_with_builtin_evaluator(client: TestClient) -> None: - """Given an agent and a policy with built-in evaluator control, when assigning policy, then succeeds.""" +def test_policy_assignment_with_builtin_rule(client: TestClient) -> None: + """Given an agent and a policy with built-in rule control, when assigning policy, then succeeds.""" # Given: agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" @@ -293,7 +293,7 @@ def test_policy_assignment_with_builtin_evaluator(client: TestClient) -> None: "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test.*"}}, + "rule": {"name": "regex", "config": {"pattern": "test.*"}}, "action": {"decision": "deny"}, }, ) @@ -305,15 +305,15 @@ def test_policy_assignment_with_builtin_evaluator(client: TestClient) -> None: assert resp.status_code == 200 -def test_policy_assignment_with_registered_agent_evaluator(client: TestClient) -> None: - """Given an agent with custom evaluator and matching policy, when assigning policy, then succeeds.""" +def test_policy_assignment_with_registered_agent_rule(client: TestClient) -> None: + """Given an agent with custom rule and matching policy, when assigning policy, then succeeds.""" # Given: agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name payload = make_agent_payload( agent_name=agent_name, name=agent_name, - evaluators=[{"name": "custom-eval", "config_schema": {"type": "object"}}], + rules=[{"name": "custom-eval", "config_schema": {"type": "object"}}], ) client.post("/api/v1/agents/initAgent", json=payload) @@ -325,7 +325,7 @@ def test_policy_assignment_with_registered_agent_evaluator(client: TestClient) - "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": f"{agent_name}:custom-eval", "config": {}}, + "rule": {"name": f"{agent_name}:custom-eval", "config": {}}, "action": {"decision": "deny"}, }, ) @@ -337,8 +337,8 @@ def test_policy_assignment_with_registered_agent_evaluator(client: TestClient) - assert resp.status_code == 200 -def test_control_creation_with_unregistered_evaluator_fails(client: TestClient) -> None: - """Given an agent without evaluator, when creating a control that uses it, then fails.""" +def test_control_creation_with_unregistered_rule_fails(client: TestClient) -> None: + """Given an agent without rule, when creating a control that uses it, then fails.""" # Given: agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name @@ -355,7 +355,7 @@ def test_control_creation_with_unregistered_evaluator_fails(client: TestClient) "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": f"{agent_name}:nonexistent-eval", "config": {}}, + "rule": {"name": f"{agent_name}:nonexistent-eval", "config": {}}, }, "action": {"decision": "deny"}, } @@ -369,15 +369,15 @@ def test_control_creation_with_unregistered_evaluator_fails(client: TestClient) assert "not registered" in response_data.get("detail", "") -def test_policy_assignment_cross_agent_evaluator_fails(client: TestClient) -> None: - """Given policy with Agent A's evaluator, when assigning to Agent B, then fails.""" - # Given: Agent A has evaluator, Agent B does not +def test_policy_assignment_cross_agent_rule_fails(client: TestClient) -> None: + """Given policy with Agent A's rule, when assigning to Agent B, then fails.""" + # Given: Agent A has rule, Agent B does not agent_a_id = f"agent-a-{uuid.uuid4().hex[:12]}" agent_a_name = agent_a_id payload_a = make_agent_payload( agent_name=agent_a_id, name=agent_a_name, - evaluators=[{"name": "shared-eval", "config_schema": {"type": "object"}}], + rules=[{"name": "shared-eval", "config_schema": {"type": "object"}}], ) client.post("/api/v1/agents/initAgent", json=payload_a) @@ -394,7 +394,7 @@ def test_policy_assignment_cross_agent_evaluator_fails(client: TestClient) -> No "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": f"{agent_a_name}:shared-eval", "config": {}}, + "rule": {"name": f"{agent_a_name}:shared-eval", "config": {}}, "action": {"decision": "deny"}, }, ) @@ -429,7 +429,7 @@ def test_schema_compat_nested_additional_properties_compatible(client: TestClien payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "nested-eval", "config_schema": { @@ -450,7 +450,7 @@ def test_schema_compat_nested_additional_properties_compatible(client: TestClien payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "nested-eval", "config_schema": { @@ -482,7 +482,7 @@ def test_schema_compat_nested_type_change_incompatible(client: TestClient) -> No payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "nested-eval", "config_schema": { @@ -503,7 +503,7 @@ def test_schema_compat_nested_type_change_incompatible(client: TestClient) -> No payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "nested-eval", "config_schema": { @@ -526,28 +526,28 @@ def test_schema_compat_nested_type_change_incompatible(client: TestClient) -> No # ============================================================================= -# Evaluator removal protection +# Rule removal protection # ============================================================================= -def test_patch_agent_remove_evaluator_blocked_by_control(client: TestClient) -> None: - """Given an agent with evaluator used by a control, when removing evaluator, then rejected. +def test_patch_agent_remove_rule_blocked_by_control(client: TestClient) -> None: + """Given an agent with rule used by a control, when removing rule, then rejected. - Given: An agent with evaluator "my-eval" and a control using that evaluator + Given: An agent with rule "my-eval" and a control using that rule When: Trying to remove "my-eval" via PATCH Then: Returns 409 with error message about referencing control """ - # Given: Agent with custom evaluator + # Given: Agent with custom rule agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name payload = make_agent_payload( agent_name=agent_name, name=agent_name, - evaluators=[{"name": "my-eval", "config_schema": {"type": "object"}}], + rules=[{"name": "my-eval", "config_schema": {"type": "object"}}], ) client.post("/api/v1/agents/initAgent", json=payload) - # And: A control set up to use that evaluator + # And: A control set up to use that rule policy_id, _ = _create_policy_with_control( client, f"policy-{uuid.uuid4().hex[:8]}", @@ -556,7 +556,7 @@ def test_patch_agent_remove_evaluator_blocked_by_control(client: TestClient) -> "execution": "server", "scope": {"step_types": ["llm"], "stages": ["pre"]}, "selector": {"path": "input"}, - "evaluator": {"name": f"{agent_name}:my-eval", "config": {}}, + "rule": {"name": f"{agent_name}:my-eval", "config": {}}, "action": {"decision": "deny"}, }, ) @@ -565,10 +565,10 @@ def test_patch_agent_remove_evaluator_blocked_by_control(client: TestClient) -> assign_resp = client.post(f"/api/v1/agents/{agent_name}/policy/{policy_id}") assert assign_resp.status_code == 200 - # When: Trying to remove the evaluator + # When: Trying to remove the rule patch_resp = client.patch( f"/api/v1/agents/{agent_name}", - json={"remove_evaluators": ["my-eval"]}, + json={"remove_rules": ["my-eval"]}, ) # Then: Should be rejected with 409 (RFC 7807 format) @@ -576,34 +576,34 @@ def test_patch_agent_remove_evaluator_blocked_by_control(client: TestClient) -> response_data = patch_resp.json() detail = response_data.get("detail", "") errors = response_data.get("errors", []) - assert "Cannot remove evaluators" in detail - # Check errors array contains reference to the evaluator + assert "Cannot remove rules" in detail + # Check errors array contains reference to the rule assert any("my-eval" in e.get("message", "") for e in errors) -def test_patch_agent_remove_evaluator_allowed_without_policy(client: TestClient) -> None: - """Given an agent with evaluator but no policy, when removing evaluator, then succeeds. +def test_patch_agent_remove_rule_allowed_without_policy(client: TestClient) -> None: + """Given an agent with rule but no policy, when removing rule, then succeeds. - Given: An agent with evaluator "my-eval" but no policy assigned + Given: An agent with rule "my-eval" but no policy assigned When: Trying to remove "my-eval" via PATCH Then: Succeeds since no controls can reference it """ - # Given: Agent with custom evaluator but no policy + # Given: Agent with custom rule but no policy agent_name = f"agent-{uuid.uuid4().hex[:12]}" agent_name = agent_name payload = make_agent_payload( agent_name=agent_name, name=agent_name, - evaluators=[{"name": "my-eval", "config_schema": {"type": "object"}}], + rules=[{"name": "my-eval", "config_schema": {"type": "object"}}], ) client.post("/api/v1/agents/initAgent", json=payload) - # When: Removing the evaluator (no policy = no controls can reference it) + # When: Removing the rule (no policy = no controls can reference it) patch_resp = client.patch( f"/api/v1/agents/{agent_name}", - json={"remove_evaluators": ["my-eval"]}, + json={"remove_rules": ["my-eval"]}, ) # Then: Should succeed assert patch_resp.status_code == 200 - assert patch_resp.json()["evaluators_removed"] == ["my-eval"] + assert patch_resp.json()["rules_removed"] == ["my-eval"] diff --git a/server/tests/test_observability_endpoints.py b/server/tests/test_observability_endpoints.py index c6b722e8..254d01c1 100644 --- a/server/tests/test_observability_endpoints.py +++ b/server/tests/test_observability_endpoints.py @@ -275,7 +275,7 @@ def test_event_with_all_fields(self): confidence=0.99, timestamp=datetime.now(UTC), execution_duration_ms=15.5, - evaluator_name="regex", + rule_name="regex", selector_path="input", error_message=None, metadata={"key": "value"}, diff --git a/server/tests/test_observability_models.py b/server/tests/test_observability_models.py index 4a7d9252..329dac41 100644 --- a/server/tests/test_observability_models.py +++ b/server/tests/test_observability_models.py @@ -183,13 +183,13 @@ def test_optional_fields(self): matched=False, confidence=0.5, execution_duration_ms=15.3, - evaluator_name="regex", + rule_name="regex", selector_path="input", error_message=None, metadata={"key": "value"}, ) assert event.execution_duration_ms == 15.3 - assert event.evaluator_name == "regex" + assert event.rule_name == "regex" assert event.selector_path == "input" assert event.metadata == {"key": "value"} diff --git a/server/tests/test_policy_integration.py b/server/tests/test_policy_integration.py index efdb5526..086221b8 100644 --- a/server/tests/test_policy_integration.py +++ b/server/tests/test_policy_integration.py @@ -43,7 +43,7 @@ def _create_control(client: TestClient, name: str | None = None, data: dict | No payload = deepcopy(VALID_CONTROL_PAYLOAD) marker = json.dumps(data, sort_keys=True) if data is not None else control_name payload["description"] = f"Name: {control_name}, Marker: {marker}" - payload["condition"]["evaluator"]["config"]["pattern"] = marker + payload["condition"]["rule"]["config"]["pattern"] = marker resp = client.put("/api/v1/controls", json={"name": control_name, "data": payload}) assert resp.status_code == 200 return resp.json()["control_id"] diff --git a/server/tests/test_principal_namespace_flow.py b/server/tests/test_principal_namespace_flow.py index 8f16a795..29fd359a 100644 --- a/server/tests/test_principal_namespace_flow.py +++ b/server/tests/test_principal_namespace_flow.py @@ -227,7 +227,7 @@ def test_duplicate_control_names_allowed_across_principal_namespaces(app: FastAP assert ns_b.put("/api/v1/controls", json=payload).status_code == 200 -def test_agent_scoped_evaluator_validation_uses_principal_namespace(app: FastAPI) -> None: +def test_agent_scoped_rule_validation_uses_principal_namespace(app: FastAPI) -> None: set_authorizer(HeaderNamespaceAuthorizer()) ns_a = _client(app, "ns-a") @@ -238,13 +238,13 @@ def test_agent_scoped_evaluator_validation_uses_principal_namespace(app: FastAPI "/api/v1/agents/initAgent", json={ **_agent_payload(agent_name), - "evaluators": [{"name": "custom", "config_schema": {"type": "object"}}], + "rules": [{"name": "custom", "config_schema": {"type": "object"}}], }, ) assert register_b.status_code == 200, register_b.text control_data = deepcopy(VALID_CONTROL_PAYLOAD) - control_data["condition"]["evaluator"] = { + control_data["condition"]["rule"] = { "name": f"{agent_name}:custom", "config": {}, } diff --git a/server/tests/test_evaluator_schemas.py b/server/tests/test_rule_schemas.py similarity index 82% rename from server/tests/test_evaluator_schemas.py rename to server/tests/test_rule_schemas.py index bb952c06..d477b113 100644 --- a/server/tests/test_evaluator_schemas.py +++ b/server/tests/test_rule_schemas.py @@ -1,4 +1,4 @@ -"""Tests for evaluator schema functionality.""" +"""Tests for rule schema functionality.""" import uuid @@ -11,9 +11,9 @@ def make_agent_payload( agent_name: str | None = None, name: str | None = None, - evaluators: list | None = None, + rules: list | None = None, ): - """Helper to create agent payload with evaluators.""" + """Helper to create agent payload with rules.""" if agent_name is not None: name = agent_name elif name is None: @@ -29,23 +29,23 @@ def make_agent_payload( "agent_version": "1.0", }, "steps": [], - "evaluators": evaluators or [], + "rules": rules or [], } # ============================================================================= -# initAgent with evaluators +# initAgent with rules # ============================================================================= -def test_init_agent_with_evaluators(client: TestClient) -> None: - """Test creating agent with evaluator schemas.""" - # Given: A payload with custom evaluator +def test_init_agent_with_rules(client: TestClient) -> None: + """Test creating agent with rule schemas.""" + # Given: A payload with custom rule payload = make_agent_payload( - evaluators=[ + rules=[ { "name": "my-custom-eval", - "description": "A custom evaluator", + "description": "A custom rule", "config_schema": { "type": "object", "properties": {"threshold": {"type": "number"}}, @@ -60,11 +60,11 @@ def test_init_agent_with_evaluators(client: TestClient) -> None: assert resp.json()["created"] is True -def test_init_agent_evaluator_name_collision_rejected(client: TestClient) -> None: - """Test that evaluator names conflicting with built-in evaluators are rejected.""" - # Given: Evaluator name conflicting with built-in +def test_init_agent_rule_name_collision_rejected(client: TestClient) -> None: + """Test that rule names conflicting with built-in rules are rejected.""" + # Given: Rule name conflicting with built-in payload = make_agent_payload( - evaluators=[ + rules=[ { "name": "regex", # Conflicts with built-in "description": "Trying to override regex", @@ -77,14 +77,14 @@ def test_init_agent_evaluator_name_collision_rejected(client: TestClient) -> Non # Then: Should be rejected (RFC 7807 format) assert resp.status_code == 409 response_data = resp.json() - assert "conflicts with built-in evaluator" in response_data.get("detail", "") + assert "conflicts with built-in rule" in response_data.get("detail", "") -def test_init_agent_evaluator_name_collision_list(client: TestClient) -> None: +def test_init_agent_rule_name_collision_list(client: TestClient) -> None: """Test that 'list' built-in name is also rejected.""" - # Given: Evaluator named 'list' + # Given: Rule named 'list' payload = make_agent_payload( - evaluators=[{"name": "list", "config_schema": {}}] + rules=[{"name": "list", "config_schema": {}}] ) # When: Initializing agent resp = client.post("/api/v1/agents/initAgent", json=payload) @@ -92,15 +92,15 @@ def test_init_agent_evaluator_name_collision_list(client: TestClient) -> None: assert resp.status_code == 409 -def test_init_agent_update_evaluator_compatible_schema(client: TestClient) -> None: - """Test updating evaluator with compatible schema change (add optional field).""" - # Given: Agent with evaluator +def test_init_agent_update_rule_compatible_schema(client: TestClient) -> None: + """Test updating rule with compatible schema change (add optional field).""" + # Given: Agent with rule agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -118,7 +118,7 @@ def test_init_agent_update_evaluator_compatible_schema(client: TestClient) -> No payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -137,17 +137,17 @@ def test_init_agent_update_evaluator_compatible_schema(client: TestClient) -> No assert resp2.status_code == 200 -def test_init_agent_update_evaluator_incompatible_schema_rejected( +def test_init_agent_update_rule_incompatible_schema_rejected( client: TestClient, ) -> None: """Test that incompatible schema change is rejected.""" - # Given: Agent with evaluator + # Given: Agent with rule agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -164,7 +164,7 @@ def test_init_agent_update_evaluator_incompatible_schema_rejected( payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -180,15 +180,15 @@ def test_init_agent_update_evaluator_incompatible_schema_rejected( assert "not backward compatible" in resp2.json()["detail"] -def test_init_agent_update_evaluator_type_change_rejected(client: TestClient) -> None: +def test_init_agent_update_rule_type_change_rejected(client: TestClient) -> None: """Test that changing property type is rejected.""" - # Given: Agent with evaluator + # Given: Agent with rule agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -204,7 +204,7 @@ def test_init_agent_update_evaluator_type_change_rejected(client: TestClient) -> payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -222,13 +222,13 @@ def test_init_agent_update_evaluator_type_change_rejected(client: TestClient) -> def test_init_agent_add_required_property_rejected(client: TestClient) -> None: """Test that adding a new required property is rejected.""" - # Given: Agent with evaluator + # Given: Agent with rule agent_name = str(uuid.uuid4()) name = f"Test Agent {uuid.uuid4().hex[:8]}" payload1 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -244,7 +244,7 @@ def test_init_agent_add_required_property_rejected(client: TestClient) -> None: payload2 = make_agent_payload( agent_name=agent_name, name=name, - evaluators=[ + rules=[ { "name": "my-eval", "config_schema": { @@ -265,15 +265,15 @@ def test_init_agent_add_required_property_rejected(client: TestClient) -> None: # ============================================================================= -# Evaluator listing endpoints +# Rule listing endpoints # ============================================================================= -def test_list_agent_evaluators(client: TestClient) -> None: - """Test listing agent's evaluator schemas.""" - # Given: Agent with two evaluators +def test_list_agent_rules(client: TestClient) -> None: + """Test listing agent's rule schemas.""" + # Given: Agent with two rules payload = make_agent_payload( - evaluators=[ + rules=[ {"name": "eval-a", "description": "First", "config_schema": {}}, {"name": "eval-b", "description": "Second", "config_schema": {}}, ] @@ -282,51 +282,51 @@ def test_list_agent_evaluators(client: TestClient) -> None: assert resp.status_code == 200 agent_name = payload["agent"]["agent_name"] - # When: Listing evaluators - list_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators") - # Then: Should return both evaluators + # When: Listing rules + list_resp = client.get(f"/api/v1/agents/{agent_name}/rules") + # Then: Should return both rules assert list_resp.status_code == 200 data = list_resp.json() - assert len(data["evaluators"]) == 2 - names = {e["name"] for e in data["evaluators"]} + assert len(data["rules"]) == 2 + names = {e["name"] for e in data["rules"]} assert names == {"eval-a", "eval-b"} assert data["pagination"]["total"] == 2 -def test_list_agent_evaluators_pagination(client: TestClient) -> None: - """Test pagination of evaluator list.""" - # Given: Agent with 5 evaluators +def test_list_agent_rules_pagination(client: TestClient) -> None: + """Test pagination of rule list.""" + # Given: Agent with 5 rules payload = make_agent_payload( - evaluators=[{"name": f"eval-{i}", "config_schema": {}} for i in range(5)] + rules=[{"name": f"eval-{i}", "config_schema": {}} for i in range(5)] ) resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 agent_name = payload["agent"]["agent_name"] # When: Fetching first page - resp1 = client.get(f"/api/v1/agents/{agent_name}/evaluators?offset=0&limit=2") + resp1 = client.get(f"/api/v1/agents/{agent_name}/rules?offset=0&limit=2") # Then: Should return 2 items with total=5 assert resp1.status_code == 200 data1 = resp1.json() - assert len(data1["evaluators"]) == 2 + assert len(data1["rules"]) == 2 assert data1["pagination"]["total"] == 5 # When: Fetching second page - resp2 = client.get(f"/api/v1/agents/{agent_name}/evaluators?offset=2&limit=2") + resp2 = client.get(f"/api/v1/agents/{agent_name}/rules?offset=2&limit=2") # Then: Should return 2 more items assert resp2.status_code == 200 data2 = resp2.json() - assert len(data2["evaluators"]) == 2 + assert len(data2["rules"]) == 2 -def test_get_agent_evaluator_by_name(client: TestClient) -> None: - """Test getting specific evaluator by name.""" - # Given: Agent with evaluator +def test_get_agent_rule_by_name(client: TestClient) -> None: + """Test getting specific rule by name.""" + # Given: Agent with rule payload = make_agent_payload( - evaluators=[ + rules=[ { "name": "my-eval", - "description": "Test evaluator", + "description": "Test rule", "config_schema": {"type": "object"}, } ] @@ -335,35 +335,35 @@ def test_get_agent_evaluator_by_name(client: TestClient) -> None: assert resp.status_code == 200 agent_name = payload["agent"]["agent_name"] - # When: Getting evaluator by name - get_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators/my-eval") - # Then: Should return evaluator details + # When: Getting rule by name + get_resp = client.get(f"/api/v1/agents/{agent_name}/rules/my-eval") + # Then: Should return rule details assert get_resp.status_code == 200 data = get_resp.json() assert data["name"] == "my-eval" - assert data["description"] == "Test evaluator" + assert data["description"] == "Test rule" -def test_get_agent_evaluator_not_found(client: TestClient) -> None: - """Test 404 for non-existent evaluator name.""" - # Given: Agent with no evaluators - payload = make_agent_payload(evaluators=[]) +def test_get_agent_rule_not_found(client: TestClient) -> None: + """Test 404 for non-existent rule name.""" + # Given: Agent with no rules + payload = make_agent_payload(rules=[]) resp = client.post("/api/v1/agents/initAgent", json=payload) assert resp.status_code == 200 agent_name = payload["agent"]["agent_name"] - # When: Getting nonexistent evaluator - get_resp = client.get(f"/api/v1/agents/{agent_name}/evaluators/nonexistent") + # When: Getting nonexistent rule + get_resp = client.get(f"/api/v1/agents/{agent_name}/rules/nonexistent") # Then: Should return 404 assert get_resp.status_code == 404 -def test_list_evaluators_agent_not_found(client: TestClient) -> None: +def test_list_rules_agent_not_found(client: TestClient) -> None: """Test 404 for non-existent agent.""" # Given: Nonexistent agent ID fake_id = str(uuid.uuid4()) - # When: Listing evaluators - resp = client.get(f"/api/v1/agents/{fake_id}/evaluators") + # When: Listing rules + resp = client.get(f"/api/v1/agents/{fake_id}/rules") # Then: Should return 404 assert resp.status_code == 404 diff --git a/server/tests/test_rule_terminology_alembic_migration.py b/server/tests/test_rule_terminology_alembic_migration.py new file mode 100644 index 00000000..4a77e711 --- /dev/null +++ b/server/tests/test_rule_terminology_alembic_migration.py @@ -0,0 +1,297 @@ +"""Alembic coverage for evaluator-to-rule payload migration.""" + +from __future__ import annotations + +import json +import uuid +from pathlib import Path +from typing import Any + +import pytest +from agent_control_server.config import db_config +from alembic import command +from alembic.config import Config +from sqlalchemy import create_engine, text +from sqlalchemy.engine import Engine, make_url + +SERVER_DIR = Path(__file__).resolve().parents[1] +PRE_MIGRATION_REVISION = "e2b7f4a9c6d1" +MIGRATION_REVISION = "d4f0b2e1c9a8" +_BASE_DB_URL = make_url(db_config.get_url()) + +pytestmark = pytest.mark.skipif( + _BASE_DB_URL.get_backend_name() != "postgresql", + reason="Rule terminology Alembic migration tests require PostgreSQL.", +) + + +@pytest.fixture +def temp_db_url() -> str: + temp_db_name = f"agent_control_rules_{uuid.uuid4().hex[:12]}" + admin_url = _BASE_DB_URL.set(database="postgres").render_as_string( + hide_password=False + ) + target_url = _BASE_DB_URL.set(database=temp_db_name).render_as_string( + hide_password=False + ) + + admin_engine = create_engine(admin_url, isolation_level="AUTOCOMMIT") + with admin_engine.connect() as conn: + conn.execute(text(f'CREATE DATABASE "{temp_db_name}"')) + admin_engine.dispose() + + try: + yield target_url + finally: + cleanup_engine = create_engine(admin_url, isolation_level="AUTOCOMMIT") + with cleanup_engine.connect() as conn: + conn.execute( + text( + """ + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE datname = :db_name AND pid <> pg_backend_pid() + """ + ), + {"db_name": temp_db_name}, + ) + conn.execute(text(f'DROP DATABASE IF EXISTS "{temp_db_name}"')) + cleanup_engine.dispose() + + +@pytest.fixture +def alembic_config(temp_db_url: str) -> Config: + cfg = Config(str(SERVER_DIR / "alembic.ini")) + cfg.set_main_option("script_location", str(SERVER_DIR / "alembic")) + cfg.set_main_option("sqlalchemy.url", temp_db_url) + return cfg + + +@pytest.fixture +def temp_engine(temp_db_url: str) -> Engine: + engine = create_engine(temp_db_url, future=True) + try: + yield engine + finally: + engine.dispose() + + +def _insert_json_payloads(engine: Engine) -> dict[str, int | str]: + agent_data = { + "agent_metadata": {"evaluator": "user metadata should stay untouched"}, + "steps": [], + "evaluators": [ + { + "name": "custom", + "description": "custom rule", + "config_schema": { + "type": "object", + "properties": { + "evaluator": {"type": "string"}, + }, + }, + } + ], + } + control_data = { + "description": "legacy control", + "enabled": True, + "execution": "server", + "scope": {"step_types": ["llm"], "stages": ["pre"]}, + "condition": { + "and": [ + { + "selector": {"path": "input"}, + "evaluator": { + "name": "regex", + "config": {"pattern": "secret", "evaluator": "keep"}, + }, + }, + { + "not": { + "selector": {"path": "output"}, + "evaluator": { + "name": "list", + "config": {"values": ["ok"]}, + }, + } + }, + ] + }, + "template": { + "metadata": {"name": "templated", "evaluator": "keep"}, + "parameters": {}, + "definition_template": { + "selector": {"path": "input"}, + "evaluator": { + "name": "regex", + "config": {"pattern": "$param", "evaluator": "keep"}, + }, + }, + }, + "action": {"decision": "deny"}, + } + event_data = { + "evaluator_name": "regex", + "metadata": { + "primary_evaluator": "regex", + "all_evaluators": ["regex", "list"], + "evaluator": "user metadata should stay untouched", + "condition_trace": { + "type": "and", + "children": [ + {"type": "leaf", "evaluator_name": "regex"}, + {"type": "leaf", "evaluator_name": "list"}, + ], + }, + }, + } + + with engine.begin() as conn: + conn.execute( + text( + """ + INSERT INTO agents (name, data) + VALUES (:name, CAST(:data AS JSONB)) + """ + ), + {"name": "agent-rules", "data": json.dumps(agent_data)}, + ) + control_id = int( + conn.execute( + text( + """ + INSERT INTO controls (name, data) + VALUES (:name, CAST(:data AS JSONB)) + RETURNING id + """ + ), + {"name": "legacy-control", "data": json.dumps(control_data)}, + ).scalar_one() + ) + conn.execute( + text( + """ + INSERT INTO control_versions (control_id, version_num, event_type, snapshot) + VALUES (:control_id, 1, 'migration_backfill', CAST(:snapshot AS JSONB)) + """ + ), + { + "control_id": control_id, + "snapshot": json.dumps({"name": "legacy-control", "data": control_data}), + }, + ) + conn.execute( + text( + """ + INSERT INTO control_execution_events ( + control_execution_id, agent_name, data + ) + VALUES (:execution_id, :agent_name, CAST(:data AS JSONB)) + """ + ), + { + "execution_id": "terminology-migration-event", + "agent_name": "agent-rules", + "data": json.dumps(event_data), + }, + ) + + return { + "control_id": control_id, + "execution_id": "terminology-migration-event", + } + + +def _fetch_payloads(engine: Engine, ids: dict[str, int | str]) -> dict[str, Any]: + with engine.begin() as conn: + return { + "agent": conn.execute( + text("SELECT data FROM agents WHERE name = 'agent-rules'") + ).scalar_one(), + "control": conn.execute( + text("SELECT data FROM controls WHERE id = :id"), + {"id": ids["control_id"]}, + ).scalar_one(), + "snapshot": conn.execute( + text("SELECT snapshot FROM control_versions WHERE control_id = :id"), + {"id": ids["control_id"]}, + ).scalar_one(), + "event": conn.execute( + text( + """ + SELECT data + FROM control_execution_events + WHERE control_execution_id = :execution_id + """ + ), + {"execution_id": ids["execution_id"]}, + ).scalar_one(), + } + + +def test_upgrade_rewrites_evaluator_payload_keys_without_touching_user_config( + alembic_config: Config, + temp_engine: Engine, +) -> None: + command.upgrade(alembic_config, PRE_MIGRATION_REVISION) + ids = _insert_json_payloads(temp_engine) + + command.upgrade(alembic_config, MIGRATION_REVISION) + + payloads = _fetch_payloads(temp_engine, ids) + agent = payloads["agent"] + assert "rules" in agent + assert "evaluators" not in agent + assert "evaluator" in agent["agent_metadata"] + assert "evaluator" in agent["rules"][0]["config_schema"]["properties"] + + control = payloads["control"] + first_leaf = control["condition"]["and"][0] + second_leaf = control["condition"]["and"][1]["not"] + assert "rule" in first_leaf + assert "evaluator" not in first_leaf + assert first_leaf["rule"]["config"]["evaluator"] == "keep" + assert "rule" in second_leaf + assert "evaluator" not in second_leaf + template_definition = control["template"]["definition_template"] + assert "rule" in template_definition + assert "evaluator" not in template_definition + assert template_definition["rule"]["config"]["evaluator"] == "keep" + assert control["template"]["metadata"]["evaluator"] == "keep" + + snapshot_data = payloads["snapshot"]["data"] + assert "rule" in snapshot_data["condition"]["and"][0] + assert "evaluator" not in snapshot_data["condition"]["and"][0] + + event = payloads["event"] + assert event["rule_name"] == "regex" + assert "evaluator_name" not in event + assert event["metadata"]["primary_rule"] == "regex" + assert event["metadata"]["all_rules"] == ["regex", "list"] + assert event["metadata"]["evaluator"] == "user metadata should stay untouched" + trace_children = event["metadata"]["condition_trace"]["children"] + assert trace_children[0]["rule_name"] == "regex" + assert trace_children[1]["rule_name"] == "list" + + +def test_downgrade_restores_evaluator_payload_keys( + alembic_config: Config, + temp_engine: Engine, +) -> None: + command.upgrade(alembic_config, PRE_MIGRATION_REVISION) + ids = _insert_json_payloads(temp_engine) + command.upgrade(alembic_config, MIGRATION_REVISION) + + command.downgrade(alembic_config, PRE_MIGRATION_REVISION) + + payloads = _fetch_payloads(temp_engine, ids) + agent = payloads["agent"] + assert "evaluators" in agent + assert "rules" not in agent + control = payloads["control"] + assert "evaluator" in control["condition"]["and"][0] + assert "rule" not in control["condition"]["and"][0] + event = payloads["event"] + assert event["evaluator_name"] == "regex" + assert "rule_name" not in event diff --git a/server/tests/test_evaluator_utils.py b/server/tests/test_rule_utils.py similarity index 77% rename from server/tests/test_evaluator_utils.py rename to server/tests/test_rule_utils.py index 326906ff..1003af05 100644 --- a/server/tests/test_evaluator_utils.py +++ b/server/tests/test_rule_utils.py @@ -1,21 +1,21 @@ -"""Unit tests for evaluator_utils module.""" +"""Unit tests for rule_utils module.""" import pytest -from agent_control_server.services.evaluator_utils import ( +from agent_control_server.services.rule_utils import ( is_agent_scoped, - parse_evaluator_ref_full, + parse_rule_ref_full, validate_config_against_schema, ) -class TestParseEvaluatorRefFull: - """Tests for parse_evaluator_ref_full function (full three-way parsing).""" +class TestParseRuleRefFull: + """Tests for parse_rule_ref_full function (full three-way parsing).""" - def test_builtin_evaluator(self) -> None: - """Given a built-in evaluator, when parsing full, then type is builtin.""" + def test_builtin_rule(self) -> None: + """Given a built-in rule, when parsing full, then type is builtin.""" # When - result = parse_evaluator_ref_full("regex") + result = parse_rule_ref_full("regex") # Then assert result.type == "builtin" @@ -23,10 +23,10 @@ def test_builtin_evaluator(self) -> None: assert result.namespace is None assert result.local_name == "regex" - def test_external_evaluator(self) -> None: - """Given an external evaluator, when parsing full, then type is external.""" + def test_external_rule(self) -> None: + """Given an external rule, when parsing full, then type is external.""" # When - result = parse_evaluator_ref_full("galileo.luna") + result = parse_rule_ref_full("galileo.luna") # Then assert result.type == "external" @@ -34,10 +34,10 @@ def test_external_evaluator(self) -> None: assert result.namespace == "galileo" assert result.local_name == "luna" - def test_agent_scoped_evaluator(self) -> None: - """Given an agent-scoped evaluator, when parsing full, then type is agent.""" + def test_agent_scoped_rule(self) -> None: + """Given an agent-scoped rule, when parsing full, then type is agent.""" # When - result = parse_evaluator_ref_full("my-agent:pii-detector") + result = parse_rule_ref_full("my-agent:pii-detector") # Then assert result.type == "agent" @@ -46,9 +46,9 @@ def test_agent_scoped_evaluator(self) -> None: assert result.local_name == "pii-detector" def test_external_with_nested_path(self) -> None: - """Given an external evaluator with nested path, when parsing, splits on first dot.""" + """Given an external rule with nested path, when parsing, splits on first dot.""" # When - result = parse_evaluator_ref_full("acme.safety.toxicity") + result = parse_rule_ref_full("acme.safety.toxicity") # Then assert result.type == "external" @@ -58,7 +58,7 @@ def test_external_with_nested_path(self) -> None: def test_agent_scoped_with_dot_in_name(self) -> None: """Given agent-scoped with dot in name, when parsing, then colon takes precedence.""" # When - colon should be detected before dot - result = parse_evaluator_ref_full("my-agent:vendor.eval") + result = parse_rule_ref_full("my-agent:vendor.eval") # Then assert result.type == "agent" @@ -70,15 +70,15 @@ class TestIsAgentScoped: """Tests for is_agent_scoped helper function.""" def test_builtin_not_agent_scoped(self) -> None: - """Given a built-in evaluator, when checking, then returns False.""" + """Given a built-in rule, when checking, then returns False.""" assert is_agent_scoped("regex") is False def test_external_not_agent_scoped(self) -> None: - """Given an external evaluator, when checking, then returns False.""" + """Given an external rule, when checking, then returns False.""" assert is_agent_scoped("galileo.luna") is False def test_agent_scoped_returns_true(self) -> None: - """Given an agent-scoped evaluator, when checking, then returns True.""" + """Given an agent-scoped rule, when checking, then returns True.""" assert is_agent_scoped("my-agent:pii-detector") is True diff --git a/server/tests/test_services_controls.py b/server/tests/test_services_controls.py index 3815f26b..9293ec3e 100644 --- a/server/tests/test_services_controls.py +++ b/server/tests/test_services_controls.py @@ -47,7 +47,7 @@ def _unrendered_template_payload() -> dict[str, object]: "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": { + "rule": { "name": "regex", "config": {"pattern": {"$param": "pattern"}}, }, diff --git a/server/tests/test_validation_paths.py b/server/tests/test_validation_paths.py index a1f5c736..f5e4707e 100644 --- a/server/tests/test_validation_paths.py +++ b/server/tests/test_validation_paths.py @@ -8,9 +8,9 @@ def test_format_field_path_renders_dot_and_bracket_notation() -> None: # When: formatting the field path assert ( format_field_path( - ("data", "condition", "and", 0, "evaluator", "config", "logic") + ("data", "condition", "and", 0, "rule", "config", "logic") ) - == "data.condition.and[0].evaluator.config.logic" + == "data.condition.and[0].rule.config.logic" ) # Then: indices use brackets and object keys use dots diff --git a/server/tests/utils.py b/server/tests/utils.py index 92cf6c4d..b96ca314 100644 --- a/server/tests/utils.py +++ b/server/tests/utils.py @@ -13,7 +13,7 @@ "scope": {"step_types": ["llm"], "stages": ["pre"]}, "condition": { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "x"}}, + "rule": {"name": "regex", "config": {"pattern": "x"}}, }, "action": {"decision": "deny"} } diff --git a/server/unit_tests/test_endpoint_helpers.py b/server/unit_tests/test_endpoint_helpers.py index 4d245206..9208b66d 100644 --- a/server/unit_tests/test_endpoint_helpers.py +++ b/server/unit_tests/test_endpoint_helpers.py @@ -2,9 +2,9 @@ from types import SimpleNamespace -from agent_control_models import ControlDefinition, ControlMatch, EvaluatorResult +from agent_control_models import ControlDefinition, ControlMatch, RuleResult from agent_control_server.endpoints.agents import ( - _find_referencing_controls_for_removed_evaluators, + _find_referencing_controls_for_removed_rules, ) from agent_control_server.endpoints.evaluation import ( ControlAdapter, @@ -13,7 +13,7 @@ def test_find_referencing_controls_dedupes_composite_matches() -> None: - # Given: two leaves in the same control reference the same evaluator + # Given: two leaves in the same control reference the same rule controls = [ SimpleNamespace( name="composite-ctrl", @@ -23,11 +23,11 @@ def test_find_referencing_controls_dedupes_composite_matches() -> None: "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": "agent-123456:custom", "config": {}}, + "rule": {"name": "agent-123456:custom", "config": {}}, }, { "selector": {"path": "output"}, - "evaluator": {"name": "agent-123456:custom", "config": {}}, + "rule": {"name": "agent-123456:custom", "config": {}}, }, ] }, @@ -36,19 +36,19 @@ def test_find_referencing_controls_dedupes_composite_matches() -> None: ) ] - # When: scanning for references to the evaluator being removed - referencing_controls = _find_referencing_controls_for_removed_evaluators( + # When: scanning for references to the rule being removed + referencing_controls = _find_referencing_controls_for_removed_rules( controls, "agent-123456", {"custom"}, ) - # Then: the same control/evaluator pair is reported only once + # Then: the same control/rule pair is reported only once assert referencing_controls == [("composite-ctrl", "custom")] def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: - # Given: a composite control whose condition trace includes a raw evaluator error + # Given: a composite control whose condition trace includes a raw rule error _ = ControlAdapter( id=1, name="composite-ctrl", @@ -58,11 +58,11 @@ def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: "and": [ { "selector": {"path": "input"}, - "evaluator": {"name": "regex", "config": {"pattern": "test"}}, + "rule": {"name": "regex", "config": {"pattern": "test"}}, }, { "selector": {"path": "output"}, - "evaluator": {"name": "list", "config": {"values": ["done"]}}, + "rule": {"name": "list", "config": {"values": ["done"]}}, }, ] }, @@ -73,18 +73,18 @@ def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: control_id=1, control_name="composite-ctrl", action="observe", - result=EvaluatorResult( + result=RuleResult( matched=False, confidence=0.9, - error="RuntimeError: secret evaluator failure", + error="RuntimeError: secret rule failure", metadata={ "condition_trace": { "type": "and", "children": [ { "type": "leaf", - "error": "RuntimeError: secret evaluator failure", - "message": "Evaluation failed: RuntimeError: secret evaluator failure", + "error": "RuntimeError: secret rule failure", + "message": "Evaluation failed: RuntimeError: secret rule failure", } ], } @@ -97,7 +97,7 @@ def test_sanitize_control_match_redacts_nested_condition_trace_errors() -> None: # Then: top-level and nested errors are redacted to the safe public message assert sanitized.result.error is not None - assert "secret evaluator failure" not in sanitized.result.error + assert "secret rule failure" not in sanitized.result.error trace = sanitized.result.metadata["condition_trace"] child = trace["children"][0] assert child["error"] == sanitized.result.error diff --git a/ui/AGENTS.md b/ui/AGENTS.md index fae98b14..820b261c 100644 --- a/ui/AGENTS.md +++ b/ui/AGENTS.md @@ -57,18 +57,18 @@ pnpm fetch-api-types # regenerate API types from server (must be running on :80 - `core/page-components/` — actual page UI logic lives here - `core/layouts/` — app shell, sidebar navigation -### Evaluator forms (`core/evaluators/`) +### Rule forms (`core/rules/`) -- Each evaluator type has its own folder: `json/`, `sql/`, `regex/`, `list/`, `luna/` +- Each rule type has its own folder: `json/`, `sql/`, `regex/`, `list/`, `luna/` - Each folder exports: `form.tsx` (React component), `types.ts` (form types), `index.ts` (re-exports) -- Registry in `evaluators/index.ts` maps evaluator names to form components +- Registry in `rules/index.ts` maps rule names to form components -### Form guidelines (control definition + evaluator forms) +### Form guidelines (control definition + rule forms) - **Always use the input's `label` prop** — never render a separate `` above the input as the label. Use Mantine's built-in `label` so required asterisks and layout are consistent. - **Label with tooltip**: Use `LabelWithTooltip` from `@/core/components/label-with-tooltip` when a field needs an (i) icon that shows help text on hover. Pass `label={}` and, for inputs that support it, `labelProps={labelPropsInline}` so the label renders inline. - **Required fields**: Use the input's `required` prop (e.g. Select, TextInput) so Mantine renders the red asterisk. Use `labelPropsInline` from the same module when you need the label inline. -- Applies to: control definition form (`edit-control/control-definition-form.tsx`) and all evaluator forms (`core/evaluators/*/form.tsx`). +- Applies to: control definition form (`edit-control/control-definition-form.tsx`) and all rule forms (`core/rules/*/form.tsx`). ### Reusable components (`core/components/`) @@ -114,13 +114,13 @@ export function SearchInput({ ## Common changes -### Add a new evaluator form +### Add a new rule form -1. Create folder in `core/evaluators//` +1. Create folder in `core/rules//` 2. Add `types.ts` with form field types 3. Add `form.tsx` with the form component — use Mantine form components with `label` prop and `LabelWithTooltip` from `@/core/components/label-with-tooltip` for fields that need a tooltip (see Form guidelines above) 4. Add `index.ts` re-exporting form and types -5. Register in `evaluators/index.ts` +5. Register in `rules/index.ts` ### Add a new API endpoint integration diff --git a/ui/README.md b/ui/README.md index 3f21cff6..8df1b1a8 100644 --- a/ui/README.md +++ b/ui/README.md @@ -7,7 +7,7 @@ The Agent Control UI is a Next.js dashboard for managing agents, controls, and m - Create, enable, disable, and edit controls - Associate controls with agents and review active configurations - Monitor evaluations, denials, and recent activity in real time -- Inspect evaluator types and configuration details in one place +- Inspect rule types and configuration details in one place ## Quick start diff --git a/ui/src/components/json-editor-codemirror/harness-schema.ts b/ui/src/components/json-editor-codemirror/harness-schema.ts index dab4b662..ba8fec98 100644 --- a/ui/src/components/json-editor-codemirror/harness-schema.ts +++ b/ui/src/components/json-editor-codemirror/harness-schema.ts @@ -16,7 +16,7 @@ export const HARNESS_CONTROL_SCHEMA: JsonSchema = { }, }, }, - EvaluatorSpec: { + RuleSpec: { type: 'object', required: ['name', 'config'], properties: { @@ -36,8 +36,8 @@ export const HARNESS_CONTROL_SCHEMA: JsonSchema = { selector: { anyOf: [{ $ref: '#/$defs/ControlSelector' }, { type: 'null' }], }, - evaluator: { - anyOf: [{ $ref: '#/$defs/EvaluatorSpec' }, { type: 'null' }], + rule: { + anyOf: [{ $ref: '#/$defs/RuleSpec' }, { type: 'null' }], }, and: { anyOf: [ diff --git a/ui/src/components/json-editor-codemirror/json-editor-codemirror-language.ts b/ui/src/components/json-editor-codemirror/json-editor-codemirror-language.ts index d0d74e64..bb420f61 100644 --- a/ui/src/components/json-editor-codemirror/json-editor-codemirror-language.ts +++ b/ui/src/components/json-editor-codemirror/json-editor-codemirror-language.ts @@ -7,12 +7,12 @@ export { canRenderInlineServerValidationError, caretAfterPrettyJsonReplace, computeAutoEdit, - extractEvaluatorNames, + extractRuleNames, fixJsonCommas, getCodeMirrorCompletionItems, normalizeOnBlur, setInlineServerValidationErrorsEffect, - shouldTriggerEvaluatorNameCompletion, + shouldTriggerRuleNameCompletion, triggerRefactorActionsDropdown, tryFormat, } from './language'; diff --git a/ui/src/components/json-editor-codemirror/json-editor-codemirror.playwright-story.tsx b/ui/src/components/json-editor-codemirror/json-editor-codemirror.playwright-story.tsx index 48ae896e..59b387e2 100644 --- a/ui/src/components/json-editor-codemirror/json-editor-codemirror.playwright-story.tsx +++ b/ui/src/components/json-editor-codemirror/json-editor-codemirror.playwright-story.tsx @@ -2,8 +2,8 @@ import { Box, Button, Group } from '@mantine/core'; import { useCallback, useEffect, useState } from 'react'; import type { - JsonEditorEvaluatorOption, JsonEditorMode, + JsonEditorRuleOption, } from '@/core/page-components/agent-detail/modals/edit-control/types'; import { HARNESS_CONTROL_SCHEMA } from './harness-schema'; @@ -15,7 +15,7 @@ export const CT_JSON_EDITOR_TEST_ID = 'codemirror-json-editor-ct'; const DEFAULT_CONTROL_JSON = '{"execution":"server","condition":{},"action":{"decision":"allow"}}'; -const CT_EVALUATORS: JsonEditorEvaluatorOption[] = [ +const CT_RULES: JsonEditorRuleOption[] = [ { id: 'regex', label: 'Regex', @@ -96,7 +96,7 @@ export function JsonEditorCodeMirrorCtHost({ mode }: { mode: JsonEditorMode }) { setJsonError={setJsonError} editorMode={mode} schema={mode === 'control' ? HARNESS_CONTROL_SCHEMA : null} - evaluators={mode === 'control' ? CT_EVALUATORS : undefined} + rules={mode === 'control' ? CT_RULES : undefined} testId={CT_JSON_EDITOR_TEST_ID} label="JSON editor (component test)" helperText="Playwright CT mounts this host without a Next.js page." diff --git a/ui/src/components/json-editor-codemirror/json-editor-codemirror.tsx b/ui/src/components/json-editor-codemirror/json-editor-codemirror.tsx index f6f156fb..7700ed49 100644 --- a/ui/src/components/json-editor-codemirror/json-editor-codemirror.tsx +++ b/ui/src/components/json-editor-codemirror/json-editor-codemirror.tsx @@ -26,8 +26,8 @@ import type { ProblemDetail, StepSchema } from '@/core/api/types'; import { LabelWithTooltip } from '@/core/components/label-with-tooltip'; import { ApiErrorAlert } from '@/core/page-components/agent-detail/modals/edit-control/api-error-alert'; import type { - JsonEditorEvaluatorOption, JsonEditorMode, + JsonEditorRuleOption, JsonSchema, } from '@/core/page-components/agent-detail/modals/edit-control/types'; @@ -48,7 +48,7 @@ import { canRenderInlineServerValidationError, caretAfterPrettyJsonReplace, computeAutoEdit, - extractEvaluatorNames, + extractRuleNames, fixJsonCommas, getCodeMirrorCompletionItems, setInlineServerValidationErrorsEffect, @@ -120,8 +120,8 @@ export type JsonEditorCodeMirrorProps = { testId?: string; editorMode?: JsonEditorMode; schema?: JsonSchema | null; - evaluators?: JsonEditorEvaluatorOption[]; - activeEvaluatorId?: string | null; + rules?: JsonEditorRuleOption[]; + activeRuleId?: string | null; steps?: StepSchema[]; debugFlags?: { enableBasicSetupExtension?: boolean; @@ -147,10 +147,10 @@ export function JsonEditorCodeMirror({ tooltip = DEFAULT_TOOLTIP, helperText, testId = DEFAULT_TEST_ID, - editorMode = 'evaluator-config', + editorMode = 'rule-config', schema, - evaluators, - activeEvaluatorId, + rules, + activeRuleId, steps, debugFlags, }: JsonEditorCodeMirrorProps) { @@ -167,7 +167,7 @@ export function JsonEditorCodeMirror({ const editorRootRef = useRef(null); const internalChangeRef = useRef(false); const autoEditInProgressRef = useRef(false); - const previousEvaluatorNamesRef = useRef>(new Map()); + const previousRuleNamesRef = useRef>(new Map()); const previousDecisionRef = useRef(null); const clipboard = useClipboard({ timeout: 1500 }); @@ -225,15 +225,15 @@ export function JsonEditorCodeMirror({ return buildCodeMirrorJsonExtensions({ mode: editorMode, schema, - evaluators, - activeEvaluatorId, + rules, + activeRuleId, steps, }); }, [ - activeEvaluatorId, + activeRuleId, editorMode, effectiveDebugFlags.useStandaloneCompletionSource, - evaluators, + rules, schema, steps, ]); @@ -246,7 +246,7 @@ export function JsonEditorCodeMirror({ }, []); useEffect(() => { - previousEvaluatorNamesRef.current = extractEvaluatorNames(jsonText); + previousRuleNamesRef.current = extractRuleNames(jsonText); previousDecisionRef.current = parseDecision(jsonText); }, [jsonText, parseDecision]); @@ -260,15 +260,15 @@ export function JsonEditorCodeMirror({ const view = update.view; const text = view.state.doc.toString(); - const { edit, nextEvaluatorNames, nextDecision } = computeAutoEdit( + const { edit, nextRuleNames, nextDecision } = computeAutoEdit( text, - previousEvaluatorNamesRef.current, + previousRuleNamesRef.current, previousDecisionRef.current, editorMode, - evaluators + rules ); - previousEvaluatorNamesRef.current = nextEvaluatorNames; + previousRuleNamesRef.current = nextRuleNames; previousDecisionRef.current = nextDecision; if (!edit) return; @@ -309,7 +309,7 @@ export function JsonEditorCodeMirror({ nextText = view.state.doc.toString(); } - previousEvaluatorNamesRef.current = extractEvaluatorNames(nextText); + previousRuleNamesRef.current = extractRuleNames(nextText); previousDecisionRef.current = parseDecision(nextText); internalChangeRef.current = true; handleJsonChange(nextText); @@ -319,7 +319,7 @@ export function JsonEditorCodeMirror({ }, [ editorMode, - evaluators, + rules, handleJsonChange, parseDecision, effectiveDebugFlags.enableAutoEdits, @@ -355,11 +355,11 @@ export function JsonEditorCodeMirror({ () => ({ mode: editorMode, schema, - evaluators, - activeEvaluatorId, + rules, + activeRuleId, steps, }), - [activeEvaluatorId, editorMode, evaluators, schema, steps] + [activeRuleId, editorMode, rules, schema, steps] ); useEffect(() => { diff --git a/ui/src/components/json-editor-codemirror/language/auto-edits.ts b/ui/src/components/json-editor-codemirror/language/auto-edits.ts index 5b72ac76..1e4c9cdb 100644 --- a/ui/src/components/json-editor-codemirror/language/auto-edits.ts +++ b/ui/src/components/json-editor-codemirror/language/auto-edits.ts @@ -5,8 +5,8 @@ import { } from 'jsonc-parser'; import type { - JsonEditorEvaluatorOption, JsonEditorMode, + JsonEditorRuleOption, } from '@/core/page-components/agent-detail/modals/edit-control/types'; import { @@ -20,22 +20,22 @@ import { } from './schema'; import type { JsonEditorTextEdit } from './types'; -type EvaluatorNodeInfo = { +type RuleNodeInfo = { name: string; nameNode: JsonNode; configNode: JsonNode | undefined; }; -function collectEvaluatorNames( +function collectRuleNames( node: JsonNode | undefined, - result: Map + result: Map ) { if (!node || node.type !== 'object' || !node.children) return; - const evaluatorNode = findNodeAtLocation(node, ['evaluator']); - if (evaluatorNode?.type === 'object') { - const nameNode = findNodeAtLocation(evaluatorNode, ['name']); - const configNode = findNodeAtLocation(evaluatorNode, ['config']); + const ruleNode = findNodeAtLocation(node, ['rule']); + if (ruleNode?.type === 'object') { + const nameNode = findNodeAtLocation(ruleNode, ['name']); + const configNode = findNodeAtLocation(ruleNode, ['config']); if (nameNode && typeof nameNode.value === 'string') { result.set(`${nameNode.offset}`, { name: nameNode.value, @@ -48,21 +48,20 @@ function collectEvaluatorNames( for (const key of ['and', 'or'] as const) { const arrayNode = findNodeAtLocation(node, [key]); if (arrayNode?.type === 'array' && arrayNode.children) { - for (const child of arrayNode.children) - collectEvaluatorNames(child, result); + for (const child of arrayNode.children) collectRuleNames(child, result); } } const notNode = findNodeAtLocation(node, ['not']); - if (notNode?.type === 'object') collectEvaluatorNames(notNode, result); + if (notNode?.type === 'object') collectRuleNames(notNode, result); } -export function extractEvaluatorNames(text: string): Map { +export function extractRuleNames(text: string): Map { const tree = parseTree(text); if (!tree) return new Map(); const conditionNode = findNodeAtLocation(tree, ['condition']); - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); const names = new Map(); for (const [key, info] of result) names.set(key, info.name); return names; @@ -111,24 +110,24 @@ function buildDefaultConfig(configSchema: unknown): Record { return config; } -function findEvaluatorConfigEdit( +function findRuleConfigEdit( text: string, previousNames: Map, - evaluators: JsonEditorEvaluatorOption[] | undefined + rules: JsonEditorRuleOption[] | undefined ): JsonEditorTextEdit | null { const tree = parseTree(text); if (!tree) return null; const conditionNode = findNodeAtLocation(tree, ['condition']); - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); for (const [key, { name, configNode, nameNode }] of result) { const prevName = previousNames.get(key); if (prevName === undefined || prevName === name) continue; - const evaluator = evaluators?.find((item) => item.id === name); - if (!evaluator) continue; + const rule = rules?.find((item) => item.id === name); + if (!rule) continue; const configJson = JSON.stringify( - buildDefaultConfig(evaluator.configSchema), + buildDefaultConfig(rule.configSchema), null, 2 ); @@ -196,17 +195,17 @@ function findSteeringContextEdit( export function computeAutoEdit( text: string, - previousEvaluatorNames: Map, + previousRuleNames: Map, previousDecision: string | null, mode: JsonEditorMode, - evaluators: JsonEditorEvaluatorOption[] | undefined + rules: JsonEditorRuleOption[] | undefined ): { edit: JsonEditorTextEdit | null; - editKind: 'evaluator-config' | 'steering-context' | null; - nextEvaluatorNames: Map; + editKind: 'rule-config' | 'steering-context' | null; + nextRuleNames: Map; nextDecision: string | null; } { - const nextEvaluatorNames = extractEvaluatorNames(text); + const nextRuleNames = extractRuleNames(text); let nextDecision: string | null = previousDecision; try { const tree = parseTree(text); @@ -219,19 +218,15 @@ export function computeAutoEdit( } if (mode !== 'control') { - return { edit: null, editKind: null, nextEvaluatorNames, nextDecision }; + return { edit: null, editKind: null, nextRuleNames, nextDecision }; } - const evaluatorEdit = findEvaluatorConfigEdit( - text, - previousEvaluatorNames, - evaluators - ); - if (evaluatorEdit) { + const ruleEdit = findRuleConfigEdit(text, previousRuleNames, rules); + if (ruleEdit) { return { - edit: evaluatorEdit, - editKind: 'evaluator-config', - nextEvaluatorNames, + edit: ruleEdit, + editKind: 'rule-config', + nextRuleNames, nextDecision, }; } @@ -241,12 +236,12 @@ export function computeAutoEdit( return { edit: steeringEdit, editKind: 'steering-context', - nextEvaluatorNames, + nextRuleNames, nextDecision, }; } - return { edit: null, editKind: null, nextEvaluatorNames, nextDecision }; + return { edit: null, editKind: null, nextRuleNames, nextDecision }; } export function applyTextEdit(text: string, edit: JsonEditorTextEdit): string { diff --git a/ui/src/components/json-editor-codemirror/language/context.ts b/ui/src/components/json-editor-codemirror/language/context.ts index 295c611f..b393017b 100644 --- a/ui/src/components/json-editor-codemirror/language/context.ts +++ b/ui/src/components/json-editor-codemirror/language/context.ts @@ -4,7 +4,7 @@ import { parseTree, } from 'jsonc-parser'; -import type { JsonEditorEvaluatorOption } from '@/core/page-components/agent-detail/modals/edit-control/types'; +import type { JsonEditorRuleOption } from '@/core/page-components/agent-detail/modals/edit-control/types'; import { asSchema, @@ -18,11 +18,11 @@ import type { SchemaCursor, } from './types'; -export function isEvaluatorNameLocation(path: JsonPath): boolean { +export function isRuleNameLocation(path: JsonPath): boolean { return ( path.length >= 2 && path[path.length - 1] === 'name' && - path[path.length - 2] === 'evaluator' + path[path.length - 2] === 'rule' ); } @@ -55,51 +55,49 @@ export function getScopeFilters(tree: JsonNode | undefined): { }; } -export function resolveActiveEvaluator( +export function resolveActiveRule( context: JsonEditorCodeMirrorContext, tree: JsonNode | undefined, path: JsonPath -): JsonEditorEvaluatorOption | null { - if (context.mode === 'evaluator-config') { +): JsonEditorRuleOption | null { + if (context.mode === 'rule-config') { return ( - context.evaluators?.find( - (item) => item.id === context.activeEvaluatorId - ) ?? null + context.rules?.find((item) => item.id === context.activeRuleId) ?? null ); } - const evaluatorIndex = path.lastIndexOf('evaluator'); - if (evaluatorIndex === -1 || !tree) return null; - const evaluatorPath = path.slice(0, evaluatorIndex + 1); - const nameNode = findNodeAtLocation(tree, [...evaluatorPath, 'name']); + const ruleIndex = path.lastIndexOf('rule'); + if (ruleIndex === -1 || !tree) return null; + const rulePath = path.slice(0, ruleIndex + 1); + const nameNode = findNodeAtLocation(tree, [...rulePath, 'name']); const value = typeof nameNode?.value === 'string' ? nameNode.value : null; if (!value) return null; - return context.evaluators?.find((item) => item.id === value) ?? null; + return context.rules?.find((item) => item.id === value) ?? null; } /** - * True when `path[index]` is the `config` property of an `evaluator` object. - * Matches Monaco `isEvaluatorConfigSegment` — used to swap the schema root to - * the active evaluator's configSchema while editing control JSON. + * True when `path[index]` is the `config` property of an `rule` object. + * Matches Monaco `isRuleConfigSegment` — used to swap the schema root to + * the active rule's configSchema while editing control JSON. */ -function isEvaluatorConfigSegment(path: JsonPath, index: number): boolean { +function isRuleConfigSegment(path: JsonPath, index: number): boolean { return ( typeof path[index] === 'string' && path[index] === 'config' && index > 0 && - path[index - 1] === 'evaluator' + path[index - 1] === 'rule' ); } export function resolveSchemaAtJsonPath( context: JsonEditorCodeMirrorContext, - activeEvaluator: JsonEditorEvaluatorOption | null, + activeRule: JsonEditorRuleOption | null, path: JsonPath ): SchemaCursor { const controlRoot = asSchema(context.schema) ?? null; let rootSchema = controlRoot; - if (context.mode === 'evaluator-config' && activeEvaluator?.configSchema) { - rootSchema = asSchema(activeEvaluator.configSchema) ?? rootSchema; + if (context.mode === 'rule-config' && activeRule?.configSchema) { + rootSchema = asSchema(activeRule.configSchema) ?? rootSchema; } if (!rootSchema) return { schema: null, rootSchema: null }; @@ -109,8 +107,8 @@ export function resolveSchemaAtJsonPath( const segment = path[index]; if (cursor === null) break; - if (context.mode === 'control' && isEvaluatorConfigSegment(path, index)) { - const configRoot = asSchema(activeEvaluator?.configSchema ?? null); + if (context.mode === 'control' && isRuleConfigSegment(path, index)) { + const configRoot = asSchema(activeRule?.configSchema ?? null); if (configRoot) { rootSchema = configRoot; cursor = normalizeSchema(rootSchema, rootSchema); diff --git a/ui/src/components/json-editor-codemirror/language/extensions.ts b/ui/src/components/json-editor-codemirror/language/extensions.ts index 20482f62..814caba8 100644 --- a/ui/src/components/json-editor-codemirror/language/extensions.ts +++ b/ui/src/components/json-editor-codemirror/language/extensions.ts @@ -44,10 +44,10 @@ export { import { getEnumValues, getScopeFilters, - isEvaluatorNameLocation, + isRuleNameLocation, isSelectorPathLocation, parseJsonTree, - resolveActiveEvaluator, + resolveActiveRule, resolveSchemaAtJsonPath, } from './context'; import { @@ -203,13 +203,9 @@ function getPropertySuggestions( isAtPropertyKey: boolean ): Completion[] { const tree = parseJsonTree(text); - const activeEvaluator = resolveActiveEvaluator(context, tree, path); + const activeRule = resolveActiveRule(context, tree, path); const objectPath = path.slice(0, -1); - const schemaCursor = resolveSchemaAtJsonPath( - context, - activeEvaluator, - objectPath - ); + const schemaCursor = resolveSchemaAtJsonPath(context, activeRule, objectPath); if (!schemaCursor.schema) return []; const objectNode = tree ? findNodeAtLocation(tree, objectPath) : undefined; @@ -274,8 +270,8 @@ function getValueSuggestions( isStringValueContext: boolean ): Completion[] { const tree = parseJsonTree(text); - if (isEvaluatorNameLocation(path) && context.evaluators?.length) { - return context.evaluators.map((item) => ({ + if (isRuleNameLocation(path) && context.rules?.length) { + return context.rules.map((item) => ({ label: item.id, type: 'constant', detail: item.description ?? undefined, @@ -343,8 +339,8 @@ function getValueSuggestions( ); } - const activeEvaluator = resolveActiveEvaluator(context, tree, path); - const cursor = resolveSchemaAtJsonPath(context, activeEvaluator, path); + const activeRule = resolveActiveRule(context, tree, path); + const cursor = resolveSchemaAtJsonPath(context, activeRule, path); const enumValues = getEnumValues(cursor.schema); if (enumValues.length === 0) return []; return enumValues.map((value) => ({ @@ -404,11 +400,11 @@ function findConditionAtOffset( } const hasSelector = !!findNodeAtLocation(node, ['selector']); - const hasEvaluator = !!findNodeAtLocation(node, ['evaluator']); + const hasRule = !!findNodeAtLocation(node, ['rule']); const hasAnd = !!findNodeAtLocation(node, ['and']); const hasOr = !!findNodeAtLocation(node, ['or']); const hasNot = !!findNodeAtLocation(node, ['not']); - const isLeaf = (hasSelector || hasEvaluator) && !hasAnd && !hasOr; + const isLeaf = (hasSelector || hasRule) && !hasAnd && !hasOr; return { node, isLeaf, @@ -473,7 +469,7 @@ function buildConditionRefactorActions( const next = applyNodeTransform((p) => ({ and: [ p as Record, - { selector: { path: '*' }, evaluator: { name: '', config: {} } }, + { selector: { path: '*' }, rule: { name: '', config: {} } }, ], })); if (!next) return; @@ -488,7 +484,7 @@ function buildConditionRefactorActions( const next = applyNodeTransform((p) => ({ or: [ p as Record, - { selector: { path: '*' }, evaluator: { name: '', config: {} } }, + { selector: { path: '*' }, rule: { name: '', config: {} } }, ], })); if (!next) return; @@ -526,7 +522,7 @@ function buildConditionRefactorActions( ...arr, { selector: { path: '*' }, - evaluator: { name: '', config: {} }, + rule: { name: '', config: {} }, }, ], }; @@ -643,7 +639,7 @@ function getHintForPath( context: JsonEditorCodeMirrorContext ): string | null { // Avoid showing hint widgets for fields that already have a good dropdown UX. - if (isEvaluatorNameLocation(path)) { + if (isRuleNameLocation(path)) { return null; } @@ -660,19 +656,19 @@ function getHintForPath( } const tree = parseJsonTree(text); - if (isEvaluatorNameLocation(path) && context.evaluators?.length) { - const display = context.evaluators + if (isRuleNameLocation(path) && context.rules?.length) { + const display = context.rules .map((item) => item.id) .slice(0, MAX_HINT_VALUES); - return ` ${display.join(' | ')}${context.evaluators.length > MAX_HINT_VALUES ? ' | ...' : ''}`; + return ` ${display.join(' | ')}${context.rules.length > MAX_HINT_VALUES ? ' | ...' : ''}`; } if (isSelectorPathLocation(path)) { return ' * | input | output | context | ...'; } - const activeEvaluator = resolveActiveEvaluator(context, tree, path); - const cursor = resolveSchemaAtJsonPath(context, activeEvaluator, path); + const activeRule = resolveActiveRule(context, tree, path); + const cursor = resolveSchemaAtJsonPath(context, activeRule, path); const enumValues = getEnumValues(cursor.schema); if (enumValues.length > 0 && enumValues.length <= MAX_HINT_VALUES) { return ` ${enumValues.map(String).join(' | ')}`; @@ -683,11 +679,11 @@ function getHintForPath( /** * `activateOnTyping` often does not reopen completions after Backspace. * Also reopen when the user edits inside a JSON string that has value - * suggestions (enums, evaluator name, selector path), including partial text + * suggestions (enums, rule name, selector path), including partial text * like `"s"` after deleting `"sdk"`. * * Only runs for direct typing/paste/delete — not programmatic doc updates - * (for example default `config` injection after an evaluator rename). + * (for example default `config` injection after a rule rename). */ function _createAutocompleteOpenWhenValueSuggestionsAfterEditExtension( context: JsonEditorCodeMirrorContext @@ -703,7 +699,7 @@ function _createAutocompleteOpenWhenValueSuggestionsAfterEditExtension( ) { return; } - // Ignore programmatic doc changes (e.g. evaluator `config` auto-fill); those + // Ignore programmatic doc changes (e.g. rule `config` auto-fill); those // must not queue another completion — the dropdown would pop right back. if ( !update.transactions.some( @@ -818,15 +814,11 @@ function _createHoverExtension( const location = getLocation(text, pos); if (!location.path.length) return null; - const activeEvaluator = resolveActiveEvaluator( - context, - tree, - location.path - ); + const activeRule = resolveActiveRule(context, tree, location.path); const path = location.isAtPropertyKey ? location.path.slice(0, -1) : location.path; - const cursor = resolveSchemaAtJsonPath(context, activeEvaluator, path); + const cursor = resolveSchemaAtJsonPath(context, activeRule, path); let title: string | null = null; let description: string | null = null; @@ -1149,12 +1141,12 @@ export function getCodeMirrorCompletionItems( })); } -export function shouldTriggerEvaluatorNameCompletion( +export function shouldTriggerRuleNameCompletion( text: string, offset: number ): boolean { const location = getLocation(text, offset); - if (!isEvaluatorNameLocation(location.path)) { + if (!isRuleNameLocation(location.path)) { return false; } diff --git a/ui/src/components/json-editor-codemirror/language/index.ts b/ui/src/components/json-editor-codemirror/language/index.ts index d52749df..d241ec77 100644 --- a/ui/src/components/json-editor-codemirror/language/index.ts +++ b/ui/src/components/json-editor-codemirror/language/index.ts @@ -1,8 +1,4 @@ -export { - applyTextEdit, - computeAutoEdit, - extractEvaluatorNames, -} from './auto-edits'; +export { applyTextEdit, computeAutoEdit, extractRuleNames } from './auto-edits'; export { buildCodeMirrorInlineServerValidationErrorsExtension, buildCodeMirrorJsonExtensions, @@ -11,7 +7,7 @@ export { canRenderInlineServerValidationError, getCodeMirrorCompletionItems, setInlineServerValidationErrorsEffect, - shouldTriggerEvaluatorNameCompletion, + shouldTriggerRuleNameCompletion, triggerRefactorActionsDropdown, } from './extensions'; export { diff --git a/ui/src/components/json-editor-codemirror/language/schema.ts b/ui/src/components/json-editor-codemirror/language/schema.ts index 6627c739..63712b4f 100644 --- a/ui/src/components/json-editor-codemirror/language/schema.ts +++ b/ui/src/components/json-editor-codemirror/language/schema.ts @@ -210,7 +210,7 @@ function jsonStringifyForInsert(value: unknown): string { /** * JSON text inserted when completing a property key. Mirrors Monaco - * `buildSchemaValueSnippet` so control scaffolding (selector, evaluator, + * `buildSchemaValueSnippet` so control scaffolding (selector, rule, * action, scope) matches the original editor. */ export function getJsonInsertTextForSchemaPropertyValue( @@ -241,7 +241,7 @@ export function getJsonInsertTextForSchemaPropertyValue( } if ( - schemaTitle === 'EvaluatorSpec' || + schemaTitle === 'RuleSpec' || isSchemaWithProperties(normalized, ['name', 'config']) ) { return '{\n "name": "",\n "config": {}\n}'; @@ -263,13 +263,7 @@ export function getJsonInsertTextForSchemaPropertyValue( if ( schemaTitle === 'ConditionNode' || - isSchemaWithProperties(normalized, [ - 'selector', - 'evaluator', - 'and', - 'or', - 'not', - ]) + isSchemaWithProperties(normalized, ['selector', 'rule', 'and', 'or', 'not']) ) { return '{}'; } diff --git a/ui/src/components/json-editor-codemirror/language/types.ts b/ui/src/components/json-editor-codemirror/language/types.ts index 35e9b73e..db36a4c6 100644 --- a/ui/src/components/json-editor-codemirror/language/types.ts +++ b/ui/src/components/json-editor-codemirror/language/types.ts @@ -1,7 +1,7 @@ import type { StepSchema } from '@/core/api/types'; import type { - JsonEditorEvaluatorOption, JsonEditorMode, + JsonEditorRuleOption, JsonSchema, } from '@/core/page-components/agent-detail/modals/edit-control/types'; @@ -10,8 +10,8 @@ export type JsonPath = Array; export type JsonEditorCodeMirrorContext = { mode: JsonEditorMode; schema?: JsonSchema | null; - evaluators?: JsonEditorEvaluatorOption[]; - activeEvaluatorId?: string | null; + rules?: JsonEditorRuleOption[]; + activeRuleId?: string | null; steps?: StepSchema[]; }; diff --git a/ui/src/components/json-editor-monaco/json-editor-monaco-language.ts b/ui/src/components/json-editor-monaco/json-editor-monaco-language.ts index 8617db1c..a4f6118d 100644 --- a/ui/src/components/json-editor-monaco/json-editor-monaco-language.ts +++ b/ui/src/components/json-editor-monaco/json-editor-monaco-language.ts @@ -10,8 +10,8 @@ import { import { removeTrailingCommasOutsideStrings } from '@/components/json-editor-shared/fix-json-commas'; import type { StepSchema } from '@/core/api/types'; import type { - JsonEditorEvaluatorOption, JsonEditorMode, + JsonEditorRuleOption, JsonSchema, } from '@/core/page-components/agent-detail/modals/edit-control/types'; @@ -22,8 +22,8 @@ type JsonEditorAutocompleteContext = { mode: JsonEditorMode; modelUri: string; schema?: JsonSchema | null; - evaluators?: JsonEditorEvaluatorOption[]; - activeEvaluatorId?: string | null; + rules?: JsonEditorRuleOption[]; + activeRuleId?: string | null; steps?: StepSchema[]; /** For template mode: path prefix under which the ControlDefinition lives */ definitionPrefix?: JsonPath; @@ -544,11 +544,11 @@ function isSelectorPathLocation(path: JsonPath): boolean { ); } -function isEvaluatorNameLocation(path: JsonPath): boolean { +function isRuleNameLocation(path: JsonPath): boolean { return ( path.length >= 2 && path[path.length - 1] === 'name' && - path[path.length - 2] === 'evaluator' + path[path.length - 2] === 'rule' ); } @@ -634,7 +634,7 @@ function buildSchemaValueSnippet( } if ( - schemaTitle === 'EvaluatorSpec' || + schemaTitle === 'RuleSpec' || isSchemaWithProperties(normalized, ['name', 'config']) ) { return '{\n "name": "",\n "config": {}\n}'; @@ -656,13 +656,7 @@ function buildSchemaValueSnippet( if ( schemaTitle === 'ConditionNode' || - isSchemaWithProperties(normalized, [ - 'selector', - 'evaluator', - 'and', - 'or', - 'not', - ]) + isSchemaWithProperties(normalized, ['selector', 'rule', 'and', 'or', 'not']) ) { return '{}'; } @@ -857,27 +851,27 @@ function buildSelectorPathSuggestions( }); } -function findEvaluatorById( - evaluators: JsonEditorEvaluatorOption[] | undefined, +function findRuleById( + rules: JsonEditorRuleOption[] | undefined, id: string | null | undefined -): JsonEditorEvaluatorOption | null { - if (!evaluators || !id) { +): JsonEditorRuleOption | null { + if (!rules || !id) { return null; } - return evaluators.find((candidate) => candidate.id === id) ?? null; + return rules.find((candidate) => candidate.id === id) ?? null; } -function resolveActiveEvaluator( +function resolveActiveRule( context: JsonEditorAutocompleteContext, tree: JsonNode | undefined, path: JsonPath -): JsonEditorEvaluatorOption | null { - if (context.mode === 'evaluator-config') { - return findEvaluatorById(context.evaluators, context.activeEvaluatorId); +): JsonEditorRuleOption | null { + if (context.mode === 'rule-config') { + return findRuleById(context.rules, context.activeRuleId); } - // For template mode, strip the definition prefix so that the evaluator + // For template mode, strip the definition prefix so that the rule // index lookup works on relative paths, then reconstruct the absolute // tree path for the node lookup. const relativePath = @@ -886,33 +880,31 @@ function resolveActiveEvaluator( : path; if (!relativePath) return null; - const evaluatorIndex = getJsonPathFieldIndex(relativePath, 'evaluator'); - if (!tree || evaluatorIndex < 0) { + const ruleIndex = getJsonPathFieldIndex(relativePath, 'rule'); + if (!tree || ruleIndex < 0) { return null; } const prefix = context.definitionPrefix ?? []; - const evaluatorNamePath = [ + const ruleNamePath = [ ...prefix, - ...relativePath.slice(0, evaluatorIndex), - 'evaluator', + ...relativePath.slice(0, ruleIndex), + 'rule', 'name', ]; - const evaluatorNameNode = findNodeAtLocation(tree, evaluatorNamePath); - const evaluatorName = - typeof evaluatorNameNode?.value === 'string' - ? evaluatorNameNode.value - : null; + const ruleNameNode = findNodeAtLocation(tree, ruleNamePath); + const ruleName = + typeof ruleNameNode?.value === 'string' ? ruleNameNode.value : null; - return findEvaluatorById(context.evaluators, evaluatorName); + return findRuleById(context.rules, ruleName); } function getInitialSchemaCursor( context: JsonEditorAutocompleteContext, - activeEvaluator: JsonEditorEvaluatorOption | null + activeRule: JsonEditorRuleOption | null ): SchemaCursor { - if (context.mode === 'evaluator-config') { - const rootSchema = asSchema(activeEvaluator?.configSchema ?? null); + if (context.mode === 'rule-config') { + const rootSchema = asSchema(activeRule?.configSchema ?? null); return { schema: normalizeSchema(rootSchema, rootSchema), rootSchema, @@ -926,18 +918,18 @@ function getInitialSchemaCursor( }; } -function isEvaluatorConfigSegment(path: JsonPath, index: number): boolean { +function isRuleConfigSegment(path: JsonPath, index: number): boolean { return ( typeof path[index] === 'string' && path[index] === 'config' && index > 0 && - path[index - 1] === 'evaluator' + path[index - 1] === 'rule' ); } function resolveSchemaAtJsonPath( context: JsonEditorAutocompleteContext, - activeEvaluator: JsonEditorEvaluatorOption | null, + activeRule: JsonEditorRuleOption | null, path: JsonPath ): SchemaCursor { // For template mode, only provide schema resolution inside definition_template. @@ -951,7 +943,7 @@ function resolveSchemaAtJsonPath( effectivePath = relative; } - let cursor = getInitialSchemaCursor(context, activeEvaluator); + let cursor = getInitialSchemaCursor(context, activeRule); for (let index = 0; index < effectivePath.length; index += 1) { const segment = effectivePath[index]; @@ -961,9 +953,9 @@ function resolveSchemaAtJsonPath( if ( (context.mode === 'control' || context.mode === 'template') && - isEvaluatorConfigSegment(effectivePath, index) + isRuleConfigSegment(effectivePath, index) ) { - const rootSchema = asSchema(activeEvaluator?.configSchema ?? null); + const rootSchema = asSchema(activeRule?.configSchema ?? null); cursor = { schema: normalizeSchema(rootSchema, rootSchema), rootSchema, @@ -988,21 +980,18 @@ function resolveSchemaAtJsonPath( return cursor; } -function buildEvaluatorNameSuggestions( +function buildRuleNameSuggestions( monaco: MonacoModule, range: import('monaco-editor').IRange, - evaluators: JsonEditorEvaluatorOption[] | undefined, + rules: JsonEditorRuleOption[] | undefined, isStringValueContext: boolean ) { - return (evaluators ?? []).map((evaluator, index) => ({ - label: evaluator.id, + return (rules ?? []).map((rule, index) => ({ + label: rule.id, kind: monaco.languages.CompletionItemKind.Value, - detail: - evaluator.source === 'agent' - ? `${evaluator.label} (agent evaluator)` - : evaluator.label, - documentation: evaluator.description ?? undefined, - insertText: buildValueInsertText(evaluator.id, isStringValueContext), + detail: rule.source === 'agent' ? `${rule.label} (agent rule)` : rule.label, + documentation: rule.description ?? undefined, + insertText: buildValueInsertText(rule.id, isStringValueContext), range, sortText: `!0${index.toString().padStart(3, '0')}`, })); @@ -1308,7 +1297,7 @@ function buildCompletionSuggestions( ? (relativePath ?? location.path) : location.path; - const activeEvaluator = resolveActiveEvaluator(context, tree, location.path); + const activeRule = resolveActiveRule(context, tree, location.path); // --- $param name completions (inside {"$param": ""}) --- if (isTemplate && isParamRefValueLocation(effectivePath)) { @@ -1352,12 +1341,12 @@ function buildCompletionSuggestions( // --- Inside definition_template (or non-template mode): control-level completions --- if (insideDefinition) { - if (isEvaluatorNameLocation(effectivePath)) { + if (isRuleNameLocation(effectivePath)) { suggestions.push( - ...buildEvaluatorNameSuggestions( + ...buildRuleNameSuggestions( monaco, valueRange, - context.evaluators, + context.rules, isStringValueContext ) ); @@ -1402,7 +1391,7 @@ function buildCompletionSuggestions( : propertyKeyContext.objectPath; const schemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, schemaObjectPath ); const currentPropertyName = @@ -1437,7 +1426,7 @@ function buildCompletionSuggestions( if (isValuePosition) { const valueSchemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.path ); @@ -1501,29 +1490,29 @@ export function getJsonEditorCompletionItems( return buildCompletionSuggestions(monaco, model, position, context); } -type EvaluatorNodeInfo = { +type RuleNodeInfo = { name: string; nameNode: JsonNode; configNode: JsonNode | undefined; - evaluatorNode: JsonNode; + ruleNode: JsonNode; }; -function collectEvaluatorNames( +function collectRuleNames( node: JsonNode | undefined, - result: Map + result: Map ) { if (!node || node.type !== 'object' || !node.children) return; - const evaluatorNode = findNodeAtLocation(node, ['evaluator']); - if (evaluatorNode?.type === 'object') { - const nameNode = findNodeAtLocation(evaluatorNode, ['name']); - const configNode = findNodeAtLocation(evaluatorNode, ['config']); + const ruleNode = findNodeAtLocation(node, ['rule']); + if (ruleNode?.type === 'object') { + const nameNode = findNodeAtLocation(ruleNode, ['name']); + const configNode = findNodeAtLocation(ruleNode, ['config']); if (nameNode && typeof nameNode.value === 'string') { result.set(`${nameNode.offset}`, { name: nameNode.value, nameNode, configNode, - evaluatorNode, + ruleNode, }); } } @@ -1532,18 +1521,18 @@ function collectEvaluatorNames( const arrayNode = findNodeAtLocation(node, [key]); if (arrayNode?.type === 'array' && arrayNode.children) { for (const child of arrayNode.children) { - collectEvaluatorNames(child, result); + collectRuleNames(child, result); } } } const notNode = findNodeAtLocation(node, ['not']); if (notNode?.type === 'object') { - collectEvaluatorNames(notNode, result); + collectRuleNames(notNode, result); } } -export function extractEvaluatorNames( +export function extractRuleNames( text: string, definitionPrefix?: JsonPath ): Map { @@ -1554,8 +1543,8 @@ export function extractEvaluatorNames( const conditionNode = subtree ? findNodeAtLocation(subtree, ['condition']) : undefined; - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); const names = new Map(); for (const [key, info] of result) { @@ -1616,10 +1605,10 @@ export function buildDefaultConfig( return config; } -export function findEvaluatorConfigEdit( +export function findRuleConfigEdit( text: string, previousNames: Map, - evaluators: JsonEditorEvaluatorOption[] | undefined, + rules: JsonEditorRuleOption[] | undefined, definitionPrefix?: JsonPath ): { offset: number; length: number; newText: string } | null { const tree = parseTree(text); @@ -1629,17 +1618,17 @@ export function findEvaluatorConfigEdit( const conditionNode = subtree ? findNodeAtLocation(subtree, ['condition']) : undefined; - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); for (const [key, { name, configNode, nameNode }] of result) { const prevName = previousNames.get(key); if (prevName === undefined || prevName === name) continue; - const evaluator = evaluators?.find((e) => e.id === name); - if (!evaluator) continue; + const rule = rules?.find((e) => e.id === name); + if (!rule) continue; - const defaultConfig = buildDefaultConfig(evaluator.configSchema); + const defaultConfig = buildDefaultConfig(rule.configSchema); const configJson = JSON.stringify(defaultConfig, null, 2); if (configNode) { @@ -1772,14 +1761,10 @@ export function getEmptyValueHints( continue; } - const activeEvaluator = resolveActiveEvaluator( - context, - tree, - location.path - ); + const activeRule = resolveActiveRule(context, tree, location.path); - if (isEvaluatorNameLocation(effectivePath) && context.evaluators?.length) { - const names = context.evaluators.map((e) => e.id); + if (isRuleNameLocation(effectivePath) && context.rules?.length) { + const names = context.rules.map((e) => e.id); const display = names.slice(0, MAX_HINT_VALUES); const hint = display.join(' | ') + @@ -1798,7 +1783,7 @@ export function getEmptyValueHints( const schemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.path ); if (!schemaCursor.schema) continue; @@ -1903,14 +1888,10 @@ export function setupJsonEditorLanguageSupport( } } - const activeEvaluator = resolveActiveEvaluator( - context, - tree, - location.path - ); + const activeRule = resolveActiveRule(context, tree, location.path); const cursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.isAtPropertyKey ? location.path.slice(0, -1) : location.path ); @@ -2013,7 +1994,7 @@ export function setupJsonEditorLanguageSupport( const LEAF_CONDITION_TEMPLATE = { selector: { path: '*' }, - evaluator: { name: '', config: {} }, + rule: { name: '', config: {} }, }; function findConditionNodeAtOffset( @@ -2076,11 +2057,11 @@ function findConditionAtOffset( // We're on this object node itself const hasSelector = !!findNodeAtLocation(node, ['selector']); - const hasEvaluator = !!findNodeAtLocation(node, ['evaluator']); + const hasRule = !!findNodeAtLocation(node, ['rule']); const hasAnd = !!findNodeAtLocation(node, ['and']); const hasOr = !!findNodeAtLocation(node, ['or']); const hasNot = !!findNodeAtLocation(node, ['not']); - const isLeaf = (hasSelector || hasEvaluator) && !hasAnd && !hasOr; + const isLeaf = (hasSelector || hasRule) && !hasAnd && !hasOr; return { node, diff --git a/ui/src/core/api/client.ts b/ui/src/core/api/client.ts index c9ad04bd..5cd60d23 100644 --- a/ui/src/core/api/client.ts +++ b/ui/src/core/api/client.ts @@ -152,8 +152,8 @@ export const api = { params: { path: { agent_name: agentName, control_id: controlId } }, }), }, - evaluators: { - list: () => apiClient.GET('/api/v1/evaluators'), + rules: { + list: () => apiClient.GET('/api/v1/rules'), }, controls: { list: (params?: { diff --git a/ui/src/core/api/generated/api-types.ts b/ui/src/core/api/generated/api-types.ts index fe859d89..3f71cde6 100644 --- a/ui/src/core/api/generated/api-types.ts +++ b/ui/src/core/api/generated/api-types.ts @@ -83,7 +83,7 @@ export interface paths { * @description List all registered agents with cursor-based pagination. * * Returns a summary of each agent including identifier, policy associations, - * and counts of registered steps and evaluators. + * and counts of registered steps and rules. * * Args: * cursor: Optional cursor for pagination (last agent name from previous page) @@ -122,7 +122,7 @@ export interface paths { * * conflict_mode controls registration conflict handling: * - strict (default): preserve compatibility checks and conflict errors - * - overwrite: latest init payload replaces steps/evaluators and returns change summary + * - overwrite: latest init payload replaces steps/rules and returns change summary * * Args: * request: Agent metadata and step schemas @@ -169,15 +169,15 @@ export interface paths { options?: never; head?: never; /** - * Modify agent (remove steps/evaluators) - * @description Remove steps and/or evaluators from an agent. + * Modify agent (remove steps/rules) + * @description Remove steps and/or rules from an agent. * * This is the complement to initAgent which only adds items. * Removals are idempotent - attempting to remove non-existent items is not an error. * * Args: * agent_name: Agent identifier - * request: Lists of step/evaluator identifiers to remove + * request: Lists of step/rule identifiers to remove * db: Database session (injected) * * Returns: @@ -246,7 +246,7 @@ export interface paths { patch?: never; trace?: never; }; - '/api/v1/agents/{agent_name}/evaluators': { + '/api/v1/agents/{agent_name}/rules': { parameters: { query?: never; header?: never; @@ -254,26 +254,26 @@ export interface paths { cookie?: never; }; /** - * List agent's registered evaluator schemas - * @description List all evaluator schemas registered with an agent. + * List agent's registered rule schemas + * @description List all rule schemas registered with an agent. * - * Evaluator schemas are registered via initAgent and used for: + * Rule schemas are registered via initAgent and used for: * - Config validation when creating Controls * - UI to display available config options * * Args: * agent_name: Agent identifier - * cursor: Optional cursor for pagination (name of last evaluator from previous page) + * cursor: Optional cursor for pagination (name of last rule from previous page) * limit: Pagination limit (default 20, max 100) * db: Database session (injected) * * Returns: - * ListEvaluatorsResponse with evaluator schemas and pagination + * ListRulesResponse with rule schemas and pagination * * Raises: * HTTPException 404: Agent not found */ - get: operations['list_agent_evaluators_api_v1_agents__agent_name__evaluators_get']; + get: operations['list_agent_rules_api_v1_agents__agent_name__rules_get']; put?: never; post?: never; delete?: never; @@ -282,7 +282,7 @@ export interface paths { patch?: never; trace?: never; }; - '/api/v1/agents/{agent_name}/evaluators/{evaluator_name}': { + '/api/v1/agents/{agent_name}/rules/{rule_name}': { parameters: { query?: never; header?: never; @@ -290,21 +290,21 @@ export interface paths { cookie?: never; }; /** - * Get specific evaluator schema - * @description Get a specific evaluator schema registered with an agent. + * Get specific rule schema + * @description Get a specific rule schema registered with an agent. * * Args: * agent_name: Agent identifier - * evaluator_name: Name of the evaluator + * rule_name: Name of the rule * db: Database session (injected) * * Returns: - * EvaluatorSchemaItem with schema details + * RuleSchemaItem with schema details * * Raises: - * HTTPException 404: Agent or evaluator not found + * HTTPException 404: Agent or rule not found */ - get: operations['get_agent_evaluator_api_v1_agents__agent_name__evaluators__evaluator_name__get']; + get: operations['get_agent_rule_api_v1_agents__agent_name__rules__rule_name__get']; put?: never; post?: never; delete?: never; @@ -653,7 +653,7 @@ export interface paths { * evaluation engine. Controls are evaluated in parallel with * cancel-on-deny for efficiency. * - * Custom evaluators must be deployed as Evaluator classes + * Custom rules must be deployed as Rule classes * with the engine. Their schemas are registered via initAgent. * * Optionally accepts X-Trace-Id and X-Span-Id headers for @@ -666,7 +666,7 @@ export interface paths { patch?: never; trace?: never; }; - '/api/v1/evaluators': { + '/api/v1/rules': { parameters: { query?: never; header?: never; @@ -674,21 +674,21 @@ export interface paths { cookie?: never; }; /** - * List available evaluators - * @description List all available evaluators. + * List available rules + * @description List all available rules. * - * Returns metadata and JSON Schema for each built-in evaluator. + * Returns metadata and JSON Schema for each built-in rule. * - * Built-in evaluators: + * Built-in rules: * - **regex**: Regular expression pattern matching * - **list**: List-based value matching with flexible logic * - **json**: JSON validation with schema, types, constraints * - **sql**: SQL query validation * - * Custom evaluators are registered per-agent via initAgent. - * Use GET /agents/{agent_name}/evaluators to list agent-specific schemas. + * Custom rules are registered per-agent via initAgent. + * Use GET /agents/{agent_name}/rules to list agent-specific schemas. */ - get: operations['get_evaluators_api_v1_evaluators_get']; + get: operations['get_rules_api_v1_rules_get']; put?: never; post?: never; delete?: never; @@ -1093,11 +1093,11 @@ export interface components { */ created_at?: string | null; /** - * Evaluator Count - * @description Number of evaluators registered with the agent + * Rule Count + * @description Number of rules registered with the agent * @default 0 */ - evaluator_count: number; + rule_count: number; /** * Policy Ids * @description IDs of policies associated with the agent @@ -1194,7 +1194,7 @@ export interface components { * ConditionNode * @description Recursive boolean condition tree for control evaluation. * @example { - * "evaluator": { + * "rule": { * "config": { * "pattern": "\\d{3}-\\d{2}-\\d{4}" * }, @@ -1207,7 +1207,7 @@ export interface components { * @example { * "and": [ * { - * "evaluator": { + * "rule": { * "config": { * "values": [ * "high", @@ -1222,7 +1222,7 @@ export interface components { * }, * { * "not": { - * "evaluator": { + * "rule": { * "config": { * "values": [ * "admin", @@ -1245,8 +1245,8 @@ export interface components { * @description Logical AND over child conditions. */ and?: components['schemas']['ConditionNode-Input'][] | null; - /** @description Leaf evaluator. Must be provided together with selector. */ - evaluator?: components['schemas']['EvaluatorSpec'] | null; + /** @description Leaf rule. Must be provided together with selector. */ + rule?: components['schemas']['RuleSpec'] | null; /** @description Logical NOT over a single child condition. */ not?: components['schemas']['ConditionNode-Input'] | null; /** @@ -1254,14 +1254,14 @@ export interface components { * @description Logical OR over child conditions. */ or?: components['schemas']['ConditionNode-Input'][] | null; - /** @description Leaf selector. Must be provided together with evaluator. */ + /** @description Leaf selector. Must be provided together with rule. */ selector?: components['schemas']['ControlSelector'] | null; }; /** * ConditionNode * @description Recursive boolean condition tree for control evaluation. * @example { - * "evaluator": { + * "rule": { * "config": { * "pattern": "\\d{3}-\\d{2}-\\d{4}" * }, @@ -1274,7 +1274,7 @@ export interface components { * @example { * "and": [ * { - * "evaluator": { + * "rule": { * "config": { * "values": [ * "high", @@ -1289,7 +1289,7 @@ export interface components { * }, * { * "not": { - * "evaluator": { + * "rule": { * "config": { * "values": [ * "admin", @@ -1312,8 +1312,8 @@ export interface components { * @description Logical AND over child conditions. */ and?: components['schemas']['ConditionNode-Output'][] | null; - /** @description Leaf evaluator. Must be provided together with selector. */ - evaluator?: components['schemas']['EvaluatorSpec'] | null; + /** @description Leaf rule. Must be provided together with selector. */ + rule?: components['schemas']['RuleSpec'] | null; /** @description Logical NOT over a single child condition. */ not?: components['schemas']['ConditionNode-Output'] | null; /** @@ -1321,7 +1321,7 @@ export interface components { * @description Logical OR over child conditions. */ or?: components['schemas']['ConditionNode-Output'][] | null; - /** @description Leaf selector. Must be provided together with evaluator. */ + /** @description Leaf selector. Must be provided together with rule. */ selector?: components['schemas']['ControlSelector'] | null; }; /** @@ -1343,7 +1343,7 @@ export interface components { * @description Conflict handling mode for initAgent registration updates. * * STRICT preserves compatibility checks and raises conflicts on incompatible changes. - * OVERWRITE applies latest-init-wins replacement for steps and evaluators. + * OVERWRITE applies latest-init-wins replacement for steps and rules. * @enum {string} */ ConflictMode: 'strict' | 'overwrite'; @@ -1368,7 +1368,7 @@ export interface components { ControlAction: { /** @description Action to take when control is triggered */ decision: components['schemas']['ActionDecision']; - /** @description Steering context object for steer actions. Strongly recommended when decision='steer' to provide correction suggestions. If not provided, the evaluator result message will be used as fallback. */ + /** @description Steering context object for steer actions. Strongly recommended when decision='steer' to provide correction suggestions. If not provided, the rule result message will be used as fallback. */ steering_context?: components['schemas']['SteeringContext'] | null; }; /** @@ -1382,7 +1382,7 @@ export interface components { * "decision": "deny" * }, * "condition": { - * "evaluator": { + * "rule": { * "config": { * "pattern": "\\b\\d{3}-\\d{2}-\\d{4}\\b" * }, @@ -1412,7 +1412,7 @@ export interface components { 'ControlDefinition-Input': { /** @description What action to take when control matches */ action: components['schemas']['ControlAction']; - /** @description Recursive boolean condition tree. Leaf nodes contain selector + evaluator; composite nodes contain and/or/not. */ + /** @description Recursive boolean condition tree. Leaf nodes contain selector + rule; composite nodes contain and/or/not. */ condition: components['schemas']['ConditionNode-Input']; /** * Description @@ -1450,7 +1450,7 @@ export interface components { * "decision": "deny" * }, * "condition": { - * "evaluator": { + * "rule": { * "config": { * "pattern": "\\b\\d{3}-\\d{2}-\\d{4}\\b" * }, @@ -1480,7 +1480,7 @@ export interface components { 'ControlDefinition-Output': { /** @description What action to take when control matches */ action: components['schemas']['ControlAction']; - /** @description Recursive boolean condition tree. Leaf nodes contain selector + evaluator; composite nodes contain and/or/not. */ + /** @description Recursive boolean condition tree. Leaf nodes contain selector + rule; composite nodes contain and/or/not. */ condition: components['schemas']['ConditionNode-Output']; /** * Description @@ -1516,7 +1516,7 @@ export interface components { * - Context: agent, control, check stage, applies to * - Result: action taken, whether matched, confidence score * - Timing: when it happened, how long it took - * - Optional details: evaluator name, selector path, errors, metadata + * - Optional details: rule name, selector path, errors, metadata * * Attributes: * control_execution_id: Unique ID for this specific control execution @@ -1528,11 +1528,11 @@ export interface components { * check_stage: "pre" (before execution) or "post" (after execution) * applies_to: "llm_call" or "tool_call" * action: The action taken (deny, steer, observe) - * matched: Whether the control evaluator matched - * confidence: Confidence score from the evaluator (0.0-1.0) + * matched: Whether the control rule matched + * confidence: Confidence score from the rule (0.0-1.0) * timestamp: When the control was executed (UTC) * execution_duration_ms: How long the control evaluation took - * evaluator_name: Name of the evaluator used + * rule_name: Name of the rule used * selector_path: The selector path used to extract data * error_message: Error message if evaluation failed * metadata: Additional metadata for extensibility @@ -1545,7 +1545,7 @@ export interface components { * "control_execution_id": "550e8400-e29b-41d4-a716-446655440000", * "control_id": 123, * "control_name": "sql-injection-check", - * "evaluator_name": "regex", + * "rule_name": "regex", * "execution_duration_ms": 15.3, * "matched": true, * "selector_path": "input", @@ -1600,10 +1600,10 @@ export interface components { */ error_message?: string | null; /** - * Evaluator Name - * @description Name of the evaluator used + * Rule Name + * @description Name of the rule used */ - evaluator_name?: string | null; + rule_name?: string | null; /** * Execution Duration Ms * @description Execution duration in milliseconds @@ -1611,7 +1611,7 @@ export interface components { execution_duration_ms?: number | null; /** * Matched - * @description Whether the evaluator matched (True) or not (False) + * @description Whether the rule matched (True) or not (False) */ matched: boolean; /** @@ -1665,8 +1665,8 @@ export interface components { * @description Name of the control */ control_name: string; - /** @description Evaluator result (confidence, message, metadata) */ - result: components['schemas']['EvaluatorResult']; + /** @description Rule result (confidence, message, metadata) */ + result: components['schemas']['RuleResult']; /** @description Steering context for steer actions if configured */ steering_context?: components['schemas']['SteeringContext'] | null; }; @@ -1725,7 +1725,7 @@ export interface components { * ControlSelector * @description Selects data from a Step payload. * - * - path: which slice of the Step to feed into the evaluator. Optional, defaults to "*" + * - path: which slice of the Step to feed into the rule. Optional, defaults to "*" * meaning the entire Step object. * @example { * "path": "output" @@ -2124,10 +2124,10 @@ export interface components { reason?: string | null; }; /** - * EvaluatorInfo - * @description Information about a registered evaluator. + * RuleInfo + * @description Information about a registered rule. */ - EvaluatorInfo: { + RuleInfo: { /** * Config Schema * @description JSON Schema for config @@ -2137,17 +2137,17 @@ export interface components { }; /** * Description - * @description Evaluator description + * @description Rule description */ description: string; /** * Name - * @description Evaluator name + * @description Rule name */ name: string; /** * Requires Api Key - * @description Whether evaluator requires API key + * @description Whether rule requires API key */ requires_api_key: boolean; /** @@ -2157,26 +2157,26 @@ export interface components { timeout_ms: number; /** * Version - * @description Evaluator version + * @description Rule version */ version: string; }; /** - * EvaluatorResult - * @description Result from a control evaluator. + * RuleResult + * @description Result from a control rule. * - * The `error` field indicates evaluator failures, NOT validation failures: - * - Set `error` for: evaluator crashes, timeouts, missing dependencies, external service errors + * The `error` field indicates rule failures, NOT validation failures: + * - Set `error` for: rule crashes, timeouts, missing dependencies, external service errors * - Do NOT set `error` for: invalid input, syntax errors, schema violations, constraint failures * - * When `error` is set, `matched` must be False (fail-open on evaluator errors). + * When `error` is set, `matched` must be False (fail-open on rule errors). * When `error` is None, `matched` reflects the actual validation result. * * This distinction allows: - * - Clients to distinguish "data violated rules" from "evaluator is broken" - * - Observability systems to monitor evaluator health separately from validation outcomes + * - Clients to distinguish "data violated rules" from "rule is broken" + * - Observability systems to monitor rule health separately from validation outcomes */ - EvaluatorResult: { + RuleResult: { /** * Confidence * @description Confidence in the evaluation @@ -2206,16 +2206,16 @@ export interface components { } | null; }; /** - * EvaluatorSchema - * @description Schema for a custom evaluator registered with an agent. + * RuleSchema + * @description Schema for a custom rule registered with an agent. * - * Custom evaluators are Evaluator classes deployed with the engine. + * Custom rules are Rule classes deployed with the engine. * This schema is registered via initAgent for validation and UI purposes. */ - EvaluatorSchema: { + RuleSchema: { /** * Config Schema - * @description JSON Schema for evaluator config validation + * @description JSON Schema for rule config validation */ config_schema?: { [key: string]: unknown; @@ -2227,15 +2227,15 @@ export interface components { description?: string | null; /** * Name - * @description Unique evaluator name + * @description Unique rule name */ name: string; }; /** - * EvaluatorSchemaItem - * @description Evaluator schema summary for list response. + * RuleSchemaItem + * @description Rule schema summary for list response. */ - EvaluatorSchemaItem: { + RuleSchemaItem: { /** Config Schema */ config_schema: { [key: string]: unknown; @@ -2246,18 +2246,18 @@ export interface components { name: string; }; /** - * EvaluatorSpec - * @description Evaluator specification. See GET /evaluators for available evaluators and schemas. + * RuleSpec + * @description Rule specification. See GET /rules for available rules and schemas. * - * Evaluator reference formats: + * Rule reference formats: * - Built-in: "regex", "list", "json", "sql" - * - External: "galileo.luna" (requires agent-control-evaluators[galileo]) - * - Agent-scoped: "my-agent:my-evaluator" (validated in endpoint, not here) + * - External: "galileo.luna" (requires agent-control-rules[galileo]) + * - Agent-scoped: "my-agent:my-rule" (validated in endpoint, not here) */ - EvaluatorSpec: { + RuleSpec: { /** * Config - * @description Evaluator-specific configuration + * @description Rule-specific configuration * @example { * "pattern": "\\d{3}-\\d{2}-\\d{4}" * } @@ -2273,7 +2273,7 @@ export interface components { }; /** * Name - * @description Evaluator name or agent-scoped reference (agent:evaluator) + * @description Rule name or agent-scoped reference (agent:rule) * @example regex * @example list * @example my-agent:pii-detector @@ -2430,10 +2430,10 @@ export interface components { /** @description Agent metadata */ agent: components['schemas']['Agent']; /** - * Evaluators - * @description Custom evaluators registered with this agent + * Rules + * @description Custom rules registered with this agent */ - evaluators?: components['schemas']['EvaluatorSchema'][]; + rules?: components['schemas']['RuleSchema'][]; /** * Steps * @description Steps registered with this agent @@ -2515,28 +2515,28 @@ export interface components { version: string; }; /** - * InitAgentEvaluatorRemoval - * @description Details for an evaluator removed during overwrite mode. + * InitAgentRuleRemoval + * @description Details for a rule removed during overwrite mode. */ - InitAgentEvaluatorRemoval: { + InitAgentRuleRemoval: { /** * Control Ids - * @description IDs of active controls referencing this evaluator + * @description IDs of active controls referencing this rule */ control_ids?: number[]; /** * Control Names - * @description Names of active controls referencing this evaluator + * @description Names of active controls referencing this rule */ control_names?: string[]; /** * Name - * @description Evaluator name removed by overwrite + * @description Rule name removed by overwrite */ name: string; /** * Referenced By Active Controls - * @description Whether this evaluator is still referenced by active controls + * @description Whether this rule is still referenced by active controls * @default false */ referenced_by_active_controls: boolean; @@ -2547,25 +2547,25 @@ export interface components { */ InitAgentOverwriteChanges: { /** - * Evaluator Removals - * @description Per-evaluator removal details, including active control references + * Rule Removals + * @description Per-rule removal details, including active control references */ - evaluator_removals?: components['schemas']['InitAgentEvaluatorRemoval'][]; + rule_removals?: components['schemas']['InitAgentRuleRemoval'][]; /** - * Evaluators Added - * @description Evaluator names added by overwrite + * Rules Added + * @description Rule names added by overwrite */ - evaluators_added?: string[]; + rules_added?: string[]; /** - * Evaluators Removed - * @description Evaluator names removed by overwrite + * Rules Removed + * @description Rule names removed by overwrite */ - evaluators_removed?: string[]; + rules_removed?: string[]; /** - * Evaluators Updated - * @description Existing evaluator names updated by overwrite + * Rules Updated + * @description Existing rule names updated by overwrite */ - evaluators_updated?: string[]; + rules_updated?: string[]; /** * Metadata Changed * @description Whether agent metadata changed @@ -2597,7 +2597,7 @@ export interface components { * "agent_name": "customer-service-bot", * "agent_version": "1.0.0" * }, - * "evaluators": [ + * "rules": [ * { * "config_schema": { * "properties": { @@ -2633,15 +2633,15 @@ export interface components { /** @description Agent metadata including ID, name, and version */ agent: components['schemas']['Agent']; /** - * @description Conflict handling mode for init registration updates. 'strict' preserves existing compatibility checks. 'overwrite' applies latest-init-wins replacement for steps and evaluators. + * @description Conflict handling mode for init registration updates. 'strict' preserves existing compatibility checks. 'overwrite' applies latest-init-wins replacement for steps and rules. * @default strict */ conflict_mode: components['schemas']['ConflictMode']; /** - * Evaluators - * @description Custom evaluator schemas for config validation + * Rules + * @description Custom rule schemas for config validation */ - evaluators?: components['schemas']['EvaluatorSchema'][]; + rules?: components['schemas']['RuleSchema'][]; /** * Force Replace * @description If true, replace corrupted agent data instead of failing. Use only when agent data is corrupted and cannot be parsed. @@ -2710,12 +2710,12 @@ export interface components { pagination: components['schemas']['PaginationInfo']; }; /** - * ListEvaluatorsResponse - * @description Response for listing agent's evaluator schemas. + * ListRulesResponse + * @description Response for listing agent's rule schemas. */ - ListEvaluatorsResponse: { - /** Evaluators */ - evaluators: components['schemas']['EvaluatorSchemaItem'][]; + ListRulesResponse: { + /** Rules */ + rules: components['schemas']['RuleSchemaItem'][]; pagination: components['schemas']['PaginationInfo']; }; /** @@ -2764,14 +2764,14 @@ export interface components { }; /** * PatchAgentRequest - * @description Request to modify an agent (remove steps/evaluators). + * @description Request to modify an agent (remove steps/rules). */ PatchAgentRequest: { /** - * Remove Evaluators - * @description Evaluator names to remove from the agent + * Remove Rules + * @description Rule names to remove from the agent */ - remove_evaluators?: string[]; + remove_rules?: string[]; /** * Remove Steps * @description Step identifiers to remove from the agent @@ -2784,10 +2784,10 @@ export interface components { */ PatchAgentResponse: { /** - * Evaluators Removed - * @description Evaluator names that were removed + * Rules Removed + * @description Rule names that were removed */ - evaluators_removed?: string[]; + rules_removed?: string[]; /** * Steps Removed * @description Step identifiers that were removed @@ -2926,8 +2926,8 @@ export interface components { * * Attributes: * execution_count: Total executions across all controls - * match_count: Total matches across all controls (evaluator matched) - * non_match_count: Total non-matches across all controls (evaluator didn't match) + * match_count: Total matches across all controls (rule matched) + * non_match_count: Total non-matches across all controls (rule didn't match) * error_count: Total errors across all controls (evaluation failed) * action_counts: Breakdown of actions for matched executions * timeseries: Time-series data points (only when include_timeseries=true) @@ -3503,7 +3503,7 @@ export interface operations { }; }; }; - list_agent_evaluators_api_v1_agents__agent_name__evaluators_get: { + list_agent_rules_api_v1_agents__agent_name__rules_get: { parameters: { query?: { cursor?: string | null; @@ -3517,13 +3517,13 @@ export interface operations { }; requestBody?: never; responses: { - /** @description Evaluator schemas registered with this agent */ + /** @description Rule schemas registered with this agent */ 200: { headers: { [name: string]: unknown; }; content: { - 'application/json': components['schemas']['ListEvaluatorsResponse']; + 'application/json': components['schemas']['ListRulesResponse']; }; }; /** @description Validation Error */ @@ -3537,25 +3537,25 @@ export interface operations { }; }; }; - get_agent_evaluator_api_v1_agents__agent_name__evaluators__evaluator_name__get: { + get_agent_rule_api_v1_agents__agent_name__rules__rule_name__get: { parameters: { query?: never; header?: never; path: { agent_name: string; - evaluator_name: string; + rule_name: string; }; cookie?: never; }; requestBody?: never; responses: { - /** @description Evaluator schema details */ + /** @description Rule schema details */ 200: { headers: { [name: string]: unknown; }; content: { - 'application/json': components['schemas']['EvaluatorSchemaItem']; + 'application/json': components['schemas']['RuleSchemaItem']; }; }; /** @description Validation Error */ @@ -4122,7 +4122,7 @@ export interface operations { }; }; }; - get_evaluators_api_v1_evaluators_get: { + get_rules_api_v1_rules_get: { parameters: { query?: never; header?: never; @@ -4131,14 +4131,14 @@ export interface operations { }; requestBody?: never; responses: { - /** @description Dictionary of evaluator name to evaluator info */ + /** @description Dictionary of rule name to rule info */ 200: { headers: { [name: string]: unknown; }; content: { 'application/json': { - [key: string]: components['schemas']['EvaluatorInfo']; + [key: string]: components['schemas']['RuleInfo']; }; }; }; diff --git a/ui/src/core/api/types.ts b/ui/src/core/api/types.ts index 0f515271..982d1f82 100644 --- a/ui/src/core/api/types.ts +++ b/ui/src/core/api/types.ts @@ -14,7 +14,7 @@ import type { components, operations } from './generated/api-types'; export type ValidationErrorItem = { /** Resource type where error occurred (e.g., 'Control') */ resource: string; - /** Field path that caused the error (e.g., 'data.evaluator.config.pattern') */ + /** Field path that caused the error (e.g., 'data.rule.config.pattern') */ field: string | null; /** Machine-readable error code (e.g., 'required', 'invalid_format') */ code: string; @@ -50,14 +50,14 @@ export type ProblemDetail = { // Agent types export type Agent = components['schemas']['Agent']; -export type EvaluatorSchema = components['schemas']['EvaluatorSchema']; +export type RuleSchema = components['schemas']['RuleSchema']; export type StepSchema = components['schemas']['StepSchema']; export type AgentSummary = components['schemas']['AgentSummary']; export type ListAgentsResponse = components['schemas']['ListAgentsResponse']; -// Evaluator types -export type EvaluatorInfo = components['schemas']['EvaluatorInfo']; -export type EvaluatorsResponse = Record; +// Rule types +export type RuleInfo = components['schemas']['RuleInfo']; +export type RulesResponse = Record; // Request/Response types export type InitAgentRequest = components['schemas']['InitAgentRequest']; export type InitAgentResponse = components['schemas']['InitAgentResponse']; diff --git a/ui/src/core/evaluators/index.ts b/ui/src/core/evaluators/index.ts deleted file mode 100644 index ed862eb8..00000000 --- a/ui/src/core/evaluators/index.ts +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Evaluator Registry - * - * This module exports all available evaluators and provides - * utilities for working with them. - * - * ## Adding a New Evaluator - * - * 1. Create a new folder under `evaluators/` (e.g., `evaluators/my-evaluator/`) - * 2. Create the following files: - * - `types.ts` - Form value types - * - `form.tsx` - React form component - * - `index.ts` - Evaluator definition (implements EvaluatorDefinition interface) - * 3. Import and add the evaluator to the `evaluators` array below - * 4. That's it! The main edit-control component will automatically pick it up. - * - * @example - * ```typescript - * // evaluators/my-evaluator/index.ts - * import type { EvaluatorDefinition } from "../types"; - * import { MyForm } from "./form"; - * import type { MyFormValues } from "./types"; - * - * export const myEvaluator: EvaluatorDefinition = { - * id: "my-evaluator", - * displayName: "My Evaluator", - * initialValues: { ... }, - * validate: { ... }, - * toConfig: (values) => ({ ... }), - * fromConfig: (config) => ({ ... }), - * FormComponent: MyForm, - * }; - * ``` - */ - -import { jsonEvaluator } from './json'; -import { listEvaluator } from './list'; -import { lunaEvaluator } from './luna'; -import { regexEvaluator } from './regex'; -import { sqlEvaluator } from './sql'; -import type { AnyEvaluatorDefinition } from './types'; - -/** - * All registered evaluators. - * Add new evaluators here to make them available in the UI. - */ -export const evaluators: AnyEvaluatorDefinition[] = [ - regexEvaluator, - listEvaluator, - jsonEvaluator, - sqlEvaluator, - lunaEvaluator, -]; - -/** - * Map of evaluator ID to evaluator for quick lookup. - */ -export const evaluatorRegistry = new Map( - evaluators.map((evaluator) => [evaluator.id, evaluator]) -); - -/** - * Get an evaluator by ID. - * Returns undefined if the evaluator is not found. - */ -export const getEvaluator = (id: string): AnyEvaluatorDefinition | undefined => - evaluatorRegistry.get(id); - -/** - * Check if an evaluator exists. - */ -export const hasEvaluator = (id: string): boolean => evaluatorRegistry.has(id); - -// Re-export types and individual evaluators for direct imports -export { jsonEvaluator } from './json'; -export { listEvaluator } from './list'; -export { lunaEvaluator } from './luna'; -export { regexEvaluator } from './regex'; -export { sqlEvaluator } from './sql'; -export type { - AnyEvaluatorDefinition, - EvaluatorDefinition, - EvaluatorFormProps, -} from './types'; diff --git a/ui/src/core/hooks/query-hooks/use-evaluators.ts b/ui/src/core/hooks/query-hooks/use-evaluators.ts deleted file mode 100644 index 44d56a10..00000000 --- a/ui/src/core/hooks/query-hooks/use-evaluators.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { useQuery } from '@tanstack/react-query'; - -import { api } from '@/core/api/client'; -import type { EvaluatorsResponse } from '@/core/api/types'; - -/** - * Query hook to fetch available evaluators. - * Returns a dictionary of evaluator name to evaluator info. - */ -export function useEvaluators() { - return useQuery({ - queryKey: ['evaluators'], - queryFn: async () => { - const { data, error } = await api.evaluators.list(); - if (error) throw error; - return data!; - }, - }); -} diff --git a/ui/src/core/hooks/query-hooks/use-rules.ts b/ui/src/core/hooks/query-hooks/use-rules.ts new file mode 100644 index 00000000..148f6f5f --- /dev/null +++ b/ui/src/core/hooks/query-hooks/use-rules.ts @@ -0,0 +1,19 @@ +import { useQuery } from '@tanstack/react-query'; + +import { api } from '@/core/api/client'; +import type { RulesResponse } from '@/core/api/types'; + +/** + * Query hook to fetch available rules. + * Returns a dictionary of rule name to rule info. + */ +export function useRules() { + return useQuery({ + queryKey: ['rules'], + queryFn: async () => { + const { data, error } = await api.rules.list(); + if (error) throw error; + return data!; + }, + }); +} diff --git a/ui/src/core/hooks/use-modal-route.ts b/ui/src/core/hooks/use-modal-route.ts index 74f2f5f3..ee482282 100644 --- a/ui/src/core/hooks/use-modal-route.ts +++ b/ui/src/core/hooks/use-modal-route.ts @@ -9,31 +9,31 @@ import { SUBMODAL_NAMES } from '@/core/constants/modal-routes'; * URL structure: * - ?modal=control-store - Opens Control Store modal * - ?modal=control-store&submodal=add-new - Opens Control Store with Add New Control modal - * - ?modal=control-store&submodal=create&evaluator=regex - Opens Control Store with Create Control modal + * - ?modal=control-store&submodal=create&rule=regex - Opens Control Store with Create Control modal * - ?modal=control-store&submodal=edit&controlId=123 - Opens Control Store with Edit Control modal * - ?modal=edit&controlId=123 - Opens Edit Control modal directly (from agent detail page) */ export function useModalRoute() { const router = useRouter(); - const { modal, submodal, evaluator, controlId } = router.query; + const { modal, submodal, rule, controlId } = router.query; const modalState = useMemo(() => { return { modal: typeof modal === 'string' ? modal : null, submodal: typeof submodal === 'string' ? submodal : null, - evaluator: typeof evaluator === 'string' ? evaluator : null, + rule: typeof rule === 'string' ? rule : null, controlId: typeof controlId === 'string' ? controlId : null, }; - }, [modal, submodal, evaluator, controlId]); + }, [modal, submodal, rule, controlId]); const openModal = useCallback( ( modalName: string, - params?: { submodal?: string; evaluator?: string; controlId?: string } + params?: { submodal?: string; rule?: string; controlId?: string } ) => { const query: Record = { modal: modalName }; if (params?.submodal) query.submodal = params.submodal; - if (params?.evaluator) query.evaluator = params.evaluator; + if (params?.rule) query.rule = params.rule; if (params?.controlId) query.controlId = params.controlId; router.push( @@ -53,7 +53,7 @@ export function useModalRoute() { const query = { ...router.query }; delete query.modal; delete query.submodal; - delete query.evaluator; + delete query.rule; delete query.controlId; router.push( @@ -70,16 +70,16 @@ export function useModalRoute() { // Extract and discard submodal-related params, keep the rest const { submodal: currentSubmodal, - evaluator, + rule, controlId, ...rest } = router.query; // Silence unused vars - we're destructuring to remove them - void evaluator; + void rule; void controlId; // If closing from "create", go back to "add-new" instead of closing everything - // This allows the user to select a different evaluator + // This allows the user to select a different rule if (currentSubmodal === SUBMODAL_NAMES.CREATE) { router.push( { diff --git a/ui/src/core/page-components/agent-detail/modals/add-new-control/index.tsx b/ui/src/core/page-components/agent-detail/modals/add-new-control/index.tsx index 3a21e7ed..6b70822c 100644 --- a/ui/src/core/page-components/agent-detail/modals/add-new-control/index.tsx +++ b/ui/src/core/page-components/agent-detail/modals/add-new-control/index.tsx @@ -17,21 +17,21 @@ import { type ColumnDef } from '@tanstack/react-table'; import { useMemo, useState } from 'react'; import { ErrorBoundary } from '@/components/error-boundary'; -import type { EvaluatorInfo } from '@/core/api/types'; +import type { RuleInfo } from '@/core/api/types'; import { MODAL_NAMES, SUBMODAL_NAMES } from '@/core/constants/modal-routes'; -import { useEvaluators } from '@/core/hooks/query-hooks/use-evaluators'; +import { useRules } from '@/core/hooks/query-hooks/use-rules'; import { useModalRoute } from '@/core/hooks/use-modal-route'; import { EditControlContent } from '../edit-control/edit-control-content'; import { sanitizeControlNamePart } from '../edit-control/utils'; -type EvaluatorWithId = EvaluatorInfo & { id: string }; +type RuleWithId = RuleInfo & { id: string }; /** - * Default evaluator configs for each evaluator type + * Default rule configs for each rule type * Based on backend models in agent_control_models/controls.py */ -const DEFAULT_EVALUATOR_CONFIGS: Record> = { +const DEFAULT_RULE_CONFIGS: Record> = { regex: { pattern: '^.*$', }, @@ -54,10 +54,8 @@ const DEFAULT_EVALUATOR_CONFIGS: Record> = { }, }; -function getDefaultConfigForEvaluator( - evaluatorId: string -): Record { - return DEFAULT_EVALUATOR_CONFIGS[evaluatorId] ?? {}; +function getDefaultConfigForRule(ruleId: string): Record { + return DEFAULT_RULE_CONFIGS[ruleId] ?? {}; } function buildJsonDraftControl() { @@ -76,9 +74,9 @@ function buildJsonDraftControl() { selector: { path: '*', }, - evaluator: { + rule: { name: 'regex', - config: getDefaultConfigForEvaluator('regex'), + config: getDefaultConfigForRule('regex'), }, }, action: { decision: 'deny' as const }, @@ -99,28 +97,28 @@ export function AddNewControlModal({ agentId, }: AddNewControlModalProps) { const [searchQuery, setSearchQuery] = useState(''); - const { submodal, evaluator, openModal, closeSubmodal, closeModal } = + const { submodal, rule, openModal, closeSubmodal, closeModal } = useModalRoute(); - const { data: evaluatorsData, isLoading, error } = useEvaluators(); + const { data: rulesData, isLoading, error } = useRules(); // Derive submodal open state from URL const editModalOpened = submodal === SUBMODAL_NAMES.CREATE; - // Find selected evaluator from URL or state - const selectedEvaluator = useMemo(() => { - if (evaluator && evaluatorsData) { - const evaluatorData = evaluatorsData[evaluator]; - if (evaluatorData) { - return { ...evaluatorData, id: evaluator }; + // Find selected rule from URL or state + const selectedRule = useMemo(() => { + if (rule && rulesData) { + const ruleData = rulesData[rule]; + if (ruleData) { + return { ...ruleData, id: rule }; } } return null; - }, [evaluator, evaluatorsData]); + }, [rule, rulesData]); - const handleAddClick = (evaluator: EvaluatorWithId) => { + const handleAddClick = (rule: RuleWithId) => { openModal(MODAL_NAMES.CONTROL_STORE, { submodal: SUBMODAL_NAMES.CREATE, - evaluator: evaluator.id, + rule: rule.id, }); }; @@ -140,23 +138,23 @@ export function AddNewControlModal({ closeModal(); }; - // Transform evaluators record to array for table display - const evaluators = useMemo(() => { - if (!evaluatorsData) return []; - return Object.entries(evaluatorsData).map(([key, evaluator]) => ({ - ...evaluator, + // Transform rules record to array for table display + const rules = useMemo(() => { + if (!rulesData) return []; + return Object.entries(rulesData).map(([key, rule]) => ({ + ...rule, id: key, })); - }, [evaluatorsData]); + }, [rulesData]); const draftControl = useMemo(() => { - if (selectedEvaluator) { - const name = `new-${sanitizeControlNamePart(selectedEvaluator.name)}-control`; + if (selectedRule) { + const name = `new-${sanitizeControlNamePart(selectedRule.name)}-control`; return { id: 0, name, control: { - description: selectedEvaluator.description, + description: selectedRule.description, enabled: true, execution: 'server' as const, scope: { @@ -167,9 +165,9 @@ export function AddNewControlModal({ selector: { path: '*', }, - evaluator: { - name: selectedEvaluator.id, - config: getDefaultConfigForEvaluator(selectedEvaluator.id), + rule: { + name: selectedRule.id, + config: getDefaultConfigForRule(selectedRule.id), }, }, action: { decision: 'deny' as const }, @@ -178,9 +176,9 @@ export function AddNewControlModal({ } return buildJsonDraftControl(); - }, [selectedEvaluator]); + }, [selectedRule]); - const columns: ColumnDef[] = [ + const columns: ColumnDef[] = [ { id: 'name', header: 'Name', @@ -228,8 +226,8 @@ export function AddNewControlModal({ }, ]; - const filteredEvaluators = evaluators.filter((evaluator) => - evaluator.name.toLowerCase().includes(searchQuery.toLowerCase()) + const filteredRules = rules.filter((rule) => + rule.name.toLowerCase().includes(searchQuery.toLowerCase()) ); return ( @@ -263,7 +261,7 @@ export function AddNewControlModal({ - Select an evaluator to create a new control or start from JSON + Select a rule to create a new control or start from JSON @@ -277,7 +275,7 @@ export function AddNewControlModal({ {/* Search and Docs Link */} } flex={1} maw={250} @@ -294,7 +292,7 @@ export function AddNewControlModal({ Write your own - Learn here on how to add new type of evaluator.{' '} + Learn here on how to add new type of rule.{' '} - Failed to load evaluators + Failed to load rules - ) : filteredEvaluators.length > 0 ? ( + ) : filteredRules.length > 0 ? ( ) : ( - No evaluators found + No rules found )} @@ -362,7 +360,7 @@ export function AddNewControlModal({ control={draftControl} agentId={agentId} mode="create" - initialEditorMode={selectedEvaluator ? 'form' : 'json'} + initialEditorMode={selectedRule ? 'form' : 'json'} onClose={handleEditModalClose} onSuccess={handleEditModalSuccess} /> diff --git a/ui/src/core/page-components/agent-detail/modals/control-store/index.tsx b/ui/src/core/page-components/agent-detail/modals/control-store/index.tsx index d968e692..e1a0fa20 100644 --- a/ui/src/core/page-components/agent-detail/modals/control-store/index.tsx +++ b/ui/src/core/page-components/agent-detail/modals/control-store/index.tsx @@ -50,7 +50,7 @@ export function ControlStoreModal({ const [debouncedSearch] = useDebouncedValue(searchQuery, 300); const { submodal, - evaluator: _evaluator, + rule: _rule, controlId, openModal, closeSubmodal, @@ -194,7 +194,7 @@ export function ControlStoreModal({ closeModal(); }; - // Build a draft control for the edit modal with full evaluator config (clone: append -copy to name) + // Build a draft control for the edit modal with full rule config (clone: append -copy to name) const draftControl = useMemo(() => { if (!selectedControl) return null; const { summary, definition } = selectedControl; diff --git a/ui/src/core/page-components/agent-detail/modals/create-from-template/index.tsx b/ui/src/core/page-components/agent-detail/modals/create-from-template/index.tsx index 3f4681fd..d60ab4ce 100644 --- a/ui/src/core/page-components/agent-detail/modals/create-from-template/index.tsx +++ b/ui/src/core/page-components/agent-detail/modals/create-from-template/index.tsx @@ -232,7 +232,7 @@ export function CreateFromTemplate({ "scope": { "stages": ["pre"] }, "condition": { "selector": { "path": "input" }, - "evaluator": { + "rule": { "name": "regex", "config": { "pattern": { "$param": "pattern" } } } diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/api-error-alert.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/api-error-alert.tsx index b216b58d..5b698d44 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/api-error-alert.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/api-error-alert.tsx @@ -3,7 +3,7 @@ import { IconAlertCircle } from '@tabler/icons-react'; import type { ProblemDetail } from '@/core/api/types'; -/** Convert API field path (e.g. data.evaluator.match_on) to user-friendly label (e.g. Match on) */ +/** Convert API field path (e.g. data.rule.match_on) to user-friendly label (e.g. Match on) */ function formatFieldForDisplay(apiField: string | null): string { if (!apiField) return ''; const lastSegment = apiField.split('.').pop() ?? apiField; diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/control-condition.ts b/ui/src/core/page-components/agent-detail/modals/edit-control/control-condition.ts index cfb30571..683933cf 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/control-condition.ts +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/control-condition.ts @@ -1,17 +1,17 @@ import type { ControlDefinition } from '@/core/api/types'; -import type { AnyEvaluatorDefinition } from '@/core/evaluators'; -import { getEvaluator } from '@/core/evaluators'; +import type { AnyRuleDefinition } from '@/core/rules'; +import { getRule } from '@/core/rules'; export type LeafConditionDetails = { selectorPath: string; - evaluatorName: string; - evaluatorConfig: Record; + ruleName: string; + ruleConfig: Record; }; export type ControlConditionState = { leafCondition: LeafConditionDetails | null; - evaluatorId: string; - evaluator: AnyEvaluatorDefinition | undefined; + ruleId: string; + rule: AnyRuleDefinition | undefined; canEditLeafCondition: boolean; }; @@ -19,14 +19,14 @@ function getLeafConditionDetails( definition: ControlDefinition ): LeafConditionDetails | null { const condition = definition.condition; - if (!condition.selector || !condition.evaluator) { + if (!condition.selector || !condition.rule) { return null; } return { selectorPath: condition.selector.path ?? '*', - evaluatorName: condition.evaluator.name, - evaluatorConfig: condition.evaluator.config, + ruleName: condition.rule.name, + ruleConfig: condition.rule.config, }; } @@ -34,13 +34,13 @@ export function getControlConditionState( definition: ControlDefinition ): ControlConditionState { const leafCondition = getLeafConditionDetails(definition); - const evaluatorId = leafCondition?.evaluatorName ?? ''; - const evaluator = getEvaluator(evaluatorId); + const ruleId = leafCondition?.ruleName ?? ''; + const rule = getRule(ruleId); return { leafCondition, - evaluatorId, - evaluator, + ruleId, + rule, canEditLeafCondition: Boolean(leafCondition), }; } @@ -59,8 +59,8 @@ export function buildEditableCondition( selector: { path: selectorPath, }, - evaluator: { - name: leafCondition.evaluatorName, + rule: { + name: leafCondition.ruleName, config: finalConfig, }, }; diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/control-definition-form.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/control-definition-form.tsx index 77aeb98d..4adc0492 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/control-definition-form.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/control-definition-form.tsx @@ -119,7 +119,7 @@ export const ControlDefinitionForm = ({ label={ } labelProps={labelPropsInline} diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/edit-control-content.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/edit-control-content.tsx index 96fc60c5..f255e4e2 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/edit-control-content.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/edit-control-content.tsx @@ -27,7 +27,7 @@ import type { import { useAddControlToAgent } from '@/core/hooks/query-hooks/use-add-control-to-agent'; import { useAgent } from '@/core/hooks/query-hooks/use-agent'; import { useControlSchema } from '@/core/hooks/query-hooks/use-control-schema'; -import { useEvaluators } from '@/core/hooks/query-hooks/use-evaluators'; +import { useRules } from '@/core/hooks/query-hooks/use-rules'; import { useUpdateControl } from '@/core/hooks/query-hooks/use-update-control'; import { useUpdateControlMetadata } from '@/core/hooks/query-hooks/use-update-control-metadata'; import { useValidateControlData } from '@/core/hooks/query-hooks/use-validate-control-data'; @@ -42,15 +42,15 @@ import { getControlConditionState, } from './control-condition'; import { ControlDefinitionForm } from './control-definition-form'; -import { EvaluatorConfigSection } from './evaluator-config-section'; +import { RuleConfigSection } from './rule-config-section'; import { TemplateEditContent } from './template-edit-content'; import type { ControlDefinitionFormValues, ControlEditorMode, EditControlMode, - JsonEditorEvaluatorOption, + JsonEditorRuleOption, } from './types'; -import { useEvaluatorConfigState } from './use-evaluator-config-state'; +import { useRuleConfigState } from './use-rule-config-state'; import { applyApiErrorsToForms } from './utils'; function isTemplateBacked(control: Control): boolean { @@ -58,7 +58,7 @@ function isTemplateBacked(control: Control): boolean { return def?.template != null; } -const EVALUATOR_CONFIG_HEIGHT = 450; +const RULE_CONFIG_HEIGHT = 450; const JSON_EDITOR_HEIGHT = 520; type ValidationStatus = 'idle' | 'validating' | 'valid' | 'invalid'; @@ -73,7 +73,7 @@ const DEFAULT_CONTROL_TEMPLATE = JSON.stringify( }, condition: { selector: { path: 'output' }, - evaluator: { + rule: { name: 'regex', config: { pattern: '\\b\\d{3}-\\d{2}-\\d{4}\\b' }, }, @@ -134,7 +134,7 @@ const RawEditControlContent = ({ }: EditControlContentProps) => { const { data: agentResponse } = useAgent(agentId); const { data: controlSchemaResponse } = useControlSchema(); - const { data: globalEvaluators } = useEvaluators(); + const { data: globalRules } = useRules(); const steps = agentResponse?.steps ?? []; const agentName = agentResponse?.agent?.agent_name ?? agentId; @@ -167,43 +167,40 @@ const RawEditControlContent = ({ : updateControl.isPending || updateControlMetadata.isPending; const formRef = useRef(null); - const formInitializedForEvaluator = useRef(''); - const { leafCondition, evaluatorId, evaluator, canEditLeafCondition } = - useMemo( - () => getControlConditionState(workingDefinition), - [workingDefinition] - ); - const availableEvaluators = useMemo(() => { - const merged = new Map(); + const formInitializedForRule = useRef(''); + const { leafCondition, ruleId, rule, canEditLeafCondition } = useMemo( + () => getControlConditionState(workingDefinition), + [workingDefinition] + ); + const availableRules = useMemo(() => { + const merged = new Map(); - for (const [id, evaluatorInfo] of Object.entries(globalEvaluators ?? {})) { + for (const [id, ruleInfo] of Object.entries(globalRules ?? {})) { merged.set(id, { id, - label: evaluatorInfo.name, - description: evaluatorInfo.description, + label: ruleInfo.name, + description: ruleInfo.description, source: 'global', - configSchema: evaluatorInfo.config_schema, + configSchema: ruleInfo.config_schema, }); } - for (const evaluatorSchema of agentResponse?.evaluators ?? []) { - const id = `${agentName}:${evaluatorSchema.name}`; + for (const ruleSchema of agentResponse?.rules ?? []) { + const id = `${agentName}:${ruleSchema.name}`; merged.set(id, { id, - label: evaluatorSchema.name, - description: evaluatorSchema.description, + label: ruleSchema.name, + description: ruleSchema.description, source: 'agent', - configSchema: evaluatorSchema.config_schema, + configSchema: ruleSchema.config_schema, }); } return [...merged.values()]; - }, [agentName, agentResponse?.evaluators, globalEvaluators]); - const activeEvaluatorOption = useMemo( - () => - availableEvaluators.find((candidate) => candidate.id === evaluatorId) ?? - null, - [availableEvaluators, evaluatorId] + }, [agentName, agentResponse?.rules, globalRules]); + const activeRuleOption = useMemo( + () => availableRules.find((candidate) => candidate.id === ruleId) ?? null, + [availableRules, ruleId] ); const definitionForm = useForm({ @@ -240,31 +237,31 @@ const RawEditControlContent = ({ }, }); - const evaluatorForm = useForm({ - initialValues: evaluator?.initialValues ?? {}, - validate: evaluator?.validate, + const ruleForm = useForm({ + initialValues: rule?.initialValues ?? {}, + validate: rule?.validate, }); - const getEvaluatorConfig = useCallback(() => { + const getRuleConfig = useCallback(() => { if (!leafCondition) { return {}; } - if (!evaluator) { - return leafCondition.evaluatorConfig; + if (!rule) { + return leafCondition.ruleConfig; } - if (formInitializedForEvaluator.current !== evaluatorId) { - return evaluator.toConfig(evaluator.initialValues); + if (formInitializedForRule.current !== ruleId) { + return rule.toConfig(rule.initialValues); } - return evaluator.toConfig(evaluatorForm.values); - }, [evaluator, evaluatorForm.values, evaluatorId, leafCondition]); + return rule.toConfig(ruleForm.values); + }, [rule, ruleForm.values, ruleId, leafCondition]); const syncJsonToForm = useCallback( (config: Record) => { - if (evaluator) { - evaluatorForm.setValues(evaluator.fromConfig(config)); + if (rule) { + ruleForm.setValues(rule.fromConfig(config)); } }, - [evaluator, evaluatorForm] + [rule, ruleForm] ); const buildLeafCondition = useCallback( @@ -326,7 +323,7 @@ const RawEditControlContent = ({ [workingDefinition.tags] ); - const validateEvaluatorConfig = useCallback( + const validateRuleConfig = useCallback( async ( config: Record, options?: { signal?: AbortSignal } @@ -345,13 +342,13 @@ const RawEditControlContent = ({ ] ); - const evaluatorConfig = useEvaluatorConfigState({ - getConfigFromForm: getEvaluatorConfig, + const ruleConfig = useRuleConfigState({ + getConfigFromForm: getRuleConfig, onConfigChange: syncJsonToForm, - onValidateConfig: validateEvaluatorConfig, + onValidateConfig: validateRuleConfig, }); - const { reset } = evaluatorConfig; + const { reset } = ruleConfig; const getDefinitionFromFormState = useCallback((): ControlDefinition | null => { @@ -359,14 +356,14 @@ const RawEditControlContent = ({ if (canEditLeafCondition) { let finalConfig: Record = - leafCondition?.evaluatorConfig ?? {}; + leafCondition?.ruleConfig ?? {}; - if (evaluatorConfig.configViewMode === 'json') { - const jsonConfig = evaluatorConfig.getJsonConfig(); + if (ruleConfig.configViewMode === 'json') { + const jsonConfig = ruleConfig.getJsonConfig(); if (!jsonConfig) return null; finalConfig = jsonConfig; } else { - finalConfig = getEvaluatorConfig(); + finalConfig = getRuleConfig(); } condition = buildLeafCondition(definitionForm.values, finalConfig); @@ -380,9 +377,9 @@ const RawEditControlContent = ({ buildLeafCondition, canEditLeafCondition, definitionForm.values, - evaluatorConfig, - getEvaluatorConfig, - leafCondition?.evaluatorConfig, + ruleConfig, + getRuleConfig, + leafCondition?.ruleConfig, workingDefinition.condition, ]); @@ -519,8 +516,8 @@ const RawEditControlContent = ({ reset(); setApiError(null); setUnmappedErrors([]); - formInitializedForEvaluator.current = ''; - }, [reset, evaluatorId, control.id, workingDefinition]); + formInitializedForRule.current = ''; + }, [reset, ruleId, control.id, workingDefinition]); useEffect(() => { const scope = workingDefinition.scope ?? {}; @@ -555,20 +552,18 @@ const RawEditControlContent = ({ definitionForm.setValues(syncedValues); definitionForm.resetDirty(syncedValues); - if (leafCondition && evaluator) { - evaluatorForm.setValues( - evaluator.fromConfig(leafCondition.evaluatorConfig) - ); - formInitializedForEvaluator.current = evaluatorId; + if (leafCondition && rule) { + ruleForm.setValues(rule.fromConfig(leafCondition.ruleConfig)); + formInitializedForRule.current = ruleId; } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [control.name, evaluator, evaluatorId, leafCondition, workingDefinition]); + }, [control.name, rule, ruleId, leafCondition, workingDefinition]); const handleSubmit = async (values: ControlDefinitionFormValues) => { setApiError(null); setUnmappedErrors([]); definitionForm.clearErrors(); - evaluatorForm.clearErrors(); + ruleForm.clearErrors(); // Bug fix #1: Explicitly validate the name before opening the confirm // dialog. The HTML5 `required` attribute may silently block submission @@ -599,8 +594,8 @@ const RawEditControlContent = ({ return; } } else { - if (canEditLeafCondition && evaluatorConfig.configViewMode === 'form') { - const validation = evaluatorForm.validate(); + if (canEditLeafCondition && ruleConfig.configViewMode === 'form') { + const validation = ruleForm.validate(); if (validation.hasErrors) return; } @@ -656,7 +651,7 @@ const RawEditControlContent = ({ const unmapped = applyApiErrorsToForms( problemDetail.errors, definitionForm, - canEditLeafCondition ? evaluatorForm : null + canEditLeafCondition ? ruleForm : null ); setUnmappedErrors( unmapped.map((e) => ({ @@ -733,11 +728,11 @@ const RawEditControlContent = ({ message: e.message, })) ); - } else if (evaluatorConfig.configViewMode === 'form') { + } else if (ruleConfig.configViewMode === 'form') { const unmapped = applyApiErrorsToForms( problemDetail.errors, definitionForm, - canEditLeafCondition ? evaluatorForm : null + canEditLeafCondition ? ruleForm : null ); setUnmappedErrors( unmapped.map((e) => ({ @@ -831,7 +826,7 @@ const RawEditControlContent = ({ }; }, [handleClose, onCloseRef]); - const formComponent = evaluator?.FormComponent; + const formComponent = rule?.FormComponent; const definitionStatusLabel = (() => { if (editorMode !== 'json') return null; if (definitionValidationStatus === 'validating') @@ -933,7 +928,7 @@ const RawEditControlContent = ({ testId="control-json-textarea" editorMode="control" schema={controlSchemaResponse?.schema ?? null} - evaluators={availableEvaluators} + rules={availableRules} steps={steps} /> @@ -949,15 +944,15 @@ const RawEditControlContent = ({ {canEditLeafCondition ? ( - ) : ( diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-language.ts b/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-language.ts index 1f88b0cc..af5f8013 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-language.ts +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-language.ts @@ -9,11 +9,7 @@ import { import type { StepSchema } from '@/core/api/types'; -import type { - JsonEditorEvaluatorOption, - JsonEditorMode, - JsonSchema, -} from './types'; +import type { JsonEditorMode, JsonEditorRuleOption, JsonSchema } from './types'; type MonacoModule = typeof import('monaco-editor'); type JsonPath = Array; @@ -22,8 +18,8 @@ type JsonEditorAutocompleteContext = { mode: JsonEditorMode; modelUri: string; schema?: JsonSchema | null; - evaluators?: JsonEditorEvaluatorOption[]; - activeEvaluatorId?: string | null; + rules?: JsonEditorRuleOption[]; + activeRuleId?: string | null; steps?: StepSchema[]; }; @@ -485,11 +481,11 @@ function isSelectorPathLocation(path: JsonPath): boolean { ); } -function isEvaluatorNameLocation(path: JsonPath): boolean { +function isRuleNameLocation(path: JsonPath): boolean { return ( path.length >= 2 && path[path.length - 1] === 'name' && - path[path.length - 2] === 'evaluator' + path[path.length - 2] === 'rule' ); } @@ -575,7 +571,7 @@ function buildSchemaValueSnippet( } if ( - schemaTitle === 'EvaluatorSpec' || + schemaTitle === 'RuleSpec' || isSchemaWithProperties(normalized, ['name', 'config']) ) { return '{\n "name": "",\n "config": {}\n}'; @@ -597,13 +593,7 @@ function buildSchemaValueSnippet( if ( schemaTitle === 'ConditionNode' || - isSchemaWithProperties(normalized, [ - 'selector', - 'evaluator', - 'and', - 'or', - 'not', - ]) + isSchemaWithProperties(normalized, ['selector', 'rule', 'and', 'or', 'not']) ) { return '{}'; } @@ -797,51 +787,45 @@ function buildSelectorPathSuggestions( }); } -function findEvaluatorById( - evaluators: JsonEditorEvaluatorOption[] | undefined, +function findRuleById( + rules: JsonEditorRuleOption[] | undefined, id: string | null | undefined -): JsonEditorEvaluatorOption | null { - if (!evaluators || !id) { +): JsonEditorRuleOption | null { + if (!rules || !id) { return null; } - return evaluators.find((candidate) => candidate.id === id) ?? null; + return rules.find((candidate) => candidate.id === id) ?? null; } -function resolveActiveEvaluator( +function resolveActiveRule( context: JsonEditorAutocompleteContext, tree: JsonNode | undefined, path: JsonPath -): JsonEditorEvaluatorOption | null { - if (context.mode === 'evaluator-config') { - return findEvaluatorById(context.evaluators, context.activeEvaluatorId); +): JsonEditorRuleOption | null { + if (context.mode === 'rule-config') { + return findRuleById(context.rules, context.activeRuleId); } - const evaluatorIndex = getJsonPathFieldIndex(path, 'evaluator'); - if (!tree || evaluatorIndex < 0) { + const ruleIndex = getJsonPathFieldIndex(path, 'rule'); + if (!tree || ruleIndex < 0) { return null; } - const evaluatorNamePath = [ - ...path.slice(0, evaluatorIndex), - 'evaluator', - 'name', - ]; - const evaluatorNameNode = findNodeAtLocation(tree, evaluatorNamePath); - const evaluatorName = - typeof evaluatorNameNode?.value === 'string' - ? evaluatorNameNode.value - : null; + const ruleNamePath = [...path.slice(0, ruleIndex), 'rule', 'name']; + const ruleNameNode = findNodeAtLocation(tree, ruleNamePath); + const ruleName = + typeof ruleNameNode?.value === 'string' ? ruleNameNode.value : null; - return findEvaluatorById(context.evaluators, evaluatorName); + return findRuleById(context.rules, ruleName); } function getInitialSchemaCursor( context: JsonEditorAutocompleteContext, - activeEvaluator: JsonEditorEvaluatorOption | null + activeRule: JsonEditorRuleOption | null ): SchemaCursor { - if (context.mode === 'evaluator-config') { - const rootSchema = asSchema(activeEvaluator?.configSchema ?? null); + if (context.mode === 'rule-config') { + const rootSchema = asSchema(activeRule?.configSchema ?? null); return { schema: normalizeSchema(rootSchema, rootSchema), rootSchema, @@ -855,21 +839,21 @@ function getInitialSchemaCursor( }; } -function isEvaluatorConfigSegment(path: JsonPath, index: number): boolean { +function isRuleConfigSegment(path: JsonPath, index: number): boolean { return ( typeof path[index] === 'string' && path[index] === 'config' && index > 0 && - path[index - 1] === 'evaluator' + path[index - 1] === 'rule' ); } function resolveSchemaAtJsonPath( context: JsonEditorAutocompleteContext, - activeEvaluator: JsonEditorEvaluatorOption | null, + activeRule: JsonEditorRuleOption | null, path: JsonPath ): SchemaCursor { - let cursor = getInitialSchemaCursor(context, activeEvaluator); + let cursor = getInitialSchemaCursor(context, activeRule); for (let index = 0; index < path.length; index += 1) { const segment = path[index]; @@ -877,8 +861,8 @@ function resolveSchemaAtJsonPath( return cursor; } - if (context.mode === 'control' && isEvaluatorConfigSegment(path, index)) { - const rootSchema = asSchema(activeEvaluator?.configSchema ?? null); + if (context.mode === 'control' && isRuleConfigSegment(path, index)) { + const rootSchema = asSchema(activeRule?.configSchema ?? null); cursor = { schema: normalizeSchema(rootSchema, rootSchema), rootSchema, @@ -903,21 +887,18 @@ function resolveSchemaAtJsonPath( return cursor; } -function buildEvaluatorNameSuggestions( +function buildRuleNameSuggestions( monaco: MonacoModule, range: import('monaco-editor').IRange, - evaluators: JsonEditorEvaluatorOption[] | undefined, + rules: JsonEditorRuleOption[] | undefined, isStringValueContext: boolean ) { - return (evaluators ?? []).map((evaluator, index) => ({ - label: evaluator.id, + return (rules ?? []).map((rule, index) => ({ + label: rule.id, kind: monaco.languages.CompletionItemKind.Value, - detail: - evaluator.source === 'agent' - ? `${evaluator.label} (agent evaluator)` - : evaluator.label, - documentation: evaluator.description ?? undefined, - insertText: buildValueInsertText(evaluator.id, isStringValueContext), + detail: rule.source === 'agent' ? `${rule.label} (agent rule)` : rule.label, + documentation: rule.description ?? undefined, + insertText: buildValueInsertText(rule.id, isStringValueContext), range, sortText: `!0${index.toString().padStart(3, '0')}`, })); @@ -1120,14 +1101,14 @@ function buildCompletionSuggestions( node?.type === 'string' && !location.isAtPropertyKey; const suggestions: import('monaco-editor').languages.CompletionItem[] = []; - const activeEvaluator = resolveActiveEvaluator(context, tree, location.path); + const activeRule = resolveActiveRule(context, tree, location.path); - if (isEvaluatorNameLocation(location.path)) { + if (isRuleNameLocation(location.path)) { suggestions.push( - ...buildEvaluatorNameSuggestions( + ...buildRuleNameSuggestions( monaco, valueRange, - context.evaluators, + context.rules, isStringValueContext ) ); @@ -1162,7 +1143,7 @@ function buildCompletionSuggestions( : null) ?? getDefaultRange(monaco, model, position); const schemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, propertyKeyContext.objectPath ); const currentPropertyName = @@ -1190,7 +1171,7 @@ function buildCompletionSuggestions( if (isValuePosition) { const valueSchemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.path ); @@ -1241,29 +1222,29 @@ export function getJsonEditorCompletionItems( return buildCompletionSuggestions(monaco, model, position, context); } -type EvaluatorNodeInfo = { +type RuleNodeInfo = { name: string; nameNode: JsonNode; configNode: JsonNode | undefined; - evaluatorNode: JsonNode; + ruleNode: JsonNode; }; -function collectEvaluatorNames( +function collectRuleNames( node: JsonNode | undefined, - result: Map + result: Map ) { if (!node || node.type !== 'object' || !node.children) return; - const evaluatorNode = findNodeAtLocation(node, ['evaluator']); - if (evaluatorNode?.type === 'object') { - const nameNode = findNodeAtLocation(evaluatorNode, ['name']); - const configNode = findNodeAtLocation(evaluatorNode, ['config']); + const ruleNode = findNodeAtLocation(node, ['rule']); + if (ruleNode?.type === 'object') { + const nameNode = findNodeAtLocation(ruleNode, ['name']); + const configNode = findNodeAtLocation(ruleNode, ['config']); if (nameNode && typeof nameNode.value === 'string') { result.set(`${nameNode.offset}`, { name: nameNode.value, nameNode, configNode, - evaluatorNode, + ruleNode, }); } } @@ -1272,24 +1253,24 @@ function collectEvaluatorNames( const arrayNode = findNodeAtLocation(node, [key]); if (arrayNode?.type === 'array' && arrayNode.children) { for (const child of arrayNode.children) { - collectEvaluatorNames(child, result); + collectRuleNames(child, result); } } } const notNode = findNodeAtLocation(node, ['not']); if (notNode?.type === 'object') { - collectEvaluatorNames(notNode, result); + collectRuleNames(notNode, result); } } -export function extractEvaluatorNames(text: string): Map { +export function extractRuleNames(text: string): Map { const tree = parseTree(text); if (!tree) return new Map(); const conditionNode = findNodeAtLocation(tree, ['condition']); - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); const names = new Map(); for (const [key, info] of result) { @@ -1350,26 +1331,26 @@ export function buildDefaultConfig( return config; } -export function findEvaluatorConfigEdit( +export function findRuleConfigEdit( text: string, previousNames: Map, - evaluators: JsonEditorEvaluatorOption[] | undefined + rules: JsonEditorRuleOption[] | undefined ): { offset: number; length: number; newText: string } | null { const tree = parseTree(text); if (!tree) return null; const conditionNode = findNodeAtLocation(tree, ['condition']); - const result = new Map(); - collectEvaluatorNames(conditionNode, result); + const result = new Map(); + collectRuleNames(conditionNode, result); for (const [key, { name, configNode, nameNode }] of result) { const prevName = previousNames.get(key); if (prevName === undefined || prevName === name) continue; - const evaluator = evaluators?.find((e) => e.id === name); - if (!evaluator) continue; + const rule = rules?.find((e) => e.id === name); + if (!rule) continue; - const defaultConfig = buildDefaultConfig(evaluator.configSchema); + const defaultConfig = buildDefaultConfig(rule.configSchema); const configJson = JSON.stringify(defaultConfig, null, 2); if (configNode) { @@ -1477,14 +1458,10 @@ export function getEmptyValueHints( pos.column ); - const activeEvaluator = resolveActiveEvaluator( - context, - tree, - location.path - ); + const activeRule = resolveActiveRule(context, tree, location.path); - if (isEvaluatorNameLocation(location.path) && context.evaluators?.length) { - const names = context.evaluators.map((e) => e.id); + if (isRuleNameLocation(location.path) && context.rules?.length) { + const names = context.rules.map((e) => e.id); const display = names.slice(0, MAX_HINT_VALUES); const hint = display.join(' | ') + @@ -1503,7 +1480,7 @@ export function getEmptyValueHints( const schemaCursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.path ); if (!schemaCursor.schema) continue; @@ -1580,14 +1557,10 @@ export function setupJsonEditorLanguageSupport( const location = getLocation(text, offset); if (!location.path.length) return null; - const activeEvaluator = resolveActiveEvaluator( - context, - tree, - location.path - ); + const activeRule = resolveActiveRule(context, tree, location.path); const cursor = resolveSchemaAtJsonPath( context, - activeEvaluator, + activeRule, location.isAtPropertyKey ? location.path.slice(0, -1) : location.path ); @@ -1690,7 +1663,7 @@ export function setupJsonEditorLanguageSupport( const LEAF_CONDITION_TEMPLATE = { selector: { path: '*' }, - evaluator: { name: '', config: {} }, + rule: { name: '', config: {} }, }; function findConditionNodeAtOffset( @@ -1752,11 +1725,11 @@ function findConditionAtOffset( // We're on this object node itself const hasSelector = !!findNodeAtLocation(node, ['selector']); - const hasEvaluator = !!findNodeAtLocation(node, ['evaluator']); + const hasRule = !!findNodeAtLocation(node, ['rule']); const hasAnd = !!findNodeAtLocation(node, ['and']); const hasOr = !!findNodeAtLocation(node, ['or']); const hasNot = !!findNodeAtLocation(node, ['not']); - const isLeaf = (hasSelector || hasEvaluator) && !hasAnd && !hasOr; + const isLeaf = (hasSelector || hasRule) && !hasAnd && !hasOr; return { node, diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-view.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-view.tsx index 7be5ed1a..16e48e34 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-view.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/json-editor-view.tsx @@ -10,8 +10,8 @@ import dynamic from 'next/dynamic'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { - extractEvaluatorNames, - findEvaluatorConfigEdit, + extractRuleNames, + findRuleConfigEdit, findSteeringContextEdit, fixJsonCommas, getEmptyValueHints, @@ -56,7 +56,7 @@ const DEFAULT_VALIDATE_DEBOUNCE_MS = 500; const DEFAULT_LABEL = 'Configuration (JSON)'; const DEFAULT_TOOLTIP = 'Raw JSON configuration'; const DEFAULT_TEST_ID = 'raw-json-textarea'; -const DEFAULT_EDITOR_MODE = 'evaluator-config'; +const DEFAULT_EDITOR_MODE = 'rule-config'; const HINT_DEBOUNCE_MS = 300; const COMMA_FIX_DEBOUNCE_MS = 800; const CURSOR_TRIGGER_DEBOUNCE_MS = 50; @@ -211,8 +211,8 @@ function shouldAutoTriggerSuggest( if (contentLen <= 2) return true; // Longer strings: trigger if our provider has suggestions (enum values, - // evaluator names, selector paths). This covers all domain fields - // including evaluator config enums (logic, match_on, mode, etc.) + // rule names, selector paths). This covers all domain fields + // including rule config enums (logic, match_on, mode, etc.) // without hardcoding field names. return hasSuggestions(); } @@ -238,8 +238,8 @@ export const JsonEditorView = ({ testId = DEFAULT_TEST_ID, editorMode = DEFAULT_EDITOR_MODE, schema, - evaluators, - activeEvaluatorId, + rules, + activeRuleId, steps, templateParameterNames, }: JsonEditorViewProps) => { @@ -264,17 +264,17 @@ export const JsonEditorView = ({ mode: editorMode, modelUri, schema, - evaluators, - activeEvaluatorId, + rules, + activeRuleId, steps, definitionPrefix, templateParameterNames, }), [ - activeEvaluatorId, + activeRuleId, definitionPrefix, editorMode, - evaluators, + rules, modelUri, schema, steps, @@ -369,10 +369,7 @@ export const JsonEditorView = ({ }; updateHints(); - let prevEvalNames = extractEvaluatorNames( - editor.getValue(), - definitionPrefix - ); + let prevEvalNames = extractRuleNames(editor.getValue(), definitionPrefix); let prevDecision: string | null = null; try { const initTree = parseTree(editor.getValue()); @@ -402,7 +399,7 @@ export const JsonEditorView = ({ const end = model.getPositionAt(edit.offset + edit.length); // NOTE: This async boundary (queueMicrotask) means the auto-edit // creates a separate undo group from the user's keystroke. This breaks - // redo after undo when auto-edits fire (e.g. evaluator config fill). + // redo after undo when auto-edits fire (e.g. rule config fill). // Synchronous alternatives (model.applyEdits) crash Monaco's worker. // This is a known Monaco limitation — undo still works correctly. queueMicrotask(() => { @@ -459,15 +456,15 @@ export const JsonEditorView = ({ // Immediate: dependent field updates (control & template modes) if (editorMode === 'control' || editorMode === 'template') { - const evalEdit = findEvaluatorConfigEdit( + const evalEdit = findRuleConfigEdit( text, prevEvalNames, - evaluators, + rules, definitionPrefix ); - prevEvalNames = extractEvaluatorNames(text, definitionPrefix); + prevEvalNames = extractRuleNames(text, definitionPrefix); if (evalEdit) { - applyEdit(evalEdit, 'evaluator-config-update'); + applyEdit(evalEdit, 'rule-config-update'); return; } @@ -507,7 +504,7 @@ export const JsonEditorView = ({ autocompleteContext, definitionPrefix, editorMode, - evaluators, + rules, handleJsonChange, ]); diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/evaluator-config-section.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/rule-config-section.tsx similarity index 84% rename from ui/src/core/page-components/agent-detail/modals/edit-control/evaluator-config-section.tsx rename to ui/src/core/page-components/agent-detail/modals/edit-control/rule-config-section.tsx index b3a42731..bf0b8184 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/evaluator-config-section.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/rule-config-section.tsx @@ -21,7 +21,7 @@ const DEFAULT_HEIGHT = 450; const CONTENT_MIN_HEIGHT_EXTRA = 60; type ValidationStatus = 'idle' | 'validating' | 'valid' | 'invalid'; -type EvaluatorConfigSectionProps = { +type RuleConfigSectionProps = { config: { configViewMode: ConfigViewMode; jsonText: string; @@ -37,22 +37,22 @@ type EvaluatorConfigSectionProps = { options?: { signal?: AbortSignal } ) => Promise; onConfigChange: (config: Record) => void; - evaluatorForm: UseFormReturnType; + ruleForm: UseFormReturnType; formComponent?: React.ComponentType<{ form: UseFormReturnType }>; height?: number; - activeEvaluatorId?: string; - activeEvaluatorSchema?: JsonSchema | null; + activeRuleId?: string; + activeRuleSchema?: JsonSchema | null; }; -export function EvaluatorConfigSection({ +export function RuleConfigSection({ config, onValidateConfig, - evaluatorForm, + ruleForm, formComponent: FormComponent, height = DEFAULT_HEIGHT, - activeEvaluatorId, - activeEvaluatorSchema, -}: EvaluatorConfigSectionProps) { + activeRuleId, + activeRuleSchema, +}: RuleConfigSectionProps) { const [validationStatus, setValidationStatus] = useState('idle'); @@ -85,7 +85,7 @@ export function EvaluatorConfigSection({ - Evaluator configuration + Rule configuration {configViewMode === 'form' ? ( FormComponent ? ( - + ) : ( - No form available for this evaluator. Use JSON view to - configure. + No form available for this rule. Use JSON view to configure. ) ) : ( @@ -134,11 +133,11 @@ export function EvaluatorConfigSection({ onValidationStatusChange={setValidationStatus} height={height} label="Configuration (JSON)" - tooltip="Raw evaluator configuration in JSON format" + tooltip="Raw rule configuration in JSON format" testId="raw-json-textarea" - editorMode="evaluator-config" - activeEvaluatorId={activeEvaluatorId} - schema={activeEvaluatorSchema} + editorMode="rule-config" + activeRuleId={activeRuleId} + schema={activeRuleSchema} {...jsonViewProps} /> )} diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/template-edit-content.tsx b/ui/src/core/page-components/agent-detail/modals/edit-control/template-edit-content.tsx index aeeb3e7e..ad2ef03f 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/template-edit-content.tsx +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/template-edit-content.tsx @@ -28,13 +28,13 @@ import { TemplateParamForm } from '@/core/components/template-param-form'; import { TemplatePreview } from '@/core/components/template-preview'; import { useAgent } from '@/core/hooks/query-hooks/use-agent'; import { useControlSchema } from '@/core/hooks/query-hooks/use-control-schema'; -import { useEvaluators } from '@/core/hooks/query-hooks/use-evaluators'; +import { useRules } from '@/core/hooks/query-hooks/use-rules'; import { useUpdateControl } from '@/core/hooks/query-hooks/use-update-control'; import { useUpdateControlMetadata } from '@/core/hooks/query-hooks/use-update-control-metadata'; import { openActionConfirmModal } from '@/core/utils/modals'; import { ApiErrorAlert } from './api-error-alert'; -import type { JsonEditorEvaluatorOption } from './types'; +import type { JsonEditorRuleOption } from './types'; type TemplateEditContentProps = { control: Control; @@ -99,34 +99,34 @@ export function TemplateEditContent({ // Hooks for smart JSON editor features const { data: controlSchemaResponse } = useControlSchema(); - const { data: globalEvaluators } = useEvaluators(); + const { data: globalRules } = useRules(); const { data: agentResponse } = useAgent(agentId); const steps = agentResponse?.steps ?? []; const agentName = agentResponse?.agent?.agent_name ?? agentId; - const availableEvaluators = useMemo(() => { - const merged = new Map(); - for (const [id, evaluatorInfo] of Object.entries(globalEvaluators ?? {})) { + const availableRules = useMemo(() => { + const merged = new Map(); + for (const [id, ruleInfo] of Object.entries(globalRules ?? {})) { merged.set(id, { id, - label: evaluatorInfo.name, - description: evaluatorInfo.description, + label: ruleInfo.name, + description: ruleInfo.description, source: 'global', - configSchema: evaluatorInfo.config_schema, + configSchema: ruleInfo.config_schema, }); } - for (const evaluatorSchema of agentResponse?.evaluators ?? []) { - const id = `${agentName}:${evaluatorSchema.name}`; + for (const ruleSchema of agentResponse?.rules ?? []) { + const id = `${agentName}:${ruleSchema.name}`; merged.set(id, { id, - label: evaluatorSchema.name, - description: evaluatorSchema.description, + label: ruleSchema.name, + description: ruleSchema.description, source: 'agent', - configSchema: evaluatorSchema.config_schema, + configSchema: ruleSchema.config_schema, }); } return [...merged.values()]; - }, [agentName, agentResponse?.evaluators, globalEvaluators]); + }, [agentName, agentResponse?.rules, globalRules]); // Dynamically extract parameter names from the current JSON text so // completions update as the user edits the parameters block. @@ -383,7 +383,7 @@ export function TemplateEditContent({ testId="template-json-textarea" editorMode="template" schema={controlSchemaResponse?.schema ?? null} - evaluators={availableEvaluators} + rules={availableRules} steps={steps} templateParameterNames={templateParameterNames} /> diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/types.ts b/ui/src/core/page-components/agent-detail/modals/edit-control/types.ts index 1dbacf8b..53585314 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/types.ts +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/types.ts @@ -6,34 +6,34 @@ import type { ControlActionDecision, ControlExecution, ControlStage, - EvaluatorSchema, ProblemDetail, + RuleSchema, StepSchema, } from '@/core/api/types'; -// Re-export evaluator form types for convenience -export type { JsonFormValues } from '@/core/evaluators/json/types'; -export type { ListFormValues } from '@/core/evaluators/list/types'; +// Re-export rule form types for convenience +export type { JsonFormValues } from '@/core/rules/json/types'; +export type { ListFormValues } from '@/core/rules/list/types'; export type { LunaFormValues, LunaOperator, LunaPayloadField, -} from '@/core/evaluators/luna/types'; -export type { RegexFormValues } from '@/core/evaluators/regex/types'; -export type { SqlFormValues } from '@/core/evaluators/sql/types'; +} from '@/core/rules/luna/types'; +export type { RegexFormValues } from '@/core/rules/regex/types'; +export type { SqlFormValues } from '@/core/rules/sql/types'; export type ConfigViewMode = 'form' | 'json'; export type ControlEditorMode = 'form' | 'json'; -export type JsonEditorMode = 'control' | 'evaluator-config' | 'template'; +export type JsonEditorMode = 'control' | 'rule-config' | 'template'; export type JsonSchema = Record; -export type JsonEditorEvaluatorOption = { +export type JsonEditorRuleOption = { id: string; label: string; description?: string | null; source: 'global' | 'agent'; - configSchema?: EvaluatorSchema['config_schema'] | null; + configSchema?: RuleSchema['config_schema'] | null; }; // Form values type for control definition @@ -103,10 +103,10 @@ export type JsonEditorViewProps = { editorMode?: JsonEditorMode; /** Optional JSON schema used for Monaco diagnostics */ schema?: JsonSchema | null; - /** Available evaluators for name/config autocomplete */ - evaluators?: JsonEditorEvaluatorOption[]; - /** Active evaluator id for evaluator-config mode */ - activeEvaluatorId?: string | null; + /** Available rules for name/config autocomplete */ + rules?: JsonEditorRuleOption[]; + /** Active rule id for rule-config mode */ + activeRuleId?: string | null; /** Agent step schemas used for selector path suggestions */ steps?: StepSchema[]; /** Parameter names from the template (for template mode $param completions) */ diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/use-evaluator-config-state.ts b/ui/src/core/page-components/agent-detail/modals/edit-control/use-rule-config-state.ts similarity index 95% rename from ui/src/core/page-components/agent-detail/modals/edit-control/use-evaluator-config-state.ts rename to ui/src/core/page-components/agent-detail/modals/edit-control/use-rule-config-state.ts index 63a70866..9cb1bfc3 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/use-evaluator-config-state.ts +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/use-rule-config-state.ts @@ -4,7 +4,7 @@ import type { ProblemDetail } from '@/core/api/types'; import type { ConfigViewMode } from './types'; -export type UseEvaluatorConfigStateArgs = { +export type UseRuleConfigStateArgs = { getConfigFromForm: () => Record; onConfigChange: (config: Record) => void; onValidateConfig: ( @@ -13,7 +13,7 @@ export type UseEvaluatorConfigStateArgs = { ) => Promise; }; -export type EvaluatorConfigState = { +export type RuleConfigState = { getConfigFromForm: () => Record; configViewMode: ConfigViewMode; jsonText: string; @@ -30,11 +30,11 @@ export type EvaluatorConfigState = { reset: () => void; }; -export function useEvaluatorConfigState({ +export function useRuleConfigState({ getConfigFromForm, onConfigChange, onValidateConfig, -}: UseEvaluatorConfigStateArgs): EvaluatorConfigState { +}: UseRuleConfigStateArgs): RuleConfigState { const [configViewMode, setConfigViewMode] = useState('form'); const [jsonText, setJsonText] = useState(''); const [jsonError, setJsonError] = useState(null); diff --git a/ui/src/core/page-components/agent-detail/modals/edit-control/utils.ts b/ui/src/core/page-components/agent-detail/modals/edit-control/utils.ts index b962de8a..465e22a5 100644 --- a/ui/src/core/page-components/agent-detail/modals/edit-control/utils.ts +++ b/ui/src/core/page-components/agent-detail/modals/edit-control/utils.ts @@ -13,7 +13,7 @@ import type { ValidationErrorItem } from '@/core/api/types'; * Mapping result indicating which form and field an API error belongs to */ type FieldMapping = { - form: 'definition' | 'evaluator'; + form: 'definition' | 'rule'; field: string; }; @@ -24,7 +24,7 @@ type FieldMapping = { * - "name" (control name) * - "data.scope.step_types" (definition field) * - "data.condition.selector.path" → selector_path (definition field) - * - "data.condition.evaluator.config.pattern" (evaluator config field) + * - "data.condition.rule.config.pattern" (rule config field) * * Since forms use snake_case, we can directly use the API field names. * @@ -57,7 +57,7 @@ export function mapApiFieldToFormField( return { form: 'definition', field: 'selector_path' }; } - const evalPrefix = 'evaluator.'; + const evalPrefix = 'rule.'; if (conditionField.startsWith(evalPrefix)) { let configField = conditionField.slice(evalPrefix.length); if (configField.startsWith('config.')) { @@ -68,7 +68,7 @@ export function mapApiFieldToFormField( const field = firstDotIndex > 0 ? configField.slice(0, firstDotIndex) : configField; - return { form: 'evaluator', field }; + return { form: 'rule', field }; } return null; @@ -110,13 +110,13 @@ export function mapApiFieldToFormField( * * @param errors Array of validation errors from API * @param definitionForm The control definition form - * @param evaluatorForm The evaluator config form + * @param ruleForm The rule config form * @returns Array of errors that couldn't be mapped to form fields */ export function applyApiErrorsToForms( errors: ValidationErrorItem[] | undefined, definitionForm: UseFormReturnType, - evaluatorForm?: UseFormReturnType | null + ruleForm?: UseFormReturnType | null ): ValidationErrorItem[] { if (!errors || errors.length === 0) { return []; @@ -130,9 +130,9 @@ export function applyApiErrorsToForms( if (mapping) { if (mapping.form === 'definition') { definitionForm.setFieldError(mapping.field, error.message); - } else if (mapping.form === 'evaluator' && evaluatorForm) { - evaluatorForm.setFieldError(mapping.field, error.message); - } else if (mapping.form === 'evaluator') { + } else if (mapping.form === 'rule' && ruleForm) { + ruleForm.setFieldError(mapping.field, error.message); + } else if (mapping.form === 'rule') { unmappedErrors.push(error); } } else { diff --git a/ui/src/core/rules/index.ts b/ui/src/core/rules/index.ts new file mode 100644 index 00000000..e0e45332 --- /dev/null +++ b/ui/src/core/rules/index.ts @@ -0,0 +1,80 @@ +/** + * Rule Registry + * + * This module exports all available rules and provides + * utilities for working with them. + * + * ## Adding a New Rule + * + * 1. Create a new folder under `rules/` (e.g., `rules/my-rule/`) + * 2. Create the following files: + * - `types.ts` - Form value types + * - `form.tsx` - React form component + * - `index.ts` - Rule definition (implements RuleDefinition interface) + * 3. Import and add the rule to the `rules` array below + * 4. That's it! The main edit-control component will automatically pick it up. + * + * @example + * ```typescript + * // rules/my-rule/index.ts + * import type { RuleDefinition } from "../types"; + * import { MyForm } from "./form"; + * import type { MyFormValues } from "./types"; + * + * export const myRule: RuleDefinition = { + * id: "my-rule", + * displayName: "My Rule", + * initialValues: { ... }, + * validate: { ... }, + * toConfig: (values) => ({ ... }), + * fromConfig: (config) => ({ ... }), + * FormComponent: MyForm, + * }; + * ``` + */ + +import { jsonRule } from './json'; +import { listRule } from './list'; +import { lunaRule } from './luna'; +import { regexRule } from './regex'; +import { sqlRule } from './sql'; +import type { AnyRuleDefinition } from './types'; + +/** + * All registered rules. + * Add new rules here to make them available in the UI. + */ +export const rules: AnyRuleDefinition[] = [ + regexRule, + listRule, + jsonRule, + sqlRule, + lunaRule, +]; + +/** + * Map of rule ID to rule for quick lookup. + */ +export const ruleRegistry = new Map( + rules.map((rule) => [rule.id, rule]) +); + +/** + * Get a rule by ID. + * Returns undefined if the rule is not found. + */ +export const getRule = (id: string): AnyRuleDefinition | undefined => + ruleRegistry.get(id); + +/** + * Check if a rule exists. + */ +export const hasRule = (id: string): boolean => ruleRegistry.has(id); + +// Re-export types and individual rules for direct imports +export { jsonRule } from './json'; +export { listRule } from './list'; +export { lunaRule } from './luna'; +export { regexRule } from './regex'; +export { sqlRule } from './sql'; +export type { AnyRuleDefinition, RuleDefinition, RuleFormProps } from './types'; diff --git a/ui/src/core/evaluators/json/form.tsx b/ui/src/core/rules/json/form.tsx similarity index 97% rename from ui/src/core/evaluators/json/form.tsx rename to ui/src/core/rules/json/form.tsx index ad43ccf3..d311f504 100644 --- a/ui/src/core/evaluators/json/form.tsx +++ b/ui/src/core/rules/json/form.tsx @@ -12,10 +12,10 @@ import { LabelWithTooltip, } from '@/core/components/label-with-tooltip'; -import type { EvaluatorFormProps } from '../types'; +import type { RuleFormProps } from '../types'; import type { JsonFormValues } from './types'; -export const JsonForm = ({ form }: EvaluatorFormProps) => { +export const JsonForm = ({ form }: RuleFormProps) => { return ( diff --git a/ui/src/core/evaluators/json/index.ts b/ui/src/core/rules/json/index.ts similarity index 96% rename from ui/src/core/evaluators/json/index.ts rename to ui/src/core/rules/json/index.ts index 1a4b2393..4bd5c80e 100644 --- a/ui/src/core/evaluators/json/index.ts +++ b/ui/src/core/rules/json/index.ts @@ -1,4 +1,4 @@ -import type { EvaluatorDefinition } from '../types'; +import type { RuleDefinition } from '../types'; import { JsonForm } from './form'; import type { JsonFormValues } from './types'; @@ -19,11 +19,11 @@ const stringifyOrEmpty = (value: unknown): string => { }; /** - * JSON evaluator definition. + * JSON rule definition. * * Validates JSON structure, types, constraints, and patterns. */ -export const jsonEvaluator: EvaluatorDefinition = { +export const jsonRule: RuleDefinition = { id: 'json', displayName: 'JSON', diff --git a/ui/src/core/evaluators/json/types.ts b/ui/src/core/rules/json/types.ts similarity index 95% rename from ui/src/core/evaluators/json/types.ts rename to ui/src/core/rules/json/types.ts index e9f28608..d1f95641 100644 --- a/ui/src/core/evaluators/json/types.ts +++ b/ui/src/core/rules/json/types.ts @@ -1,5 +1,5 @@ /** - * Form values for the JSON evaluator. + * Form values for the JSON rule. * Uses snake_case to match API field names directly. */ export type JsonFormValues = { diff --git a/ui/src/core/evaluators/list/form.tsx b/ui/src/core/rules/list/form.tsx similarity index 95% rename from ui/src/core/evaluators/list/form.tsx rename to ui/src/core/rules/list/form.tsx index 8aee4bc8..89c0e91f 100644 --- a/ui/src/core/evaluators/list/form.tsx +++ b/ui/src/core/rules/list/form.tsx @@ -5,10 +5,10 @@ import { LabelWithTooltip, } from '@/core/components/label-with-tooltip'; -import type { EvaluatorFormProps } from '../types'; +import type { RuleFormProps } from '../types'; import type { ListFormValues } from './types'; -export const ListForm = ({ form }: EvaluatorFormProps) => { +export const ListForm = ({ form }: RuleFormProps) => { return (