Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions evaluators/builtin/src/agent_control_evaluators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- list: List-based value matching
- json: JSON validation
- sql: SQL query validation
- budget: Cumulative LLM token and cost tracking

Naming convention:
- Built-in: "regex", "list", "json", "sql"
Expand Down Expand Up @@ -47,6 +48,13 @@
from agent_control_evaluators.json import JSONEvaluator, JSONEvaluatorConfig
from agent_control_evaluators.list import ListEvaluator, ListEvaluatorConfig
from agent_control_evaluators.regex import RegexEvaluator, RegexEvaluatorConfig
from agent_control_evaluators.budget import (
BudgetEvaluator,
BudgetEvaluatorConfig,
BudgetSnapshot,
BudgetStore,
InMemoryBudgetStore,
)
from agent_control_evaluators.sql import SQLEvaluator, SQLEvaluatorConfig

__all__ = [
Expand All @@ -73,4 +81,9 @@
"JSONEvaluatorConfig",
"SQLEvaluator",
"SQLEvaluatorConfig",
"BudgetEvaluator",
"BudgetEvaluatorConfig",
"BudgetSnapshot",
"BudgetStore",
"InMemoryBudgetStore",
]
17 changes: 17 additions & 0 deletions evaluators/builtin/src/agent_control_evaluators/budget/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
"""Budget evaluator for per-agent LLM cost and token tracking."""

from agent_control_evaluators.budget.config import BudgetEvaluatorConfig
from agent_control_evaluators.budget.evaluator import BudgetEvaluator
from agent_control_evaluators.budget.store import (
BudgetSnapshot,
BudgetStore,
InMemoryBudgetStore,
)

__all__ = [
"BudgetEvaluator",
"BudgetEvaluatorConfig",
"BudgetSnapshot",
"BudgetStore",
"InMemoryBudgetStore",
]
81 changes: 81 additions & 0 deletions evaluators/builtin/src/agent_control_evaluators/budget/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""Configuration for the budget evaluator."""

from __future__ import annotations

import math
from typing import Any, Literal

from pydantic import Field, field_validator, model_validator

from agent_control_evaluators._base import EvaluatorConfig


class BudgetLimitRule(EvaluatorConfig):
"""A single budget limit rule.

Each rule defines a ceiling (USD and/or tokens) for a combination
of scope dimensions and time window. Multiple rules can apply to
the same step -- the evaluator checks all of them and triggers
on the first breach.

Attributes:
scope: Static scope dimensions that must match for this rule
to apply. Empty dict = global rule.
per: If set, the limit is applied independently for each unique
value of this metadata field (e.g. "user_id" creates per-user
budgets within the scope).
window: Time window for accumulation. None = cumulative (no reset).
limit_usd: Maximum USD spend in the window. None = uncapped.
limit_tokens: Maximum tokens in the window. None = uncapped.
"""

scope: dict[str, str] = Field(default_factory=dict)
per: str | None = None
window: Literal["daily", "weekly", "monthly"] | None = None
limit_usd: float | None = None
limit_tokens: int | None = None

@model_validator(mode="after")
def at_least_one_limit(self) -> "BudgetLimitRule":
if self.limit_usd is None and self.limit_tokens is None:
raise ValueError("At least one of limit_usd or limit_tokens must be set")
return self

@field_validator("limit_usd")
@classmethod
def validate_limit_usd(cls, v: float | None) -> float | None:
if v is not None and (not math.isfinite(v) or v <= 0):
raise ValueError("limit_usd must be a finite positive number")
return v

@field_validator("limit_tokens")
@classmethod
def validate_limit_tokens(cls, v: int | None) -> int | None:
if v is not None and v <= 0:
raise ValueError("limit_tokens must be positive")
return v


class BudgetEvaluatorConfig(EvaluatorConfig):
"""Configuration for the budget evaluator.

Attributes:
limits: List of budget limit rules. Each is checked independently.
pricing: Optional model pricing table. Maps model name to per-1K
token rates. Used to derive cost_usd from token counts when
cost is not provided in step data.
token_path: Dot-notation path to extract token usage from step
data (e.g. "usage.total_tokens"). If None, looks for standard
fields (input_tokens, output_tokens, total_tokens, usage).
cost_path: Dot-notation path to extract cost from step data.
model_path: Dot-notation path to extract model name (for pricing lookup).
metadata_paths: Mapping of metadata field name to dot-notation path
in step data. Used to extract scope dimensions (channel, user_id, etc).
"""

limits: list[BudgetLimitRule] = Field(min_length=1)
pricing: dict[str, dict[str, float]] | None = None
token_path: str | None = None
cost_path: str | None = None
model_path: str | None = None
metadata_paths: dict[str, str] = Field(default_factory=dict)
Loading