From 3f71a85061f7d03cbd1d6da53741483493b065ed Mon Sep 17 00:00:00 2001 From: Bashir Partovi Date: Thu, 12 Feb 2026 13:49:24 -0500 Subject: [PATCH 1/2] A new alternative design for Identifiers --- pyrit/identifiers/__init__.py | 3 + pyrit/identifiers/component_config.py | 469 ++++++++++++++++++ pyrit/prompt_target/common/prompt_target.py | 53 +- .../openai/openai_chat_target.py | 33 +- pyrit/registry/base.py | 38 ++ .../class_registries/base_class_registry.py | 10 +- .../class_registries/initializer_registry.py | 16 +- .../class_registries/scenario_registry.py | 23 +- .../float_scale/self_ask_scale_scorer.py | 23 +- pyrit/score/scorer.py | 40 +- .../true_false/true_false_composite_scorer.py | 17 +- 11 files changed, 699 insertions(+), 26 deletions(-) create mode 100644 pyrit/identifiers/component_config.py diff --git a/pyrit/identifiers/__init__.py b/pyrit/identifiers/__init__.py index 30c501894..150117ceb 100644 --- a/pyrit/identifiers/__init__.py +++ b/pyrit/identifiers/__init__.py @@ -15,6 +15,7 @@ ) from pyrit.identifiers.scorer_identifier import ScorerIdentifier from pyrit.identifiers.target_identifier import TargetIdentifier +from pyrit.identifiers.component_config import ComponentConfig, Configurable __all__ = [ "class_name_to_snake_case", @@ -27,4 +28,6 @@ "ScorerIdentifier", "snake_case_to_class_name", "TargetIdentifier", + "ComponentConfig", + "Configurable", ] diff --git a/pyrit/identifiers/component_config.py b/pyrit/identifiers/component_config.py new file mode 100644 index 000000000..6692600d3 --- /dev/null +++ b/pyrit/identifiers/component_config.py @@ -0,0 +1,469 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Component configuration and identity system for PyRIT. + +A ComponentConfig is an immutable snapshot of a component's behavioral configuration, +serving as both its identity and its storable representation. + +Design principles: + 1. The config dict IS the identity — no wrapper hierarchy needed. + 2. Hash is content-addressed from behavioral params only. + 3. Children carry their own hashes — compositional by default. + 4. Adding optional params with None default is backward-compatible (None values excluded). + +ComponentConfig also satisfies the registry metadata contract (has class_name, class_module, +snake_class_name), so it can be used directly as metadata in instance registries like +ScorerRegistry without a separate wrapper. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from typing import Any, ClassVar, Dict, List, Optional, Union + +import pyrit +from pyrit.identifiers.class_name_utils import class_name_to_snake_case + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Pure utility functions +# --------------------------------------------------------------------------- + + +def config_hash(config_dict: Dict[str, Any]) -> str: + """ + Compute a deterministic SHA256 hash from a config dictionary. + + This is the single source of truth for identity hashing across the entire + system. The dict is serialized with sorted keys and compact separators to + ensure determinism. + + Args: + config_dict (Dict[str, Any]): A JSON-serializable dictionary. + + Returns: + str: Hex-encoded SHA256 hash string. + + Raises: + TypeError: If config_dict contains values that are not JSON-serializable. + """ + canonical = json.dumps(config_dict, sort_keys=True, separators=(",", ":")) + return hashlib.sha256(canonical.encode("utf-8")).hexdigest() + + +def _build_hash_dict( + *, + class_name: str, + class_module: str, + params: Dict[str, Any], + children: Dict[str, Any], +) -> Dict[str, Any]: + """ + Build the canonical dictionary used for hash computation. + + Children are represented by their hashes, not their full config. + A parent's hash changes when a child's behavioral config changes, + but the parent doesn't need to understand the child's internal structure. + + Args: + class_name (str): The component's class name. + class_module (str): The component's module path. + params (Dict[str, Any]): Behavioral parameters (non-None values only). + children (Dict[str, Any]): Child name to ComponentConfig or list of ComponentConfig. + + Returns: + Dict[str, Any]: The canonical dictionary for hashing. + """ + hash_dict: Dict[str, Any] = { + ComponentConfig.KEY_CLASS_NAME: class_name, + ComponentConfig.KEY_CLASS_MODULE: class_module, + } + + # Only include non-None params — adding an optional param with None default + # won't change existing hashes, making the schema backward-compatible. + for key, value in sorted(params.items()): + if value is not None: + hash_dict[key] = value + + # Children contribute their hashes, not their full structure. + if children: + children_hashes: Dict[str, Any] = {} + for name, child in sorted(children.items()): + if isinstance(child, ComponentConfig): + children_hashes[name] = child.hash + elif isinstance(child, list): + children_hashes[name] = [c.hash for c in child if isinstance(c, ComponentConfig)] + if children_hashes: + hash_dict[ComponentConfig.KEY_CHILDREN] = children_hashes + + return hash_dict + + +# --------------------------------------------------------------------------- +# ComponentConfig — the frozen identity snapshot +# --------------------------------------------------------------------------- + + +@dataclass(frozen=True) +class ComponentConfig: + """ + Immutable snapshot of a component's behavioral configuration. + + A single type for all component identity — scorers, targets, converters, and + any future component types all produce a ComponentConfig with their relevant + params and children. + + The hash is content-addressed: two ComponentConfigs with the same class, params, + and children produce the same hash. This enables deterministic metrics lookup, + DB deduplication, and registry keying. + + Also usable as registry metadata for instance registries (e.g., ScorerRegistry) + because it exposes ``class_name``, ``snake_class_name``, and ``unique_name``. + + Attributes: + class_name (str): Python class name (e.g., "SelfAskScaleScorer"). + class_module (str): Full module path (e.g., "pyrit.score.self_ask_scale_scorer"). + params (Dict[str, Any]): Behavioral parameters that affect output. + children (Dict[str, Union[ComponentConfig, List[ComponentConfig]]]): Named + child configs for compositional identity (e.g., a scorer's target). + hash (str): Content-addressed SHA256 hash computed from class, params, and children. + pyrit_version (str): Version tag for storage. Not included in hash. + """ + + # ------------------------------------------------------------------- + # Serialization key constants + # ------------------------------------------------------------------- + + KEY_CLASS_NAME: ClassVar[str] = "class_name" + KEY_CLASS_MODULE: ClassVar[str] = "class_module" + KEY_HASH: ClassVar[str] = "hash" + KEY_PYRIT_VERSION: ClassVar[str] = "pyrit_version" + KEY_CHILDREN: ClassVar[str] = "children" + LEGACY_KEY_TYPE: ClassVar[str] = "__type__" + LEGACY_KEY_MODULE: ClassVar[str] = "__module__" + + # ------------------------------------------------------------------- + # Fields + # ------------------------------------------------------------------- + + class_name: str + class_module: str + params: Dict[str, Any] = field(default_factory=dict) + children: Dict[str, Union[ComponentConfig, List[ComponentConfig]]] = field(default_factory=dict) + hash: str = field(init=False, compare=False) + pyrit_version: str = field(default_factory=lambda: pyrit.__version__, compare=False) + + def __post_init__(self) -> None: + """Compute the content-addressed hash at creation time.""" + hash_dict = _build_hash_dict( + class_name=self.class_name, + class_module=self.class_module, + params=self.params, + children=self.children, + ) + object.__setattr__(self, "hash", config_hash(hash_dict)) + + # ------------------------------------------------------------------- + # Computed properties + # ------------------------------------------------------------------- + + @property + def short_hash(self) -> str: + """ + Return the first 8 characters of the hash for display and logging. + + This truncated hash provides sufficient uniqueness for human-readable + identification while keeping output concise. Used in string representations, + log messages, and registry keys. + + Returns: + str: First 8 hex characters of the SHA256 hash. + """ + return self.hash[:8] + + @property + def snake_class_name(self) -> str: + """ + Snake_case version of class_name (e.g., "self_ask_scale_scorer"). + + Used by registries for key derivation and CLI formatting. + """ + return class_name_to_snake_case(self.class_name) + + @property + def unique_name(self) -> str: + """ + Globally unique display name: ``snake_class_name::short_hash``. + + Used as the default registration key in instance registries + (e.g., "self_ask_scale_scorer::a1b2c3d4"). + """ + return f"{self.snake_class_name}::{self.short_hash}" + + # ------------------------------------------------------------------- + # Factory + # ------------------------------------------------------------------- + + @classmethod + def of( + cls, + obj: object, + *, + params: Optional[Dict[str, Any]] = None, + children: Optional[Dict[str, Union[ComponentConfig, List[ComponentConfig]]]] = None, + ) -> ComponentConfig: + """ + Build a ComponentConfig from a live object instance. + + This factory method extracts class_name and class_module from the object's + type automatically, making it the preferred way to create configs in + component implementations. None-valued params and children are filtered out + to ensure backward-compatible hashing. + + Args: + obj (object): The live component instance whose type info will be captured. + params (Optional[Dict[str, Any]]): Behavioral parameters that affect the + component's output. Only include params that change behavior — exclude + operational settings like rate limits, retry counts, or logging config. + children (Optional[Dict[str, Union[ComponentConfig, List[ComponentConfig]]]]): + Named child component configs. Use for compositional components like + scorers that wrap other scorers or targets that chain converters. + + Returns: + ComponentConfig: The frozen config snapshot with computed hash. + """ + clean_params = {k: v for k, v in (params or {}).items() if v is not None} + clean_children = {k: v for k, v in (children or {}).items() if v is not None} + + return cls( + class_name=obj.__class__.__name__, + class_module=obj.__class__.__module__, + params=clean_params, + children=clean_children, + ) + + # ------------------------------------------------------------------- + # Normalization + # ------------------------------------------------------------------- + + @classmethod + def normalize(cls, value: Union[ComponentConfig, Dict[str, Any]]) -> ComponentConfig: + """ + Normalize a value to a ComponentConfig instance. + + Accepts either an existing ComponentConfig (returned as-is) or a dict + (reconstructed via from_dict). This supports code paths that may receive + either typed configs or raw dicts from database storage. + + Args: + value (Union[ComponentConfig, Dict[str, Any]]): A ComponentConfig or + a dictionary representation. + + Returns: + ComponentConfig: The normalized config instance. + + Raises: + TypeError: If value is neither a ComponentConfig nor a dict. + """ + if isinstance(value, cls): + return value + if isinstance(value, dict): + return cls.from_dict(value) + raise TypeError(f"Expected ComponentConfig or dict, got {type(value).__name__}") + + # ------------------------------------------------------------------- + # Serialization + # ------------------------------------------------------------------- + + def to_dict(self) -> Dict[str, Any]: + """ + Serialize to a JSON-compatible dictionary for DB/JSONL storage. + + Produces a flat structure where params are inlined at the top level alongside + class_name, class_module, hash, and pyrit_version. This maintains backward + compatibility with existing DB queries that access params directly + (e.g., ``scorer_class_identifier->>'scorer_type'``). + + Children are recursively serialized into a nested "children" key. + + Returns: + Dict[str, Any]: JSON-serializable dictionary suitable for database storage + or JSONL export. + """ + result: Dict[str, Any] = { + self.KEY_CLASS_NAME: self.class_name, + self.KEY_CLASS_MODULE: self.class_module, + self.KEY_HASH: self.hash, + self.KEY_PYRIT_VERSION: self.pyrit_version, + } + + for key, value in self.params.items(): + result[key] = value + + if self.children: + serialized_children: Dict[str, Any] = {} + for name, child in self.children.items(): + if isinstance(child, ComponentConfig): + serialized_children[name] = child.to_dict() + elif isinstance(child, list): + serialized_children[name] = [c.to_dict() for c in child] + result[self.KEY_CHILDREN] = serialized_children + + return result + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> ComponentConfig: + """ + Deserialize from a stored dictionary. + + Reconstructs a ComponentConfig from data previously saved via to_dict(). + Handles both the current format (``class_name``/``class_module``) and legacy + format (``__type__``/``__module__``) for backward compatibility with + older database records. + + The hash is always recomputed from the reconstructed params and children. + If the stored hash differs from the computed hash, a warning is logged + indicating possible schema drift. + + Args: + data (Dict[str, Any]): Dictionary from DB/JSONL storage. The original + dict is not mutated; a copy is made internally. + + Returns: + ComponentConfig: Reconstructed config with freshly computed hash. + """ + data = dict(data) # Don't mutate the input + + # Handle legacy key mappings + class_name = ( + data.pop(cls.KEY_CLASS_NAME, None) or data.pop(cls.LEGACY_KEY_TYPE, None) or "Unknown" + ) + class_module = ( + data.pop(cls.KEY_CLASS_MODULE, None) or data.pop(cls.LEGACY_KEY_MODULE, None) or "unknown" + ) + + stored_hash = data.pop(cls.KEY_HASH, None) + pyrit_version = data.pop(cls.KEY_PYRIT_VERSION, pyrit.__version__) + + # Reconstruct children + children: Dict[str, Union[ComponentConfig, List[ComponentConfig]]] = {} + raw_children = data.pop(cls.KEY_CHILDREN, None) + if raw_children and isinstance(raw_children, dict): + for name, child_data in raw_children.items(): + if isinstance(child_data, dict): + children[name] = cls.from_dict(child_data) + elif isinstance(child_data, list): + children[name] = [cls.from_dict(c) for c in child_data if isinstance(c, dict)] + + # Everything remaining is a param + params = data + + config = cls( + class_name=class_name, + class_module=class_module, + params=params, + children=children, + pyrit_version=pyrit_version, + ) + + if stored_hash and config.hash != stored_hash: + logger.warning( + f"Hash mismatch for {class_name}: stored={stored_hash[:8]}, " + f"computed={config.short_hash}. Schema may have changed." + ) + + return config + + # ------------------------------------------------------------------- + # Display + # ------------------------------------------------------------------- + + def __str__(self) -> str: + """ + Return a human-readable string representation. + + Format: ``ClassName::abcd1234`` (class name followed by short hash). + Suitable for log messages and user-facing output. + + Returns: + str: Human-readable identifier string. + """ + return f"{self.class_name}::{self.short_hash}" + + def __repr__(self) -> str: + """ + Return a detailed representation for debugging. + + Includes class name, all params, children references, and the short hash. + Useful for inspecting config contents in debuggers or REPL sessions. + + Returns: + str: Detailed debug string showing all config components. + """ + params_str = ", ".join(f"{k}={v!r}" for k, v in sorted(self.params.items())) + children_str = ", ".join(f"{k}={v}" for k, v in sorted(self.children.items())) + parts = [f"class={self.class_name}"] + if params_str: + parts.append(f"params=({params_str})") + if children_str: + parts.append(f"children=({children_str})") + parts.append(f"hash={self.short_hash}") + return f"ComponentConfig({', '.join(parts)})" + + +# --------------------------------------------------------------------------- +# Configurable — the ABC components implement +# --------------------------------------------------------------------------- + + +class Configurable(ABC): + """ + Abstract base class for components that describe their behavioral configuration. + + Components implement ``_build_config()`` to return a frozen ComponentConfig + snapshot. The config is built lazily on first access and cached for the + component's lifetime. + """ + + _config: Optional[ComponentConfig] = None + + @abstractmethod + def _build_config(self) -> ComponentConfig: + """ + Build the behavioral configuration for this component. + + Only include params that affect the component's behavior/output. + Exclude operational params (rate limits, retry config, logging settings). + + Returns: + ComponentConfig: The frozen configuration snapshot. + """ + ... + + def get_config(self) -> ComponentConfig: + """ + Get the component's configuration, building it lazily on first access. + + The config is computed once via _build_config() and then cached for + subsequent calls. This ensures consistent identity throughout the + component's lifetime while deferring computation until actually needed. + + Note: + Not thread-safe. If thread safety is required, subclasses should + implement appropriate synchronization. + + Returns: + ComponentConfig: The frozen configuration snapshot representing + this component's behavioral identity. + """ + if self._config is None: + self._config = self._build_config() + return self._config \ No newline at end of file diff --git a/pyrit/prompt_target/common/prompt_target.py b/pyrit/prompt_target/common/prompt_target.py index 8cd80f47d..c06186a7a 100644 --- a/pyrit/prompt_target/common/prompt_target.py +++ b/pyrit/prompt_target/common/prompt_target.py @@ -3,16 +3,17 @@ import abc import logging -from typing import Any, List, Optional +from typing import Any, ClassVar, Dict, List, Optional from pyrit.identifiers import Identifiable, TargetIdentifier +from pyrit.identifiers.component_config import Configurable, ComponentConfig from pyrit.memory import CentralMemory, MemoryInterface from pyrit.models import Message logger = logging.getLogger(__name__) -class PromptTarget(Identifiable[TargetIdentifier]): +class PromptTarget(Identifiable[TargetIdentifier], Configurable): """ Abstract base class for prompt targets. @@ -28,6 +29,11 @@ class PromptTarget(Identifiable[TargetIdentifier]): _identifier: Optional[TargetIdentifier] = None + # Configuration key constants + CONFIG_KEY_ENDPOINT: ClassVar[str] = "endpoint" + CONFIG_KEY_MODEL_NAME: ClassVar[str] = "model_name" + CONFIG_KEY_MAX_REQUESTS_PER_MINUTE: ClassVar[str] = "max_requests_per_minute" + def __init__( self, verbose: bool = False, @@ -149,3 +155,46 @@ def _build_identifier(self) -> TargetIdentifier: TargetIdentifier: The identifier for this prompt target. """ return self._create_identifier() + + def _create_config( + self, + *, + params: Optional[Dict[str, Any]] = None, + ) -> ComponentConfig: + """ + Build a ComponentConfig for this target. + + Automatically injects standard target fields. Subclasses pass their + specific params (temperature, top_p, etc.); the base class handles + the rest. + + Parallel to ``_create_identifier`` — both coexist during migration. + + Args: + params (Optional[Dict[str, Any]]): Additional behavioral parameters. + + Returns: + ComponentConfig: Frozen config snapshot with computed hash. + """ + from pyrit.prompt_target.common.prompt_chat_target import PromptChatTarget + + model_name = self._underlying_model or self._model_name or "" + + all_params: Dict[str, Any] = { + self.CONFIG_KEY_ENDPOINT: self._endpoint, + self.CONFIG_KEY_MODEL_NAME: model_name, + self.CONFIG_KEY_MAX_REQUESTS_PER_MINUTE: self._max_requests_per_minute, + } + if params: + all_params.update(params) + + return ComponentConfig.of(self, params=all_params) + + def _build_config(self) -> ComponentConfig: + """ + Build the configuration for this target. + + Base implementation includes standard fields only. + Subclasses override to add temperature, top_p, etc. + """ + return self._create_config() diff --git a/pyrit/prompt_target/openai/openai_chat_target.py b/pyrit/prompt_target/openai/openai_chat_target.py index 87ffa26f4..5ccccb014 100644 --- a/pyrit/prompt_target/openai/openai_chat_target.py +++ b/pyrit/prompt_target/openai/openai_chat_target.py @@ -4,7 +4,7 @@ import base64 import json import logging -from typing import Any, Dict, MutableSequence, Optional +from typing import Any, ClassVar, Dict, MutableSequence, Optional from pyrit.common import convert_local_image_to_data_url from pyrit.exceptions import ( @@ -13,6 +13,7 @@ pyrit_target_retry, ) from pyrit.identifiers import TargetIdentifier +from pyrit.identifiers.component_config import ComponentConfig from pyrit.models import ( ChatMessage, DataTypeSerializer, @@ -62,6 +63,16 @@ class OpenAIChatTarget(OpenAITarget, PromptChatTarget): """ + # Configuration key constants + CONFIG_KEY_TEMPERATURE: ClassVar[str] = "temperature" + CONFIG_KEY_TOP_P: ClassVar[str] = "top_p" + CONFIG_KEY_MAX_COMPLETION_TOKENS: ClassVar[str] = "max_completion_tokens" + CONFIG_KEY_MAX_TOKENS: ClassVar[str] = "max_tokens" + CONFIG_KEY_FREQUENCY_PENALTY: ClassVar[str] = "frequency_penalty" + CONFIG_KEY_PRESENCE_PENALTY: ClassVar[str] = "presence_penalty" + CONFIG_KEY_SEED: ClassVar[str] = "seed" + CONFIG_KEY_N: ClassVar[str] = "n" + def __init__( self, *, @@ -183,6 +194,26 @@ def _build_identifier(self) -> TargetIdentifier: "n": self._n, }, ) + + def _build_config(self) -> ComponentConfig: + """ + Build the behavioral configuration for this target. + + Returns: + ComponentConfig: The frozen configuration snapshot. + """ + return self._create_config( + params={ + self.CONFIG_KEY_TEMPERATURE: self._temperature, + self.CONFIG_KEY_TOP_P: self._top_p, + self.CONFIG_KEY_MAX_COMPLETION_TOKENS: self._max_completion_tokens, + self.CONFIG_KEY_MAX_TOKENS: self._max_tokens, + self.CONFIG_KEY_FREQUENCY_PENALTY: self._frequency_penalty, + self.CONFIG_KEY_PRESENCE_PENALTY: self._presence_penalty, + self.CONFIG_KEY_SEED: self._seed, + self.CONFIG_KEY_N: self._n, + }, + ) def _set_openai_env_configuration_vars(self) -> None: """ diff --git a/pyrit/registry/base.py b/pyrit/registry/base.py index 5f5e37400..59a7ed753 100644 --- a/pyrit/registry/base.py +++ b/pyrit/registry/base.py @@ -8,12 +8,50 @@ and instance registries (which store T instances). """ +from dataclasses import dataclass from typing import Any, Dict, Iterator, List, Optional, Protocol, TypeVar, runtime_checkable +from pyrit.identifiers.class_name_utils import class_name_to_snake_case + # Type variable for metadata (invariant for Protocol compatibility) MetadataT = TypeVar("MetadataT") +@dataclass(frozen=True) +class RegistryEntry: + """ + Minimal base for class-level registry metadata. + + Provides the common fields every registry metadata type needs for display, + lookup, and filtering in class registries (ScenarioRegistry, InitializerRegistry). + + This is NOT for component instance identity — use ComponentConfig for that. + RegistryEntry describes a *class* for discovery and CLI listing; + ComponentConfig describes a *configured instance* for DB storage and hash-based lookup. + + Subclasses (ScenarioMetadata, InitializerMetadata) add domain-specific fields + as frozen dataclass fields. + + Attributes: + class_name (str): Python class name (e.g., "ContentHarmsScenario"). + class_module (str): Full module path (e.g., "pyrit.scenario.scenarios.content_harms"). + class_description (str): Human-readable description, typically from the class docstring. + """ + + class_name: str + class_module: str + class_description: str = "" + + @property + def snake_class_name(self) -> str: + """ + Snake_case version of class_name (e.g., "content_harms_scenario"). + + Used by CLI formatting and as registry display keys. + """ + return class_name_to_snake_case(self.class_name) + + @runtime_checkable class RegistryProtocol(Protocol[MetadataT]): """ diff --git a/pyrit/registry/class_registries/base_class_registry.py b/pyrit/registry/class_registries/base_class_registry.py index e7df37c78..9c37777d5 100644 --- a/pyrit/registry/class_registries/base_class_registry.py +++ b/pyrit/registry/class_registries/base_class_registry.py @@ -19,9 +19,8 @@ from abc import ABC, abstractmethod from typing import Callable, Dict, Generic, Iterator, List, Optional, Type, TypeVar -from pyrit.identifiers import Identifier from pyrit.identifiers.class_name_utils import class_name_to_snake_case -from pyrit.registry.base import RegistryProtocol +from pyrit.registry.base import RegistryEntry, RegistryProtocol # Type variable for the registered class type T = TypeVar("T") @@ -183,7 +182,7 @@ def _build_metadata(self, name: str, entry: ClassEntry[T]) -> MetadataT: """ pass - def _build_base_metadata(self, name: str, entry: ClassEntry[T]) -> Identifier: + def _build_base_metadata(self, name: str, entry: ClassEntry[T]) -> RegistryEntry: """ Build the common base metadata for a registered class. @@ -195,7 +194,7 @@ def _build_base_metadata(self, name: str, entry: ClassEntry[T]) -> Identifier: entry: The ClassEntry containing the registered class. Returns: - An Identifier dataclass with common fields. + A RegistryEntry dataclass with common fields. """ registered_class = entry.registered_class @@ -206,8 +205,7 @@ def _build_base_metadata(self, name: str, entry: ClassEntry[T]) -> Identifier: else: description = entry.description or "No description available" - return Identifier( - identifier_type="class", + return RegistryEntry( class_name=registered_class.__name__, class_module=registered_class.__module__, class_description=description, diff --git a/pyrit/registry/class_registries/initializer_registry.py b/pyrit/registry/class_registries/initializer_registry.py index a334e87e7..10c301c54 100644 --- a/pyrit/registry/class_registries/initializer_registry.py +++ b/pyrit/registry/class_registries/initializer_registry.py @@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Dict, Optional from pyrit.identifiers import Identifier +from pyrit.registry.base import RegistryEntry from pyrit.registry.class_registries.base_class_registry import ( BaseClassRegistry, ClassEntry, @@ -34,16 +35,21 @@ @dataclass(frozen=True) -class InitializerMetadata(Identifier): +class InitializerMetadata(RegistryEntry): """ Metadata describing a registered PyRITInitializer class. Use get_class() to get the actual class. """ - display_name: str - required_env_vars: tuple[str, ...] - execution_order: int + # Human-readable display name (e.g., "Objective Target Setup"). + display_name: str = "" + + # Environment variables required by the initializer. + required_env_vars: tuple[str, ...] = () + + # Execution order priority (lower = earlier). + execution_order: int = 100 class InitializerRegistry(BaseClassRegistry["PyRITInitializer", InitializerMetadata]): @@ -208,7 +214,6 @@ def _build_metadata(self, name: str, entry: ClassEntry["PyRITInitializer"]) -> I try: instance = initializer_class() return InitializerMetadata( - identifier_type="class", class_name=initializer_class.__name__, class_module=initializer_class.__module__, class_description=instance.description, @@ -219,7 +224,6 @@ def _build_metadata(self, name: str, entry: ClassEntry["PyRITInitializer"]) -> I except Exception as e: logger.warning(f"Failed to get metadata for {name}: {e}") return InitializerMetadata( - identifier_type="class", class_name=initializer_class.__name__, class_module=initializer_class.__module__, class_description="Error loading initializer metadata", diff --git a/pyrit/registry/class_registries/scenario_registry.py b/pyrit/registry/class_registries/scenario_registry.py index f95ad9398..c17d25839 100644 --- a/pyrit/registry/class_registries/scenario_registry.py +++ b/pyrit/registry/class_registries/scenario_registry.py @@ -17,6 +17,7 @@ from pyrit.identifiers import Identifier from pyrit.identifiers.class_name_utils import class_name_to_snake_case +from pyrit.registry.base import RegistryEntry from pyrit.registry.class_registries.base_class_registry import ( BaseClassRegistry, ClassEntry, @@ -33,18 +34,27 @@ @dataclass(frozen=True) -class ScenarioMetadata(Identifier): +class ScenarioMetadata(RegistryEntry): """ Metadata describing a registered Scenario class. Use get_class() to get the actual class. """ - default_strategy: str - all_strategies: tuple[str, ...] - aggregate_strategies: tuple[str, ...] - default_datasets: tuple[str, ...] - max_dataset_size: Optional[int] + # The default strategy name (e.g., "single_turn"). + default_strategy: str = "" + + # All available strategy names for this scenario. + all_strategies: tuple[str, ...] = () + + # Aggregate strategies that combine multiple attack approaches. + aggregate_strategies: tuple[str, ...] = () + + # Default dataset names used by this scenario. + default_datasets: tuple[str, ...] = () + + # Maximum number of items per dataset. + max_dataset_size: Optional[int] = None class ScenarioRegistry(BaseClassRegistry["Scenario", ScenarioMetadata]): @@ -170,7 +180,6 @@ def _build_metadata(self, name: str, entry: ClassEntry["Scenario"]) -> ScenarioM max_dataset_size = dataset_config.max_dataset_size return ScenarioMetadata( - identifier_type="class", class_name=scenario_class.__name__, class_module=scenario_class.__module__, class_description=description, diff --git a/pyrit/score/float_scale/self_ask_scale_scorer.py b/pyrit/score/float_scale/self_ask_scale_scorer.py index 5e502681d..6ca3f56c7 100644 --- a/pyrit/score/float_scale/self_ask_scale_scorer.py +++ b/pyrit/score/float_scale/self_ask_scale_scorer.py @@ -3,13 +3,14 @@ import enum from pathlib import Path -from typing import Any, Optional, Union +from typing import Any, ClassVar, Optional, Union import yaml from pyrit.common import verify_and_resolve_path from pyrit.common.path import SCORER_SCALES_PATH from pyrit.identifiers import ScorerIdentifier +from pyrit.identifiers.component_config import ComponentConfig from pyrit.models import MessagePiece, Score, SeedPrompt, UnvalidatedScore from pyrit.prompt_target import PromptChatTarget from pyrit.score.float_scale.float_scale_scorer import FloatScaleScorer @@ -40,6 +41,11 @@ class SystemPaths(enum.Enum): is_objective_required=True, ) + # Configuration key constants + CONFIG_KEY_SYSTEM_PROMPT: ClassVar[str] = "system_prompt" + CONFIG_KEY_USER_PROMPT_TEMPLATE: ClassVar[str] = "user_prompt_template" + CONFIG_KEY_PROMPT_TARGET: ClassVar[str] = "prompt_target" + def __init__( self, *, @@ -96,6 +102,21 @@ def _build_identifier(self) -> ScorerIdentifier: user_prompt_template="objective: {objective}\nresponse: {response}", prompt_target=self._prompt_target, ) + + def _build_config(self) -> ComponentConfig: + """ + Build the behavioral configuration for this scorer. + + Returns: + ComponentConfig: The frozen configuration snapshot. + """ + return self._create_config( + params={ + self.CONFIG_KEY_SYSTEM_PROMPT: self._system_prompt, + self.CONFIG_KEY_USER_PROMPT_TEMPLATE: "objective: {objective}\nresponse: {response}", + }, + children={self.CONFIG_KEY_PROMPT_TARGET: self._prompt_target.get_config()}, + ) async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]: """ diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py index 83cd795ec..d30749f51 100644 --- a/pyrit/score/scorer.py +++ b/pyrit/score/scorer.py @@ -12,6 +12,7 @@ from typing import ( TYPE_CHECKING, Any, + ClassVar, Dict, List, Optional, @@ -26,7 +27,7 @@ pyrit_json_retry, remove_markdown_json, ) -from pyrit.identifiers import Identifiable, ScorerIdentifier +from pyrit.identifiers import Identifiable, ScorerIdentifier, ComponentConfig, Configurable from pyrit.memory import CentralMemory, MemoryInterface from pyrit.models import ( ChatMessageRole, @@ -54,11 +55,15 @@ from pyrit.score.scorer_evaluation.scorer_metrics import ScorerMetrics -class Scorer(Identifiable[ScorerIdentifier], abc.ABC): +class Scorer(Identifiable[ScorerIdentifier], Configurable, abc.ABC): """ Abstract base class for scorers. """ + # Configuration key constants + CONFIG_KEY_SCORER_TYPE: ClassVar[str] = "scorer_type" + + # Class attributes # Evaluation configuration - maps input dataset files to a result file # Specifies glob patterns for datasets and a result file name evaluation_file_mapping: Optional["ScorerEvalDatasetFiles"] = None @@ -155,6 +160,37 @@ def _create_identifier( scorer_specific_params=scorer_specific_params, ) + def _create_config( + self, + *, + params: Optional[Dict[str, Any]] = None, + children: Optional[Dict[str, Union[ComponentConfig, List[ComponentConfig]]]] = None, + ) -> ComponentConfig: + """ + Build a ComponentConfig for this scorer. + + Automatically injects ``scorer_type`` into params. Subclasses pass their + behavioral params and child component configs; the base class handles + the rest. + + This is the ComponentConfig equivalent of ``_create_identifier``. Both + coexist during the migration period. + + Args: + params (Optional[Dict[str, Any]]): Behavioral parameters specific to + this scorer (e.g., system_prompt, score_aggregator, min_value). + children (Optional[Dict[str, Union[ComponentConfig, List[ComponentConfig]]]]): Named + child configs (e.g., prompt_target, sub_scorers, image_scorer). + + Returns: + ComponentConfig: Frozen config snapshot with computed hash. + """ + all_params: Dict[str, Any] = {self.CONFIG_KEY_SCORER_TYPE: self.scorer_type} + if params: + all_params.update(params) + + return ComponentConfig.of(self, params=all_params, children=children) + async def score_async( self, message: Message, diff --git a/pyrit/score/true_false/true_false_composite_scorer.py b/pyrit/score/true_false/true_false_composite_scorer.py index 5dc2bb818..0da473bcc 100644 --- a/pyrit/score/true_false/true_false_composite_scorer.py +++ b/pyrit/score/true_false/true_false_composite_scorer.py @@ -2,9 +2,10 @@ # Licensed under the MIT license. import asyncio -from typing import List, Optional +from typing import ClassVar, List, Optional from pyrit.identifiers import ScorerIdentifier +from pyrit.identifiers.component_config import ComponentConfig from pyrit.models import ChatMessageRole, Message, MessagePiece, Score from pyrit.score.scorer_prompt_validator import ScorerPromptValidator from pyrit.score.true_false.true_false_score_aggregator import TrueFalseAggregatorFunc @@ -21,6 +22,10 @@ class TrueFalseCompositeScorer(TrueFalseScorer): ``TrueFalseScoreAggregator.MAJORITY``). """ + # Configuration key constants + CONFIG_KEY_SCORE_AGGREGATOR: ClassVar[str] = "score_aggregator" + CONFIG_KEY_SUB_SCORERS: ClassVar[str] = "sub_scorers" + def __init__( self, *, @@ -64,6 +69,16 @@ def _build_identifier(self) -> ScorerIdentifier: sub_scorers=self._scorers, score_aggregator=self._score_aggregator.__name__, ) + + def _build_config(self) -> ComponentConfig: + """Build the behavioral configuration for this scorer.""" + + return self._create_config( + params={self.CONFIG_KEY_SCORE_AGGREGATOR: self._score_aggregator.__name__}, + # NOTE: Named children list. Each child carries its own hash. + # The composite's hash includes all children's hashes. + children={self.CONFIG_KEY_SUB_SCORERS: [s.get_config() for s in self._scorers]}, + ) async def _score_async( self, From aa723b41046ea82a0709d8546ee592d4a6924ac1 Mon Sep 17 00:00:00 2001 From: Bashir Partovi Date: Thu, 12 Feb 2026 14:21:24 -0500 Subject: [PATCH 2/2] updated component config --- pyrit/identifiers/component_config.py | 43 --------------------------- 1 file changed, 43 deletions(-) diff --git a/pyrit/identifiers/component_config.py b/pyrit/identifiers/component_config.py index 6692600d3..b17963244 100644 --- a/pyrit/identifiers/component_config.py +++ b/pyrit/identifiers/component_config.py @@ -33,11 +33,6 @@ logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Pure utility functions -# --------------------------------------------------------------------------- - - def config_hash(config_dict: Dict[str, Any]) -> str: """ Compute a deterministic SHA256 hash from a config dictionary. @@ -107,11 +102,6 @@ def _build_hash_dict( return hash_dict -# --------------------------------------------------------------------------- -# ComponentConfig — the frozen identity snapshot -# --------------------------------------------------------------------------- - - @dataclass(frozen=True) class ComponentConfig: """ @@ -138,10 +128,6 @@ class ComponentConfig: pyrit_version (str): Version tag for storage. Not included in hash. """ - # ------------------------------------------------------------------- - # Serialization key constants - # ------------------------------------------------------------------- - KEY_CLASS_NAME: ClassVar[str] = "class_name" KEY_CLASS_MODULE: ClassVar[str] = "class_module" KEY_HASH: ClassVar[str] = "hash" @@ -150,10 +136,6 @@ class ComponentConfig: LEGACY_KEY_TYPE: ClassVar[str] = "__type__" LEGACY_KEY_MODULE: ClassVar[str] = "__module__" - # ------------------------------------------------------------------- - # Fields - # ------------------------------------------------------------------- - class_name: str class_module: str params: Dict[str, Any] = field(default_factory=dict) @@ -171,10 +153,6 @@ def __post_init__(self) -> None: ) object.__setattr__(self, "hash", config_hash(hash_dict)) - # ------------------------------------------------------------------- - # Computed properties - # ------------------------------------------------------------------- - @property def short_hash(self) -> str: """ @@ -208,10 +186,6 @@ def unique_name(self) -> str: """ return f"{self.snake_class_name}::{self.short_hash}" - # ------------------------------------------------------------------- - # Factory - # ------------------------------------------------------------------- - @classmethod def of( cls, @@ -250,10 +224,6 @@ def of( children=clean_children, ) - # ------------------------------------------------------------------- - # Normalization - # ------------------------------------------------------------------- - @classmethod def normalize(cls, value: Union[ComponentConfig, Dict[str, Any]]) -> ComponentConfig: """ @@ -279,10 +249,6 @@ def normalize(cls, value: Union[ComponentConfig, Dict[str, Any]]) -> ComponentCo return cls.from_dict(value) raise TypeError(f"Expected ComponentConfig or dict, got {type(value).__name__}") - # ------------------------------------------------------------------- - # Serialization - # ------------------------------------------------------------------- - def to_dict(self) -> Dict[str, Any]: """ Serialize to a JSON-compatible dictionary for DB/JSONL storage. @@ -382,10 +348,6 @@ def from_dict(cls, data: Dict[str, Any]) -> ComponentConfig: return config - # ------------------------------------------------------------------- - # Display - # ------------------------------------------------------------------- - def __str__(self) -> str: """ Return a human-readable string representation. @@ -419,11 +381,6 @@ def __repr__(self) -> str: return f"ComponentConfig({', '.join(parts)})" -# --------------------------------------------------------------------------- -# Configurable — the ABC components implement -# --------------------------------------------------------------------------- - - class Configurable(ABC): """ Abstract base class for components that describe their behavioral configuration.