-
Notifications
You must be signed in to change notification settings - Fork 750
MAINT simplify how default adversarial and scorer targets are set in scenarios #1695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,6 +16,7 @@ | |
| import uuid | ||
| from abc import ABC, abstractmethod | ||
| from collections.abc import Sequence | ||
| from pathlib import Path | ||
| from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast, get_origin | ||
|
|
||
| from tqdm.auto import tqdm | ||
|
|
@@ -27,14 +28,20 @@ | |
| from pyrit.memory.memory_models import ScenarioResultEntry | ||
| from pyrit.models import AttackResult, SeedAttackGroup | ||
| from pyrit.models.scenario_result import ScenarioIdentifier, ScenarioResult | ||
| from pyrit.prompt_target import OpenAIChatTarget, PromptTarget | ||
| from pyrit.prompt_target import PromptTarget | ||
| from pyrit.prompt_target.common.target_requirements import TargetRequirements | ||
| from pyrit.registry import ScorerRegistry | ||
| from pyrit.registry.object_registries.scorer_registry import ScorerRegistry | ||
| from pyrit.scenario.core.atomic_attack import AtomicAttack | ||
| from pyrit.scenario.core.attack_technique import AttackTechnique | ||
| from pyrit.scenario.core.dataset_configuration import DatasetConfiguration | ||
| from pyrit.scenario.core.scenario_strategy import ScenarioStrategy | ||
| from pyrit.score import Scorer, SelfAskRefusalScorer, TrueFalseInverterScorer, TrueFalseScorer | ||
| from pyrit.scenario.core.scenario_target_defaults import get_default_scorer_target | ||
| from pyrit.score import Scorer, TrueFalseScorer | ||
| from pyrit.score.true_false.self_ask_refusal_scorer import SelfAskRefusalScorer | ||
| from pyrit.score.true_false.self_ask_true_false_scorer import SelfAskTrueFalseScorer | ||
| from pyrit.score.true_false.true_false_composite_scorer import TrueFalseCompositeScorer | ||
| from pyrit.score.true_false.true_false_inverter_scorer import TrueFalseInverterScorer | ||
| from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator | ||
|
|
||
| if TYPE_CHECKING: | ||
| from pyrit.executor.attack.core.attack_config import AttackScoringConfig | ||
|
|
@@ -107,6 +114,11 @@ class Scenario(ABC): | |
| #: what the scenario needs. Validated in ``initialize_async`` once the target is supplied. | ||
| TARGET_REQUIREMENTS: ClassVar[TargetRequirements] = TargetRequirements() | ||
|
|
||
| #: Optional true/false question prompt path for objective scoring. | ||
| #: When set, the default objective scorer becomes | ||
| #: ``SelfAskTrueFalseScorer(path) AND NOT(SelfAskRefusalScorer)``. | ||
| COMPOSITE_SCORER_QUESTIONS_PATH: ClassVar[Path | None] = None | ||
|
|
||
| def __init__( | ||
| self, | ||
| *, | ||
|
|
@@ -310,16 +322,34 @@ def _build_display_group(self, *, technique_name: str, seed_group_name: str) -> | |
| return technique_name | ||
|
|
||
| def _get_default_objective_scorer(self) -> TrueFalseScorer: | ||
| # Deferred import to avoid circular dependency: | ||
| composite_scorer_questions_path = type(self).COMPOSITE_SCORER_QUESTIONS_PATH | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is cool, but I wonder if it's hidden? Should we do a method override? And we could potentially make it more flexible so that individual scenario can add more scorers to the composite
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. idk. maybe it's good as is, I do like how it saves a bunch of code. |
||
|
|
||
| if composite_scorer_questions_path is not None: | ||
| chat_target = get_default_scorer_target() | ||
| objective_scorer = SelfAskTrueFalseScorer( | ||
| chat_target=chat_target, | ||
| true_false_question_path=composite_scorer_questions_path, | ||
| ) | ||
| backstop_scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) | ||
| scorer = TrueFalseCompositeScorer( | ||
| aggregator=TrueFalseScoreAggregator.AND, | ||
| scorers=[objective_scorer, backstop_scorer], | ||
| ) | ||
| logger.info(f"Using composite default objective scorer: {type(scorer).__name__}") | ||
| return scorer | ||
|
|
||
| # Deferred import to avoid circular dependency. | ||
| from pyrit.setup.initializers.components.scorers import ScorerInitializerTags | ||
|
|
||
| entries = ScorerRegistry.get_registry_singleton().get_by_tag(tag=ScorerInitializerTags.DEFAULT_OBJECTIVE_SCORER) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We potentially want to make more use of this ^^^ Because this has metrics to be the "best" objective scorer including composite scorers. |
||
| if entries and isinstance(entries[0].instance, TrueFalseScorer): | ||
| scorer = entries[0].instance | ||
| logger.info(f"Using registered default objective scorer: {type(scorer).__name__}") | ||
| return scorer | ||
| scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=OpenAIChatTarget())) | ||
| logger.info(f"No registered default objective scorer found, using fallback: {type(scorer).__name__}") | ||
|
|
||
| chat_target = get_default_scorer_target() | ||
| scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target)) | ||
| logger.info(f"Using fallback default objective scorer: {type(scorer).__name__}") | ||
| return scorer | ||
|
|
||
| def set_params_from_args(self, *, args: dict[str, Any]) -> None: | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,91 @@ | ||
| # Copyright (c) Microsoft Corporation. | ||
| # Licensed under the MIT license. | ||
|
|
||
| from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget | ||
| from pyrit.prompt_target.common.target_capabilities import CapabilityName | ||
| from pyrit.registry import TargetRegistry | ||
|
|
||
|
|
||
| def get_default_scorer_target() -> PromptChatTarget: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Love this, I had a similar thought |
||
| """ | ||
| Resolve the default objective scorer chat target. | ||
|
|
||
| First checks the ``TargetRegistry`` for an ``"objective_scorer_chat"`` entry | ||
| (populated by ``TargetInitializer`` from ``OBJECTIVE_SCORER_CHAT_*`` env vars). | ||
| Falls back to a plain ``OpenAIChatTarget`` | ||
|
|
||
| Returns: | ||
| PromptChatTarget: The resolved objective scorer chat target. | ||
|
|
||
| Raises: | ||
| ValueError: If the registered target does not support multi-turn. | ||
| """ | ||
| return _get_default_chat_target(preferred_target_key="objective_scorer_chat") | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think we should move |
||
|
|
||
| def get_default_adversarial_target() -> PromptChatTarget: | ||
| """ | ||
| Resolve the default adversarial chat target. | ||
|
|
||
| First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry | ||
| (populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars). | ||
| Falls back to a default fallback target with temperature=1.2 | ||
|
|
||
| Returns: | ||
| PromptChatTarget: The resolved adversarial chat target. | ||
|
|
||
| Raises: | ||
| ValueError: If the registered target does not support multi-turn. | ||
| """ | ||
| return _get_default_chat_target( | ||
| preferred_target_key="adversarial_chat", | ||
| required_capabilities={CapabilityName.MULTI_TURN}, | ||
| fallback_temperature=1.2, | ||
| ) | ||
|
|
||
|
|
||
| def _get_default_chat_target( | ||
| *, | ||
| preferred_target_key: str, | ||
| required_capabilities: set[CapabilityName] | None = None, | ||
| fallback_temperature: float | None = None, | ||
| ) -> PromptChatTarget: | ||
| """ | ||
| Resolve a chat target from TargetRegistry with configurable fallback behavior. | ||
|
|
||
| Resolution order: | ||
| 1. ``preferred_target_key`` entry from ``TargetRegistry`` | ||
| 2. ``OpenAIChatTarget(...)`` with optional temperature | ||
|
|
||
| Args: | ||
| preferred_target_key (str): TargetRegistry key to resolve first. | ||
| required_capabilities (set[CapabilityName] | None): Optional capabilities | ||
| that a resolved target must support. | ||
| fallback_temperature (float | None): Optional temperature for fallback | ||
| ``OpenAIChatTarget`` construction. | ||
|
|
||
| Returns: | ||
| PromptChatTarget: The resolved chat target. | ||
|
|
||
| Raises: | ||
| ValueError: If the resolved target does not satisfy required capabilities. | ||
| ValueError: If the registry entry exists but is not a PromptChatTarget. | ||
| """ | ||
| registry = TargetRegistry.get_registry_singleton() | ||
| target = registry.get(preferred_target_key) | ||
| if target is not None: | ||
| # Check required capabilities first (fail fast) | ||
| if required_capabilities: | ||
| for capability in required_capabilities: | ||
| if not target.capabilities.includes(capability=capability): | ||
| raise ValueError(f"Registry entry '{preferred_target_key}' must support {capability.value}.") | ||
|
|
||
| # Then check type | ||
| if not isinstance(target, PromptChatTarget): | ||
| raise ValueError( | ||
| f"Registry entry '{preferred_target_key}' must be a PromptChatTarget, but got {type(target).__name__}" | ||
| ) | ||
|
|
||
| return target | ||
|
|
||
| return OpenAIChatTarget(temperature=fallback_temperature) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We may want to look at the scorer initializer, so we can make sure to do metrics for whatever the default is