Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env_example
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ ADVERSARIAL_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1"
ADVERSARIAL_CHAT_KEY="xxxxx"
ADVERSARIAL_CHAT_MODEL="deployment-name"

# Objective Scorer chat target (used in scorers in scenarios)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may want to look at the scorer initializer, so we can make sure to do metrics for whatever the default is

OBJECTIVE_SCORER_CHAT_ENDPOINT="https://xxxxx.openai.azure.com/openai/v1"
OBJECTIVE_SCORER_CHAT_KEY="xxxxx"
OBJECTIVE_SCORER_CHAT_MODEL="deployment-name"

AZURE_FOUNDRY_DEEPSEEK_ENDPOINT="https://xxxxx.eastus2.models.ai.azure.com"
AZURE_FOUNDRY_DEEPSEEK_KEY="xxxxx"
AZURE_FOUNDRY_DEEPSEEK_MODEL=""
Expand Down
5 changes: 3 additions & 2 deletions pyrit/scenario/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
from pyrit.scenario.core.dataset_configuration import EXPLICIT_SEED_GROUPS_KEY, DatasetConfiguration
from pyrit.scenario.core.scenario import Scenario
from pyrit.scenario.core.scenario_strategy import ScenarioCompositeStrategy, ScenarioStrategy
from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target, get_default_scorer_target
from pyrit.scenario.core.scenario_techniques import (
SCENARIO_TECHNIQUES,
get_default_adversarial_target,
register_scenario_techniques,
)

Expand All @@ -28,6 +28,7 @@
"ScenarioCompositeStrategy",
"ScenarioStrategy",
"ScorerOverridePolicy",
"get_default_adversarial_target",
"register_scenario_techniques",
"get_default_scorer_target",
"get_default_adversarial_target",
]
42 changes: 36 additions & 6 deletions pyrit/scenario/core/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import uuid
from abc import ABC, abstractmethod
from collections.abc import Sequence
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union, cast, get_origin

from tqdm.auto import tqdm
Expand All @@ -27,14 +28,20 @@
from pyrit.memory.memory_models import ScenarioResultEntry
from pyrit.models import AttackResult, SeedAttackGroup
from pyrit.models.scenario_result import ScenarioIdentifier, ScenarioResult
from pyrit.prompt_target import OpenAIChatTarget, PromptTarget
from pyrit.prompt_target import PromptTarget
from pyrit.prompt_target.common.target_requirements import TargetRequirements
from pyrit.registry import ScorerRegistry
from pyrit.registry.object_registries.scorer_registry import ScorerRegistry
from pyrit.scenario.core.atomic_attack import AtomicAttack
from pyrit.scenario.core.attack_technique import AttackTechnique
from pyrit.scenario.core.dataset_configuration import DatasetConfiguration
from pyrit.scenario.core.scenario_strategy import ScenarioStrategy
from pyrit.score import Scorer, SelfAskRefusalScorer, TrueFalseInverterScorer, TrueFalseScorer
from pyrit.scenario.core.scenario_target_defaults import get_default_scorer_target
from pyrit.score import Scorer, TrueFalseScorer
from pyrit.score.true_false.self_ask_refusal_scorer import SelfAskRefusalScorer
from pyrit.score.true_false.self_ask_true_false_scorer import SelfAskTrueFalseScorer
from pyrit.score.true_false.true_false_composite_scorer import TrueFalseCompositeScorer
from pyrit.score.true_false.true_false_inverter_scorer import TrueFalseInverterScorer
from pyrit.score.true_false.true_false_score_aggregator import TrueFalseScoreAggregator

if TYPE_CHECKING:
from pyrit.executor.attack.core.attack_config import AttackScoringConfig
Expand Down Expand Up @@ -107,6 +114,11 @@ class Scenario(ABC):
#: what the scenario needs. Validated in ``initialize_async`` once the target is supplied.
TARGET_REQUIREMENTS: ClassVar[TargetRequirements] = TargetRequirements()

#: Optional true/false question prompt path for objective scoring.
#: When set, the default objective scorer becomes
#: ``SelfAskTrueFalseScorer(path) AND NOT(SelfAskRefusalScorer)``.
COMPOSITE_SCORER_QUESTIONS_PATH: ClassVar[Path | None] = None

def __init__(
self,
*,
Expand Down Expand Up @@ -310,16 +322,34 @@ def _build_display_group(self, *, technique_name: str, seed_group_name: str) ->
return technique_name

def _get_default_objective_scorer(self) -> TrueFalseScorer:
# Deferred import to avoid circular dependency:
composite_scorer_questions_path = type(self).COMPOSITE_SCORER_QUESTIONS_PATH
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is cool, but I wonder if it's hidden? Should we do a method override?

And we could potentially make it more flexible so that individual scenario can add more scorers to the composite

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idk. maybe it's good as is, I do like how it saves a bunch of code.


if composite_scorer_questions_path is not None:
chat_target = get_default_scorer_target()
objective_scorer = SelfAskTrueFalseScorer(
chat_target=chat_target,
true_false_question_path=composite_scorer_questions_path,
)
backstop_scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))
scorer = TrueFalseCompositeScorer(
aggregator=TrueFalseScoreAggregator.AND,
scorers=[objective_scorer, backstop_scorer],
)
logger.info(f"Using composite default objective scorer: {type(scorer).__name__}")
return scorer

# Deferred import to avoid circular dependency.
from pyrit.setup.initializers.components.scorers import ScorerInitializerTags

entries = ScorerRegistry.get_registry_singleton().get_by_tag(tag=ScorerInitializerTags.DEFAULT_OBJECTIVE_SCORER)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We potentially want to make more use of this ^^^

Because this has metrics to be the "best" objective scorer including composite scorers.

if entries and isinstance(entries[0].instance, TrueFalseScorer):
scorer = entries[0].instance
logger.info(f"Using registered default objective scorer: {type(scorer).__name__}")
return scorer
scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=OpenAIChatTarget()))
logger.info(f"No registered default objective scorer found, using fallback: {type(scorer).__name__}")

chat_target = get_default_scorer_target()
scorer = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))
logger.info(f"Using fallback default objective scorer: {type(scorer).__name__}")
return scorer

def set_params_from_args(self, *, args: dict[str, Any]) -> None:
Expand Down
91 changes: 91 additions & 0 deletions pyrit/scenario/core/scenario_target_defaults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
from pyrit.prompt_target.common.target_capabilities import CapabilityName
from pyrit.registry import TargetRegistry


def get_default_scorer_target() -> PromptChatTarget:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Love this, I had a similar thought

"""
Resolve the default objective scorer chat target.

First checks the ``TargetRegistry`` for an ``"objective_scorer_chat"`` entry
(populated by ``TargetInitializer`` from ``OBJECTIVE_SCORER_CHAT_*`` env vars).
Falls back to a plain ``OpenAIChatTarget``

Returns:
PromptChatTarget: The resolved objective scorer chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
return _get_default_chat_target(preferred_target_key="objective_scorer_chat")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should move get_default_objective_scorer here also?


def get_default_adversarial_target() -> PromptChatTarget:
"""
Resolve the default adversarial chat target.

First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry
(populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars).
Falls back to a default fallback target with temperature=1.2

Returns:
PromptChatTarget: The resolved adversarial chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
return _get_default_chat_target(
preferred_target_key="adversarial_chat",
required_capabilities={CapabilityName.MULTI_TURN},
fallback_temperature=1.2,
)


def _get_default_chat_target(
*,
preferred_target_key: str,
required_capabilities: set[CapabilityName] | None = None,
fallback_temperature: float | None = None,
) -> PromptChatTarget:
"""
Resolve a chat target from TargetRegistry with configurable fallback behavior.

Resolution order:
1. ``preferred_target_key`` entry from ``TargetRegistry``
2. ``OpenAIChatTarget(...)`` with optional temperature

Args:
preferred_target_key (str): TargetRegistry key to resolve first.
required_capabilities (set[CapabilityName] | None): Optional capabilities
that a resolved target must support.
fallback_temperature (float | None): Optional temperature for fallback
``OpenAIChatTarget`` construction.

Returns:
PromptChatTarget: The resolved chat target.

Raises:
ValueError: If the resolved target does not satisfy required capabilities.
ValueError: If the registry entry exists but is not a PromptChatTarget.
"""
registry = TargetRegistry.get_registry_singleton()
target = registry.get(preferred_target_key)
if target is not None:
# Check required capabilities first (fail fast)
if required_capabilities:
for capability in required_capabilities:
if not target.capabilities.includes(capability=capability):
raise ValueError(f"Registry entry '{preferred_target_key}' must support {capability.value}.")

# Then check type
if not isinstance(target, PromptChatTarget):
raise ValueError(
f"Registry entry '{preferred_target_key}' must be a PromptChatTarget, but got {type(target).__name__}"
)

return target

return OpenAIChatTarget(temperature=fallback_temperature)
41 changes: 5 additions & 36 deletions pyrit/scenario/core/scenario_techniques.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import inspect
import logging
from pathlib import Path
from typing import TYPE_CHECKING

from pyrit.common.path import EXECUTOR_SEED_PROMPT_PATH
from pyrit.executor.attack import (
Expand All @@ -34,13 +35,15 @@
)
from pyrit.models import SeedAttackTechniqueGroup, SeedSimulatedConversation
from pyrit.models.seeds.seed_simulated_conversation import NextMessageSystemPromptPaths
from pyrit.prompt_target import OpenAIChatTarget, PromptChatTarget
from pyrit.prompt_target.common.target_capabilities import CapabilityName
from pyrit.registry import TargetRegistry
from pyrit.registry.object_registries.attack_technique_registry import (
AttackTechniqueRegistry,
AttackTechniqueSpec,
)
from pyrit.scenario.core.scenario_target_defaults import get_default_adversarial_target

if TYPE_CHECKING:
from pyrit.prompt_target import PromptChatTarget

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -98,40 +101,6 @@
]


# ---------------------------------------------------------------------------
# Default adversarial target
# ---------------------------------------------------------------------------


def get_default_adversarial_target() -> PromptChatTarget:
"""
Resolve the default adversarial chat target.

First checks the ``TargetRegistry`` for an ``"adversarial_chat"`` entry
(populated by ``TargetInitializer`` from ``ADVERSARIAL_CHAT_*`` env vars).
Falls back to a plain ``OpenAIChatTarget(temperature=1.2)`` using
``@apply_defaults`` resolution.

Returns:
PromptChatTarget: The resolved adversarial chat target.

Raises:
ValueError: If the registered target does not support multi-turn.
"""
registry = TargetRegistry.get_registry_singleton()
if "adversarial_chat" in registry:
target = registry.get("adversarial_chat")
if target:
if not target.capabilities.includes(capability=CapabilityName.MULTI_TURN):
raise ValueError(
f"Registry entry 'adversarial_chat' must support multi-turn conversations, "
f"but {type(target).__name__} does not."
)
return target

return OpenAIChatTarget(temperature=1.2)


# ---------------------------------------------------------------------------
# Runtime spec builder
# ---------------------------------------------------------------------------
Expand Down
40 changes: 2 additions & 38 deletions pyrit/scenario/scenarios/airt/cyber.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,12 @@

from pyrit.common import apply_defaults
from pyrit.common.path import SCORER_SEED_PROMPT_PATH
from pyrit.prompt_target import OpenAIChatTarget
from pyrit.scenario.core.dataset_configuration import DatasetConfiguration
from pyrit.scenario.core.scenario import Scenario
from pyrit.score import (
SelfAskRefusalScorer,
SelfAskTrueFalseScorer,
TrueFalseCompositeScorer,
TrueFalseInverterScorer,
TrueFalseScoreAggregator,
TrueFalseScorer,
)

if TYPE_CHECKING:
from pyrit.scenario.core.scenario_strategy import ScenarioStrategy
from pyrit.score import TrueFalseScorer

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,6 +56,7 @@ class Cyber(Scenario):
"""

VERSION: int = 2
COMPOSITE_SCORER_QUESTIONS_PATH = SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml"
_cached_strategy_class: ClassVar[type[ScenarioStrategy] | None] = None

@classmethod
Expand Down Expand Up @@ -128,32 +121,3 @@ def __init__(
include_default_baseline=include_baseline,
scenario_result_id=scenario_result_id,
)

def _get_default_objective_scorer(self) -> TrueFalseCompositeScorer:
"""
Retrieve the default objective scorer.

The scoring mechanism is as follows:
1. Detect for the presence of malware using a SelfAskTrueFalseScorer.
2. Detect for NON-refusal as a backstop. (If the target model refuses to produce malware, we want that
to register as a negative score).

The chat target is resolved from the base class default scorer if available,
otherwise falls back to OpenAIChatTarget().

Returns:
TrueFalseCompositeScorer: Default objective scorer with backstop and malware evaluation.
"""
base_scorer = super()._get_default_objective_scorer()
chat_target = base_scorer.get_chat_target() or OpenAIChatTarget()

presence_of_malware = SelfAskTrueFalseScorer(
chat_target=chat_target,
true_false_question_path=SCORER_SEED_PROMPT_PATH / "true_false_question" / "malware.yaml",
)

backstop = TrueFalseInverterScorer(scorer=SelfAskRefusalScorer(chat_target=chat_target))

return TrueFalseCompositeScorer(
aggregator=TrueFalseScoreAggregator.AND, scorers=[presence_of_malware, backstop]
)
Loading
Loading