From 668512dbe7975e7e8712ffedd9d8a0713ca0f0a0 Mon Sep 17 00:00:00 2001
From: jsong468 <songjustin@microsoft.com>
Date: Mon, 4 May 2026 15:24:04 -0700
Subject: [PATCH 1/5] score blocked content

---
 pyrit/executor/attack/core/attack_config.py   |   6 +
 pyrit/executor/attack/multi_turn/crescendo.py |   7 +-
 .../attack/multi_turn/multi_prompt_sending.py |   2 +
 .../executor/attack/multi_turn/red_teaming.py |   2 +
 .../attack/multi_turn/tree_of_attacks.py      |  13 +
 .../attack/single_turn/prompt_sending.py      |   2 +
 .../openai/openai_chat_target.py              |  20 +
 .../openai/openai_response_target.py          |  28 ++
 pyrit/prompt_target/openai/openai_target.py   |  30 +-
 pyrit/score/conversation_scorer.py            |   6 +-
 pyrit/score/scorer.py                         |  87 ++++-
 .../float_scale_threshold_scorer.py           |   4 +
 pyrit/score/true_false/true_false_scorer.py   |  10 +-
 .../attack/core/test_attack_config.py         |  17 +
 .../target/test_openai_chat_target.py         |  56 +++
 .../target/test_openai_response_target.py     |  42 +++
 tests/unit/score/test_scorer.py               | 345 +++++++++++++++++-
 17 files changed, 667 insertions(+), 10 deletions(-)

diff --git a/pyrit/executor/attack/core/attack_config.py b/pyrit/executor/attack/core/attack_config.py
index 7d128ffd79..b11b91aeff 100644
--- a/pyrit/executor/attack/core/attack_config.py
+++ b/pyrit/executor/attack/core/attack_config.py
@@ -53,6 +53,12 @@ class AttackScoringConfig:
     # Whether to use scoring results as feedback for iterative attacks
     use_score_as_feedback: bool = True
 
+    # Whether to score blocked responses using partial content from prompt_metadata["partial_content"].
+    # When True, blocked responses that contain partial model output (e.g., from Azure Content Safety
+    # triggering mid-generation) will be evaluated by scorers instead of being skipped or
+    # auto-classified as failures/refusals.
+    score_blocked_content: bool = False
+
     def __post_init__(self) -> None:
         """
         Validate configuration values.
diff --git a/pyrit/executor/attack/multi_turn/crescendo.py b/pyrit/executor/attack/multi_turn/crescendo.py
index fec9e9c856..9f04ab0df0 100644
--- a/pyrit/executor/attack/multi_turn/crescendo.py
+++ b/pyrit/executor/attack/multi_turn/crescendo.py
@@ -203,6 +203,7 @@ def __init__(
         self._objective_scorer = objective_scorer
         self._use_score_as_feedback = attack_scoring_config.use_score_as_feedback
         self._auxiliary_scorers = attack_scoring_config.auxiliary_scorers
+        self._score_blocked_content = attack_scoring_config.score_blocked_content
 
         # Initialize refusal scorer - use the one from config if provided, otherwise create default
         self._refusal_scorer = attack_scoring_config.refusal_scorer or SelfAskRefusalScorer(
@@ -675,7 +676,10 @@ async def _check_refusal_async(self, context: CrescendoAttackContext, objective:
             objective=context.objective,
         ):
             scores = await self._refusal_scorer.score_async(
-                message=context.last_response, objective=objective, skip_on_error_result=False
+                message=context.last_response,
+                objective=objective,
+                skip_on_error_result=False,
+                score_blocked_content=self._score_blocked_content,
             )
         return scores[0]
 
@@ -711,6 +715,7 @@ async def _score_response_async(self, *, context: CrescendoAttackContext) -> Sco
                 role_filter="assistant",
                 objective=context.objective,
                 skip_on_error_result=False,
+                score_blocked_content=self._score_blocked_content,
             )
 
         objective_score = scoring_results["objective_scores"]
diff --git a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py
index 546b6c970b..d429813a51 100644
--- a/pyrit/executor/attack/multi_turn/multi_prompt_sending.py
+++ b/pyrit/executor/attack/multi_turn/multi_prompt_sending.py
@@ -173,6 +173,7 @@ def __init__(
 
         self._auxiliary_scorers = attack_scoring_config.auxiliary_scorers
         self._objective_scorer = attack_scoring_config.objective_scorer
+        self._score_blocked_content = attack_scoring_config.score_blocked_content
 
         # Initialize prompt normalizer and conversation manager
         self._prompt_normalizer = prompt_normalizer or PromptNormalizer()
@@ -401,6 +402,7 @@ async def _evaluate_response_async(self, *, response: Message, objective: str) -
                 role_filter="assistant",
                 objective=objective,
                 skip_on_error_result=True,
+                score_blocked_content=self._score_blocked_content,
             )
 
         objective_scores = scoring_results["objective_scores"]
diff --git a/pyrit/executor/attack/multi_turn/red_teaming.py b/pyrit/executor/attack/multi_turn/red_teaming.py
index 60a05e388b..e41967067c 100644
--- a/pyrit/executor/attack/multi_turn/red_teaming.py
+++ b/pyrit/executor/attack/multi_turn/red_teaming.py
@@ -143,6 +143,7 @@ def __init__(
 
         self._objective_scorer = attack_scoring_config.objective_scorer
         self._use_score_as_feedback = attack_scoring_config.use_score_as_feedback
+        self._score_blocked_content = attack_scoring_config.score_blocked_content
 
         # Initialize adversarial configuration
         self._adversarial_chat = attack_adversarial_config.target
@@ -604,6 +605,7 @@ async def _score_response_async(self, *, context: MultiTurnAttackContext[Any]) -
                 message=context.last_response,
                 role_filter="assistant",
                 objective=context.objective,
+                score_blocked_content=self._score_blocked_content,
             )
 
         objective_scores = scoring_results
diff --git a/pyrit/executor/attack/multi_turn/tree_of_attacks.py b/pyrit/executor/attack/multi_turn/tree_of_attacks.py
index 7f0470bba4..ad689829fa 100644
--- a/pyrit/executor/attack/multi_turn/tree_of_attacks.py
+++ b/pyrit/executor/attack/multi_turn/tree_of_attacks.py
@@ -96,6 +96,7 @@ def __init__(
         refusal_scorer: Optional[TrueFalseScorer] = None,
         auxiliary_scorers: Optional[list[Scorer]] = None,
         use_score_as_feedback: bool = True,
+        score_blocked_content: bool = False,
     ) -> None:
         """
         Initialize TAP scoring configuration.
@@ -107,6 +108,8 @@ def __init__(
             refusal_scorer (Optional[TrueFalseScorer]): Optional scorer for detecting refusals.
             auxiliary_scorers (Optional[List[Scorer]]): Additional scorers for auxiliary metrics.
             use_score_as_feedback (bool): Whether to use scoring results as feedback. Defaults to True.
+            score_blocked_content (bool): Whether to score blocked responses using partial content.
+                Defaults to False.
 
         Raises:
             ValueError: If objective_scorer is not a FloatScaleThresholdScorer or
@@ -128,6 +131,7 @@ def __init__(
         self.refusal_scorer = refusal_scorer
         self.auxiliary_scorers = auxiliary_scorers or []
         self.use_score_as_feedback = use_score_as_feedback
+        self.score_blocked_content = score_blocked_content
 
     @property
     def threshold(self) -> float:
@@ -283,6 +287,7 @@ def __init__(
         parent_id: Optional[str] = None,
         prompt_normalizer: Optional[PromptNormalizer] = None,
         initial_prompt: Optional[Message] = None,
+        score_blocked_content: bool = False,
     ) -> None:
         """
         Initialize a tree node.
@@ -306,6 +311,8 @@ def __init__(
             prompt_normalizer (Optional[PromptNormalizer]): Normalizer for handling prompts and responses.
             initial_prompt (Optional[Message]): Initial message to send for the first turn,
                 bypassing adversarial chat generation. Supports multimodal messages.
+            score_blocked_content (bool): If True, blocked responses with partial content will be
+                scored using that content. Defaults to False.
         """
         # Store configuration
         self._objective_target = objective_target
@@ -322,6 +329,7 @@ def __init__(
         self._attack_id = attack_id
         self._attack_strategy_name = attack_strategy_name
         self._memory_labels = memory_labels or {}
+        self._score_blocked_content = score_blocked_content
 
         # Initialize utilities
         self._memory = CentralMemory.get_memory_instance()
@@ -660,6 +668,7 @@ async def _score_response_async(self, *, response: Message, objective: str) -> N
                 role_filter="assistant",
                 objective=objective,
                 skip_on_error_result=False,
+                score_blocked_content=self._score_blocked_content,
             )
 
         # Extract objective score
@@ -783,6 +792,7 @@ def duplicate(self) -> "_TreeOfAttacksNode":
             desired_response_prefix=self._desired_response_prefix,
             parent_id=self.node_id,
             prompt_normalizer=self._prompt_normalizer,
+            score_blocked_content=self._score_blocked_content,
         )
 
         # Duplicate the conversations to preserve history
@@ -1382,11 +1392,13 @@ def __init__(
                 refusal_scorer=attack_scoring_config.refusal_scorer,
                 auxiliary_scorers=attack_scoring_config.auxiliary_scorers or None,
                 use_score_as_feedback=attack_scoring_config.use_score_as_feedback,
+                score_blocked_content=attack_scoring_config.score_blocked_content,
             )
 
         self._attack_scoring_config = tap_scoring_config
         self._auxiliary_scorers = tap_scoring_config.auxiliary_scorers
         self._objective_scorer = tap_scoring_config.objective_scorer
+        self._score_blocked_content = tap_scoring_config.score_blocked_content
 
         # Use the adversarial chat target for scoring, as in CrescendoAttack
         self._scoring_target = self._adversarial_chat
@@ -1890,6 +1902,7 @@ def _create_attack_node(
             parent_id=parent_id,
             prompt_normalizer=self._prompt_normalizer,
             initial_prompt=initial_prompt,
+            score_blocked_content=self._score_blocked_content,
         )
 
         # Add the adversarial chat conversation ID to the context's tracking (ensuring uniqueness)
diff --git a/pyrit/executor/attack/single_turn/prompt_sending.py b/pyrit/executor/attack/single_turn/prompt_sending.py
index 650d86bd04..451d3b96b3 100644
--- a/pyrit/executor/attack/single_turn/prompt_sending.py
+++ b/pyrit/executor/attack/single_turn/prompt_sending.py
@@ -102,6 +102,7 @@ def __init__(
 
         self._auxiliary_scorers = attack_scoring_config.auxiliary_scorers
         self._objective_scorer = attack_scoring_config.objective_scorer
+        self._score_blocked_content = attack_scoring_config.score_blocked_content
 
         # Skip criteria could be set directly in the injected prompt normalizer
         self._prompt_normalizer = prompt_normalizer or PromptNormalizer()
@@ -363,6 +364,7 @@ async def _evaluate_response_async(
                 role_filter="assistant",
                 objective=objective,
                 skip_on_error_result=True,
+                score_blocked_content=self._score_blocked_content,
             )
 
         if not self._objective_scorer:
diff --git a/pyrit/prompt_target/openai/openai_chat_target.py b/pyrit/prompt_target/openai/openai_chat_target.py
index 6dfb5f391f..eb9061e586 100644
--- a/pyrit/prompt_target/openai/openai_chat_target.py
+++ b/pyrit/prompt_target/openai/openai_chat_target.py
@@ -288,6 +288,26 @@ def _check_content_filter(self, response: Any) -> bool:
             pass
         return False
 
+    def _extract_partial_content(self, response: Any) -> Optional[str]:
+        """
+        Extract partial content from a Chat Completions response with finish_reason=content_filter.
+
+        When Azure Content Safety triggers mid-generation, the model may have produced partial
+        text in ``response.choices[0].message.content`` before being cut off.
+
+        Args:
+            response: A ChatCompletion object from the OpenAI SDK.
+
+        Returns:
+            The partial text content, or None if no content was generated.
+        """
+        try:
+            if response.choices and response.choices[0].message and response.choices[0].message.content:
+                return response.choices[0].message.content
+        except (AttributeError, IndexError):
+            pass
+        return None
+
     def _validate_response(self, response: Any, request: MessagePiece) -> Optional[Message]:
         """
         Validate a Chat Completions API response for errors.
diff --git a/pyrit/prompt_target/openai/openai_response_target.py b/pyrit/prompt_target/openai/openai_response_target.py
index dbe71e5406..b4f51dc9b3 100644
--- a/pyrit/prompt_target/openai/openai_response_target.py
+++ b/pyrit/prompt_target/openai/openai_response_target.py
@@ -454,6 +454,34 @@ def _check_content_filter(self, response: Any) -> bool:
             return _is_content_filter_error(response_dict)
         return False
 
+    def _extract_partial_content(self, response: Any) -> Optional[str]:
+        """
+        Extract partial content from a Response API response that was content-filtered.
+
+        The Response API may include partial text in ``response.output`` message sections
+        even when the response has a content filter error.
+
+        Args:
+            response: A Response object from the OpenAI SDK.
+
+        Returns:
+            The partial text content, or None if no content was generated.
+        """
+        try:
+            if not hasattr(response, "output") or not response.output:
+                return None
+            parts: list[str] = []
+            for section in response.output:
+                if getattr(section, "type", None) == MessagePieceType.MESSAGE:
+                    content = getattr(section, "content", None)
+                    if content and len(content) > 0:
+                        text = getattr(content[0], "text", None)
+                        if text:
+                            parts.append(text)
+            return "\n".join(parts) if parts else None
+        except (AttributeError, IndexError, TypeError):
+            return None
+
     def _validate_response(self, response: Any, request: MessagePiece) -> Optional[Message]:
         """
         Validate a Response API response for errors.
diff --git a/pyrit/prompt_target/openai/openai_target.py b/pyrit/prompt_target/openai/openai_target.py
index 8058a2b7fd..885f650894 100644
--- a/pyrit/prompt_target/openai/openai_target.py
+++ b/pyrit/prompt_target/openai/openai_target.py
@@ -559,6 +559,10 @@ def _handle_content_filter_response(self, response: Any, request: MessagePiece)
         """
         Handle content filter errors by creating a proper error Message.
 
+        If the subclass provides partial content via ``_extract_partial_content``,
+        it is attached to each response piece as ``prompt_metadata["partial_content"]``
+        so that scorers with ``score_blocked_content=True`` can evaluate it.
+
         Args:
             response: The response object from OpenAI SDK.
             request: The original request message piece.
@@ -567,13 +571,37 @@ def _handle_content_filter_response(self, response: Any, request: MessagePiece)
             Message object with error type indicating content was filtered.
         """
         logger.warning("Output content filtered by content policy.")
-        return handle_bad_request_exception(
+
+        partial_content = self._extract_partial_content(response)
+
+        error_message = handle_bad_request_exception(
             response_text=response.model_dump_json(),
             request=request,
             error_code=200,
             is_content_filter=True,
         )
 
+        if partial_content:
+            for piece in error_message.message_pieces:
+                piece.prompt_metadata["partial_content"] = partial_content
+
+        return error_message
+
+    def _extract_partial_content(self, response: Any) -> Optional[str]:
+        """
+        Extract any partial content the model generated before the content filter triggered.
+
+        Override this in subclasses to extract partial content from API-specific response
+        structures. The base implementation returns None (no partial content).
+
+        Args:
+            response: The response object from OpenAI SDK.
+
+        Returns:
+            The partial text content, or None if no content was generated.
+        """
+        return None
+
     def _validate_response(self, response: Any, request: MessagePiece) -> Optional[Message]:
         """
         Validate the response and return error Message if needed.
diff --git a/pyrit/score/conversation_scorer.py b/pyrit/score/conversation_scorer.py
index c3bcbf4f87..7908d27404 100644
--- a/pyrit/score/conversation_scorer.py
+++ b/pyrit/score/conversation_scorer.py
@@ -33,7 +33,9 @@ class ConversationScorer(Scorer, ABC):
         enforce_all_pieces_valid=True,
     )
 
-    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
+    async def _score_async(
+        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
+    ) -> list[Score]:
         """
         Scores the entire conversation history by concatenating all messages and passing to the wrapped scorer.
 
@@ -41,6 +43,8 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
             message (Message): A message from the conversation to be scored.
                 The conversation ID from the first message piece is used to retrieve the full conversation from memory.
             objective (Optional[str]): Optional objective to evaluate against.
+            score_blocked_content (bool): If True, blocked pieces with partial content will be
+                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: List of Score objects from the underlying scorer
diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index 1a011823fd..4288926c39 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -163,6 +163,7 @@ async def score_async(
         role_filter: Optional[ChatMessageRole] = None,
         skip_on_error_result: bool = False,
         infer_objective_from_request: bool = False,
+        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Score the message, add the results to the database, and return a list of Score objects.
@@ -177,6 +178,9 @@ async def score_async(
             skip_on_error_result (bool): If True, skip scoring if the message contains an error. Defaults to False.
             infer_objective_from_request (bool): If True, infer the objective from the message's previous request
                 when objective is not provided. Defaults to False.
+            score_blocked_content (bool): If True, blocked responses that contain partial content
+                (in prompt_metadata["partial_content"]) will be scored using that content instead
+                of being filtered out or short-circuited. Defaults to False.
 
         Returns:
             list[Score]: A list of Score objects representing the results.
@@ -192,8 +196,12 @@ async def score_async(
             return []
 
         if skip_on_error_result and message.is_error():
-            logger.debug("Skipping scoring due to error in message and skip_on_error=True.")
-            return []
+            # When score_blocked_content is enabled and the message has partial content,
+            # don't skip — let _score_async handle the substitution.
+            has_partial = any("partial_content" in p.prompt_metadata for p in message.message_pieces if p.is_blocked())
+            if not (score_blocked_content and has_partial):
+                logger.debug("Skipping scoring due to error in message and skip_on_error=True.")
+                return []
 
         if infer_objective_from_request and (not objective):
             objective = self._extract_objective_from_response(message)
@@ -202,6 +210,7 @@ async def score_async(
             scores = await self._score_async(
                 message,
                 objective=objective,
+                score_blocked_content=score_blocked_content,
             )
         except PyritException as e:
             # Re-raise PyRIT exceptions with enhanced context while preserving type for retry decorators
@@ -217,7 +226,9 @@ async def score_async(
 
         return scores
 
-    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
+    async def _score_async(
+        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
+    ) -> list[Score]:
         """
         Score the given request response asynchronously.
 
@@ -225,9 +236,16 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
         and returns a flattened list of scores. Subclasses can override this method
         to implement custom scoring logic (e.g., aggregating scores).
 
+        When score_blocked_content is True, blocked pieces with partial content in
+        prompt_metadata["partial_content"] are substituted with text-type copies
+        (with response_error="none") so they pass the validator and are scored
+        by the LLM without triggering blocked short-circuits.
+
         Args:
             message (Message): The message to score.
             objective (Optional[str]): The objective to evaluate against. Defaults to None.
+            score_blocked_content (bool): If True, substitute blocked pieces that have
+                partial content with text-type copies. Defaults to False.
 
         Returns:
             list[Score]: A list of Score objects.
@@ -238,6 +256,20 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
         # Score only the supported pieces
         supported_pieces = self._get_supported_pieces(message)
 
+        # When score_blocked_content is enabled, substitute blocked pieces that have partial content.
+        # Substitutes replace the original blocked piece (if present) or are added if not.
+        if score_blocked_content:
+            already_supported_ids = {p.id for p in supported_pieces}
+            for piece in message.message_pieces:
+                if piece.is_blocked() and "partial_content" in piece.prompt_metadata:
+                    substitute = self._create_text_piece_from_blocked(piece)
+                    if substitute and self._validator.is_message_piece_supported(message_piece=substitute):
+                        # Replace original blocked piece if it was already in supported_pieces
+                        if piece.id in already_supported_ids:
+                            supported_pieces = [substitute if p.id == piece.id else p for p in supported_pieces]
+                        else:
+                            supported_pieces.append(substitute)
+
         tasks = [self._score_piece_async(message_piece=piece, objective=objective) for piece in supported_pieces]
 
         if not tasks:
@@ -253,6 +285,44 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
     async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
         raise NotImplementedError
 
+    @staticmethod
+    def _create_text_piece_from_blocked(piece: MessagePiece) -> Optional[MessagePiece]:
+        """
+        Create a text-typed copy of a blocked MessagePiece using its partial content.
+
+        The substitute preserves the original piece's id (so scores link back correctly),
+        sets converted_value to the partial content with converted_value_data_type="text",
+        and sets response_error="none" so scorer short-circuits (e.g., refusal scorer's
+        blocked check) do not fire.
+
+        Args:
+            piece: A blocked MessagePiece with prompt_metadata["partial_content"].
+
+        Returns:
+            MessagePiece with text content, or None if partial content is empty.
+        """
+        partial_content = str(piece.prompt_metadata.get("partial_content", ""))
+        if not partial_content:
+            return None
+
+        return MessagePiece(
+            id=piece.id,
+            role=piece.api_role,
+            original_value=piece.original_value,
+            converted_value=partial_content,
+            original_value_data_type=piece.original_value_data_type,
+            converted_value_data_type="text",
+            conversation_id=piece.conversation_id,
+            sequence=piece.sequence,
+            labels=piece.labels,
+            prompt_metadata=piece.prompt_metadata,
+            converter_identifiers=list(piece.converter_identifiers),  # type: ignore[arg-type]
+            prompt_target_identifier=piece.prompt_target_identifier,
+            attack_identifier=piece.attack_identifier,
+            response_error="none",
+            timestamp=piece.timestamp,
+        )
+
     def _get_supported_pieces(self, message: Message) -> list[MessagePiece]:
         """
         Get a list of supported message pieces for this scorer.
@@ -713,6 +783,7 @@ async def score_response_async(
         role_filter: ChatMessageRole = "assistant",
         objective: Optional[str] = None,
         skip_on_error_result: bool = True,
+        score_blocked_content: bool = False,
     ) -> dict[str, list[Score]]:
         """
         Score a response using an objective scorer and optional auxiliary scorers.
@@ -725,6 +796,8 @@ async def score_response_async(
                 Defaults to "assistant" (real responses only, not simulated).
             objective (Optional[str]): Task/objective for scoring context. Defaults to None.
             skip_on_error_result (bool): If True, skip scoring pieces that have errors. Defaults to True.
+            score_blocked_content (bool): If True, blocked responses with partial content will be
+                scored using that content. Defaults to False.
 
         Returns:
             Dict[str, List[Score]]: Dictionary with keys `auxiliary_scores` and `objective_scores`
@@ -747,6 +820,7 @@ async def score_response_async(
                     role_filter=role_filter,
                     objective=objective,
                     skip_on_error_result=skip_on_error_result,
+                    score_blocked_content=score_blocked_content,
                 )
                 result["auxiliary_scores"] = aux_scores
             # objective_scores remains empty
@@ -760,12 +834,14 @@ async def score_response_async(
                 role_filter=role_filter,
                 objective=objective,
                 skip_on_error_result=skip_on_error_result,
+                score_blocked_content=score_blocked_content,
             )
             obj_task = objective_scorer.score_async(
                 message=response,
                 objective=objective,
                 skip_on_error_result=skip_on_error_result,
                 role_filter=role_filter,
+                score_blocked_content=score_blocked_content,
             )
             aux_scores, obj_scores = await asyncio.gather(aux_task, obj_task)
             result["auxiliary_scores"] = aux_scores
@@ -776,6 +852,7 @@ async def score_response_async(
                 objective=objective,
                 skip_on_error_result=skip_on_error_result,
                 role_filter=role_filter,
+                score_blocked_content=score_blocked_content,
             )
             result["objective_scores"] = obj_scores
         return result
@@ -788,6 +865,7 @@ async def score_response_multiple_scorers_async(
         role_filter: ChatMessageRole = "assistant",
         objective: Optional[str] = None,
         skip_on_error_result: bool = True,
+        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Score a response using multiple scorers in parallel.
@@ -802,6 +880,8 @@ async def score_response_multiple_scorers_async(
                 Defaults to "assistant" (real responses only, not simulated).
             objective (Optional[str]): Optional objective description for scoring context.
             skip_on_error_result (bool): If True, skip scoring pieces that have errors (default: True).
+            score_blocked_content (bool): If True, blocked responses with partial content will be
+                scored using that content. Defaults to False.
 
         Returns:
             List[Score]: All scores from all scorers
@@ -816,6 +896,7 @@ async def score_response_multiple_scorers_async(
                 objective=objective,
                 role_filter=role_filter,
                 skip_on_error_result=skip_on_error_result,
+                score_blocked_content=score_blocked_content,
             )
             for scorer in scorers
         ]
diff --git a/pyrit/score/true_false/float_scale_threshold_scorer.py b/pyrit/score/true_false/float_scale_threshold_scorer.py
index 5d35d52d0d..0271c9ad3d 100644
--- a/pyrit/score/true_false/float_scale_threshold_scorer.py
+++ b/pyrit/score/true_false/float_scale_threshold_scorer.py
@@ -79,6 +79,7 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
+        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Scores the piece using the underlying float-scale scorer and thresholds the resulting score.
@@ -88,6 +89,8 @@ async def _score_async(
             objective (Optional[str]): The objective to evaluate against (the original attacker model's objective).
                 Defaults to None.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
+            score_blocked_content (bool): If True, blocked pieces with partial content will be
+                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single true/false Score object based on the threshold comparison.
@@ -96,6 +99,7 @@ async def _score_async(
             message,
             objective=objective,
             role_filter=role_filter,
+            score_blocked_content=score_blocked_content,
         )
 
         # Aggregator handles 0-many scores and returns exactly one result (or raises if configured)
diff --git a/pyrit/score/true_false/true_false_scorer.py b/pyrit/score/true_false/true_false_scorer.py
index 6b6e79815e..3017895660 100644
--- a/pyrit/score/true_false/true_false_scorer.py
+++ b/pyrit/score/true_false/true_false_scorer.py
@@ -104,7 +104,9 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]:
 
         return find_objective_metrics_by_eval_hash(eval_hash=eval_hash, file_path=result_file)
 
-    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
+    async def _score_async(
+        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
+    ) -> list[Score]:
         """
         Score the given request response asynchronously.
 
@@ -113,6 +115,8 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
         Args:
             message (Message): The message to score.
             objective (Optional[str]): The objective to evaluate against. Defaults to None.
+            score_blocked_content (bool): If True, blocked pieces with partial content will be
+                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single true/false Score object.
@@ -121,7 +125,9 @@ async def _score_async(self, message: Message, *, objective: Optional[str] = Non
             ValueError: If no pieces are scored and cannot determine a piece ID for the return score.
         """
         # Get individual scores for all supported pieces using base implementation logic
-        score_list = await super()._score_async(message, objective=objective)
+        score_list = await super()._score_async(
+            message, objective=objective, score_blocked_content=score_blocked_content
+        )
 
         if not score_list:
             # If no pieces matched (e.g., due to role filter or if all pieces filtered), return False
diff --git a/tests/unit/executor/attack/core/test_attack_config.py b/tests/unit/executor/attack/core/test_attack_config.py
index bc3a822f67..8355b16cf6 100644
--- a/tests/unit/executor/attack/core/test_attack_config.py
+++ b/tests/unit/executor/attack/core/test_attack_config.py
@@ -76,3 +76,20 @@ def test_init_with_use_score_as_feedback_false(self):
         config = AttackScoringConfig(use_score_as_feedback=False)
 
         assert config.use_score_as_feedback is False
+
+    def test_score_blocked_content_default_is_false(self):
+        """Test that score_blocked_content defaults to False."""
+        config = AttackScoringConfig()
+        assert config.score_blocked_content is False
+
+    def test_score_blocked_content_can_set_to_true(self):
+        """Test that score_blocked_content can be set to True."""
+        config = AttackScoringConfig(score_blocked_content=True)
+        assert config.score_blocked_content is True
+
+    def test_score_blocked_content_with_valid_scorers(self):
+        """Test that score_blocked_content works with valid scorers."""
+        mock_scorer = MagicMock(spec=TrueFalseScorer)
+        config = AttackScoringConfig(objective_scorer=mock_scorer, score_blocked_content=True)
+        assert config.score_blocked_content is True
+        assert config.objective_scorer is mock_scorer
diff --git a/tests/unit/prompt_target/target/test_openai_chat_target.py b/tests/unit/prompt_target/target/test_openai_chat_target.py
index 59395a270f..a1796f5304 100644
--- a/tests/unit/prompt_target/target/test_openai_chat_target.py
+++ b/tests/unit/prompt_target/target/test_openai_chat_target.py
@@ -1596,6 +1596,62 @@ async def test_save_audio_response_async_pcm16_format(patch_central_database):
         assert result == "/path/to/saved/audio.wav"
 
 
+# ── _extract_partial_content tests ──────────────────────────────────────────
+
+
+class TestExtractPartialContentChatTarget:
+    def test_extracts_partial_content_from_content_filter_response(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(
+            content="Partial harmful content before cutoff", finish_reason="content_filter"
+        )
+        result = target._extract_partial_content(mock_response)
+        assert result == "Partial harmful content before cutoff"
+
+    def test_returns_none_when_no_content(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(content=None, finish_reason="content_filter")
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+    def test_returns_none_when_empty_content(self, target: OpenAIChatTarget):
+        mock_response = create_mock_completion(content="", finish_reason="content_filter")
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+    def test_returns_none_when_no_choices(self, target: OpenAIChatTarget):
+        mock_response = MagicMock(spec=ChatCompletion)
+        mock_response.choices = []
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+
+class TestContentFilterPreservesPartialContent:
+    async def test_200_content_filter_attaches_partial_content_metadata(self, target: OpenAIChatTarget):
+        """Integration: 200 + content_filter response preserves partial content in metadata."""
+        message = Message(
+            message_pieces=[MessagePiece(role="user", conversation_id="test-convo", original_value="test prompt")]
+        )
+        mock_completion = create_mock_completion(content="Harmful partial content here", finish_reason="content_filter")
+        target._async_client.chat.completions.create = AsyncMock(return_value=mock_completion)  # type: ignore[method-assign]
+
+        response = await target.send_prompt_async(message=message)
+
+        assert response[0].message_pieces[0].response_error == "blocked"
+        assert response[0].message_pieces[0].prompt_metadata["partial_content"] == "Harmful partial content here"
+
+    async def test_200_content_filter_no_metadata_when_no_content(self, target: OpenAIChatTarget):
+        """200 + content_filter with no content doesn't attach metadata."""
+        message = Message(
+            message_pieces=[MessagePiece(role="user", conversation_id="test-convo", original_value="test prompt")]
+        )
+        mock_completion = create_mock_completion(content=None, finish_reason="content_filter")
+        target._async_client.chat.completions.create = AsyncMock(return_value=mock_completion)  # type: ignore[method-assign]
+
+        response = await target.send_prompt_async(message=message)
+
+        assert response[0].message_pieces[0].response_error == "blocked"
+        assert "partial_content" not in response[0].message_pieces[0].prompt_metadata
+
+
 async def test_save_audio_response_async_flac_format(patch_central_database):
     """Test saving audio response with flac format."""
     audio_config = OpenAIChatAudioConfig(voice="alloy", audio_format="flac")
diff --git a/tests/unit/prompt_target/target/test_openai_response_target.py b/tests/unit/prompt_target/target/test_openai_response_target.py
index 95f6e238f8..0dfb02cf18 100644
--- a/tests/unit/prompt_target/target/test_openai_response_target.py
+++ b/tests/unit/prompt_target/target/test_openai_response_target.py
@@ -1043,6 +1043,48 @@ def test_invalid_top_p_raises(patch_central_database):
 # Unit tests for override methods
 
 
+class TestExtractPartialContentResponseTarget:
+    def test_extracts_text_from_message_sections(self):
+        from pyrit.prompt_target.openai.openai_response_target import MessagePieceType
+
+        target = OpenAIResponseTarget(model_name="gpt-4", endpoint="https://test.com", api_key="test")
+
+        section = MagicMock()
+        section.type = MessagePieceType.MESSAGE
+        content_item = MagicMock()
+        content_item.text = "Partial response text"
+        section.content = [content_item]
+
+        mock_response = MagicMock()
+        mock_response.output = [section]
+
+        result = target._extract_partial_content(mock_response)
+        assert result == "Partial response text"
+
+    def test_returns_none_when_no_output(self):
+        target = OpenAIResponseTarget(model_name="gpt-4", endpoint="https://test.com", api_key="test")
+
+        mock_response = MagicMock()
+        mock_response.output = []
+
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+    def test_ignores_non_message_sections(self):
+        from pyrit.prompt_target.openai.openai_response_target import MessagePieceType
+
+        target = OpenAIResponseTarget(model_name="gpt-4", endpoint="https://test.com", api_key="test")
+
+        section = MagicMock()
+        section.type = MessagePieceType.REASONING
+
+        mock_response = MagicMock()
+        mock_response.output = [section]
+
+        result = target._extract_partial_content(mock_response)
+        assert result is None
+
+
 def test_check_content_filter_detects_filtered_response(target: OpenAIResponseTarget):
     """Test _check_content_filter detects content_filter error code."""
     mock_response = MagicMock()
diff --git a/tests/unit/score/test_scorer.py b/tests/unit/score/test_scorer.py
index a35fbe3cb1..8e5335bd52 100644
--- a/tests/unit/score/test_scorer.py
+++ b/tests/unit/score/test_scorer.py
@@ -587,10 +587,18 @@ async def test_score_response_async_parallel_execution():
     assert score1_1 in result["auxiliary_scores"]
     assert score2_1 in result["auxiliary_scores"]
     scorer1.score_async.assert_any_call(
-        message=response, objective="test task", role_filter="assistant", skip_on_error_result=True
+        message=response,
+        objective="test task",
+        role_filter="assistant",
+        skip_on_error_result=True,
+        score_blocked_content=False,
     )
     scorer2.score_async.assert_any_call(
-        message=response, objective="test task", role_filter="assistant", skip_on_error_result=True
+        message=response,
+        objective="test task",
+        role_filter="assistant",
+        skip_on_error_result=True,
+        score_blocked_content=False,
     )
 
 
@@ -1465,3 +1473,336 @@ async def test_score_value_with_llm_skips_reasoning_piece(good_json):
 
     assert result.raw_score_value == "1"
     assert result.score_rationale == "Valid response"
+
+
+# ── Helpers for score_blocked_content tests ──────────────────────────────────
+
+
+class _AcceptAllValidator(ScorerPromptValidator):
+    """Validator that accepts all pieces (like SelfAskRefusalScorer's default)."""
+
+    def validate(self, message: Message, objective: Optional[str] = None) -> None:
+        pass
+
+    def is_message_piece_supported(self, message_piece: MessagePiece) -> bool:
+        return True
+
+
+class _TextOnlyValidator(ScorerPromptValidator):
+    """Validator that only accepts text pieces (like SelfAskTrueFalseScorer's default)."""
+
+    def __init__(self) -> None:
+        super().__init__(supported_data_types=["text", "image_path"])
+
+    def validate(self, message: Message, objective: Optional[str] = None) -> None:
+        pass
+
+
+class _BlockedContentScorer(TrueFalseScorer):
+    """A mock TrueFalseScorer that records what pieces it was asked to score."""
+
+    def __init__(self, *, validator: Optional[ScorerPromptValidator] = None) -> None:
+        super().__init__(validator=validator or _TextOnlyValidator())
+        self.scored_pieces: list[MessagePiece] = []
+
+    def _build_identifier(self) -> ComponentIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        self.scored_pieces.append(message_piece)
+        return [
+            Score(
+                score_value="true",
+                score_value_description="desc",
+                score_type="true_false",
+                score_category=None,
+                score_metadata=None,
+                score_rationale="rationale",
+                scorer_class_identifier=self.get_identifier(),
+                message_piece_id=str(message_piece.id),
+                objective=objective,
+            )
+        ]
+
+
+class _MockRefusalScorer(TrueFalseScorer):
+    """Mimics SelfAskRefusalScorer: accepts all types, short-circuits on blocked."""
+
+    def __init__(self) -> None:
+        super().__init__(validator=_AcceptAllValidator())
+        self.scored_pieces: list[MessagePiece] = []
+
+    def _build_identifier(self) -> ComponentIdentifier:
+        return self._create_identifier()
+
+    async def _score_piece_async(self, message_piece: MessagePiece, *, objective: Optional[str] = None) -> list[Score]:
+        self.scored_pieces.append(message_piece)
+        if message_piece.response_error == "blocked":
+            return [
+                Score(
+                    score_value="true",
+                    score_value_description="Refusal detected",
+                    score_type="true_false",
+                    score_category=None,
+                    score_metadata=None,
+                    score_rationale="Content was filtered, constituting a refusal.",
+                    scorer_class_identifier=self.get_identifier(),
+                    message_piece_id=str(message_piece.id),
+                    objective=objective,
+                )
+            ]
+        return [
+            Score(
+                score_value="false",
+                score_value_description="Not a refusal",
+                score_type="true_false",
+                score_category=None,
+                score_metadata=None,
+                score_rationale="The response contains substantive content.",
+                scorer_class_identifier=self.get_identifier(),
+                message_piece_id=str(message_piece.id),
+                objective=objective,
+            )
+        ]
+
+
+def _make_blocked_piece(*, partial_content: Optional[str] = None, conversation_id: str = "test-convo") -> MessagePiece:
+    """Create a blocked MessagePiece, optionally with partial content metadata."""
+    metadata: dict = {}
+    if partial_content is not None:
+        metadata["partial_content"] = partial_content
+    return MessagePiece(
+        role="assistant",
+        original_value='{"status_code": 200, "message": "content_filter"}',
+        converted_value='{"status_code": 200, "message": "content_filter"}',
+        original_value_data_type="error",
+        converted_value_data_type="error",
+        conversation_id=conversation_id,
+        response_error="blocked",
+        prompt_metadata=metadata,
+    )
+
+
+def _make_normal_piece(*, conversation_id: str = "test-convo") -> MessagePiece:
+    """Create a normal text MessagePiece."""
+    return MessagePiece(
+        role="assistant",
+        original_value="Hello, how can I help?",
+        conversation_id=conversation_id,
+    )
+
+
+# ── _create_text_piece_from_blocked tests ────────────────────────────────────
+
+
+class TestCreateTextPieceFromBlocked:
+    def test_returns_text_piece_with_partial_content(self):
+        piece = _make_blocked_piece(partial_content="Harmful partial text here")
+        substitute = Scorer._create_text_piece_from_blocked(piece)
+
+        assert substitute is not None
+        assert substitute.converted_value == "Harmful partial text here"
+        assert substitute.converted_value_data_type == "text"
+        assert substitute.response_error == "none"
+        assert substitute.id == piece.id
+
+    def test_preserves_original_value(self):
+        piece = _make_blocked_piece(partial_content="partial")
+        substitute = Scorer._create_text_piece_from_blocked(piece)
+
+        assert substitute is not None
+        assert substitute.original_value == piece.original_value
+        assert substitute.original_value_data_type == piece.original_value_data_type
+
+    def test_returns_none_when_no_partial_content(self):
+        piece = _make_blocked_piece()
+        assert Scorer._create_text_piece_from_blocked(piece) is None
+
+    def test_returns_none_when_empty_partial_content(self):
+        piece = _make_blocked_piece(partial_content="")
+        assert Scorer._create_text_piece_from_blocked(piece) is None
+
+    def test_preserves_conversation_id(self):
+        piece = _make_blocked_piece(partial_content="partial")
+        substitute = Scorer._create_text_piece_from_blocked(piece)
+        assert substitute is not None
+        assert substitute.conversation_id == piece.conversation_id
+
+    def test_response_error_is_none_not_blocked(self):
+        """Substitute must have response_error='none' so refusal short-circuits don't fire."""
+        piece = _make_blocked_piece(partial_content="partial text")
+        substitute = Scorer._create_text_piece_from_blocked(piece)
+        assert substitute is not None
+        assert substitute.response_error == "none"
+        assert not substitute.is_blocked()
+        assert not substitute.has_error()
+
+
+# ── score_async with score_blocked_content tests ─────────────────────────────
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestScoreAsyncWithBlockedContent:
+    async def test_default_false_skips_blocked_piece_text_only_scorer(self):
+        """Default behavior: text-only scorer filters out blocked error-type pieces."""
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, score_blocked_content=False)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+        assert len(scorer.scored_pieces) == 0
+
+    async def test_true_substitutes_blocked_piece_for_text_only_scorer(self):
+        """With flag on, text-only scorer gets a text substitute and scores it."""
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, score_blocked_content=True)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+        assert len(scorer.scored_pieces) == 1
+        assert scorer.scored_pieces[0].converted_value == "harmful text"
+        assert scorer.scored_pieces[0].converted_value_data_type == "text"
+
+    async def test_refusal_scorer_short_circuits_on_blocked_by_default(self):
+        """Refusal scorer (accepts all types) sees original blocked piece, returns True."""
+        scorer = _MockRefusalScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, score_blocked_content=False)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+        assert scorer.scored_pieces[0].response_error == "blocked"
+
+    async def test_refusal_scorer_evaluates_partial_content_when_flag_on(self):
+        """With flag on, refusal scorer gets substitute (response_error=none), evaluates via LLM path."""
+        scorer = _MockRefusalScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, score_blocked_content=True)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+        assert scorer.scored_pieces[0].response_error == "none"
+        assert scorer.scored_pieces[0].converted_value == "harmful text"
+
+    async def test_no_substitute_when_no_partial_content(self):
+        """400 full block with no partial content: no substitute, same behavior."""
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece()])
+
+        scores = await scorer.score_async(msg, score_blocked_content=True)
+
+        assert len(scores) == 1
+        assert scores[0].score_value == "false"
+        assert len(scorer.scored_pieces) == 0
+
+    async def test_normal_piece_unaffected_by_flag(self):
+        """Normal text pieces are scored the same regardless of flag."""
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_normal_piece()])
+
+        scores_off = await scorer.score_async(msg, score_blocked_content=False)
+        scorer.scored_pieces.clear()
+        scores_on = await scorer.score_async(msg, score_blocked_content=True)
+
+        assert scores_off[0].score_value == scores_on[0].score_value
+
+    async def test_mixed_pieces_only_blocked_substituted(self):
+        """In a multi-piece message, only blocked pieces get substituted."""
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_normal_piece(), _make_blocked_piece(partial_content="partial harmful")])
+
+        scores = await scorer.score_async(msg, score_blocked_content=True)
+
+        assert len(scores) == 1  # TrueFalseScorer aggregates
+        assert len(scorer.scored_pieces) == 2
+        assert scorer.scored_pieces[0].converted_value == "Hello, how can I help?"
+        assert scorer.scored_pieces[1].converted_value == "partial harmful"
+        assert scorer.scored_pieces[1].response_error == "none"
+
+
+# ── skip_on_error_result interaction tests ───────────────────────────────────
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestSkipOnErrorWithBlockedContent:
+    async def test_skip_on_error_true_without_flag_skips_blocked(self):
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, skip_on_error_result=True, score_blocked_content=False)
+        assert scores == []
+
+    async def test_skip_on_error_true_with_flag_does_not_skip_when_partial_content(self):
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await scorer.score_async(msg, skip_on_error_result=True, score_blocked_content=True)
+        assert len(scores) == 1
+        assert scores[0].score_value == "true"
+
+    async def test_skip_on_error_true_with_flag_still_skips_when_no_partial_content(self):
+        scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece()])
+
+        scores = await scorer.score_async(msg, skip_on_error_result=True, score_blocked_content=True)
+        assert scores == []
+
+
+# ── score_response_async passthrough tests ───────────────────────────────────
+
+
+@pytest.mark.usefixtures("patch_central_database")
+class TestScoreResponseAsyncBlockedContent:
+    async def test_score_response_async_passes_flag_to_scorers(self):
+        obj_scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        result = await Scorer.score_response_async(
+            response=msg,
+            objective_scorer=obj_scorer,
+            objective="test",
+            skip_on_error_result=False,
+            score_blocked_content=True,
+        )
+
+        assert len(result["objective_scores"]) == 1
+        assert result["objective_scores"][0].score_value == "true"
+        assert obj_scorer.scored_pieces[0].converted_value == "harmful text"
+
+    async def test_score_response_async_default_does_not_substitute(self):
+        obj_scorer = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        result = await Scorer.score_response_async(
+            response=msg,
+            objective_scorer=obj_scorer,
+            objective="test",
+            skip_on_error_result=False,
+            score_blocked_content=False,
+        )
+
+        assert result["objective_scores"][0].score_value == "false"
+        assert len(obj_scorer.scored_pieces) == 0
+
+    async def test_score_response_multiple_scorers_passes_flag(self):
+        scorer1 = _BlockedContentScorer()
+        scorer2 = _BlockedContentScorer()
+        msg = Message(message_pieces=[_make_blocked_piece(partial_content="harmful text")])
+
+        scores = await Scorer.score_response_multiple_scorers_async(
+            response=msg,
+            scorers=[scorer1, scorer2],
+            objective="test",
+            skip_on_error_result=False,
+            score_blocked_content=True,
+        )
+
+        assert len(scores) == 2
+        assert len(scorer1.scored_pieces) == 1
+        assert len(scorer2.scored_pieces) == 1

From e6fae9269bc384ab035da3ac4553b8721105f9b6 Mon Sep 17 00:00:00 2001
From: jsong468 <songjustin@microsoft.com>
Date: Mon, 4 May 2026 15:55:45 -0700
Subject: [PATCH 2/5] docstring

---
 pyrit/score/scorer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index 4288926c39..17026949bc 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -175,7 +175,9 @@ async def score_async(
             role_filter (Optional[ChatMessageRole]): Only score messages with this exact stored role.
                 Use "assistant" to score only real assistant responses, or "simulated_assistant"
                 to score only simulated responses. Defaults to None (no filtering).
-            skip_on_error_result (bool): If True, skip scoring if the message contains an error. Defaults to False.
+            skip_on_error_result (bool): If True, skip scoring if the message contains an error. If True
+                but score_blocked_content is also True, blocked content will be scored in the case of a
+                content filter triggered error instead of skipping. Defaults to False.
             infer_objective_from_request (bool): If True, infer the objective from the message's previous request
                 when objective is not provided. Defaults to False.
             score_blocked_content (bool): If True, blocked responses that contain partial content

From 9a4505acbfac0784e7fef22746d0600bf11a8f86 Mon Sep 17 00:00:00 2001
From: jsong468 <songjustin@microsoft.com>
Date: Mon, 4 May 2026 16:53:30 -0700
Subject: [PATCH 3/5] fix unit tests

---
 pyrit/score/true_false/true_false_composite_scorer.py  | 10 +++++++++-
 pyrit/score/true_false/true_false_inverter_scorer.py   |  4 ++++
 .../executor/attack/single_turn/test_prompt_sending.py |  3 +++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/pyrit/score/true_false/true_false_composite_scorer.py b/pyrit/score/true_false/true_false_composite_scorer.py
index d40d3874b3..d3d08238c1 100644
--- a/pyrit/score/true_false/true_false_composite_scorer.py
+++ b/pyrit/score/true_false/true_false_composite_scorer.py
@@ -75,6 +75,7 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
+        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Score a request/response by combining results from all constituent scorers.
@@ -83,6 +84,8 @@ async def _score_async(
             message (Message): The request/response to score.
             objective (Optional[str]): Scoring objective or context.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
+            score_blocked_content (bool): If True, blocked pieces with partial content will be
+                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A single-element list with the aggregated true/false score.
@@ -92,7 +95,12 @@ async def _score_async(
             ValueError: If no scores are generated from the request response pieces.
         """
         tasks = [
-            scorer.score_async(message=message, objective=objective, role_filter=role_filter)
+            scorer.score_async(
+                message=message,
+                objective=objective,
+                role_filter=role_filter,
+                score_blocked_content=score_blocked_content,
+            )
             for scorer in self._scorers
         ]
 
diff --git a/pyrit/score/true_false/true_false_inverter_scorer.py b/pyrit/score/true_false/true_false_inverter_scorer.py
index 5b3a1404cd..e3d42f4427 100644
--- a/pyrit/score/true_false/true_false_inverter_scorer.py
+++ b/pyrit/score/true_false/true_false_inverter_scorer.py
@@ -53,6 +53,7 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
+        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Scores the piece using the underlying true-false scorer and returns the inverted score.
@@ -62,6 +63,8 @@ async def _score_async(
             objective (Optional[str]): The objective to evaluate against (the original attacker model's objective).
                 Defaults to None.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
+            score_blocked_content (bool): If True, blocked pieces with partial content will be
+                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single Score object with the inverted true/false value.
@@ -70,6 +73,7 @@ async def _score_async(
             message,
             objective=objective,
             role_filter=role_filter,
+            score_blocked_content=score_blocked_content,
         )
 
         # TrueFalseScorers only have a single score
diff --git a/tests/unit/executor/attack/single_turn/test_prompt_sending.py b/tests/unit/executor/attack/single_turn/test_prompt_sending.py
index 132e98b1a9..00b1d245a0 100644
--- a/tests/unit/executor/attack/single_turn/test_prompt_sending.py
+++ b/tests/unit/executor/attack/single_turn/test_prompt_sending.py
@@ -458,6 +458,7 @@ async def test_evaluate_response_with_objective_scorer_returns_score(
                 role_filter="assistant",
                 objective="Test objective",
                 skip_on_error_result=True,
+                score_blocked_content=False,
             )
 
     async def test_evaluate_response_without_objective_scorer_returns_none(self, mock_target, sample_response):
@@ -480,6 +481,7 @@ async def test_evaluate_response_without_objective_scorer_returns_none(self, moc
                 role_filter="assistant",
                 objective="Test objective",
                 skip_on_error_result=True,
+                score_blocked_content=False,
             )
 
     async def test_evaluate_response_with_auxiliary_scorers(
@@ -522,6 +524,7 @@ async def test_evaluate_response_with_auxiliary_scorers(
                 role_filter="assistant",
                 objective="Test objective",
                 skip_on_error_result=True,
+                score_blocked_content=False,
             )
 
 

From fc6c7e75f1e4b61825583abd8731dc46dc6547e5 Mon Sep 17 00:00:00 2001
From: jsong468 <songjustin@microsoft.com>
Date: Tue, 5 May 2026 16:53:20 -0700
Subject: [PATCH 4/5] fix conversation_scorer bug and score_async

---
 pyrit/score/conversation_scorer.py            | 22 ++++--
 pyrit/score/scorer.py                         | 67 ++++++++++++-------
 .../float_scale_threshold_scorer.py           |  4 --
 .../true_false/true_false_composite_scorer.py | 10 +--
 .../true_false/true_false_inverter_scorer.py  |  4 --
 pyrit/score/true_false/true_false_scorer.py   | 10 +--
 6 files changed, 60 insertions(+), 57 deletions(-)

diff --git a/pyrit/score/conversation_scorer.py b/pyrit/score/conversation_scorer.py
index 7908d27404..d1dad443bc 100644
--- a/pyrit/score/conversation_scorer.py
+++ b/pyrit/score/conversation_scorer.py
@@ -33,9 +33,7 @@ class ConversationScorer(Scorer, ABC):
         enforce_all_pieces_valid=True,
     )
 
-    async def _score_async(
-        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
-    ) -> list[Score]:
+    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
         """
         Scores the entire conversation history by concatenating all messages and passing to the wrapped scorer.
 
@@ -43,8 +41,6 @@ async def _score_async(
             message (Message): A message from the conversation to be scored.
                 The conversation ID from the first message piece is used to retrieve the full conversation from memory.
             objective (Optional[str]): Optional objective to evaluate against.
-            score_blocked_content (bool): If True, blocked pieces with partial content will be
-                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: List of Score objects from the underlying scorer
@@ -67,6 +63,14 @@ async def _score_async(
         # Build the full conversation text
         conversation_text = ""
 
+        # Check if the caller requested scoring of blocked content by inspecting whether
+        # the incoming message was substituted by score_async._apply_blocked_content_substitution.
+        # A substituted piece has partial_content in metadata but response_error="none".
+        incoming_piece = message.message_pieces[0]
+        use_partial_content = (
+            "partial_content" in incoming_piece.prompt_metadata and incoming_piece.response_error == "none"
+        )
+
         # Goes through each message in the conversation and appends user/assistant messages only
         # Explicitly excludes system, tool, developer messages from being scored/included in conversation history
         # they are allowed in validation but not included in the scored conversation text
@@ -75,7 +79,13 @@ async def _score_async(
                 # Only include user and assistant messages in the conversation text
                 if piece.api_role in ["user", "assistant", "tool"]:
                     role_display = "Assistant (simulated)" if piece.is_simulated else piece.api_role.capitalize()
-                    conversation_text += f"{role_display}: {piece.converted_value}\n"
+                    # For blocked pieces with partial content, use the partial content
+                    # instead of the error JSON when score_blocked_content is enabled
+                    if use_partial_content and piece.is_blocked() and "partial_content" in piece.prompt_metadata:
+                        text = str(piece.prompt_metadata["partial_content"])
+                    else:
+                        text = piece.converted_value
+                    conversation_text += f"{role_display}: {text}\n"
 
         # Create a new message with the concatenated conversation text
         # Preserve the original message piece metadata
diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index 17026949bc..178556ceaf 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -208,11 +208,16 @@ async def score_async(
         if infer_objective_from_request and (not objective):
             objective = self._extract_objective_from_response(message)
 
+        # When score_blocked_content is enabled, create a modified message where blocked pieces
+        # with partial content are replaced with text-type substitutes (response_error="none").
+        # This is done here (not in _score_async) so that _score_async's signature remains
+        # (self, message, *, objective=None) — preserving backward compatibility for subclasses.
+        scoring_message = self._apply_blocked_content_substitution(message) if score_blocked_content else message
+
         try:
             scores = await self._score_async(
-                message,
+                scoring_message,
                 objective=objective,
-                score_blocked_content=score_blocked_content,
             )
         except PyritException as e:
             # Re-raise PyRIT exceptions with enhanced context while preserving type for retry decorators
@@ -228,9 +233,7 @@ async def score_async(
 
         return scores
 
-    async def _score_async(
-        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
-    ) -> list[Score]:
+    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
         """
         Score the given request response asynchronously.
 
@@ -238,16 +241,9 @@ async def _score_async(
         and returns a flattened list of scores. Subclasses can override this method
         to implement custom scoring logic (e.g., aggregating scores).
 
-        When score_blocked_content is True, blocked pieces with partial content in
-        prompt_metadata["partial_content"] are substituted with text-type copies
-        (with response_error="none") so they pass the validator and are scored
-        by the LLM without triggering blocked short-circuits.
-
         Args:
             message (Message): The message to score.
             objective (Optional[str]): The objective to evaluate against. Defaults to None.
-            score_blocked_content (bool): If True, substitute blocked pieces that have
-                partial content with text-type copies. Defaults to False.
 
         Returns:
             list[Score]: A list of Score objects.
@@ -258,20 +254,6 @@ async def _score_async(
         # Score only the supported pieces
         supported_pieces = self._get_supported_pieces(message)
 
-        # When score_blocked_content is enabled, substitute blocked pieces that have partial content.
-        # Substitutes replace the original blocked piece (if present) or are added if not.
-        if score_blocked_content:
-            already_supported_ids = {p.id for p in supported_pieces}
-            for piece in message.message_pieces:
-                if piece.is_blocked() and "partial_content" in piece.prompt_metadata:
-                    substitute = self._create_text_piece_from_blocked(piece)
-                    if substitute and self._validator.is_message_piece_supported(message_piece=substitute):
-                        # Replace original blocked piece if it was already in supported_pieces
-                        if piece.id in already_supported_ids:
-                            supported_pieces = [substitute if p.id == piece.id else p for p in supported_pieces]
-                        else:
-                            supported_pieces.append(substitute)
-
         tasks = [self._score_piece_async(message_piece=piece, objective=objective) for piece in supported_pieces]
 
         if not tasks:
@@ -325,6 +307,39 @@ def _create_text_piece_from_blocked(piece: MessagePiece) -> Optional[MessagePiec
             timestamp=piece.timestamp,
         )
 
+    def _apply_blocked_content_substitution(self, message: Message) -> Message:
+        """
+        Create a copy of the message where blocked pieces with partial content are substituted.
+
+        Each blocked piece that has prompt_metadata["partial_content"] is replaced with a
+        text-typed copy (response_error="none", converted_value=partial_content). Non-blocked
+        pieces and blocked pieces without partial content are kept as-is.
+
+        This is called in score_async (not _score_async) so that subclass overrides of
+        _score_async do not need to accept the score_blocked_content parameter.
+
+        Args:
+            message: The original message potentially containing blocked pieces.
+
+        Returns:
+            A new Message with substituted pieces, or the original if no substitution was needed.
+        """
+        substituted = False
+        new_pieces: list[MessagePiece] = []
+        for piece in message.message_pieces:
+            if piece.is_blocked() and "partial_content" in piece.prompt_metadata:
+                substitute = self._create_text_piece_from_blocked(piece)
+                if substitute:
+                    new_pieces.append(substitute)
+                    substituted = True
+                    continue
+            new_pieces.append(piece)
+
+        if not substituted:
+            return message
+
+        return Message(message_pieces=new_pieces)
+
     def _get_supported_pieces(self, message: Message) -> list[MessagePiece]:
         """
         Get a list of supported message pieces for this scorer.
diff --git a/pyrit/score/true_false/float_scale_threshold_scorer.py b/pyrit/score/true_false/float_scale_threshold_scorer.py
index 0271c9ad3d..5d35d52d0d 100644
--- a/pyrit/score/true_false/float_scale_threshold_scorer.py
+++ b/pyrit/score/true_false/float_scale_threshold_scorer.py
@@ -79,7 +79,6 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
-        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Scores the piece using the underlying float-scale scorer and thresholds the resulting score.
@@ -89,8 +88,6 @@ async def _score_async(
             objective (Optional[str]): The objective to evaluate against (the original attacker model's objective).
                 Defaults to None.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
-            score_blocked_content (bool): If True, blocked pieces with partial content will be
-                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single true/false Score object based on the threshold comparison.
@@ -99,7 +96,6 @@ async def _score_async(
             message,
             objective=objective,
             role_filter=role_filter,
-            score_blocked_content=score_blocked_content,
         )
 
         # Aggregator handles 0-many scores and returns exactly one result (or raises if configured)
diff --git a/pyrit/score/true_false/true_false_composite_scorer.py b/pyrit/score/true_false/true_false_composite_scorer.py
index d3d08238c1..d40d3874b3 100644
--- a/pyrit/score/true_false/true_false_composite_scorer.py
+++ b/pyrit/score/true_false/true_false_composite_scorer.py
@@ -75,7 +75,6 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
-        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Score a request/response by combining results from all constituent scorers.
@@ -84,8 +83,6 @@ async def _score_async(
             message (Message): The request/response to score.
             objective (Optional[str]): Scoring objective or context.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
-            score_blocked_content (bool): If True, blocked pieces with partial content will be
-                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A single-element list with the aggregated true/false score.
@@ -95,12 +92,7 @@ async def _score_async(
             ValueError: If no scores are generated from the request response pieces.
         """
         tasks = [
-            scorer.score_async(
-                message=message,
-                objective=objective,
-                role_filter=role_filter,
-                score_blocked_content=score_blocked_content,
-            )
+            scorer.score_async(message=message, objective=objective, role_filter=role_filter)
             for scorer in self._scorers
         ]
 
diff --git a/pyrit/score/true_false/true_false_inverter_scorer.py b/pyrit/score/true_false/true_false_inverter_scorer.py
index e3d42f4427..5b3a1404cd 100644
--- a/pyrit/score/true_false/true_false_inverter_scorer.py
+++ b/pyrit/score/true_false/true_false_inverter_scorer.py
@@ -53,7 +53,6 @@ async def _score_async(
         *,
         objective: Optional[str] = None,
         role_filter: Optional[ChatMessageRole] = None,
-        score_blocked_content: bool = False,
     ) -> list[Score]:
         """
         Scores the piece using the underlying true-false scorer and returns the inverted score.
@@ -63,8 +62,6 @@ async def _score_async(
             objective (Optional[str]): The objective to evaluate against (the original attacker model's objective).
                 Defaults to None.
             role_filter (Optional[ChatMessageRole]): Optional filter for message roles. Defaults to None.
-            score_blocked_content (bool): If True, blocked pieces with partial content will be
-                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single Score object with the inverted true/false value.
@@ -73,7 +70,6 @@ async def _score_async(
             message,
             objective=objective,
             role_filter=role_filter,
-            score_blocked_content=score_blocked_content,
         )
 
         # TrueFalseScorers only have a single score
diff --git a/pyrit/score/true_false/true_false_scorer.py b/pyrit/score/true_false/true_false_scorer.py
index 3017895660..6b6e79815e 100644
--- a/pyrit/score/true_false/true_false_scorer.py
+++ b/pyrit/score/true_false/true_false_scorer.py
@@ -104,9 +104,7 @@ def get_scorer_metrics(self) -> Optional["ObjectiveScorerMetrics"]:
 
         return find_objective_metrics_by_eval_hash(eval_hash=eval_hash, file_path=result_file)
 
-    async def _score_async(
-        self, message: Message, *, objective: Optional[str] = None, score_blocked_content: bool = False
-    ) -> list[Score]:
+    async def _score_async(self, message: Message, *, objective: Optional[str] = None) -> list[Score]:
         """
         Score the given request response asynchronously.
 
@@ -115,8 +113,6 @@ async def _score_async(
         Args:
             message (Message): The message to score.
             objective (Optional[str]): The objective to evaluate against. Defaults to None.
-            score_blocked_content (bool): If True, blocked pieces with partial content will be
-                substituted with text copies for scoring. Defaults to False.
 
         Returns:
             list[Score]: A list containing a single true/false Score object.
@@ -125,9 +121,7 @@ async def _score_async(
             ValueError: If no pieces are scored and cannot determine a piece ID for the return score.
         """
         # Get individual scores for all supported pieces using base implementation logic
-        score_list = await super()._score_async(
-            message, objective=objective, score_blocked_content=score_blocked_content
-        )
+        score_list = await super()._score_async(message, objective=objective)
 
         if not score_list:
             # If no pieces matched (e.g., due to role filter or if all pieces filtered), return False

From c49debc6edcdf2643a35e52c5c7fc528a3516dce Mon Sep 17 00:00:00 2001
From: jsong468 <songjustin@microsoft.com>
Date: Tue, 5 May 2026 17:03:58 -0700
Subject: [PATCH 5/5] minor truthiness change

---
 pyrit/score/scorer.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyrit/score/scorer.py b/pyrit/score/scorer.py
index 178556ceaf..5be8fae7d0 100644
--- a/pyrit/score/scorer.py
+++ b/pyrit/score/scorer.py
@@ -200,7 +200,9 @@ async def score_async(
         if skip_on_error_result and message.is_error():
             # When score_blocked_content is enabled and the message has partial content,
             # don't skip — let _score_async handle the substitution.
-            has_partial = any("partial_content" in p.prompt_metadata for p in message.message_pieces if p.is_blocked())
+            has_partial = any(
+                p.prompt_metadata.get("partial_content") for p in message.message_pieces if p.is_blocked()
+            )
             if not (score_blocked_content and has_partial):
                 logger.debug("Skipping scoring due to error in message and skip_on_error=True.")
                 return []