From 4c068fbbf0ab22b64a8dced16f52a495b7a002d7 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 11:30:27 -0500 Subject: [PATCH 1/9] fix: prevent pickle deserialization of untrusted HITL input Add strip_pickle_markers() to sanitize HTTP input before it reaches pickle.loads() via the checkpoint decoding path. Applied as a 3-layer defence-in-depth: 1. _app.py: sanitize req.get_json() at the HTTP boundary 2. _workflow.py: sanitize in _deserialize_hitl_response() before decode 3. _serialization.py: sanitize in reconstruct_to_type() as final guard Any dict containing __pickled__ or __type__ markers from untrusted sources is replaced with None, blocking arbitrary code execution via crafted payloads to POST /workflow/respond/{instanceId}/{requestId}. Includes 12 new unit tests covering the sanitizer and end-to-end attack prevention. --- .../agent_framework_azurefunctions/_app.py | 6 +- .../_serialization.py | 50 ++++++++++- .../_workflow.py | 9 +- .../azurefunctions/tests/test_func_utils.py | 84 ++++++++++++++++++- 4 files changed, 143 insertions(+), 6 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_app.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_app.py index 01dcc102f4..c108f7739d 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_app.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_app.py @@ -44,7 +44,7 @@ from ._entities import create_agent_entity from ._errors import IncomingRequestError from ._orchestration import AgentOrchestrationContextType, AgentTask, AzureFunctionsAgentExecutor -from ._serialization import deserialize_value, serialize_value +from ._serialization import deserialize_value, serialize_value, strip_pickle_markers from ._workflow import ( SOURCE_HITL_RESPONSE, SOURCE_ORCHESTRATOR, @@ -515,6 +515,10 @@ async def send_hitl_response(req: func.HttpRequest, client: df.DurableOrchestrat except ValueError: return self._build_error_response("Request body must be valid JSON.") + # Sanitize untrusted HTTP input before it reaches pickle.loads(). + # See strip_pickle_markers() docstring for details on the attack vector. + response_data = strip_pickle_markers(response_data) + # Send the response as an external event # The request_id is used as the event name for correlation await client.raise_event( diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index f48e55f5d5..666ee24606 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -24,7 +24,12 @@ from dataclasses import is_dataclass from typing import Any -from agent_framework._workflows._checkpoint_encoding import decode_checkpoint_value, encode_checkpoint_value +from agent_framework._workflows._checkpoint_encoding import ( + _PICKLE_MARKER, + _TYPE_MARKER, + decode_checkpoint_value, + encode_checkpoint_value, +) from pydantic import BaseModel logger = logging.getLogger(__name__) @@ -48,6 +53,42 @@ def resolve_type(type_key: str) -> type | None: return None +# ============================================================================ +# Pickle marker sanitization (security) +# ============================================================================ + + +def strip_pickle_markers(data: Any) -> Any: + """Recursively strip pickle/type markers from untrusted data. + + The core checkpoint encoding uses ``__pickled__`` and ``__type__`` markers to + roundtrip arbitrary Python objects via *pickle*. If an attacker crafts an + HTTP payload that contains these markers, the data would flow into + ``pickle.loads()`` and enable **arbitrary code execution**. + + This function walks the incoming data structure and replaces any ``dict`` + that contains either marker key with ``None``, neutralising the attack + vector while leaving all other data untouched. + + It **must** be called on every value that originates from an untrusted + source (e.g. ``req.get_json()``) *before* the value is passed to + ``deserialize_value`` / ``decode_checkpoint_value``. + """ + if isinstance(data, dict): + if _PICKLE_MARKER in data or _TYPE_MARKER in data: + logger.warning( + "Stripped pickle/type markers from untrusted input – " + "potential deserialization attack blocked." + ) + return None + return {k: strip_pickle_markers(v) for k, v in data.items()} + + if isinstance(data, list): + return [strip_pickle_markers(item) for item in data] + + return data + + # ============================================================================ # Serialize / Deserialize # ============================================================================ @@ -117,6 +158,13 @@ def reconstruct_to_type(value: Any, target_type: type) -> Any: if not isinstance(value, dict): return value + # Sanitize untrusted dicts before they reach pickle.loads() + value = strip_pickle_markers(value) + if value is None: + return None + if not isinstance(value, dict): + return value + # Try decoding if data has pickle markers (from checkpoint encoding) decoded = deserialize_value(value) if not isinstance(decoded, dict): diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py index 60c04ad66c..e2fa80fc32 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py @@ -50,7 +50,7 @@ from ._context import CapturingRunnerContext from ._orchestration import AzureFunctionsAgentExecutor -from ._serialization import deserialize_value, reconstruct_to_type, resolve_type, serialize_value +from ._serialization import deserialize_value, reconstruct_to_type, resolve_type, serialize_value, strip_pickle_markers logger = logging.getLogger(__name__) @@ -961,6 +961,13 @@ def _deserialize_hitl_response(response_data: Any, response_type_str: str | None type(response_data).__name__, ) + if response_data is None: + return None + + # Sanitize untrusted external input before deserialization. + # HITL response data originates from an HTTP POST and must not contain + # pickle/type markers that would reach pickle.loads(). + response_data = strip_pickle_markers(response_data) if response_data is None: return None diff --git a/python/packages/azurefunctions/tests/test_func_utils.py b/python/packages/azurefunctions/tests/test_func_utils.py index 240e2f0a2c..872c3e8117 100644 --- a/python/packages/azurefunctions/tests/test_func_utils.py +++ b/python/packages/azurefunctions/tests/test_func_utils.py @@ -21,6 +21,7 @@ deserialize_value, reconstruct_to_type, serialize_value, + strip_pickle_markers, ) @@ -353,13 +354,18 @@ class Feedback: assert result.comment == "Great" def test_reconstruct_from_checkpoint_markers(self) -> None: - """Test that data with checkpoint markers is decoded via deserialize_value.""" + """Test that data with checkpoint markers is stripped (security). + + reconstruct_to_type is used for untrusted HITL responses, so pickle + markers must be neutralised. Legitimate internal roundtrips use + deserialize_value directly instead. + """ original = SampleData(value=99, name="marker-test") encoded = serialize_value(original) + # Pickle markers are stripped — returns None (attack blocked) result = reconstruct_to_type(encoded, SampleData) - assert isinstance(result, SampleData) - assert result.value == 99 + assert result is None def test_unrecognized_dict_returns_original(self) -> None: """Test that unrecognized dicts are returned as-is.""" @@ -372,3 +378,75 @@ class Unrelated: result = reconstruct_to_type(data, Unrelated) assert result == data + + def test_reconstruct_strips_injected_pickle_markers(self) -> None: + """Test that reconstruct_to_type neutralises injected pickle markers.""" + malicious = {"__pickled__": "gASVDgAAAAAAAACMBHRlc3SULg==", "__type__": "builtins:str"} + result = reconstruct_to_type(malicious, str) + assert result is None + + +class TestStripPickleMarkers: + """Security tests for strip_pickle_markers — the defence-in-depth layer + that prevents untrusted HTTP input from reaching pickle.loads().""" + + def test_strips_top_level_pickle_marker(self) -> None: + """A dict containing __pickled__ must be replaced with None.""" + data = {"__pickled__": "PAYLOAD", "__type__": "os:system"} + assert strip_pickle_markers(data) is None + + def test_strips_top_level_type_marker_only(self) -> None: + """Even __type__ alone (without __pickled__) must be neutralised.""" + data = {"__type__": "os:system", "other": "value"} + assert strip_pickle_markers(data) is None + + def test_strips_nested_pickle_marker(self) -> None: + """Pickle markers nested inside a dict must be neutralised.""" + data = {"safe": "value", "nested": {"__pickled__": "PAYLOAD", "__type__": "os:system"}} + result = strip_pickle_markers(data) + assert result == {"safe": "value", "nested": None} + + def test_strips_pickle_marker_in_list(self) -> None: + """Pickle markers inside a list element must be neutralised.""" + data = [{"__pickled__": "PAYLOAD"}, "safe"] + result = strip_pickle_markers(data) + assert result == [None, "safe"] + + def test_strips_deeply_nested_marker(self) -> None: + """Deeply nested pickle markers must be neutralised.""" + data = {"a": {"b": {"c": {"__pickled__": "deep"}}}} + result = strip_pickle_markers(data) + assert result == {"a": {"b": {"c": None}}} + + def test_preserves_safe_dict(self) -> None: + """Dicts without pickle markers must be left untouched.""" + data = {"approved": True, "reason": "Looks good"} + assert strip_pickle_markers(data) == data + + def test_preserves_primitives(self) -> None: + """Primitive values must pass through unchanged.""" + assert strip_pickle_markers("hello") == "hello" + assert strip_pickle_markers(42) == 42 + assert strip_pickle_markers(None) is None + assert strip_pickle_markers(True) is True + + def test_preserves_safe_list(self) -> None: + """Lists without pickle markers must be left untouched.""" + data = [1, "two", {"key": "value"}] + assert strip_pickle_markers(data) == data + + def test_mixed_safe_and_malicious(self) -> None: + """Only the malicious entries should be stripped; safe entries remain.""" + data = { + "user_input": "hello", + "evil": {"__pickled__": "PAYLOAD", "__type__": "os:system"}, + "count": 42, + } + result = strip_pickle_markers(data) + assert result == {"user_input": "hello", "evil": None, "count": 42} + + def test_reconstruct_blocks_injected_markers(self) -> None: + """End-to-end: reconstruct_to_type must not unpickle injected markers.""" + malicious = {"__pickled__": "gASVDgAAAAAAAACMBHRlc3SULg==", "__type__": "builtins:str"} + result = reconstruct_to_type(malicious, str) + assert result is None From 87a5a183cb3574af1ec830f48fe8c76a0eb33ecb Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 12:29:28 -0500 Subject: [PATCH 2/9] refactor: address review concerns for pickle fix 1. Remove deserialize_value() fallback in _deserialize_hitl_response untrusted HITL data now returns as-is when no type hint is available, never flowing into pickle.loads(). 2. Move strip_pickle_markers() out of reconstruct_to_type() the function is general-purpose again; untrusted-data callers are responsible for sanitizing first (documented with NOTE comment). 3. Define _PICKLE_MARKER/_TYPE_MARKER as local constants with import-time assertions against core's values decouples from private names while failing loudly if core ever changes them. 4. Update tests to reflect new responsibility boundaries. --- .../_serialization.py | 29 ++++++++++++------- .../_workflow.py | 10 ++++--- .../azurefunctions/tests/test_func_utils.py | 24 ++++++++------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 666ee24606..4f85b5c54c 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -25,8 +25,8 @@ from typing import Any from agent_framework._workflows._checkpoint_encoding import ( - _PICKLE_MARKER, - _TYPE_MARKER, + _PICKLE_MARKER as _CORE_PICKLE_MARKER, + _TYPE_MARKER as _CORE_TYPE_MARKER, decode_checkpoint_value, encode_checkpoint_value, ) @@ -34,6 +34,19 @@ logger = logging.getLogger(__name__) +# Local copies of the checkpoint marker keys used by strip_pickle_markers(). +# Defined here to avoid tight coupling to core's private names. The import- +# time assertions below ensure we stay in sync if core ever changes them. +_PICKLE_MARKER: str = "__pickled__" +_TYPE_MARKER: str = "__type__" + +assert _PICKLE_MARKER == _CORE_PICKLE_MARKER, ( + f"Pickle marker mismatch: local={_PICKLE_MARKER!r}, core={_CORE_PICKLE_MARKER!r}" +) +assert _TYPE_MARKER == _CORE_TYPE_MARKER, ( + f"Type marker mismatch: local={_TYPE_MARKER!r}, core={_CORE_TYPE_MARKER!r}" +) + def resolve_type(type_key: str) -> type | None: """Resolve a 'module:class' type key to its Python type. @@ -158,14 +171,10 @@ def reconstruct_to_type(value: Any, target_type: type) -> Any: if not isinstance(value, dict): return value - # Sanitize untrusted dicts before they reach pickle.loads() - value = strip_pickle_markers(value) - if value is None: - return None - if not isinstance(value, dict): - return value - - # Try decoding if data has pickle markers (from checkpoint encoding) + # Try decoding if data has pickle markers (from checkpoint encoding). + # NOTE: This function is general-purpose. Callers that handle untrusted + # data (e.g. HITL responses) MUST call strip_pickle_markers() before + # passing data here. See _deserialize_hitl_response in _workflow.py. decoded = deserialize_value(value) if not isinstance(decoded, dict): return decoded diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py index e2fa80fc32..59ed19e6f7 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py @@ -976,7 +976,7 @@ def _deserialize_hitl_response(response_data: Any, response_type_str: str | None logger.debug("Response data is not a dict, returning as-is: %s", type(response_data).__name__) return response_data - # Try to deserialize using the type hint + # Try to reconstruct using the type hint (Pydantic / dataclass) if response_type_str: response_type = resolve_type(response_type_str) if response_type: @@ -986,6 +986,8 @@ def _deserialize_hitl_response(response_data: Any, response_type_str: str | None return result logger.warning("Could not resolve response type: %s", response_type_str) - # Fall back to generic deserialization - logger.debug("Falling back to generic deserialization") - return deserialize_value(response_data) + # No type hint available — return the sanitized dict as-is. + # We intentionally do NOT call deserialize_value() here because HITL + # response data is untrusted and must never flow into pickle.loads(). + logger.debug("No type hint; returning sanitized data as-is") + return response_data diff --git a/python/packages/azurefunctions/tests/test_func_utils.py b/python/packages/azurefunctions/tests/test_func_utils.py index 872c3e8117..98c21cf946 100644 --- a/python/packages/azurefunctions/tests/test_func_utils.py +++ b/python/packages/azurefunctions/tests/test_func_utils.py @@ -354,18 +354,17 @@ class Feedback: assert result.comment == "Great" def test_reconstruct_from_checkpoint_markers(self) -> None: - """Test that data with checkpoint markers is stripped (security). + """Test that data with checkpoint markers is decoded via deserialize_value. - reconstruct_to_type is used for untrusted HITL responses, so pickle - markers must be neutralised. Legitimate internal roundtrips use - deserialize_value directly instead. + reconstruct_to_type is general-purpose and handles trusted checkpoint + data. Untrusted HITL callers must call strip_pickle_markers() first. """ original = SampleData(value=99, name="marker-test") encoded = serialize_value(original) - # Pickle markers are stripped — returns None (attack blocked) result = reconstruct_to_type(encoded, SampleData) - assert result is None + assert isinstance(result, SampleData) + assert result.value == 99 def test_unrecognized_dict_returns_original(self) -> None: """Test that unrecognized dicts are returned as-is.""" @@ -380,9 +379,13 @@ class Unrelated: assert result == data def test_reconstruct_strips_injected_pickle_markers(self) -> None: - """Test that reconstruct_to_type neutralises injected pickle markers.""" + """End-to-end: strip_pickle_markers + reconstruct_to_type blocks attack. + + This mirrors the real HITL flow where callers sanitize before reconstruction. + """ malicious = {"__pickled__": "gASVDgAAAAAAAACMBHRlc3SULg==", "__type__": "builtins:str"} - result = reconstruct_to_type(malicious, str) + sanitized = strip_pickle_markers(malicious) + result = reconstruct_to_type(sanitized, str) assert result is None @@ -446,7 +449,8 @@ def test_mixed_safe_and_malicious(self) -> None: assert result == {"user_input": "hello", "evil": None, "count": 42} def test_reconstruct_blocks_injected_markers(self) -> None: - """End-to-end: reconstruct_to_type must not unpickle injected markers.""" + """End-to-end: strip then reconstruct must not unpickle injected markers.""" malicious = {"__pickled__": "gASVDgAAAAAAAACMBHRlc3SULg==", "__type__": "builtins:str"} - result = reconstruct_to_type(malicious, str) + sanitized = strip_pickle_markers(malicious) + result = reconstruct_to_type(sanitized, str) assert result is None From ce4be42d66532f55f37a414bec7c41759ec95bd1 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 14:32:45 -0500 Subject: [PATCH 3/9] fix: simplify warning message and fix ruff RUF001 lint --- .../agent_framework_azurefunctions/_serialization.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 4f85b5c54c..1403f82081 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -89,10 +89,7 @@ def strip_pickle_markers(data: Any) -> Any: """ if isinstance(data, dict): if _PICKLE_MARKER in data or _TYPE_MARKER in data: - logger.warning( - "Stripped pickle/type markers from untrusted input – " - "potential deserialization attack blocked." - ) + logger.warning("Stripped pickle/type markers from untrusted input.") return None return {k: strip_pickle_markers(v) for k, v in data.items()} From 33cdb61636b32af0c9a8a70217826c547e587771 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 14:40:35 -0500 Subject: [PATCH 4/9] fix: suppress pyright reportPrivateUsage on core marker imports --- .../agent_framework_azurefunctions/_serialization.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 1403f82081..d93145a2f2 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -25,8 +25,8 @@ from typing import Any from agent_framework._workflows._checkpoint_encoding import ( - _PICKLE_MARKER as _CORE_PICKLE_MARKER, - _TYPE_MARKER as _CORE_TYPE_MARKER, + _PICKLE_MARKER as _CORE_PICKLE_MARKER, # pyright: ignore[reportPrivateUsage] + _TYPE_MARKER as _CORE_TYPE_MARKER, # pyright: ignore[reportPrivateUsage] # pyright: ignore[reportPrivateUsage] decode_checkpoint_value, encode_checkpoint_value, ) From 6f2078ec0ee1c31958810fdaeba004d381c086c6 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 14:44:06 -0500 Subject: [PATCH 5/9] Lower marker-strip log from warning to debug to avoid log flooding --- .../agent_framework_azurefunctions/_serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index d93145a2f2..9704fac65e 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -89,7 +89,7 @@ def strip_pickle_markers(data: Any) -> Any: """ if isinstance(data, dict): if _PICKLE_MARKER in data or _TYPE_MARKER in data: - logger.warning("Stripped pickle/type markers from untrusted input.") + logger.debug("Stripped pickle/type markers from untrusted input.") return None return {k: strip_pickle_markers(v) for k, v in data.items()} From bfd0ec8ecd009eacb9ffd55832226bd4db6f1c69 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 15:17:01 -0500 Subject: [PATCH 6/9] Replace assert with RuntimeError for marker sync checks (ruff S101) --- .../agent_framework_azurefunctions/_serialization.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 9704fac65e..0687fc5fcf 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -36,16 +36,14 @@ # Local copies of the checkpoint marker keys used by strip_pickle_markers(). # Defined here to avoid tight coupling to core's private names. The import- -# time assertions below ensure we stay in sync if core ever changes them. +# time checks below ensure we stay in sync if core ever changes them. _PICKLE_MARKER: str = "__pickled__" _TYPE_MARKER: str = "__type__" -assert _PICKLE_MARKER == _CORE_PICKLE_MARKER, ( - f"Pickle marker mismatch: local={_PICKLE_MARKER!r}, core={_CORE_PICKLE_MARKER!r}" -) -assert _TYPE_MARKER == _CORE_TYPE_MARKER, ( - f"Type marker mismatch: local={_TYPE_MARKER!r}, core={_CORE_TYPE_MARKER!r}" -) +if _PICKLE_MARKER != _CORE_PICKLE_MARKER: + raise RuntimeError(f"Pickle marker mismatch: local={_PICKLE_MARKER!r}, core={_CORE_PICKLE_MARKER!r}") +if _TYPE_MARKER != _CORE_TYPE_MARKER: + raise RuntimeError(f"Type marker mismatch: local={_TYPE_MARKER!r}, core={_CORE_TYPE_MARKER!r}") def resolve_type(type_key: str) -> type | None: From 2da23b63e59ace4f8e44c0b031b66818caa1320c Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 17:15:42 -0500 Subject: [PATCH 7/9] Fix pyright and ruff CI errors in security fix - Use cast() for dict/list comprehensions in strip_pickle_markers (pyright) - type: ignore for narrowed dict return in _workflow.py (pyright) - Simplify marker imports: use core constants directly, remove local copies - Remove duplicate pyright ignore comment --- .../_serialization.py | 23 ++++++------------- .../_workflow.py | 4 ++-- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 0687fc5fcf..3550600f04 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -22,11 +22,11 @@ import logging from contextlib import suppress from dataclasses import is_dataclass -from typing import Any +from typing import Any, cast from agent_framework._workflows._checkpoint_encoding import ( - _PICKLE_MARKER as _CORE_PICKLE_MARKER, # pyright: ignore[reportPrivateUsage] - _TYPE_MARKER as _CORE_TYPE_MARKER, # pyright: ignore[reportPrivateUsage] # pyright: ignore[reportPrivateUsage] + _PICKLE_MARKER, # pyright: ignore[reportPrivateUsage] + _TYPE_MARKER, # pyright: ignore[reportPrivateUsage] decode_checkpoint_value, encode_checkpoint_value, ) @@ -34,17 +34,6 @@ logger = logging.getLogger(__name__) -# Local copies of the checkpoint marker keys used by strip_pickle_markers(). -# Defined here to avoid tight coupling to core's private names. The import- -# time checks below ensure we stay in sync if core ever changes them. -_PICKLE_MARKER: str = "__pickled__" -_TYPE_MARKER: str = "__type__" - -if _PICKLE_MARKER != _CORE_PICKLE_MARKER: - raise RuntimeError(f"Pickle marker mismatch: local={_PICKLE_MARKER!r}, core={_CORE_PICKLE_MARKER!r}") -if _TYPE_MARKER != _CORE_TYPE_MARKER: - raise RuntimeError(f"Type marker mismatch: local={_TYPE_MARKER!r}, core={_CORE_TYPE_MARKER!r}") - def resolve_type(type_key: str) -> type | None: """Resolve a 'module:class' type key to its Python type. @@ -89,10 +78,12 @@ def strip_pickle_markers(data: Any) -> Any: if _PICKLE_MARKER in data or _TYPE_MARKER in data: logger.debug("Stripped pickle/type markers from untrusted input.") return None - return {k: strip_pickle_markers(v) for k, v in data.items()} + typed_dict = cast(dict[str, Any], data) + return {k: strip_pickle_markers(v) for k, v in typed_dict.items()} if isinstance(data, list): - return [strip_pickle_markers(item) for item in data] + typed_list = cast(list[Any], data) + return [strip_pickle_markers(item) for item in typed_list] return data diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py index 59ed19e6f7..a8774353ec 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_workflow.py @@ -986,8 +986,8 @@ def _deserialize_hitl_response(response_data: Any, response_type_str: str | None return result logger.warning("Could not resolve response type: %s", response_type_str) - # No type hint available — return the sanitized dict as-is. + # No type hint available - return the sanitized dict as-is. # We intentionally do NOT call deserialize_value() here because HITL # response data is untrusted and must never flow into pickle.loads(). logger.debug("No type hint; returning sanitized data as-is") - return response_data + return response_data # type: ignore[reportUnknownVariableType] From 1d099cae611ff36a4a3ae16858beea68ff5e1166 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 17:24:37 -0500 Subject: [PATCH 8/9] Remove duplicate end-to-end test in TestStripPickleMarkers --- python/packages/azurefunctions/tests/test_func_utils.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/python/packages/azurefunctions/tests/test_func_utils.py b/python/packages/azurefunctions/tests/test_func_utils.py index 98c21cf946..63f0af0182 100644 --- a/python/packages/azurefunctions/tests/test_func_utils.py +++ b/python/packages/azurefunctions/tests/test_func_utils.py @@ -447,10 +447,3 @@ def test_mixed_safe_and_malicious(self) -> None: } result = strip_pickle_markers(data) assert result == {"user_input": "hello", "evil": None, "count": 42} - - def test_reconstruct_blocks_injected_markers(self) -> None: - """End-to-end: strip then reconstruct must not unpickle injected markers.""" - malicious = {"__pickled__": "gASVDgAAAAAAAACMBHRlc3SULg==", "__type__": "builtins:str"} - sanitized = strip_pickle_markers(malicious) - result = reconstruct_to_type(sanitized, str) - assert result is None From c71e0aebea3d8530964cae92f229a8b4e3514ac4 Mon Sep 17 00:00:00 2001 From: Ahmed Muhsin Date: Mon, 9 Mar 2026 17:27:41 -0500 Subject: [PATCH 9/9] Suppress mypy redundant-cast on list cast needed by pyright --- .../agent_framework_azurefunctions/_serialization.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py index 3550600f04..4ed080eceb 100644 --- a/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py +++ b/python/packages/azurefunctions/agent_framework_azurefunctions/_serialization.py @@ -82,7 +82,7 @@ def strip_pickle_markers(data: Any) -> Any: return {k: strip_pickle_markers(v) for k, v in typed_dict.items()} if isinstance(data, list): - typed_list = cast(list[Any], data) + typed_list = cast(list[Any], data) # type: ignore[redundant-cast] return [strip_pickle_markers(item) for item in typed_list] return data