From 8a3e8341f0c27f41febbc347d737ef66729f9735 Mon Sep 17 00:00:00 2001 From: jkottu Date: Tue, 24 Mar 2026 10:25:22 -0700 Subject: [PATCH] fix: set allow_pickle=False in NPY decoder for CWE-502 Patch sagemaker-inference decoder.py at Docker build time to change allow_pickle=True to allow_pickle=False, preventing pickle deserialization RCE via malicious NPY payloads. sim: https://t.corp.amazon.com/P398172182 --- docker/1.4-2/final/Dockerfile.cpu | 4 + docker/1.4-2/resources/patches/decoder.py | 96 +++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 docker/1.4-2/resources/patches/decoder.py diff --git a/docker/1.4-2/final/Dockerfile.cpu b/docker/1.4-2/final/Dockerfile.cpu index bd7148c9..5954fb5f 100644 --- a/docker/1.4-2/final/Dockerfile.cpu +++ b/docker/1.4-2/final/Dockerfile.cpu @@ -16,6 +16,10 @@ COPY dist/sagemaker_sklearn_container-2.0-py3-none-any.whl /sagemaker_sklearn_co RUN uv pip install --system --no-cache /sagemaker_sklearn_container-2.0-py3-none-any.whl && \ rm /sagemaker_sklearn_container-2.0-py3-none-any.whl +# CWE-502: Patch sagemaker-inference decoder to disable pickle deserialization +COPY docker/$SAGEMAKER_SKLEARN_VERSION/resources/patches/decoder.py \ + /usr/local/lib/python3.10/dist-packages/sagemaker_inference/decoder.py + ENV SAGEMAKER_TRAINING_MODULE sagemaker_sklearn_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_sklearn_container.serving:main diff --git a/docker/1.4-2/resources/patches/decoder.py b/docker/1.4-2/resources/patches/decoder.py new file mode 100644 index 00000000..12330ec0 --- /dev/null +++ b/docker/1.4-2/resources/patches/decoder.py @@ -0,0 +1,96 @@ +# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the 'License'). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the 'license' file accompanying this file. This file is +# distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""This module contains functionality for converting various types of +files and objects to NumPy arrays.""" +from __future__ import absolute_import + +import json +from typing import Iterable # noqa ignore=F401 imported but unused + +import numpy as np +from six import BytesIO, StringIO + +from sagemaker_inference import content_types, errors + + +def _json_to_numpy(string_like, dtype=None): # type: (str) -> np.array + """Convert a JSON object to a numpy array. + + Args: + string_like (str): JSON string. + dtype (dtype, optional): Data type of the resulting array. + If None, the dtypes will be determined by the contents + of each column, individually. This argument can only be + used to 'upcast' the array. For downcasting, use the + .astype(t) method. + + Returns: + (np.array): numpy array + """ + data = json.loads(string_like) + return np.array(data, dtype=dtype) + + +def _csv_to_numpy(string_like, dtype=None): # type: (str) -> np.array + """Convert a CSV object to a numpy array. + + Args: + string_like (str): CSV string. + dtype (dtype, optional): Data type of the resulting array. If None, + the dtypes will be determined by the contents of each column, + individually. This argument can only be used to 'upcast' the array. + For downcasting, use the .astype(t) method. + + Returns: + (np.array): numpy array + """ + stream = StringIO(string_like) + return np.genfromtxt(stream, dtype=dtype, delimiter=",") + + +def _npy_to_numpy(npy_array): # type: (object) -> np.array + """Convert a NPY array into numpy. + + Args: + npy_array (npy array): to be converted to numpy array + + Returns: + (np.array): converted numpy array. + """ + stream = BytesIO(npy_array) + return np.load(stream, allow_pickle=False) + + +_decoder_map = { + content_types.NPY: _npy_to_numpy, + content_types.CSV: _csv_to_numpy, + content_types.JSON: _json_to_numpy, +} + + +def decode(obj, content_type): + # type: (np.array or Iterable or int or float, str) -> np.array + """Decode an object to one of the default content types to a numpy array. + + Args: + obj (object): to be decoded. + content_type (str): content type to be used. + + Returns: + np.array: decoded object. + """ + try: + decoder = _decoder_map[content_type] + return decoder(obj) + except KeyError: + raise errors.UnsupportedFormatError(content_type)