From c0bf477ecb855f880ea1fcbe5e40aceb573bd287 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Thu, 13 Nov 2025 18:16:51 +0100 Subject: [PATCH 01/11] Debug --- minio/api.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/minio/api.py b/minio/api.py index 4c583140..4f005298 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3234,10 +3234,17 @@ def put_object( pool = ThreadPool(num_parallel_uploads) pool.start_parallel() - headers = HTTPHeaderDict( + part_headers = HTTPHeaderDict( sse.headers() if isinstance(sse, SseCustomerKey) else None, ) - headers.extend(checksum_headers) + part_headers.extend(checksum_headers) + + # Explicitly filter out CreateMultipartUpload-only headers that should not be in UploadPart + headers = HTTPHeaderDict({ + k: v for k, v in part_headers.items() + if not k.lower() in ("x-amz-storage-class",) + }) + if num_parallel_uploads > 1: kwargs = { "bucket_name": bucket_name, From de532ea5de2cd18902e0bee50c1df53819d511e8 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Thu, 13 Nov 2025 19:37:47 +0100 Subject: [PATCH 02/11] Updates --- minio/api.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/minio/api.py b/minio/api.py index 4f005298..da6df63a 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3221,7 +3221,6 @@ def put_object( ) if not upload_id: - headers.extend(checksum_headers) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, @@ -3234,16 +3233,10 @@ def put_object( pool = ThreadPool(num_parallel_uploads) pool.start_parallel() - part_headers = HTTPHeaderDict( + headers = HTTPHeaderDict( sse.headers() if isinstance(sse, SseCustomerKey) else None, ) - part_headers.extend(checksum_headers) - - # Explicitly filter out CreateMultipartUpload-only headers that should not be in UploadPart - headers = HTTPHeaderDict({ - k: v for k, v in part_headers.items() - if not k.lower() in ("x-amz-storage-class",) - }) + headers.extend(checksum_headers) if num_parallel_uploads > 1: kwargs = { From e49e93b93b59e476c233099cfbc2946a208c72a1 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Thu, 13 Nov 2025 19:38:18 +0100 Subject: [PATCH 03/11] Remove NL --- minio/api.py | 1 - 1 file changed, 1 deletion(-) diff --git a/minio/api.py b/minio/api.py index da6df63a..c78f5478 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3237,7 +3237,6 @@ def put_object( sse.headers() if isinstance(sse, SseCustomerKey) else None, ) headers.extend(checksum_headers) - if num_parallel_uploads > 1: kwargs = { "bucket_name": bucket_name, From 82a173805e63bdc1d09d7d26b91f5d0dae10f23b Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 14:17:12 +0100 Subject: [PATCH 04/11] Update --- minio/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/minio/api.py b/minio/api.py index c78f5478..6d801375 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3221,6 +3221,9 @@ def put_object( ) if not upload_id: + # Add only algorithm header to CreateMultipartUpload, not checksum values + headers.extend({k: v for k, v in checksum_headers.items() + if k == "x-amz-sdk-checksum-algorithm"}) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, From 47b1dbc65e53dd78de7dc4a8ca488cb1cf8f5e42 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 15:01:16 +0100 Subject: [PATCH 05/11] Attempt --- minio/api.py | 37 ++++++++++++++++++++++++++++--------- minio/datatypes.py | 4 ++++ 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/minio/api.py b/minio/api.py index 6d801375..4fb12ef9 100644 --- a/minio/api.py +++ b/minio/api.py @@ -2832,6 +2832,14 @@ def _complete_multipart_upload( tag = SubElement(element, "Part") SubElement(tag, "PartNumber", str(part.part_number)) SubElement(tag, "ETag", '"' + part.etag + '"') + if part.checksum_crc32: + SubElement(tag, "ChecksumCRC32", part.checksum_crc32) + elif part.checksum_crc32c: + SubElement(tag, "ChecksumCRC32C", part.checksum_crc32c) + elif part.checksum_sha1: + SubElement(tag, "ChecksumSHA1", part.checksum_sha1) + elif part.checksum_sha256: + SubElement(tag, "ChecksumSHA256", part.checksum_sha256) body = getbytes(element) headers = HTTPHeaderDict( { @@ -2921,7 +2929,7 @@ def _upload_part( region: Optional[str] = None, extra_headers: Optional[HTTPHeaderDict] = None, extra_query_params: Optional[HTTPQueryDict] = None, - ) -> str: + ) -> ObjectWriteResult: """Execute UploadPart S3 API.""" query_params = HTTPQueryDict({ "partNumber": str(part_number), @@ -2937,7 +2945,7 @@ def _upload_part( extra_headers=extra_headers, extra_query_params=extra_query_params, ) - return cast(str, result.etag) + return result def _upload_part_task(self, kwargs): """Upload_part task for ThreadPool.""" @@ -3221,9 +3229,6 @@ def put_object( ) if not upload_id: - # Add only algorithm header to CreateMultipartUpload, not checksum values - headers.extend({k: v for k, v in checksum_headers.items() - if k == "x-amz-sdk-checksum-algorithm"}) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, @@ -3253,7 +3258,7 @@ def put_object( self._upload_part_task, kwargs, ) else: - etag = self._upload_part( + result = self._upload_part( bucket_name=bucket_name, object_name=object_name, data=part_data, @@ -3261,14 +3266,28 @@ def put_object( upload_id=upload_id, part_number=part_number, ) - parts.append(Part(part_number, etag)) + parts.append(Part( + part_number=part_number, + etag=result.etag, + checksum_crc32=result.checksum_crc32, + checksum_crc32c=result.checksum_crc32c, + checksum_sha1=result.checksum_sha1, + checksum_sha256=result.checksum_sha256, + )) if pool: result = pool.result() parts = [Part(0, "")] * part_count while not result.empty(): - part_number, etag = result.get() - parts[part_number - 1] = Part(part_number, etag) + part_number, upload_result = result.get() + parts[part_number - 1] = Part( + part_number=part_number, + etag=upload_result.etag, + checksum_crc32=upload_result.checksum_crc32, + checksum_crc32c=upload_result.checksum_crc32c, + checksum_sha1=upload_result.checksum_sha1, + checksum_sha256=upload_result.checksum_sha256, + ) upload_result = self._complete_multipart_upload( bucket_name=bucket_name, diff --git a/minio/datatypes.py b/minio/datatypes.py index 2e645d37..6684df0f 100644 --- a/minio/datatypes.py +++ b/minio/datatypes.py @@ -284,6 +284,10 @@ class Part: etag: str last_modified: Optional[datetime] = None size: Optional[int] = None + checksum_crc32: Optional[str] = None + checksum_crc32c: Optional[str] = None + checksum_sha1: Optional[str] = None + checksum_sha256: Optional[str] = None @classmethod def fromxml(cls: Type[C], element: ET.Element) -> C: From a526c1aad3a1c8aa1b99db20cc4f7d7ee53efbf0 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 15:15:48 +0100 Subject: [PATCH 06/11] FIxes --- minio/api.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/minio/api.py b/minio/api.py index 4fb12ef9..8245ad3e 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3229,6 +3229,7 @@ def put_object( ) if not upload_id: + headers.extend(checksum_headers) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, @@ -3276,10 +3277,10 @@ def put_object( )) if pool: - result = pool.result() + result_queue = pool.result() parts = [Part(0, "")] * part_count - while not result.empty(): - part_number, upload_result = result.get() + while not result_queue.empty(): + part_number, upload_result = result_queue.get() parts[part_number - 1] = Part( part_number=part_number, etag=upload_result.etag, From 288556ea177212e4e0d04c76a8af8e0d6249371c Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 15:40:23 +0100 Subject: [PATCH 07/11] Updates --- minio/api.py | 2 +- minio/checksum.py | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/minio/api.py b/minio/api.py index 8245ad3e..95eec6c4 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3229,7 +3229,7 @@ def put_object( ) if not upload_id: - headers.extend(checksum_headers) + headers.extend(make_headers(hashers, add_content_sha256, add_sha256_checksum, algorithm_only=True)) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, diff --git a/minio/checksum.py b/minio/checksum.py index 94af7e1e..e86b33fb 100644 --- a/minio/checksum.py +++ b/minio/checksum.py @@ -402,9 +402,18 @@ def reset_hashers(hashers: Optional[Dict[Algorithm, "Hasher"]]): def make_headers( hashers: Optional[Dict[Algorithm, "Hasher"]], add_content_sha256: bool, - add_sha256_checksum: bool + add_sha256_checksum: bool, + algorithm_only: bool = False ) -> Dict[str, str]: - """Makes headers for hashers.""" + """Makes headers for hashers. + + Args: + hashers: Dictionary of algorithm to hasher instances + add_content_sha256: Whether to add x-amz-content-sha256 header + add_sha256_checksum: Whether to add SHA256 checksum header + algorithm_only: If True, only include algorithm declaration header, + not checksum value headers + """ headers = {} if hashers: for algo, hasher in hashers.items(): @@ -415,5 +424,6 @@ def make_headers( if not add_sha256_checksum: continue headers["x-amz-sdk-checksum-algorithm"] = str(algo) - headers[algo.header()] = base64_string(sum_bytes) + if not algorithm_only: + headers[algo.header()] = base64_string(sum_bytes) return headers From 968031f57675c9de81fda3397f6761c7fda05dea Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 15:42:29 +0100 Subject: [PATCH 08/11] Fix --- minio/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/minio/api.py b/minio/api.py index 95eec6c4..10d42341 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3229,7 +3229,10 @@ def put_object( ) if not upload_id: - headers.extend(make_headers(hashers, add_content_sha256, add_sha256_checksum, algorithm_only=True)) + headers.extend(make_headers( + hashers, add_content_sha256, add_sha256_checksum, + algorithm_only=True, + )) upload_id = self._create_multipart_upload( bucket_name=bucket_name, object_name=object_name, From f84be368495431fbd936ce2f17c85428ca7a3f9e Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 16:01:46 +0100 Subject: [PATCH 09/11] Add functional test --- tests/functional/tests.py | 107 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/functional/tests.py b/tests/functional/tests.py index 8d7f667c..48ed5619 100644 --- a/tests/functional/tests.py +++ b/tests/functional/tests.py @@ -42,6 +42,7 @@ from urllib3._collections import HTTPHeaderDict from minio import Minio +from minio.checksum import Algorithm from minio.commonconfig import ENABLED, REPLACE, CopySource, SnowballObject from minio.datatypes import PostPolicy from minio.deleteobjects import DeleteObject @@ -908,6 +909,111 @@ def test_negative_put_object_with_path_segment( # pylint: disable=invalid-name _client.remove_bucket(bucket_name=bucket_name) +def test_put_object_multipart_with_checksum( # pylint: disable=invalid-name + log_entry): + """Test put_object() multipart upload with checksum validation. + + This test validates the AWS S3 compliant checksum implementation for + multipart uploads: + - CreateMultipartUpload receives algorithm header only (not values) + - UploadPart includes checksum value headers + - CompleteMultipartUpload includes checksums in XML body + """ + + # Get a unique bucket_name and object_name + bucket_name = _gen_bucket_name() + object_name = f"{uuid4()}-checksum" + object_name_sha256 = None # Initialize for cleanup + # Use 6 MB to trigger multipart upload (> 5 MB threshold) + length = 6 * MB + + log_entry["args"] = { + "bucket_name": bucket_name, + "object_name": object_name, + "length": length, + "data": "LimitedRandomReader(6 * MB)", + "checksum": "Algorithm.CRC32C", + } + + try: + _client.make_bucket(bucket_name=bucket_name) + + # Upload with CRC32C checksum - triggers multipart upload + reader = LimitedRandomReader(length) + result = _client.put_object( + bucket_name=bucket_name, + object_name=object_name, + data=reader, + length=length, + checksum=Algorithm.CRC32C, + ) + + # Verify upload succeeded and returned valid result + if not result.etag: + raise ValueError("Upload did not return valid ETag") + + # Verify ETag indicates multipart upload (contains dash and part count) + if '-' not in result.etag: + raise ValueError( + f"Expected multipart ETag (with dash), got: {result.etag}") + + # Stat the object to verify it exists and has correct size + st_obj = _client.stat_object( + bucket_name=bucket_name, + object_name=object_name, + ) + + if st_obj.size != length: + raise ValueError( + f"Size mismatch: expected {length}, got {st_obj.size}") + + # Test with SHA256 checksum algorithm + object_name_sha256 = f"{uuid4()}-checksum-sha256" + log_entry["args"]["object_name"] = object_name_sha256 + log_entry["args"]["checksum"] = "Algorithm.SHA256" + + reader = LimitedRandomReader(length) + result = _client.put_object( + bucket_name=bucket_name, + object_name=object_name_sha256, + data=reader, + length=length, + checksum=Algorithm.SHA256, + ) + + if not result.etag: + raise ValueError("Upload with SHA256 did not return valid ETag") + + if '-' not in result.etag: + raise ValueError( + f"Expected multipart ETag for SHA256, got: {result.etag}") + + st_obj = _client.stat_object( + bucket_name=bucket_name, + object_name=object_name_sha256, + ) + + if st_obj.size != length: + raise ValueError( + f"Size mismatch: expected {length}, got {st_obj.size}") + + finally: + try: + _client.remove_object(bucket_name=bucket_name, object_name=object_name) + except: # pylint: disable=bare-except + pass + if object_name_sha256: + try: + _client.remove_object( + bucket_name=bucket_name, object_name=object_name_sha256) + except: # pylint: disable=bare-except + pass + try: + _client.remove_bucket(bucket_name=bucket_name) + except: # pylint: disable=bare-except + pass + + def _test_stat_object(log_entry, sse=None, version_check=False): """Test stat_object().""" @@ -2393,6 +2499,7 @@ def main(): test_copy_object_unmodified_since: None, test_put_object: {"sse": ssec} if ssec else None, test_negative_put_object_with_path_segment: None, + test_put_object_multipart_with_checksum: None, test_stat_object: {"sse": ssec} if ssec else None, test_stat_object_version: {"sse": ssec} if ssec else None, test_get_object: {"sse": ssec} if ssec else None, From 371a384ff31cc72db3d44bf61725c1091b315f99 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 14 Nov 2025 16:06:45 +0100 Subject: [PATCH 10/11] Fixes --- tests/functional/tests.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/functional/tests.py b/tests/functional/tests.py index 48ed5619..8e5456df 100644 --- a/tests/functional/tests.py +++ b/tests/functional/tests.py @@ -999,13 +999,16 @@ def test_put_object_multipart_with_checksum( # pylint: disable=invalid-name finally: try: - _client.remove_object(bucket_name=bucket_name, object_name=object_name) + _client.remove_object( + bucket_name=bucket_name, object_name=object_name) except: # pylint: disable=bare-except pass if object_name_sha256: try: _client.remove_object( - bucket_name=bucket_name, object_name=object_name_sha256) + bucket_name=bucket_name, + object_name=object_name_sha256, + ) except: # pylint: disable=bare-except pass try: From 1ea7ba41d27e44fd1c3ce9a3407b5b3b1dc597a7 Mon Sep 17 00:00:00 2001 From: Raul-Mircea Date: Fri, 21 Nov 2025 15:36:31 +0200 Subject: [PATCH 11/11] Fix for SSE-C --- minio/api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/minio/api.py b/minio/api.py index 10d42341..fbc9d557 100644 --- a/minio/api.py +++ b/minio/api.py @@ -3298,6 +3298,9 @@ def put_object( object_name=object_name, upload_id=cast(str, upload_id), parts=parts, + extra_headers=HTTPHeaderDict( + sse.headers() if isinstance(sse, SseCustomerKey) else None + ), ) return ObjectWriteResult.new( headers=upload_result.headers,