Skip to content

Commit c9e68aa

Browse files
weirongw23-msftmsyyc
authored andcommitted
[Storage] Decompression for Binary Response in Download APIs (#43587)
1 parent 2566bf8 commit c9e68aa

23 files changed

+484
-4
lines changed

sdk/storage/azure-storage-blob/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ APIs, which specifies the full path to start listing paths from.
1111
- Added support for the keyword `user_delegation_oid` to `generate_blob_sas` and `generate_container_sas`, which
1212
specifies the Entra ID of the user that is authorized to use the generated SAS URL.
1313
- Added support for `UseDevelopmentStorage=true;` as a valid connection string for Azurite.
14+
- Added the ability to skip auto decompression on `BlobClient.download_blob` via the `decompress` keyword.
1415

1516
## 12.27.1 (2025-10-29)
1617

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,8 @@ def download_blob(
732732
function(current: int, total: int) where current is the number of bytes transferred
733733
so far, and total is the total size of the download.
734734
:paramtype progress_hook: Callable[[int, int], None]
735+
:keyword bool decompress: If True, any compressed content, identified by the Content-Encoding header, will be
736+
decompressed automatically before being returned. Default value is True.
735737
:keyword int timeout:
736738
Sets the server-side timeout for the operation in seconds. For more details see
737739
https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations.

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
210210
max_concurrency: int = 1,
211211
encoding: str,
212212
progress_hook: Optional[Callable[[int, int], None]] = None,
213+
decompress: Optional[bool] = None,
213214
timeout: Optional[int] = None,
214215
**kwargs: Any
215216
) -> StorageStreamDownloader[str]: ...
@@ -231,6 +232,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
231232
max_concurrency: int = 1,
232233
encoding: None = None,
233234
progress_hook: Optional[Callable[[int, int], None]] = None,
235+
decompress: Optional[bool] = None,
234236
timeout: Optional[int] = None,
235237
**kwargs: Any
236238
) -> StorageStreamDownloader[bytes]: ...
@@ -252,6 +254,7 @@ class BlobClient(StorageAccountHostsMixin, StorageEncryptionMixin):
252254
max_concurrency: int = 1,
253255
encoding: Optional[str] = None,
254256
progress_hook: Optional[Callable[[int, int], None]] = None,
257+
decompress: Optional[bool] = None,
255258
timeout: Optional[int] = None,
256259
**kwargs: Any
257260
) -> Union[StorageStreamDownloader[str], StorageStreamDownloader[bytes]]: ...

sdk/storage/azure-storage-blob/azure/storage/blob/_download.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,10 @@ def __len__(self):
422422
def _get_encryption_data_request(self) -> None:
423423
# Save current request cls
424424
download_cls = self._request_options.pop('cls', None)
425+
426+
# Temporarily removing this for the get properties request
427+
decompress = self._request_options.pop('decompress', None)
428+
425429
# Adjust cls for get_properties
426430
self._request_options['cls'] = deserialize_blob_properties
427431

@@ -434,6 +438,10 @@ def _get_encryption_data_request(self) -> None:
434438
# Restore cls for download
435439
self._request_options['cls'] = download_cls
436440

441+
# Decompression does not work with client-side encryption
442+
if decompress is not None:
443+
self._request_options['decompress'] = decompress
444+
437445
@property
438446
def _download_complete(self):
439447
if is_encryption_v2(self._encryption_data):

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_blob_client_async.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,8 @@ async def download_blob(
745745
function(current: int, total: int) where current is the number of bytes transferred
746746
so far, and total is the total size of the download.
747747
:paramtype progress_hook: Callable[[int, int], Awaitable[None]]
748+
:keyword bool decompress: If True, any compressed content, identified by the Content-Encoding header, will be
749+
decompressed automatically before being returned. Default value is True.
748750
:keyword int timeout:
749751
Sets the server-side timeout for the operation in seconds. For more details see
750752
https://learn.microsoft.com/rest/api/storageservices/setting-timeouts-for-blob-service-operations.

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_blob_client_async.pyi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ class BlobClient( # type: ignore[misc]
212212
max_concurrency: int = 1,
213213
encoding: str,
214214
progress_hook: Optional[Callable[[int, int], Awaitable[None]]] = None,
215+
decompress: Optional[bool] = None,
215216
timeout: Optional[int] = None,
216217
**kwargs: Any
217218
) -> StorageStreamDownloader[str]: ...
@@ -233,6 +234,7 @@ class BlobClient( # type: ignore[misc]
233234
max_concurrency: int = 1,
234235
encoding: None = None,
235236
progress_hook: Optional[Callable[[int, int], Awaitable[None]]] = None,
237+
decompress: Optional[bool] = None,
236238
timeout: Optional[int] = None,
237239
**kwargs: Any
238240
) -> StorageStreamDownloader[bytes]: ...
@@ -254,6 +256,7 @@ class BlobClient( # type: ignore[misc]
254256
max_concurrency: int = 1,
255257
encoding: Optional[str] = None,
256258
progress_hook: Optional[Callable[[int, int], Awaitable[None]]] = None,
259+
decompress: Optional[bool] = None,
257260
timeout: Optional[int] = None,
258261
**kwargs: Any
259262
) -> Union[StorageStreamDownloader[str], StorageStreamDownloader[bytes]]: ...

sdk/storage/azure-storage-blob/azure/storage/blob/aio/_download_async.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,10 @@ def __len__(self):
292292
async def _get_encryption_data_request(self) -> None:
293293
# Save current request cls
294294
download_cls = self._request_options.pop('cls', None)
295+
296+
# Temporarily removing this for the get properties request
297+
decompress = self._request_options.pop('decompress', None)
298+
295299
# Adjust cls for get_properties
296300
self._request_options['cls'] = deserialize_blob_properties
297301

@@ -304,6 +308,10 @@ async def _get_encryption_data_request(self) -> None:
304308
# Restore cls for download
305309
self._request_options['cls'] = download_cls
306310

311+
# Decompression does not work with client-side encryption
312+
if decompress is not None:
313+
self._request_options['decompress'] = decompress
314+
307315
async def _setup(self) -> None:
308316
if self._encryption_options.get("key") is not None or self._encryption_options.get("resolver") is not None:
309317
await self._get_encryption_data_request()

sdk/storage/azure-storage-blob/tests/test_blob_encryption_v2.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pytest
1515
from azure.core import MatchConditions
1616
from azure.core.exceptions import HttpResponseError
17-
from azure.storage.blob import BlobServiceClient, BlobType
17+
from azure.storage.blob import BlobServiceClient, BlobType, ContentSettings
1818
from azure.storage.blob._encryption import (
1919
_dict_to_encryption_data,
2020
_validate_and_unwrap_cek,
@@ -196,6 +196,26 @@ def test_encryption_kek(self, **kwargs):
196196
# Assert
197197
assert content == data
198198

199+
@pytest.mark.live_test_only
200+
@BlobPreparer()
201+
def test_decompression_with_encryption(self, **kwargs):
202+
storage_account_name = kwargs.pop("storage_account_name")
203+
storage_account_key = kwargs.pop("storage_account_key")
204+
205+
self._setup(storage_account_name, storage_account_key)
206+
kek = KeyWrapper('key1')
207+
self.enable_encryption_v2(kek)
208+
209+
blob = self.bsc.get_blob_client(self.container_name, self._get_blob_reference())
210+
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00'
211+
content_settings = ContentSettings(content_encoding='gzip')
212+
213+
# Act / Assert
214+
blob.upload_blob(data=compressed_data, overwrite=True, content_settings=content_settings)
215+
216+
result = blob.download_blob(decompress=False).readall()
217+
assert result == compressed_data
218+
199219
@pytest.mark.live_test_only
200220
@BlobPreparer()
201221
def test_encryption_kek_rsa(self, **kwargs):

sdk/storage/azure-storage-blob/tests/test_blob_encryption_v2_async.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pytest
1515
from azure.core import MatchConditions
1616
from azure.core.exceptions import HttpResponseError
17-
from azure.storage.blob import BlobType
17+
from azure.storage.blob import BlobType, ContentSettings
1818
from azure.storage.blob.aio import BlobServiceClient
1919
from azure.storage.blob._encryption import (
2020
_dict_to_encryption_data,
@@ -199,6 +199,26 @@ async def test_encryption_kek(self, **kwargs):
199199
# Assert
200200
assert content == data
201201

202+
@pytest.mark.live_test_only
203+
@BlobPreparer()
204+
async def test_decompression_with_encryption(self, **kwargs):
205+
storage_account_name = kwargs.pop("storage_account_name")
206+
storage_account_key = kwargs.pop("storage_account_key")
207+
208+
await self._setup(storage_account_name, storage_account_key)
209+
kek = KeyWrapper('key1')
210+
self.enable_encryption_v2(kek)
211+
212+
blob = self.bsc.get_blob_client(self.container_name, self._get_blob_reference())
213+
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00'
214+
content_settings = ContentSettings(content_encoding='gzip')
215+
216+
# Act / Assert
217+
await blob.upload_blob(data=compressed_data, overwrite=True, content_settings=content_settings)
218+
219+
result = await (await blob.download_blob(decompress=False)).readall()
220+
assert result == compressed_data
221+
202222
@pytest.mark.live_test_only
203223
@BlobPreparer()
204224
async def test_encryption_kek_rsa(self, **kwargs):

sdk/storage/azure-storage-blob/tests/test_common_blob.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3658,4 +3658,53 @@ def test_blob_user_delegation_oid(self, **kwargs):
36583658

36593659
return variables
36603660

3661+
@pytest.mark.live_test_only
3662+
@BlobPreparer()
3663+
def test_download_blob_decompress(self, **kwargs):
3664+
storage_account_name = kwargs.pop("storage_account_name")
3665+
storage_account_key = kwargs.pop("storage_account_key")
3666+
3667+
# Arrange
3668+
self._setup(storage_account_name, storage_account_key)
3669+
blob_name = self._get_blob_reference()
3670+
blob = self.bsc.get_blob_client(self.container_name, blob_name)
3671+
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00'
3672+
decompressed_data = b"hello from gzip"
3673+
content_settings = ContentSettings(content_encoding='gzip')
3674+
3675+
# Act / Assert
3676+
blob.upload_blob(data=compressed_data, overwrite=True, content_settings=content_settings)
3677+
3678+
result = blob.download_blob(decompress=True).readall()
3679+
assert result == decompressed_data
3680+
3681+
result = blob.download_blob(decompress=False).readall()
3682+
assert result == compressed_data
3683+
3684+
@pytest.mark.live_test_only
3685+
@BlobPreparer()
3686+
def test_download_blob_no_decompress_chunks(self, **kwargs):
3687+
storage_account_name = kwargs.pop("storage_account_name")
3688+
storage_account_key = kwargs.pop("storage_account_key")
3689+
3690+
# Arrange
3691+
self._setup(storage_account_name, storage_account_key)
3692+
blob_name = self._get_blob_reference()
3693+
blob = BlobClient(
3694+
account_url=self.account_url(storage_account_name, "blob"),
3695+
container_name=self.container_name,
3696+
blob_name = blob_name,
3697+
credential=storage_account_key,
3698+
max_chunk_get_size=4,
3699+
max_single_get_size=4,
3700+
)
3701+
compressed_data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00\x00\xff\xcaH\xcd\xc9\xc9WH+\xca\xcfUH\xaf\xca,\x00\x00\x00\x00\xff\xff\x03\x00d\xaa\x8e\xb5\x0f\x00\x00\x00'
3702+
content_settings = ContentSettings(content_encoding='gzip')
3703+
3704+
# Act / Assert
3705+
blob.upload_blob(data=compressed_data, overwrite=True, content_settings=content_settings)
3706+
3707+
result = blob.download_blob(decompress=False).readall()
3708+
assert result == compressed_data
3709+
36613710
# ------------------------------------------------------------------------------

0 commit comments

Comments
 (0)