Skip to content

Commit f7d9fe4

Browse files
committed
Consolidate presigned URL handling into storage arch
1 parent 586f089 commit f7d9fe4

File tree

5 files changed

+129
-114
lines changed

5 files changed

+129
-114
lines changed

contentcuration/contentcuration/tests/utils/test_storage.py

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,15 @@
77
import requests
88
from django.core.files.storage import FileSystemStorage
99
from django.test import TestCase
10-
from django_s3_storage.storage import S3Storage
1110
from mock import MagicMock
1211

1312
from ..base import StudioTestCase
1413
from contentcuration.models import generate_object_storage_name
15-
from contentcuration.utils.storage.common import _get_gcs_presigned_put_url
1614
from contentcuration.utils.storage.common import determine_content_type
1715
from contentcuration.utils.storage.common import get_presigned_upload_url
1816
from contentcuration.utils.storage.common import UnknownStorageBackendError
19-
# The modules we'll test
17+
from contentcuration.utils.storage.dev import Storage as DevStorage
18+
from contentcuration.utils.storage.gcs import GoogleCloudStorage
2019

2120

2221
class MimeTypesTestCase(TestCase):
@@ -77,7 +76,10 @@ def test_raises_error(self):
7776
"""
7877
with pytest.raises(UnknownStorageBackendError):
7978
get_presigned_upload_url(
80-
"nice", "err", 5, 0, storage=self.STORAGE,
79+
"nice",
80+
"err",
81+
5,
82+
storage=self.STORAGE,
8183
)
8284

8385

@@ -90,7 +92,9 @@ class GoogleCloudStoragePresignedURLUnitTestCase(TestCase):
9092
"""
9193

9294
def setUp(self):
95+
super().setUp()
9396
self.client = MagicMock()
97+
self.storage = GoogleCloudStorage(self.client, "fake")
9498
self.generate_signed_url_method = (
9599
self.client.get_bucket.return_value.blob.return_value.generate_signed_url
96100
)
@@ -102,19 +106,15 @@ def test_that_generate_signed_url_is_called(self):
102106
"""
103107
Check that we even call blob.generate_signed_url in the first place.
104108
"""
105-
bucket = "fake"
106-
_get_gcs_presigned_put_url(self.client, bucket, "/object.jpg", "aBc", 0, 0)
109+
get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage)
107110
self.generate_signed_url_method.assert_called_once()
108111

109112
def test_that_we_return_a_string(self):
110113
"""
111114
Check that _get_gcs_presigned_put_url returns a string.
112115
"""
113-
bucket = "fake"
114-
ret = _get_gcs_presigned_put_url(
115-
self.client, bucket, "/object.jpg", "aBc", 0, 0
116-
)
117-
assert isinstance(ret, str)
116+
ret = get_presigned_upload_url("/object.jpg", "aBc", 0, storage=self.storage)
117+
assert isinstance(ret["uploadURL"], str)
118118

119119
def test_generate_signed_url_called_with_required_arguments(self):
120120
"""
@@ -132,11 +132,9 @@ def test_generate_signed_url_called_with_required_arguments(self):
132132
bucket_name = "fake"
133133
filepath = "object.jpg"
134134
lifetime = 20 # seconds
135-
mimetype = "doesntmatter"
135+
mimetype = "image/jpeg"
136136

137-
_get_gcs_presigned_put_url(
138-
self.client, bucket_name, filepath, content_md5, lifetime, mimetype
139-
)
137+
get_presigned_upload_url(filepath, content_md5, lifetime, storage=self.storage)
140138

141139
# assert that we're creating the right object
142140
self.client.get_bucket.assert_called_once_with(bucket_name)
@@ -148,8 +146,8 @@ def test_generate_signed_url_called_with_required_arguments(self):
148146
self.generate_signed_url_method.assert_called_once_with(
149147
method=method,
150148
content_md5=content_md5,
151-
expiration=lifetime_timedelta,
152149
content_type=mimetype,
150+
expiration=lifetime_timedelta,
153151
)
154152

155153

@@ -158,11 +156,9 @@ class S3StoragePresignedURLUnitTestCase(StudioTestCase):
158156
Test cases for generating presigned URLs for S3 storage, i.e. Minio.
159157
"""
160158

161-
STORAGE = S3Storage()
162-
163159
def setUp(self):
164-
self.client = MagicMock()
165160
super().setUp()
161+
self.storage = DevStorage()
166162

167163
def test_returns_string_if_inputs_are_valid(self):
168164
"""
@@ -171,9 +167,7 @@ def test_returns_string_if_inputs_are_valid(self):
171167
"""
172168

173169
# use a real connection here as a sanity check
174-
ret = get_presigned_upload_url(
175-
"a/b/abc.jpg", "aBc", 10, 1, storage=self.STORAGE, client=None
176-
)
170+
ret = get_presigned_upload_url("a/b/abc.jpg", "aBc", 10, storage=self.storage)
177171
url = ret["uploadURL"]
178172

179173
assert isinstance(url, str)
@@ -187,12 +181,14 @@ def test_can_upload_file_to_presigned_url(self):
187181
# S3 expects a base64-encoded MD5 checksum
188182
md5 = hashlib.md5(file_contents)
189183
md5_checksum = md5.hexdigest()
190-
md5_checksum_base64 = codecs.encode(codecs.decode(md5_checksum, "hex"), "base64").decode()
184+
md5_checksum_base64 = codecs.encode(
185+
codecs.decode(md5_checksum, "hex"), "base64"
186+
).decode()
191187

192188
filename = "blahfile.jpg"
193189
filepath = generate_object_storage_name(md5_checksum, filename)
194190

195-
ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000, len(file_contents))
191+
ret = get_presigned_upload_url(filepath, md5_checksum_base64, 1000)
196192
url = ret["uploadURL"]
197193
content_type = ret["mimetype"]
198194

@@ -201,6 +197,6 @@ def test_can_upload_file_to_presigned_url(self):
201197
data=file,
202198
headers={
203199
"Content-Type": content_type,
204-
}
200+
},
205201
)
206202
resp.raise_for_status()

contentcuration/contentcuration/utils/storage/base.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ def get_client(self):
1414
"""
1515
return None
1616

17+
def get_presigned_put_url(
18+
self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"
19+
):
20+
"""
21+
Creates a pre-signed URL for uploading files.
22+
23+
:param filepath: A string representing the destination file path inside the bucket
24+
:param md5sum: A MD5 checksum of the file to be uploaded
25+
:param lifetime_sec: The lifetime of the URL in seconds
26+
:param mimetype: The content type of the file to be uploaded
27+
:return: A pre-signed URL for uploading the file
28+
"""
29+
raise NotImplementedError("Subclasses must implement this method")
30+
1731

1832
class CompositeStorage(Storage):
1933
def __init__(self):
@@ -40,7 +54,7 @@ def _get_readable_backend(self, name):
4054
def get_client(self):
4155
return self._get_writeable_backend().get_client()
4256

43-
def open(self, name, mode='rb'):
57+
def open(self, name, mode="rb"):
4458
return self._get_readable_backend(name).open(name, mode)
4559

4660
def save(self, name, content, max_length=None):
@@ -74,3 +88,10 @@ def get_created_time(self, name):
7488

7589
def get_modified_time(self, name):
7690
return self._get_readable_backend(name).get_modified_time(name)
91+
92+
def get_presigned_put_url(
93+
self, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"
94+
):
95+
return self._get_writeable_backend().get_presigned_put_url(
96+
filepath, md5sum, lifetime_sec, mimetype=mimetype
97+
)
Lines changed: 8 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
import mimetypes
22
import os
3-
from datetime import timedelta
43

5-
from django.conf import settings
64
from django.core.files.storage import default_storage
7-
from django_s3_storage.storage import S3Storage
85

96
from .base import CompositeStorage
107
from .base import Storage
11-
from .gcs import CompositeGCS
12-
from .gcs import GoogleCloudStorage
138

149

1510
# Do this to ensure that we infer mimetypes for files properly, specifically
@@ -38,9 +33,10 @@ def determine_content_type(filename):
3833

3934

4035
def get_presigned_upload_url(
41-
filepath, md5sum_b64, lifetime_sec, content_length, storage=default_storage, client=None
36+
filepath, md5sum_b64, lifetime_sec, storage=default_storage
4237
):
43-
"""Return a presigned URL that can modify the given filepath through a PUT
38+
"""
39+
Return a presigned URL that can modify the given filepath through a PUT
4440
request. Performing a PUT request on the returned URL changes the object's
4541
contents with the contents of your PUT request.
4642
@@ -49,92 +45,22 @@ def get_presigned_upload_url(
4945
have to set a Content-MD5 HTTP header matching this md5sum once it
5046
initiates the download.
5147
:param: lifetime_sec: the lifetime of the generated upload url, in seconds.
52-
:param: content_length: the size of the content, in bytes.
53-
:param: client: the storage client that will be used to gennerate the presigned URL.
54-
This must have an API that's similar to either the GCS client or the boto3 client.
5548
5649
:returns: a dictionary containing 2 keys:
5750
mimetype: the mimetype that will be required to send as part of the file upload's mimetype header
5851
uploadURL: the URL to upload the file to.
5952
6053
:raises: :class:`UnknownStorageBackendError`: If the storage backend is not S3 or GCS.
6154
"""
62-
63-
# Aron: note that content_length is not used right now because
64-
# both storage types are having difficulties enforcing it.
65-
6655
mimetype = determine_content_type(filepath)
67-
bucket = settings.AWS_S3_BUCKET_NAME
68-
69-
if isinstance(storage, Storage):
70-
client = client or storage.get_client()
7156

72-
if isinstance(storage, (GoogleCloudStorage, CompositeGCS)):
73-
upload_url = _get_gcs_presigned_put_url(client, bucket, filepath, md5sum_b64, lifetime_sec, mimetype=mimetype)
74-
elif isinstance(storage, (S3Storage, CompositeStorage)):
75-
upload_url = _get_s3_presigned_put_url(client, bucket, filepath, md5sum_b64, lifetime_sec)
57+
if isinstance(storage, (Storage, CompositeStorage)):
58+
upload_url = storage.get_presigned_put_url(
59+
filepath, md5sum_b64, lifetime_sec, mimetype=mimetype
60+
)
7661
else:
7762
raise UnknownStorageBackendError(
7863
"Please ensure your storage backend is either Google Cloud Storage or S3 Storage!"
7964
)
8065

81-
return {
82-
"mimetype": mimetype,
83-
"uploadURL": upload_url
84-
}
85-
86-
87-
def _get_gcs_presigned_put_url(gcs_client, bucket, filepath, md5sum, lifetime_sec, mimetype="application/octet-stream"):
88-
bucket_obj = gcs_client.get_bucket(bucket)
89-
blob_obj = bucket_obj.blob(filepath)
90-
91-
# ensure the md5sum doesn't have any whitespace, including newlines.
92-
# We should do the same whitespace stripping as well on any client that actually
93-
# uses the returned presigned url.
94-
md5sum_stripped = md5sum.strip()
95-
96-
# convert the lifetime to a timedelta, so gcloud library will interpret the lifetime
97-
# as the seconds from right now. If we use an absolute integer, it's the number of seconds
98-
# from unix time
99-
lifetime_timedelta = timedelta(seconds=lifetime_sec)
100-
101-
url = blob_obj.generate_signed_url(
102-
method="PUT",
103-
content_md5=md5sum_stripped,
104-
content_type=mimetype,
105-
expiration=lifetime_timedelta,
106-
)
107-
108-
return url
109-
110-
111-
def _get_s3_presigned_put_url(s3_client, bucket, filepath, md5sum, lifetime_sec):
112-
"""
113-
Creates a pre-signed URL for S3-like backends, e.g. Minio.
114-
115-
Note that since our production object storage backend is GCS, we do not enforce or require
116-
any Content-MD5 value.
117-
118-
:param: s3_client: an initialized S3 client. We will use this to create the presigned PUT url.
119-
:param: bucket: the bucket where the user can PUT their object.
120-
:param: filepath: the file path inside the bucket that the user can PUT their object.
121-
:param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT.
122-
This is ignored for this function and added solely to maintain API compatibility with other
123-
private presigned URL functions.
124-
:param: lifetime_sec: how long before the presigned URL expires, in seconds.
125-
"""
126-
# S3's PUT Object parameters:
127-
# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
128-
method = "put_object"
129-
fields = {
130-
"Bucket": bucket,
131-
"Key": filepath,
132-
}
133-
134-
response = s3_client.generate_presigned_url(
135-
ClientMethod=method,
136-
Params=fields,
137-
ExpiresIn=lifetime_sec,
138-
)
139-
140-
return response
66+
return {"mimetype": mimetype, "uploadURL": upload_url}

contentcuration/contentcuration/utils/storage/dev.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from django.conf import settings
12
from django_s3_storage.storage import S3Storage
23
from google.cloud.storage import Client
34

@@ -9,13 +10,44 @@
910
class Storage(S3Storage, BaseStorage):
1011
def get_client(self):
1112
"""
12-
:rtype: object
13+
:rtype: botocore.client.BaseClient
1314
"""
1415
return self.s3_connection
1516

17+
def get_presigned_put_url(self, filepath, md5sum, lifetime_sec, mimetype=None):
18+
"""
19+
Creates a pre-signed URL for development storage backends
20+
21+
Note that since our production object storage backend is GCS, we do not enforce or require
22+
any Content-MD5 value.
23+
24+
:param: filepath: the file path inside the bucket that the user can PUT their object.
25+
:param: md5sum: the base64-encoded MD5sum of the object the user is planning to PUT.
26+
This is ignored for this function and added solely to maintain API compatibility with other
27+
private presigned URL functions.
28+
:param: lifetime_sec: how long before the presigned URL expires, in seconds.
29+
:param: mimetype: the content type of the file to be uploaded
30+
:return: A pre-signed URL for uploading the file
31+
"""
32+
# S3's PUT Object parameters:
33+
# https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
34+
method = "put_object"
35+
fields = {
36+
"Bucket": settings.AWS_S3_BUCKET_NAME,
37+
"Key": filepath,
38+
}
39+
40+
return self.get_client().generate_presigned_url(
41+
ClientMethod=method,
42+
Params=fields,
43+
ExpiresIn=lifetime_sec,
44+
)
45+
1646

1747
class CompositeStorage(BaseCompositeStorage):
1848
def __init__(self):
1949
super(CompositeStorage, self).__init__()
2050
self.backends.append(Storage())
21-
self.backends.append(GoogleCloudStorage(Client.create_anonymous_client(), "studio-content"))
51+
self.backends.append(
52+
GoogleCloudStorage(Client.create_anonymous_client(), "studio-content")
53+
)

0 commit comments

Comments
 (0)