@@ -29,6 +29,7 @@ def __init__(self, connection_string, logger=None):
2929 .split (AWSLambdaDataSource .CONNECTION_STRING_GROUP_SEPARATOR )
3030 )
3131 self .aws_lambda_client = boto3 .client ("lambda" )
32+ self .aws_s3_client = boto3 .client ("s3" )
3233
3334 @staticmethod
3435 def can_handle_connection_string (connection_string ):
@@ -43,8 +44,9 @@ def get_connection_string_prefix():
4344 return AWSLambdaDataSource .CONNECTION_STRING_PREFIX
4445
4546 def get_table_info (self , table_config , last_known_sync_version ):
46- column_names , last_sync_version , sync_version , full_refresh_required , data_changed_since_last_sync \
47- = self .__get_table_info (table_config , last_known_sync_version )
47+ column_names , last_sync_version , sync_version , full_refresh_required , data_changed_since_last_sync = self .__get_table_info (
48+ table_config , last_known_sync_version
49+ )
4850 columns_in_database = column_names
4951 change_tracking_info = ChangeTrackingInfo (
5052 last_sync_version = last_sync_version ,
@@ -91,11 +93,13 @@ def __get_table_info(self, table_config, last_known_sync_version):
9193
9294 result = self .__invoke_lambda (pay_load )
9395
94- return result ["ColumnNames" ], \
95- result ["LastSyncVersion" ], \
96- result ["CurrentSyncVersion" ], \
97- result ["FullRefreshRequired" ], \
98- result ["DataChangedSinceLastSync" ]
96+ return (
97+ result ["ColumnNames" ],
98+ result ["LastSyncVersion" ],
99+ result ["CurrentSyncVersion" ],
100+ result ["FullRefreshRequired" ],
101+ result ["DataChangedSinceLastSync" ],
102+ )
99103
100104 def __get_table_data (
101105 self ,
@@ -116,23 +120,31 @@ def __get_table_data(
116120 "BatchSize" : batch_config ["size" ],
117121 "LastSyncVersion" : change_tracking_info .last_sync_version ,
118122 "FullRefresh" : full_refresh ,
119- "ColumnNames" : list (map (lambda cfg : cfg ['source_name' ], columns_config )),
123+ "ColumnNames" : list (
124+ map (lambda cfg : cfg ["source_name" ], columns_config )
125+ ),
120126 "PrimaryKeyColumnNames" : table_config ["primary_keys" ],
121127 "LastBatchPrimaryKeys" : [
122- {"Key" : k , "Value" : v } for k , v in batch_key_tracker .bookmarks .items ()
128+ {"Key" : k , "Value" : v }
129+ for k , v in batch_key_tracker .bookmarks .items ()
123130 ],
124131 },
125132 }
126133
127134 result = self .__invoke_lambda (pay_load )
135+ command_result = self .aws_s3_client .get_object (
136+ Bucket = result ["DataBucketName" ], Key = result ["DataKey" ]
137+ )
128138
129- return result ["ColumnNames" ], result ["Data" ]
139+ data = json .loads (command_result ["Body" ].read ())
140+
141+ return result ["ColumnNames" ], data
130142
131143 def __get_data_frame (self , data : [[]], column_names : []):
132144 return pandas .DataFrame (data = data , columns = column_names )
133145
134146 def __invoke_lambda (self , pay_load ):
135- self .logger .debug (' \n Request being sent to Lambda:' )
147+ self .logger .debug (" \n Request being sent to Lambda:" )
136148 self .logger .debug (pay_load )
137149
138150 lambda_response = self .aws_lambda_client .invoke (
@@ -142,24 +154,28 @@ def __invoke_lambda(self, pay_load):
142154 Payload = json .dumps (pay_load ).encode (),
143155 )
144156
145- response_status_code = int (lambda_response [' StatusCode' ])
157+ response_status_code = int (lambda_response [" StatusCode" ])
146158 response_function_error = lambda_response .get ("FunctionError" )
147- self .logger .debug (' \n Response received from Lambda:' )
159+ self .logger .debug (" \n Response received from Lambda:" )
148160 self .logger .debug (f'Response - StatusCode = "{ response_status_code } "' )
149161 self .logger .debug (f'Response - FunctionError = "{ response_function_error } "' )
150162
151- response_payload = json .loads (lambda_response [' Payload' ].read ())
163+ response_payload = json .loads (lambda_response [" Payload" ].read ())
152164
153165 if response_status_code != 200 or response_function_error :
154- self .logger .error (F'Error in response from aws lambda { self .connection_data ["function" ]} ' )
155- self .logger .error (f'Response - Status Code = { response_status_code } ' )
156- self .logger .error (f'Response - Error Function = { response_function_error } ' )
157- self .logger .error (f'Response - Error Details:' )
166+ self .logger .error (
167+ f'Error in response from aws lambda { self .connection_data ["function" ]} '
168+ )
169+ self .logger .error (f"Response - Status Code = { response_status_code } " )
170+ self .logger .error (f"Response - Error Function = { response_function_error } " )
171+ self .logger .error (f"Response - Error Details:" )
158172 # the below is risky as it may contain actual data if this line is reached in case of a successful result
159173 # however, the same Payload field is used to return actual error details in case of real errors
160174 # i.e. StatusCode is 200 (since AWS could invoke the lambda)
161175 # BUT the lambda barfed with an error and therefore the FunctionError would not be None
162176 self .logger .error (response_payload )
163- raise Exception ('Error received when invoking AWS Lambda. See logs for further details.' )
177+ raise Exception (
178+ "Error received when invoking AWS Lambda. See logs for further details."
179+ )
164180
165181 return response_payload
0 commit comments