1010from boto3 import client as boto_client
1111from botocore .exceptions import ClientError
1212
13+ from gitingest .utils .logging_config import get_logger , log_with_extra
14+
15+ # Initialize logger for this module
16+ logger = get_logger (__name__ )
17+
1318
1419class S3UploadError (Exception ):
1520 """Custom exception for S3 upload failures."""
@@ -131,6 +136,17 @@ def generate_s3_file_path(
131136def create_s3_client () -> boto_client : # type: ignore[name-defined]
132137 """Create and return an S3 client with configuration from environment."""
133138 config = get_s3_config ()
139+
140+ # Log S3 client creation with configuration details (excluding sensitive info)
141+ log_config = {k : v for k , v in config .items () if k not in ["aws_access_key_id" , "aws_secret_access_key" ]}
142+ log_with_extra (
143+ logger ,
144+ "debug" ,
145+ "Creating S3 client" ,
146+ s3_config = log_config ,
147+ has_credentials = bool (config .get ("aws_access_key_id" )),
148+ )
149+
134150 return boto_client ("s3" , ** config )
135151
136152
@@ -166,10 +182,21 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
166182 msg = "S3 is not enabled"
167183 raise ValueError (msg )
168184
169- try :
170- s3_client = create_s3_client ()
171- bucket_name = get_s3_bucket_name ()
185+ s3_client = create_s3_client ()
186+ bucket_name = get_s3_bucket_name ()
187+
188+ # Log upload attempt
189+ log_with_extra (
190+ logger ,
191+ "debug" ,
192+ "Starting S3 upload" ,
193+ bucket_name = bucket_name ,
194+ s3_file_path = s3_file_path ,
195+ ingest_id = str (ingest_id ),
196+ content_size = len (content ),
197+ )
172198
199+ try :
173200 # Upload the content with ingest_id as tag
174201 s3_client .put_object (
175202 Bucket = bucket_name ,
@@ -178,21 +205,46 @@ def upload_to_s3(content: str, s3_file_path: str, ingest_id: UUID) -> str:
178205 ContentType = "text/plain" ,
179206 Tagging = f"ingest_id={ ingest_id !s} " ,
180207 )
208+ except ClientError as e :
209+ # Log upload failure
210+ log_with_extra (
211+ logger ,
212+ "error" ,
213+ "S3 upload failed" ,
214+ bucket_name = bucket_name ,
215+ s3_file_path = s3_file_path ,
216+ ingest_id = str (ingest_id ),
217+ error_code = e .response .get ("Error" , {}).get ("Code" ),
218+ error_message = str (e ),
219+ )
220+ msg = f"Failed to upload to S3: { e } "
221+ raise S3UploadError (msg ) from e
181222
182- # Generate public URL
183- alias_host = get_s3_alias_host ()
184- if alias_host :
185- # Use alias host if configured
186- return f"{ alias_host .rstrip ('/' )} /{ s3_file_path } "
223+ # Generate public URL
224+ alias_host = get_s3_alias_host ()
225+ if alias_host :
226+ # Use alias host if configured
227+ public_url = f"{ alias_host .rstrip ('/' )} /{ s3_file_path } "
228+ else :
187229 # Fallback to direct S3 URL
188- endpoint = get_s3_config ()[ "endpoint_url" ]
230+ endpoint = get_s3_config (). get ( "endpoint_url" )
189231 if endpoint :
190- return f"{ endpoint .rstrip ('/' )} /{ bucket_name } /{ s3_file_path } "
191- return f"https://{ bucket_name } .s3.{ get_s3_config ()['region_name' ]} .amazonaws.com/{ s3_file_path } "
232+ public_url = f"{ endpoint .rstrip ('/' )} /{ bucket_name } /{ s3_file_path } "
233+ else :
234+ public_url = f"https://{ bucket_name } .s3.{ get_s3_config ()['region_name' ]} .amazonaws.com/{ s3_file_path } "
192235
193- except ClientError as e :
194- msg = f"Failed to upload to S3: { e } "
195- raise S3UploadError (msg ) from e
236+ # Log successful upload
237+ log_with_extra (
238+ logger ,
239+ "debug" ,
240+ "S3 upload completed successfully" ,
241+ bucket_name = bucket_name ,
242+ s3_file_path = s3_file_path ,
243+ ingest_id = str (ingest_id ),
244+ public_url = public_url ,
245+ )
246+
247+ return public_url
196248
197249
198250def _build_s3_url (key : str ) -> str :
@@ -241,8 +293,16 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
241293
242294 """
243295 if not is_s3_enabled ():
296+ logger .debug ("S3 not enabled, skipping URL lookup for ingest_id: %s" , ingest_id )
244297 return None
245298
299+ log_with_extra (
300+ logger ,
301+ "debug" ,
302+ "Starting S3 URL lookup for ingest ID" ,
303+ ingest_id = str (ingest_id ),
304+ )
305+
246306 try :
247307 s3_client = create_s3_client ()
248308 bucket_name = get_s3_bucket_name ()
@@ -254,16 +314,43 @@ def get_s3_url_for_ingest_id(ingest_id: UUID) -> str | None:
254314 Prefix = "ingest/" ,
255315 )
256316
317+ objects_checked = 0
257318 for page in page_iterator :
258319 if "Contents" not in page :
259320 continue
260321
261322 for obj in page ["Contents" ]:
262323 key = obj ["Key" ]
324+ objects_checked += 1
263325 if _check_object_tags (s3_client , bucket_name , key , ingest_id ):
264- return _build_s3_url (key )
326+ s3_url = _build_s3_url (key )
327+ log_with_extra (
328+ logger ,
329+ "debug" ,
330+ "Found S3 object for ingest ID" ,
331+ ingest_id = str (ingest_id ),
332+ s3_key = key ,
333+ s3_url = s3_url ,
334+ objects_checked = objects_checked ,
335+ )
336+ return s3_url
337+
338+ log_with_extra (
339+ logger ,
340+ "debug" ,
341+ "No S3 object found for ingest ID" ,
342+ ingest_id = str (ingest_id ),
343+ objects_checked = objects_checked ,
344+ )
265345
266- except ClientError :
267- pass
346+ except ClientError as e :
347+ log_with_extra (
348+ logger ,
349+ "error" ,
350+ "Error during S3 URL lookup" ,
351+ ingest_id = str (ingest_id ),
352+ error_code = e .response .get ("Error" , {}).get ("Code" ),
353+ error_message = str (e ),
354+ )
268355
269356 return None
0 commit comments