@@ -128,36 +128,127 @@ async def download_ingest(
128128
129129 """
130130 logger = logging .getLogger (__name__ )
131+
132+ logger .info ("Download request received" , extra = {
133+ "ingest_id" : str (ingest_id ),
134+ "s3_enabled" : is_s3_enabled ()
135+ })
136+
131137 # Check if S3 is enabled and file exists in S3
132- logger .info (f"Checking if S3 is enabled and file exists in S3 for ingest ID: { ingest_id } " )
133138 if is_s3_enabled ():
134- logger .info (f"S3 is enabled, checking if file exists in S3 for ingest ID: { ingest_id } " )
135- s3_url = get_s3_url_for_ingest_id (ingest_id )
136- if s3_url :
137- logger .info (f"File exists in S3, redirecting to S3 URL: { s3_url } " )
138- return RedirectResponse (url = s3_url , status_code = 302 )
139+ logger .info ("S3 is enabled, attempting S3 URL lookup" , extra = {"ingest_id" : str (ingest_id )})
140+
141+ try :
142+ s3_url = get_s3_url_for_ingest_id (ingest_id )
143+ if s3_url :
144+ logger .info ("File found in S3, redirecting" , extra = {
145+ "ingest_id" : str (ingest_id ),
146+ "s3_url" : s3_url ,
147+ "redirect_status" : 302
148+ })
149+ return RedirectResponse (url = s3_url , status_code = 302 )
150+ else :
151+ logger .info ("File not found in S3, falling back to local file" , extra = {
152+ "ingest_id" : str (ingest_id )
153+ })
154+ except Exception as s3_err :
155+ logger .error ("Error during S3 URL lookup, falling back to local file" , extra = {
156+ "ingest_id" : str (ingest_id ),
157+ "error_type" : type (s3_err ).__name__ ,
158+ "error_message" : str (s3_err )
159+ })
160+ else :
161+ logger .info ("S3 is disabled, serving local file" , extra = {"ingest_id" : str (ingest_id )})
139162
140163 # Fall back to local file serving
164+ logger .info ("Attempting local file serving" , extra = {"ingest_id" : str (ingest_id )})
165+
141166 # Normalize and validate the directory path
142167 directory = (TMP_BASE_PATH / str (ingest_id )).resolve ()
168+
169+ logger .debug ("Local directory path resolved" , extra = {
170+ "ingest_id" : str (ingest_id ),
171+ "directory_path" : str (directory ),
172+ "tmp_base_path" : str (TMP_BASE_PATH .resolve ())
173+ })
174+
143175 if not str (directory ).startswith (str (TMP_BASE_PATH .resolve ())):
176+ logger .error ("Invalid ingest ID - path traversal attempt" , extra = {
177+ "ingest_id" : str (ingest_id ),
178+ "directory_path" : str (directory ),
179+ "tmp_base_path" : str (TMP_BASE_PATH .resolve ())
180+ })
144181 raise HTTPException (status_code = status .HTTP_403_FORBIDDEN , detail = f"Invalid ingest ID: { ingest_id !r} " )
145182
146183 if not directory .is_dir ():
184+ logger .error ("Digest directory not found" , extra = {
185+ "ingest_id" : str (ingest_id ),
186+ "directory_path" : str (directory ),
187+ "directory_exists" : directory .exists (),
188+ "is_directory" : directory .is_dir () if directory .exists () else False
189+ })
147190 raise HTTPException (status_code = status .HTTP_404_NOT_FOUND , detail = f"Digest { ingest_id !r} not found" )
148191
149192 try :
193+ # List all txt files for debugging
194+ txt_files = list (directory .glob ("*.txt" ))
195+ logger .debug ("Found txt files in directory" , extra = {
196+ "ingest_id" : str (ingest_id ),
197+ "directory_path" : str (directory ),
198+ "txt_files_count" : len (txt_files ),
199+ "txt_files" : [f .name for f in txt_files ]
200+ })
201+
150202 first_txt_file = next (directory .glob ("*.txt" ))
203+
204+ logger .info ("Selected txt file for download" , extra = {
205+ "ingest_id" : str (ingest_id ),
206+ "selected_file" : first_txt_file .name ,
207+ "file_path" : str (first_txt_file ),
208+ "file_size" : first_txt_file .stat ().st_size if first_txt_file .exists () else "unknown"
209+ })
210+
151211 except StopIteration as exc :
212+ # List all files in directory for debugging
213+ all_files = list (directory .glob ("*" ))
214+ logger .error ("No txt file found in digest directory" , extra = {
215+ "ingest_id" : str (ingest_id ),
216+ "directory_path" : str (directory ),
217+ "all_files_count" : len (all_files ),
218+ "all_files" : [f .name for f in all_files ],
219+ "s3_enabled" : is_s3_enabled ()
220+ })
152221 raise HTTPException (
153222 status_code = status .HTTP_404_NOT_FOUND ,
154223 detail = f"No .txt file found for digest { ingest_id !r} , s3_enabled: { is_s3_enabled ()} "
155224 ) from exc
156225
157226 try :
227+ logger .info ("Serving local file" , extra = {
228+ "ingest_id" : str (ingest_id ),
229+ "file_name" : first_txt_file .name ,
230+ "file_path" : str (first_txt_file ),
231+ "media_type" : "text/plain"
232+ })
158233 return FileResponse (path = first_txt_file , media_type = "text/plain" , filename = first_txt_file .name )
159234 except PermissionError as exc :
235+ logger .error ("Permission denied accessing file" , extra = {
236+ "ingest_id" : str (ingest_id ),
237+ "file_path" : str (first_txt_file ),
238+ "error_message" : str (exc )
239+ })
160240 raise HTTPException (
161241 status_code = status .HTTP_403_FORBIDDEN ,
162242 detail = f"Permission denied for { first_txt_file } " ,
163243 ) from exc
244+ except Exception as exc :
245+ logger .error ("Unexpected error serving local file" , extra = {
246+ "ingest_id" : str (ingest_id ),
247+ "file_path" : str (first_txt_file ),
248+ "error_type" : type (exc ).__name__ ,
249+ "error_message" : str (exc )
250+ })
251+ raise HTTPException (
252+ status_code = status .HTTP_500_INTERNAL_SERVER_ERROR ,
253+ detail = f"Error serving file for digest { ingest_id !r} " ,
254+ ) from exc
0 commit comments