Skip to content

Commit c4ee8b3

Browse files
committed
idk
1 parent e696407 commit c4ee8b3

File tree

2 files changed

+221
-57
lines changed

2 files changed

+221
-57
lines changed

src/server/routers/ingest.py

Lines changed: 97 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,36 +128,127 @@ async def download_ingest(
128128
129129
"""
130130
logger = logging.getLogger(__name__)
131+
132+
logger.info("Download request received", extra={
133+
"ingest_id": str(ingest_id),
134+
"s3_enabled": is_s3_enabled()
135+
})
136+
131137
# Check if S3 is enabled and file exists in S3
132-
logger.info(f"Checking if S3 is enabled and file exists in S3 for ingest ID: {ingest_id}")
133138
if is_s3_enabled():
134-
logger.info(f"S3 is enabled, checking if file exists in S3 for ingest ID: {ingest_id}")
135-
s3_url = get_s3_url_for_ingest_id(ingest_id)
136-
if s3_url:
137-
logger.info(f"File exists in S3, redirecting to S3 URL: {s3_url}")
138-
return RedirectResponse(url=s3_url, status_code=302)
139+
logger.info("S3 is enabled, attempting S3 URL lookup", extra={"ingest_id": str(ingest_id)})
140+
141+
try:
142+
s3_url = get_s3_url_for_ingest_id(ingest_id)
143+
if s3_url:
144+
logger.info("File found in S3, redirecting", extra={
145+
"ingest_id": str(ingest_id),
146+
"s3_url": s3_url,
147+
"redirect_status": 302
148+
})
149+
return RedirectResponse(url=s3_url, status_code=302)
150+
else:
151+
logger.info("File not found in S3, falling back to local file", extra={
152+
"ingest_id": str(ingest_id)
153+
})
154+
except Exception as s3_err:
155+
logger.error("Error during S3 URL lookup, falling back to local file", extra={
156+
"ingest_id": str(ingest_id),
157+
"error_type": type(s3_err).__name__,
158+
"error_message": str(s3_err)
159+
})
160+
else:
161+
logger.info("S3 is disabled, serving local file", extra={"ingest_id": str(ingest_id)})
139162

140163
# Fall back to local file serving
164+
logger.info("Attempting local file serving", extra={"ingest_id": str(ingest_id)})
165+
141166
# Normalize and validate the directory path
142167
directory = (TMP_BASE_PATH / str(ingest_id)).resolve()
168+
169+
logger.debug("Local directory path resolved", extra={
170+
"ingest_id": str(ingest_id),
171+
"directory_path": str(directory),
172+
"tmp_base_path": str(TMP_BASE_PATH.resolve())
173+
})
174+
143175
if not str(directory).startswith(str(TMP_BASE_PATH.resolve())):
176+
logger.error("Invalid ingest ID - path traversal attempt", extra={
177+
"ingest_id": str(ingest_id),
178+
"directory_path": str(directory),
179+
"tmp_base_path": str(TMP_BASE_PATH.resolve())
180+
})
144181
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail=f"Invalid ingest ID: {ingest_id!r}")
145182

146183
if not directory.is_dir():
184+
logger.error("Digest directory not found", extra={
185+
"ingest_id": str(ingest_id),
186+
"directory_path": str(directory),
187+
"directory_exists": directory.exists(),
188+
"is_directory": directory.is_dir() if directory.exists() else False
189+
})
147190
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Digest {ingest_id!r} not found")
148191

149192
try:
193+
# List all txt files for debugging
194+
txt_files = list(directory.glob("*.txt"))
195+
logger.debug("Found txt files in directory", extra={
196+
"ingest_id": str(ingest_id),
197+
"directory_path": str(directory),
198+
"txt_files_count": len(txt_files),
199+
"txt_files": [f.name for f in txt_files]
200+
})
201+
150202
first_txt_file = next(directory.glob("*.txt"))
203+
204+
logger.info("Selected txt file for download", extra={
205+
"ingest_id": str(ingest_id),
206+
"selected_file": first_txt_file.name,
207+
"file_path": str(first_txt_file),
208+
"file_size": first_txt_file.stat().st_size if first_txt_file.exists() else "unknown"
209+
})
210+
151211
except StopIteration as exc:
212+
# List all files in directory for debugging
213+
all_files = list(directory.glob("*"))
214+
logger.error("No txt file found in digest directory", extra={
215+
"ingest_id": str(ingest_id),
216+
"directory_path": str(directory),
217+
"all_files_count": len(all_files),
218+
"all_files": [f.name for f in all_files],
219+
"s3_enabled": is_s3_enabled()
220+
})
152221
raise HTTPException(
153222
status_code=status.HTTP_404_NOT_FOUND,
154223
detail=f"No .txt file found for digest {ingest_id!r}, s3_enabled: {is_s3_enabled()}"
155224
) from exc
156225

157226
try:
227+
logger.info("Serving local file", extra={
228+
"ingest_id": str(ingest_id),
229+
"file_name": first_txt_file.name,
230+
"file_path": str(first_txt_file),
231+
"media_type": "text/plain"
232+
})
158233
return FileResponse(path=first_txt_file, media_type="text/plain", filename=first_txt_file.name)
159234
except PermissionError as exc:
235+
logger.error("Permission denied accessing file", extra={
236+
"ingest_id": str(ingest_id),
237+
"file_path": str(first_txt_file),
238+
"error_message": str(exc)
239+
})
160240
raise HTTPException(
161241
status_code=status.HTTP_403_FORBIDDEN,
162242
detail=f"Permission denied for {first_txt_file}",
163243
) from exc
244+
except Exception as exc:
245+
logger.error("Unexpected error serving local file", extra={
246+
"ingest_id": str(ingest_id),
247+
"file_path": str(first_txt_file),
248+
"error_type": type(exc).__name__,
249+
"error_message": str(exc)
250+
})
251+
raise HTTPException(
252+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
253+
detail=f"Error serving file for digest {ingest_id!r}",
254+
) from exc

0 commit comments

Comments
 (0)