From 691f5b659fd54f1a7f31b0589b6f5a87248a9dd0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 10:37:22 +0200 Subject: [PATCH 01/11] Add logging to user authentication --- src/routers/dependencies.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index aeca95a..53cb93f 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -2,6 +2,7 @@ from typing import Annotated from fastapi import Depends +from loguru import logger from pydantic import BaseModel from sqlalchemy.ext.asyncio import AsyncConnection @@ -30,8 +31,16 @@ async def fetch_user( return None user = await User.fetch(api_key, user_data) + mask_length = 28 + masked_key = "*" * mask_length + api_key[mask_length:] if user: + logger.info( + "User {identifier} authenticated in with api key {api_key}.", + identifier=user.user_id, + api_key=masked_key, + ) return user + logger.info("Authentication failed.", api_key=masked_key) msg = "Invalid API key provided." raise AuthenticationFailedError(msg) @@ -40,6 +49,7 @@ def fetch_user_or_raise( user: Annotated[User | None, Depends(fetch_user)] = None, ) -> User: if user is None: + logger.info("Unauthenticated user tried to access endpoint that requires authentication.") msg = "No API key provided." raise AuthenticationRequiredError(msg) return user From 175f402414b7407fa873b0299f116f3113015d5e Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 12:18:45 +0200 Subject: [PATCH 02/11] Make sure enqueue is turned on by default to make non-blocking calls --- src/core/logging.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/logging.py b/src/core/logging.py index d36df1e..28d2a2a 100644 --- a/src/core/logging.py +++ b/src/core/logging.py @@ -20,7 +20,11 @@ def setup_log_sinks(configuration_file: Path | None = None) -> None: sink = sink_configuration.pop("sink") if sink == "sys.stderr": sink = sys.stderr - logger.add(sink, serialize=True, **sink_configuration) + # Logs the additionally provided data as JSON. + sink_configuration.setdefault("serialize", True) + # Decouples log calls from I/O and makes it multiprocessing safe. + sink_configuration.setdefault("enqueue", True) + logger.add(sink, **sink_configuration) async def add_request_context_to_log( From f55b798cad56c9b47d6142d8aedee5938f374806 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 12:38:02 +0200 Subject: [PATCH 03/11] Add middleware for logging response times --- src/core/logging.py | 28 ++++++++++++++++++++++++++++ src/main.py | 8 +++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/core/logging.py b/src/core/logging.py index 28d2a2a..cce9e70 100644 --- a/src/core/logging.py +++ b/src/core/logging.py @@ -1,6 +1,7 @@ """Utility functions for logging.""" import sys +import time import uuid from collections.abc import Awaitable, Callable from pathlib import Path @@ -37,6 +38,33 @@ async def add_request_context_to_log( return await call_next(request) +async def log_request_duration( + request: Request, + call_next: Callable[[Request], Awaitable[Response]], +) -> Response: + """Log the process and wallclock time for each call. + + Reported times cannot be attributed solely to processing the request. + As multiple requests can be handled concurrently in the same process, + process time may be spent on other requests as well. The same goes for + wallclock time, which is additionally influenced by e.g., context switches. + """ + start_mono_ns = time.monotonic_ns() + start_process_ns = time.process_time_ns() + response: Response = await call_next(request) + + duration_mono_ns = time.monotonic_ns() - start_mono_ns + duration_process_ns = time.process_time_ns() - start_process_ns + logger.info( + "Request took {mono_ms} ms wallclock time (process time {process_ms} ms)", + mono_ms=int(duration_mono_ns / 1_000_000), + process_ms=int(duration_process_ns / 1_000_000), + wallclock_time_ns=duration_mono_ns, + process_time_ns=duration_process_ns, + ) + return response + + async def request_response_logger( request: Request, call_next: Callable[[Request], Awaitable[Response]], diff --git a/src/main.py b/src/main.py index 696c09b..a681877 100644 --- a/src/main.py +++ b/src/main.py @@ -10,7 +10,12 @@ from config import load_configuration from core.errors import ProblemDetailError, problem_detail_exception_handler -from core.logging import add_request_context_to_log, request_response_logger, setup_log_sinks +from core.logging import ( + add_request_context_to_log, + log_request_duration, + request_response_logger, + setup_log_sinks, +) from database.setup import close_databases from routers.mldcat_ap.dataset import router as mldcat_ap_router from routers.openml.datasets import router as datasets_router @@ -72,6 +77,7 @@ def create_api(configuration_file: Path | None = None) -> FastAPI: # Order matters! Each added middleware wraps the previous, creating a stack. # See also: https://fastapi.tiangolo.com/tutorial/middleware/#multiple-middleware-execution-order app.middleware("http")(request_response_logger) + app.middleware("http")(log_request_duration) app.middleware("http")(add_request_context_to_log) app.add_exception_handler(ProblemDetailError, problem_detail_exception_handler) # type: ignore[arg-type] From 2bb4a1dc7dcb525898ed1569c3540e85644acfc2 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 13:22:46 +0200 Subject: [PATCH 04/11] Add user id to logging context --- src/routers/dependencies.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index 53cb93f..376f101 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -26,23 +26,25 @@ async def userdb_connection() -> AsyncGenerator[AsyncConnection, None]: async def fetch_user( api_key: APIKey | None = None, user_data: Annotated[AsyncConnection | None, Depends(userdb_connection)] = None, -) -> User | None: +) -> AsyncGenerator[User | None]: if not (api_key and user_data): - return None + yield None + return user = await User.fetch(api_key, user_data) - mask_length = 28 - masked_key = "*" * mask_length + api_key[mask_length:] - if user: - logger.info( - "User {identifier} authenticated in with api key {api_key}.", - identifier=user.user_id, - api_key=masked_key, - ) - return user - logger.info("Authentication failed.", api_key=masked_key) - msg = "Invalid API key provided." - raise AuthenticationFailedError(msg) + masked_key = api_key[-4:] + if not user: + logger.info("Authentication failed.", api_key=masked_key) + msg = "Invalid API key provided." + raise AuthenticationFailedError(msg) + + logger.info( + "User {identifier} authenticated in with api key ending in '{api_key}'.", + identifier=user.user_id, + api_key=masked_key, + ) + with logger.contextualize(user_id=user.user_id): + yield user def fetch_user_or_raise( From 7135e251d6fc696dafbf4da1cade2af65acbb953 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 13:49:34 +0200 Subject: [PATCH 05/11] Add logging to state mutations --- src/routers/openml/datasets.py | 8 ++++++++ src/routers/openml/setups.py | 8 ++++++++ src/routers/openml/study.py | 15 +++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index 18547ee..51d26d2 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -5,6 +5,7 @@ from typing import Annotated, Any, Literal, NamedTuple from fastapi import APIRouter, Body, Depends +from loguru import logger from sqlalchemy import bindparam, text from sqlalchemy.engine import Row from sqlalchemy.ext.asyncio import AsyncConnection @@ -61,6 +62,7 @@ async def tag_dataset( raise TagAlreadyExistsError(msg) await database.datasets.tag(data_id, tag, user_id=user.user_id, connection=expdb_db) + logger.info("Dataset {dataset_id} tagged '{tag}'.", dataset_id=data_id, tag=tag) return { "data_tag": {"id": str(data_id), "tag": [*tags, tag]}, } @@ -375,6 +377,12 @@ async def update_dataset_status( msg = f"Unknown status transition: {current_status} -> {status}" raise InternalError(msg) + logger.info( + "Dataset {dataset_id} changed from {previous} to {current}", + dataset_id=dataset_id, + previous=current_status.status if current_status else DatasetStatus.IN_PREPARATION, + current=status, + ) return {"dataset_id": dataset_id, "status": status} diff --git a/src/routers/openml/setups.py b/src/routers/openml/setups.py index b6d937e..89abb24 100644 --- a/src/routers/openml/setups.py +++ b/src/routers/openml/setups.py @@ -4,6 +4,7 @@ from typing import Annotated from fastapi import APIRouter, Body, Depends, Path +from loguru import logger from sqlalchemy.ext.asyncio import AsyncConnection import database.setups @@ -65,6 +66,7 @@ async def tag_setup( raise TagAlreadyExistsError(msg) await database.setups.tag(setup_id, tag, user.user_id, expdb_db) + logger.info("Setup {setup_id} tagged '{tag}'.", setup_id=setup_id, tag=tag) all_tags = [t.tag for t in setup_tags] + [tag] return {"setup_tag": {"id": str(setup_id), "tag": all_tags}} @@ -94,9 +96,15 @@ async def untag_setup( msg = ( f"You may not remove tag {tag!r} of setup {setup_id} because it was not created by you." ) + logger.warning( + "User attempted to remove tag '{tag}' from setup {setup_id}.", + setup_id=setup_id, + tag=tag, + ) raise TagNotOwnedError(msg) await database.setups.untag(setup_id, matched_tag_row.tag, expdb_db) + logger.info("Setup {setup_id} had tag '{tag}' removed.", setup_id=setup_id, tag=tag) remaining_tags = [ t.tag for t in setup_tags if t.tag.casefold() != matched_tag_row.tag.casefold() ] diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index 3b3ca2a..14dbfbc 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -1,6 +1,7 @@ from typing import Annotated, Literal from fastapi import APIRouter, Body, Depends +from loguru import logger from pydantic import BaseModel from sqlalchemy.engine import Row from sqlalchemy.ext.asyncio import AsyncConnection @@ -73,6 +74,11 @@ async def attach_to_study( # PHP lets *anyone* edit *any* study. We're not going to do that. if study.creator != user.user_id and not await user.is_admin(): msg = f"Study {study_id} can only be edited by its creator." + logger.warning( + "User attempted to attach entities to study they do not own.", + study_id=study_id, + entity_ids=entity_ids, + ) raise StudyNotEditableError(msg) if study.status != StudyStatus.IN_PREPARATION: msg = f"Study {study_id} can only be edited while in preparation." @@ -93,6 +99,11 @@ async def attach_to_study( except ValueError as e: msg = str(e) raise StudyConflictError(msg) from e + logger.info( + "User {user_id} attached entities to study {study_id}.", + study_id=study_id, + entity_ids=entity_ids, + ) return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_) @@ -124,6 +135,10 @@ async def create_study( user=user, expdb=expdb, ) + logger.info( + "User {user_id} created study {study_id}.", + study_id=study_id, + ) # Make sure that invalid fields raise an error (e.g., "task_ids") return {"study_id": study_id} From c6af844364a672d7315eb1c07a7f3b09f38ae461 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 15:15:55 +0200 Subject: [PATCH 06/11] Fix dependency tests --- src/routers/dependencies.py | 2 +- src/routers/openml/study.py | 2 ++ tests/dependencies/fetch_user_test.py | 11 ++++++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index 376f101..e22dae4 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -26,7 +26,7 @@ async def userdb_connection() -> AsyncGenerator[AsyncConnection, None]: async def fetch_user( api_key: APIKey | None = None, user_data: Annotated[AsyncConnection | None, Depends(userdb_connection)] = None, -) -> AsyncGenerator[User | None]: +) -> AsyncGenerator[User | None, None]: if not (api_key and user_data): yield None return diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index 14dbfbc..a8c95e7 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -103,6 +103,7 @@ async def attach_to_study( "User {user_id} attached entities to study {study_id}.", study_id=study_id, entity_ids=entity_ids, + user_id=user.user_id, ) return AttachDetachResponse(study_id=study_id, main_entity_type=study.type_) @@ -138,6 +139,7 @@ async def create_study( logger.info( "User {user_id} created study {study_id}.", study_id=study_id, + user_id=user.user_id, ) # Make sure that invalid fields raise an error (e.g., "task_ids") return {"study_id": study_id} diff --git a/tests/dependencies/fetch_user_test.py b/tests/dependencies/fetch_user_test.py index f6c31c4..faae3d5 100644 --- a/tests/dependencies/fetch_user_test.py +++ b/tests/dependencies/fetch_user_test.py @@ -1,3 +1,5 @@ +from contextlib import aclosing + import pytest from sqlalchemy.ext.asyncio import AsyncConnection @@ -16,19 +18,22 @@ ], ) async def test_fetch_user(api_key: str, user: User, user_test: AsyncConnection) -> None: - db_user = await fetch_user(api_key, user_data=user_test) + async with aclosing(fetch_user(api_key, user_data=user_test)) as agen: + db_user = await anext(agen) assert isinstance(db_user, User) assert user.user_id == db_user.user_id assert set(await user.get_groups()) == set(await db_user.get_groups()) async def test_fetch_user_no_key_no_user() -> None: - assert await fetch_user(api_key=None) is None + async with aclosing(fetch_user(api_key=None)) as agen: + assert await anext(agen) is None async def test_fetch_user_invalid_key_raises(user_test: AsyncConnection) -> None: with pytest.raises(AuthenticationFailedError): - await fetch_user(api_key=ApiKey.INVALID, user_data=user_test) + async with aclosing(fetch_user(api_key=ApiKey.INVALID, user_data=user_test)) as agen: + await anext(agen) async def test_fetch_user_or_raise_raises_if_no_user() -> None: From af4290a550eb7fa3dd63f6a74b7813df61d1a8af Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Wed, 8 Apr 2026 17:52:21 +0200 Subject: [PATCH 07/11] Process feedback --- src/core/logging.py | 7 ++++++- src/routers/dependencies.py | 2 +- src/routers/openml/study.py | 3 ++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/core/logging.py b/src/core/logging.py index cce9e70..b35270d 100644 --- a/src/core/logging.py +++ b/src/core/logging.py @@ -34,7 +34,11 @@ async def add_request_context_to_log( ) -> Response: """Add a unique request id to each log call.""" identifier = uuid.uuid4().hex - with logger.contextualize(request_id=identifier): + with logger.contextualize( + request_id=identifier, + method=request.method, + path=request.url.path, + ): return await call_next(request) @@ -61,6 +65,7 @@ async def log_request_duration( process_ms=int(duration_process_ns / 1_000_000), wallclock_time_ns=duration_mono_ns, process_time_ns=duration_process_ns, + status=response.status_code, ) return response diff --git a/src/routers/dependencies.py b/src/routers/dependencies.py index e22dae4..c5623fb 100644 --- a/src/routers/dependencies.py +++ b/src/routers/dependencies.py @@ -39,7 +39,7 @@ async def fetch_user( raise AuthenticationFailedError(msg) logger.info( - "User {identifier} authenticated in with api key ending in '{api_key}'.", + "User {identifier} authenticated with api key ending in '{api_key}'.", identifier=user.user_id, api_key=masked_key, ) diff --git a/src/routers/openml/study.py b/src/routers/openml/study.py index a8c95e7..56c670c 100644 --- a/src/routers/openml/study.py +++ b/src/routers/openml/study.py @@ -75,9 +75,10 @@ async def attach_to_study( if study.creator != user.user_id and not await user.is_admin(): msg = f"Study {study_id} can only be edited by its creator." logger.warning( - "User attempted to attach entities to study they do not own.", + "User {user_id} attempted to attach entities to study they do not own.", study_id=study_id, entity_ids=entity_ids, + user_id=user.user_id, ) raise StudyNotEditableError(msg) if study.status != StudyStatus.IN_PREPARATION: From 0f8cc87ed846012d635f434d279fedee8f5a2ea0 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 9 Apr 2026 10:35:06 +0200 Subject: [PATCH 08/11] Call logger.complete on shutdown --- src/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.py b/src/main.py index a681877..6f2d1b3 100644 --- a/src/main.py +++ b/src/main.py @@ -34,6 +34,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001 """Manage application lifespan - startup and shutdown events.""" yield + logger.complete() await close_databases() From 29e224077d8c4470e32fbd9fab87e21e638a2b73 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 9 Apr 2026 10:57:46 +0200 Subject: [PATCH 09/11] Use asgi-lifespan to ensure the lifespan events are used in test --- pyproject.toml | 1 + tests/conftest.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 596d5b2..2c53b9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dev = [ "pytest-mock", "pytest-asyncio", "httpx", + "asgi-lifespan", "hypothesis", "deepdiff", "pytest-xdist", diff --git a/tests/conftest.py b/tests/conftest.py index 163e4e7..6ae24db 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ import pytest from _pytest.config import Config from _pytest.nodes import Item +from asgi_lifespan import LifespanManager from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine @@ -85,11 +86,14 @@ async def override_userdb() -> AsyncIterator[AsyncConnection]: app.dependency_overrides[expdb_connection] = override_expdb app.dependency_overrides[userdb_connection] = override_userdb - async with httpx.AsyncClient( - transport=httpx.ASGITransport(app=app), - base_url="http://test", - follow_redirects=True, - ) as client: + async with ( + LifespanManager(app) as manager, + httpx.AsyncClient( + transport=httpx.ASGITransport(app=manager.app), + base_url="http://test", + follow_redirects=True, + ) as client, + ): yield client From bd5995a6f21bec05defe2c50338c077fa92cbf9f Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 9 Apr 2026 15:12:30 +0200 Subject: [PATCH 10/11] setup/teardown once per session we will restructure the use of Lifespan and test client creation in a separate PR --- src/main.py | 2 +- tests/conftest.py | 26 +++++++++++++++----------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/main.py b/src/main.py index 6f2d1b3..9f04647 100644 --- a/src/main.py +++ b/src/main.py @@ -31,7 +31,7 @@ @asynccontextmanager -async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]: # noqa: ARG001 +async def lifespan(app: FastAPI | None) -> AsyncGenerator[None, None]: # noqa: ARG001 """Manage application lifespan - startup and shutdown events.""" yield logger.complete() diff --git a/tests/conftest.py b/tests/conftest.py index 6ae24db..483206e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ import contextlib import json -from collections.abc import AsyncIterator, Iterable, Iterator +from collections.abc import AsyncGenerator, AsyncIterator, Iterable, Iterator from pathlib import Path from typing import Any, NamedTuple @@ -9,12 +9,11 @@ import pytest from _pytest.config import Config from _pytest.nodes import Item -from asgi_lifespan import LifespanManager from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine from database.setup import expdb_database, user_database -from main import create_api +from main import create_api, lifespan from routers.dependencies import expdb_connection, userdb_connection PHP_API_URL = "http://php-api:80/api/v1/json" @@ -52,6 +51,12 @@ async def temporary_records( await connection.commit() +@pytest.fixture(autouse=True, scope="session") +async def one_lifespan() -> AsyncGenerator[None, None]: + async with lifespan(app=None): + yield + + @pytest.fixture async def expdb_test() -> AsyncIterator[AsyncConnection]: async with automatic_rollback(expdb_database()) as connection: @@ -86,14 +91,13 @@ async def override_userdb() -> AsyncIterator[AsyncConnection]: app.dependency_overrides[expdb_connection] = override_expdb app.dependency_overrides[userdb_connection] = override_userdb - async with ( - LifespanManager(app) as manager, - httpx.AsyncClient( - transport=httpx.ASGITransport(app=manager.app), - base_url="http://test", - follow_redirects=True, - ) as client, - ): + # We do not use the Lifespan manager for now because our auto-use fixture + # `one_lifespan` will do setup and teardown at a session scope level instead. + async with httpx.AsyncClient( + transport=httpx.ASGITransport(app=app), + base_url="http://test", + follow_redirects=True, + ) as client: yield client From e6d45927cd0e53329ebd8315ac6562d60b5845a6 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Thu, 9 Apr 2026 15:14:37 +0200 Subject: [PATCH 11/11] await teardown of logger and database together --- src/main.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main.py b/src/main.py index 9f04647..93ad1ac 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,5 @@ import argparse +import asyncio import sys from collections.abc import AsyncGenerator from contextlib import asynccontextmanager @@ -34,8 +35,10 @@ async def lifespan(app: FastAPI | None) -> AsyncGenerator[None, None]: # noqa: ARG001 """Manage application lifespan - startup and shutdown events.""" yield - logger.complete() - await close_databases() + asyncio.gather( + logger.complete(), + close_databases(), + ) def _parse_args() -> argparse.Namespace: