From 3879a05891f505085948b46bf164c67888a58772 Mon Sep 17 00:00:00 2001 From: Mert Toslali Date: Mon, 10 Nov 2025 15:22:17 -0500 Subject: [PATCH 1/2] Add vllm id to the response --- src/guidellm/backends/response_handlers.py | 1 + src/guidellm/schemas/response.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/guidellm/backends/response_handlers.py b/src/guidellm/backends/response_handlers.py index f1e83bd8..6d2e6973 100644 --- a/src/guidellm/backends/response_handlers.py +++ b/src/guidellm/backends/response_handlers.py @@ -145,6 +145,7 @@ def compile_non_streaming( request_args=str( request.arguments.model_dump() if request.arguments else None ), + response_id=response.get("id"), # use vLLM ID if available text=text, input_metrics=input_metrics, output_metrics=output_metrics, diff --git a/src/guidellm/schemas/response.py b/src/guidellm/schemas/response.py index a02ae8ba..63b0097a 100644 --- a/src/guidellm/schemas/response.py +++ b/src/guidellm/schemas/response.py @@ -41,6 +41,10 @@ class GenerationResponse(StandardBaseModel): request_id: str = Field( description="Unique identifier matching the original GenerationRequest." ) + response_id: str | None = Field( + default=None, + description="Unique identifier matching the original vLLM Response ID." + ) request_args: str | None = Field( description="Arguments passed to the backend for request processing." ) From 1527e5450aae675e96b6b14b6d2c6e81f58c295a Mon Sep 17 00:00:00 2001 From: Mert Toslali Date: Thu, 13 Nov 2025 11:50:34 -0500 Subject: [PATCH 2/2] Rebase to main --- src/guidellm/schemas/request_stats.py | 3 +++ src/guidellm/schemas/response.py | 1 + 2 files changed, 4 insertions(+) diff --git a/src/guidellm/schemas/request_stats.py b/src/guidellm/schemas/request_stats.py index c3c7dee7..72b6db42 100644 --- a/src/guidellm/schemas/request_stats.py +++ b/src/guidellm/schemas/request_stats.py @@ -48,6 +48,9 @@ class GenerativeRequestStats(StandardBaseDict): request_type: GenerativeRequestType | str = Field( description="Type of generative request (text_completion or chat_completion)" ) + response_id: str | None = Field( + default=None, description="vLLM-assigned unique request ID" + ) request_args: str | None = Field( default=None, description="Backend arguments used for this request" ) diff --git a/src/guidellm/schemas/response.py b/src/guidellm/schemas/response.py index 63b0097a..ee971964 100644 --- a/src/guidellm/schemas/response.py +++ b/src/guidellm/schemas/response.py @@ -112,6 +112,7 @@ def compile_stats( return GenerativeRequestStats( request_id=self.request_id, + response_id=self.response_id, request_type=request.request_type, request_args=str( request.arguments.model_dump() if request.arguments else {}