Skip to content

Commit ab6f2eb

Browse files
committed
Fixes for non streaming request pathways
Signed-off-by: Mark Kurtz <mark.kurtz@neuralmagic.com>
1 parent ccced0d commit ab6f2eb

File tree

4 files changed

+16
-30
lines changed

4 files changed

+16
-30
lines changed

src/guidellm/backends/openai.py

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818
import httpx
1919

2020
from guidellm.backends.backend import Backend
21-
from guidellm.backends.response_handlers import (
22-
GenerationResponseHandler,
23-
GenerationResponseHandlerFactory,
24-
)
21+
from guidellm.backends.response_handlers import GenerationResponseHandlerFactory
2522
from guidellm.schemas import GenerationRequest, GenerationResponse, RequestInfo
2623

2724
__all__ = ["OpenAIHTTPBackend"]
@@ -244,8 +241,8 @@ async def resolve( # type: ignore[override]
244241
)
245242
request_json = request.arguments.body if not request_files else None
246243
request_data = request.arguments.body if request_files else None
247-
response_handler = self._resolve_response_handler(
248-
request_type=request.request_type
244+
response_handler = GenerationResponseHandlerFactory.create(
245+
request.request_type, handler_overrides=self.response_handlers
249246
)
250247

251248
if not request.arguments.stream:
@@ -335,20 +332,3 @@ def _resolve_validate_kwargs(
335332
validate_kwargs["method"] = "GET"
336333

337334
return validate_kwargs
338-
339-
def _resolve_response_handler(self, request_type: str) -> GenerationResponseHandler:
340-
if (
341-
self.response_handlers is not None
342-
and (handler := self.response_handlers.get(request_type)) is not None
343-
):
344-
return handler
345-
346-
handler_class = GenerationResponseHandlerFactory.get_registered_object(
347-
request_type
348-
)
349-
if handler_class is None:
350-
raise ValueError(
351-
f"No response handler registered for request type '{request_type}'"
352-
)
353-
354-
return handler_class()

src/guidellm/backends/response_handlers.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ def compile_non_streaming(
136136
:return: Standardized GenerationResponse with extracted text and metrics
137137
"""
138138
choices, usage = self.extract_choices_and_usage(response)
139-
text = choices[0].get("text", "") if choices else ""
139+
choice = choices[0] if choices else {}
140+
text = choice.get("text", "")
140141
input_metrics, output_metrics = self.extract_metrics(usage, text)
141142

142143
return GenerationResponse(
@@ -164,8 +165,9 @@ def add_streaming_line(self, line: str) -> int | None:
164165

165166
updated = False
166167
choices, usage = self.extract_choices_and_usage(data)
168+
choice = choices[0] if choices else {}
167169

168-
if choices and (text := choices[0].get("text")):
170+
if choices and (text := choice.get("text")):
169171
self.streaming_texts.append(text)
170172
updated = True
171173

@@ -295,8 +297,8 @@ def compile_non_streaming(
295297
:return: Standardized GenerationResponse with extracted content and metrics
296298
"""
297299
choices, usage = self.extract_choices_and_usage(response)
298-
choice = choices[0] if choices else {}
299-
text = choice.get("content", "")
300+
choice: dict[str, dict] = choices[0] if choices else {}
301+
text = choice.get("message", {}).get("content", "")
300302
input_metrics, output_metrics = self.extract_metrics(usage, text)
301303

302304
return GenerationResponse(
@@ -324,8 +326,9 @@ def add_streaming_line(self, line: str) -> int | None:
324326

325327
updated = False
326328
choices, usage = self.extract_choices_and_usage(data)
329+
choice: dict[str, dict] = choices[0] if choices else {}
327330

328-
if choices and (content := choices[0].get("delta", {}).get("content")):
331+
if choices and (content := choice.get("delta", {}).get("content")):
329332
self.streaming_texts.append(content)
330333
updated = True
331334

src/guidellm/benchmark/schemas/generative/entrypoints.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def get_default(cls: type[BenchmarkGenerativeTextArgs], field: str) -> Any:
200200
default="generative_column_mapper",
201201
description="Column mapping preprocessor for dataset fields",
202202
)
203-
data_request_formatter: RequestFormatter | dict[str, str] | str = Field(
203+
data_request_formatter: RequestFormatter | dict[str, Any] | str = Field(
204204
default="chat_completions",
205205
description="Request formatting preprocessor or template name",
206206
validation_alias=AliasChoices(

src/guidellm/schemas/request_stats.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,10 @@ def time_per_output_token_ms(self) -> float | None:
167167
"""
168168
if (
169169
(start := self.info.timings.request_start) is None
170-
or (last_token := self.last_token_iteration) is None
170+
or (
171+
(last_token := self.last_token_iteration or self.request_end_time)
172+
is None
173+
)
171174
or (output_tokens := self.output_tokens) is None
172175
or output_tokens == 0
173176
):

0 commit comments

Comments
 (0)