diff --git a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md index 681de50b3aeb..b3416a30ec64 100644 --- a/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md +++ b/sdk/evaluation/azure-ai-evaluation/CHANGELOG.md @@ -27,6 +27,7 @@ - Fixed `_get_metric_result` prefix matching where shorter metric names (e.g., `xpia`) could match before longer, more-specific ones (e.g., `xpia_manipulated_content`). Now sorts by length descending for correct longest-prefix matching. - Fixed non-dict `_properties` values from evaluators causing downstream issues. Values that are not dicts are now logged and dropped gracefully. - Fixed filename length error in `_inline_image` by catching OSError/ValueError during local path resolution and fall back to returning a text chunk instead of throwing. +- Fixed `format_llm_response` raising `UnboundLocalError` when `inputs` was not provided by ensuring `sample_input` is always initialized. ### Other Changes diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py index 2a5ed070862a..b13201165085 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/_legacy/prompty/_utils.py @@ -609,6 +609,7 @@ async def format_stream(llm_response: AsyncStream[ChatCompletionChunk]) -> Async ) sample_output = json.dumps(sample_output_list) input_str = f"{json.dumps(inputs)}" if inputs else "" + sample_input = "" if inputs and len(inputs) > 0: sample_input_json = [] msg = ChatCompletionUserMessageParam( diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_prompty_format_llm_response.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_prompty_format_llm_response.py new file mode 100644 index 000000000000..b15386281a18 --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_prompty_format_llm_response.py @@ -0,0 +1,52 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +import asyncio +from types import SimpleNamespace + +import pytest + +from azure.ai.evaluation._legacy.prompty._utils import format_llm_response + + +class _FakeResponse: + def __init__(self): + self.usage = SimpleNamespace(prompt_tokens=1, completion_tokens=2, total_tokens=3) + self.choices = [ + SimpleNamespace( + finish_reason="stop", + message=SimpleNamespace(role="assistant", content="test-output"), + ) + ] + self.model = "test-model" + + def model_dump(self): + return { + "choices": [ + { + "message": { + "role": "assistant", + "content": "test-output", + } + } + ] + } + + +@pytest.mark.unittest +def test_format_llm_response_with_no_inputs_sets_empty_sample_input(): + response = _FakeResponse() + + result = asyncio.run( + format_llm_response( + response=response, + is_first_choice=True, + response_format={"type": "text"}, + outputs=None, + inputs=None, + ) + ) + + assert result["llm_output"] == "test-output" + assert result["sample_input"] == ""