From 62656c2ff85aa3919a1a36bc3a513922df5410db Mon Sep 17 00:00:00 2001 From: Jason Dai Date: Wed, 1 Apr 2026 17:37:20 -0700 Subject: [PATCH] chore: GenAI Client(evals) - update validation and warning messages for intermediate_events PiperOrigin-RevId: 893195249 --- tests/unit/vertexai/genai/test_evals.py | 62 ++++++++++++++++++++++- vertexai/_genai/_evals_metric_handlers.py | 24 ++++++--- 2 files changed, 78 insertions(+), 8 deletions(-) diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index 5d944a7c4c..5ca67a448a 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -2013,6 +2013,23 @@ def test_has_tool_call_mixed_events(self): ] assert _evals_metric_handlers._has_tool_call(events) + def test_has_tool_call_with_agent_event(self): + events = [ + vertexai_genai_types.evals.AgentEvent( + author="model", + content=genai_types.Content( + parts=[ + genai_types.Part( + function_call=genai_types.FunctionCall( + name="search", args={} + ) + ) + ] + ), + ) + ] + assert _evals_metric_handlers._has_tool_call(events) + @pytest.mark.usefixtures("google_auth_mock") class TestRunAgentInternal: @@ -4892,10 +4909,53 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning( handler._build_request_payload(eval_case, response_index=0) mock_warning.assert_called_once_with( "Metric 'tool_use_quality_v1' requires tool usage in " - "'intermediate_events', but no tool usage was found for case %s.", + "'intermediate_events' or 'agent_data', but no tool usage was found for case %s.", "case-no-tool-call", ) + @mock.patch.object(_evals_metric_handlers.logger, "warning") + def test_build_request_payload_tool_use_quality_v1_with_agent_data_tool_call( + self, mock_warning, mock_api_client_fixture + ): + """Tests that PredefinedMetricHandler does not warn if tool call is in agent_data.""" + metric = vertexai_genai_types.Metric(name="tool_use_quality_v1") + handler = _evals_metric_handlers.PredefinedMetricHandler( + module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric + ) + eval_case = vertexai_genai_types.EvalCase( + eval_case_id="case-with-agent-data-tool-call", + prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]), + responses=[ + vertexai_genai_types.ResponseCandidate( + response=genai_types.Content(parts=[genai_types.Part(text="Hi")]) + ) + ], + agent_data=vertexai_genai_types.evals.AgentData( + turns=[ + vertexai_genai_types.evals.ConversationTurn( + turn_index=0, + turn_id="turn_0", + events=[ + vertexai_genai_types.evals.AgentEvent( + author="model", + content=genai_types.Content( + parts=[ + genai_types.Part( + function_call=genai_types.FunctionCall( + name="search", args={} + ) + ) + ] + ), + ) + ], + ) + ] + ), + ) + handler._build_request_payload(eval_case, response_index=0) + mock_warning.assert_not_called() + @pytest.mark.usefixtures("google_auth_mock") class TestRunAdkUserSimulation: diff --git a/vertexai/_genai/_evals_metric_handlers.py b/vertexai/_genai/_evals_metric_handlers.py index ea8dc11735..9d72bafc86 100644 --- a/vertexai/_genai/_evals_metric_handlers.py +++ b/vertexai/_genai/_evals_metric_handlers.py @@ -42,12 +42,12 @@ T = TypeVar("T", types.Metric, types.MetricSource, types.LLMMetric) -def _has_tool_call(intermediate_events: Optional[list[types.evals.Event]]) -> bool: - """Checks if any event in intermediate_events has a function call.""" - if not intermediate_events: +def _has_tool_call(events: Optional[list[Any]]) -> bool: + """Checks if any event in events has a function call.""" + if not events: return False - for event in intermediate_events: - if event.content and event.content.parts: + for event in events: + if getattr(event, "content", None) and getattr(event.content, "parts", None): for part in event.content.parts: if hasattr(part, "function_call") and part.function_call: return True @@ -922,10 +922,20 @@ def _build_request_payload( ) if self.metric.name == "tool_use_quality_v1": - if not _has_tool_call(eval_case.intermediate_events): + has_tool_call = _has_tool_call(eval_case.intermediate_events) + + # Check agent_data for tool calls if intermediate_events is empty + agent_data = getattr(eval_case, "agent_data", None) + if not has_tool_call and agent_data: + for turn in agent_data.turns or []: + if _has_tool_call(turn.events): + has_tool_call = True + break + + if not has_tool_call: logger.warning( "Metric 'tool_use_quality_v1' requires tool usage in " - "'intermediate_events', but no tool usage was found for case %s.", + "'intermediate_events' or 'agent_data', but no tool usage was found for case %s.", eval_case.eval_case_id, )