Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 61 additions & 1 deletion tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2013,6 +2013,23 @@ def test_has_tool_call_mixed_events(self):
]
assert _evals_metric_handlers._has_tool_call(events)

def test_has_tool_call_with_agent_event(self):
events = [
vertexai_genai_types.evals.AgentEvent(
author="model",
content=genai_types.Content(
parts=[
genai_types.Part(
function_call=genai_types.FunctionCall(
name="search", args={}
)
)
]
),
)
]
assert _evals_metric_handlers._has_tool_call(events)


@pytest.mark.usefixtures("google_auth_mock")
class TestRunAgentInternal:
Expand Down Expand Up @@ -4892,10 +4909,53 @@ def test_tool_use_quality_metric_no_tool_call_logs_warning(
handler._build_request_payload(eval_case, response_index=0)
mock_warning.assert_called_once_with(
"Metric 'tool_use_quality_v1' requires tool usage in "
"'intermediate_events', but no tool usage was found for case %s.",
"'intermediate_events' or 'agent_data', but no tool usage was found for case %s.",
"case-no-tool-call",
)

@mock.patch.object(_evals_metric_handlers.logger, "warning")
def test_build_request_payload_tool_use_quality_v1_with_agent_data_tool_call(
self, mock_warning, mock_api_client_fixture
):
"""Tests that PredefinedMetricHandler does not warn if tool call is in agent_data."""
metric = vertexai_genai_types.Metric(name="tool_use_quality_v1")
handler = _evals_metric_handlers.PredefinedMetricHandler(
module=evals.Evals(api_client_=mock_api_client_fixture), metric=metric
)
eval_case = vertexai_genai_types.EvalCase(
eval_case_id="case-with-agent-data-tool-call",
prompt=genai_types.Content(parts=[genai_types.Part(text="Hello")]),
responses=[
vertexai_genai_types.ResponseCandidate(
response=genai_types.Content(parts=[genai_types.Part(text="Hi")])
)
],
agent_data=vertexai_genai_types.evals.AgentData(
turns=[
vertexai_genai_types.evals.ConversationTurn(
turn_index=0,
turn_id="turn_0",
events=[
vertexai_genai_types.evals.AgentEvent(
author="model",
content=genai_types.Content(
parts=[
genai_types.Part(
function_call=genai_types.FunctionCall(
name="search", args={}
)
)
]
),
)
],
)
]
),
)
handler._build_request_payload(eval_case, response_index=0)
mock_warning.assert_not_called()


@pytest.mark.usefixtures("google_auth_mock")
class TestRunAdkUserSimulation:
Expand Down
24 changes: 17 additions & 7 deletions vertexai/_genai/_evals_metric_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@
T = TypeVar("T", types.Metric, types.MetricSource, types.LLMMetric)


def _has_tool_call(intermediate_events: Optional[list[types.evals.Event]]) -> bool:
"""Checks if any event in intermediate_events has a function call."""
if not intermediate_events:
def _has_tool_call(events: Optional[list[Any]]) -> bool:
"""Checks if any event in events has a function call."""
if not events:
return False
for event in intermediate_events:
if event.content and event.content.parts:
for event in events:
if getattr(event, "content", None) and getattr(event.content, "parts", None):
for part in event.content.parts:
if hasattr(part, "function_call") and part.function_call:
return True
Expand Down Expand Up @@ -922,10 +922,20 @@ def _build_request_payload(
)

if self.metric.name == "tool_use_quality_v1":
if not _has_tool_call(eval_case.intermediate_events):
has_tool_call = _has_tool_call(eval_case.intermediate_events)

# Check agent_data for tool calls if intermediate_events is empty
agent_data = getattr(eval_case, "agent_data", None)
if not has_tool_call and agent_data:
for turn in agent_data.turns or []:
if _has_tool_call(turn.events):
has_tool_call = True
break

if not has_tool_call:
logger.warning(
"Metric 'tool_use_quality_v1' requires tool usage in "
"'intermediate_events', but no tool usage was found for case %s.",
"'intermediate_events' or 'agent_data', but no tool usage was found for case %s.",
eval_case.eval_case_id,
)

Expand Down
Loading