diff --git a/agentplatform/_genai/_evals_metric_loaders.py b/agentplatform/_genai/_evals_metric_loaders.py index bb8940174a..e64b151f0d 100644 --- a/agentplatform/_genai/_evals_metric_loaders.py +++ b/agentplatform/_genai/_evals_metric_loaders.py @@ -277,27 +277,27 @@ def __getattr__( @property def GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("GENERAL_QUALITY") + return self.__getattr__("GENERAL_QUALITY", version="v1") @property def TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("TEXT_QUALITY") + return self.__getattr__("TEXT_QUALITY", version="v1") @property def INSTRUCTION_FOLLOWING(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("INSTRUCTION_FOLLOWING") + return self.__getattr__("INSTRUCTION_FOLLOWING", version="v1") @property def SAFETY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("SAFETY") + return self.__getattr__("SAFETY", version="v1") @property def MULTI_TURN_GENERAL_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("MULTI_TURN_GENERAL_QUALITY") + return self.__getattr__("MULTI_TURN_GENERAL_QUALITY", version="v1") @property def MULTI_TURN_TEXT_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("MULTI_TURN_TEXT_QUALITY") + return self.__getattr__("MULTI_TURN_TEXT_QUALITY", version="v1") @property def MULTI_TURN_TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric: @@ -317,43 +317,43 @@ def FINAL_RESPONSE_MATCH(self) -> LazyLoadedPrebuiltMetric: @property def FINAL_RESPONSE_REFERENCE_FREE(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE") + return self.__getattr__("FINAL_RESPONSE_REFERENCE_FREE", version="v1") @property def COHERENCE(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("COHERENCE") + return self.__getattr__("COHERENCE", version="v1") @property def FLUENCY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("FLUENCY") + return self.__getattr__("FLUENCY", version="v1") @property def VERBOSITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("VERBOSITY") + return self.__getattr__("VERBOSITY", version="v1") @property def SUMMARIZATION_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("SUMMARIZATION_QUALITY") + return self.__getattr__("SUMMARIZATION_QUALITY", version="v1") @property def QUESTION_ANSWERING_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("QUESTION_ANSWERING_QUALITY") + return self.__getattr__("QUESTION_ANSWERING_QUALITY", version="v1") @property def MULTI_TURN_CHAT_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("MULTI_TURN_CHAT_QUALITY") + return self.__getattr__("MULTI_TURN_CHAT_QUALITY", version="v1") @property def MULTI_TURN_SAFETY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("MULTI_TURN_SAFETY") + return self.__getattr__("MULTI_TURN_SAFETY", version="v1") @property def FINAL_RESPONSE_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("FINAL_RESPONSE_QUALITY") + return self.__getattr__("FINAL_RESPONSE_QUALITY", version="v1") @property def HALLUCINATION(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("HALLUCINATION") + return self.__getattr__("HALLUCINATION", version="v1") @property def GROUNDING(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-name @@ -374,15 +374,15 @@ def GROUNDEDNESS(self) -> LazyLoadedPrebuiltMetric: # pylint: disable=invalid-n @property def TOOL_USE_QUALITY(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("TOOL_USE_QUALITY") + return self.__getattr__("TOOL_USE_QUALITY", version="v1") @property def GECKO_TEXT2IMAGE(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("GECKO_TEXT2IMAGE") + return self.__getattr__("GECKO_TEXT2IMAGE", version="v1") @property def GECKO_TEXT2VIDEO(self) -> LazyLoadedPrebuiltMetric: - return self.__getattr__("GECKO_TEXT2VIDEO") + return self.__getattr__("GECKO_TEXT2VIDEO", version="v1") PrebuiltMetric = PrebuiltMetricLoader() diff --git a/tests/unit/agentplatform/genai/test_evals.py b/tests/unit/agentplatform/genai/test_evals.py index 4d1ee2bce6..33f63a1a88 100644 --- a/tests/unit/agentplatform/genai/test_evals.py +++ b/tests/unit/agentplatform/genai/test_evals.py @@ -123,6 +123,7 @@ def mock_api_client_fixture(): @pytest.fixture def mock_eval_dependencies(mock_api_client_fixture): + _evals_metric_loaders.LazyLoadedPrebuiltMetric._cache.clear() # fmt: off with ( mock.patch("google.cloud.storage.Client") as mock_storage_client, @@ -6386,6 +6387,49 @@ def test_groundedness_resolve_returns_grounding_v1_metric(self): assert resolved.name == "grounding_v1" +class TestPrebuiltMetricLoaderVersionPinning: + """Verifies explicit version pinning for all RubricMetric properties.""" + + @pytest.mark.parametrize( + "prop_name,expected_spec", + [ + ("GENERAL_QUALITY", "general_quality_v1"), + ("TEXT_QUALITY", "text_quality_v1"), + ("INSTRUCTION_FOLLOWING", "instruction_following_v1"), + ("SAFETY", "safety_v1"), + ("MULTI_TURN_GENERAL_QUALITY", "multi_turn_general_quality_v1"), + ("MULTI_TURN_TEXT_QUALITY", "multi_turn_text_quality_v1"), + ("FINAL_RESPONSE_REFERENCE_FREE", "final_response_reference_free_v1"), + ("FINAL_RESPONSE_QUALITY", "final_response_quality_v1"), + ("HALLUCINATION", "hallucination_v1"), + ("TOOL_USE_QUALITY", "tool_use_quality_v1"), + ("GECKO_TEXT2IMAGE", "gecko_text2image_v1"), + ("GECKO_TEXT2VIDEO", "gecko_text2video_v1"), + ], + ) + def test_predefined_property_pins_to_v1(self, prop_name, expected_spec): + lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name) + assert lazy_metric.version == "v1" + assert lazy_metric._get_api_metric_spec_name() == expected_spec + + @pytest.mark.parametrize( + "prop_name", + [ + "COHERENCE", + "FLUENCY", + "VERBOSITY", + "SUMMARIZATION_QUALITY", + "QUESTION_ANSWERING_QUALITY", + "MULTI_TURN_CHAT_QUALITY", + "MULTI_TURN_SAFETY", + ], + ) + def test_gcs_backed_property_pins_to_v1(self, prop_name): + lazy_metric = getattr(agentplatform_genai_types.RubricMetric, prop_name) + assert lazy_metric.version == "v1" + assert lazy_metric._get_api_metric_spec_name() is None + + class TestMergeResponseDatasets: """Unit tests for the merge_response_datasets_into_canonical_format function.""" diff --git a/tests/unit/vertexai/test_evaluation.py b/tests/unit/vertexai/test_evaluation.py index 8f37cf1ca3..1e0b0f7d56 100644 --- a/tests/unit/vertexai/test_evaluation.py +++ b/tests/unit/vertexai/test_evaluation.py @@ -1882,9 +1882,9 @@ def test_runnable_trajectory_eval_with_runnable_inference(self, api_transport): "trajectory_exact_match/score", ] ) - assert list( + assert sorted( test_result.metrics_table["trajectory_exact_match/score"].to_list() - ) == [1.0, 0.0] + ) == [0.0, 1.0] @pytest.mark.parametrize("api_transport", ["grpc", "rest"]) def test_pointwise_autorater_request_config_enabled(self, api_transport):